diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 69e2346dc0b..ab57779b580 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -139,6 +139,12 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", [FeatureVMLxForwarding, FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR]>; +// FIXME: It has not been determined if A15 has these features. +def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", + "Cortex-A15 ARM processors", + [FeatureVMLxForwarding, + FeatureT2XtPk, FeatureFP16, + FeatureAvoidPartialCPSR]>; class ProcNoItin Features> : Processor; @@ -214,6 +220,10 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureMP, FeatureHasRAS]>; +// FIXME: A15 has currently the same ProcessorModel as A9. +def : ProcessorModel<"cortex-a15", CortexA9Model, + [ProcA15, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureHasRAS]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index e2f0d7d4bdf..6d475e5a602 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2430,7 +2430,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, if (NumRegs % 2) ++A8UOps; return A8UOps; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9()) { int A9UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2463,7 +2463,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = RegNo / 2 + 1; if (RegNo % 2) ++DefCycle; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9()) { DefCycle = RegNo; bool isSLoad = false; @@ -2507,7 +2507,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = 1; // Result latency is issue cycle + 2: E2. DefCycle += 2; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9()) { DefCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2538,7 +2538,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = RegNo / 2 + 1; if (RegNo % 2) ++UseCycle; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9()) { UseCycle = RegNo; bool isSStore = false; @@ -2579,7 +2579,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = 2; // Read in E3. UseCycle += 2; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9()) { UseCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2764,7 +2764,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr *DefMI, const MCInstrDesc *DefMCID, unsigned DefAlign) { int Adjust = 0; - if (Subtarget.isCortexA8() || Subtarget.isCortexA9()) { + if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID->getOpcode()) { @@ -2791,7 +2791,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, } } - if (DefAlign < 8 && Subtarget.isCortexA9()) { + if (DefAlign < 8 && Subtarget.isLikeA9()) { switch (DefMCID->getOpcode()) { default: break; case ARM::VLD1q8: @@ -2949,7 +2949,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (Reg == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) - return Subtarget.isCortexA9() ? 1 : 20; + return Subtarget.isLikeA9() ? 1 : 20; } // CPSR set and branch can be paired in the same cycle. @@ -3015,7 +3015,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (!UseNode->isMachineOpcode()) { int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); - if (Subtarget.isCortexA9()) + if (Subtarget.isLikeA9()) return Latency <= 2 ? 1 : Latency - 1; else return Latency <= 3 ? 1 : Latency - 2; @@ -3032,7 +3032,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, UseMCID, UseIdx, UseAlign); if (Latency > 1 && - (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { + (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID.getOpcode()) { @@ -3061,7 +3061,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } } - if (DefAlign < 8 && Subtarget.isCortexA9()) + if (DefAlign < 8 && Subtarget.isLikeA9()) switch (DefMCID.getOpcode()) { default: break; case ARM::VLD1q8: @@ -3354,9 +3354,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || MI->getOpcode() == ARM::VMOVS)) diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 9deb96ea9e0..13f6dd228f6 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -476,7 +476,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, bool ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { // CortexA9 has a Write-after-write hazard for NEON registers. - if (!STI.isCortexA9()) + if (!STI.isLikeA9()) return false; switch (RC->getID()) { diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index a5fd15b6bb9..1240169e84e 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -47,7 +47,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Skip over one non-VFP / NEON instruction. if (!LastMI->isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) && + !(STI.isLikeA9() && (LastMI->mayLoad() || LastMI->mayStore())) && (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 140662013d2..d3b4d7c97bf 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -335,8 +335,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (!CheckVMLxHazard) return true; - - if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9()) + if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9()) return true; if (!N->hasOneUse()) @@ -374,7 +373,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt) { - if (!Subtarget->isCortexA9()) + if (!Subtarget->isLikeA9()) return true; if (Shift.hasOneUse()) return true; @@ -486,7 +485,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { if (N.getOpcode() == ISD::MUL && - (!Subtarget->isCortexA9() || N.hasOneUse())) { + (!Subtarget->isLikeA9() || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -550,7 +549,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && - !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { + !(Subtarget->isLikeA9() || N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't @@ -584,7 +583,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, SDValue &Offset, SDValue &Opc) { if (N.getOpcode() == ISD::MUL && - (!Subtarget->isCortexA9() || N.hasOneUse())) { + (!Subtarget->isLikeA9() || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -650,7 +649,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, } } - if (Subtarget->isCortexA9() && !N.hasOneUse()) { + if (Subtarget->isLikeA9() && !N.hasOneUse()) { // Compute R +/- (R << N) and reuse it. Base = N; Offset = CurDAG->getRegister(0, MVT::i32); @@ -688,7 +687,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && - !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { + !(Subtarget->isLikeA9() || N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e51315e49cb..f530c5dfbfd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -824,7 +824,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) benefitFromCodePlacementOpt = true; // Prefer likely predicted branches to selects on out-of-order cores. - predictableSelectIsExpensive = Subtarget->isCortexA9(); + predictableSelectIsExpensive = Subtarget->isLikeA9(); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index b3940613000..495afa92b58 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -30,7 +30,7 @@ class StringRef; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA8, CortexA9 + Others, CortexA8, CortexA9, CortexA15 }; /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. @@ -199,7 +199,9 @@ protected: bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } + bool isCortexA15() const { return ARMProcFamily == CortexA15; } bool isCortexM3() const { return CPUString == "cortex-m3"; } + bool isLikeA9() const { return isCortexA9() || isCortexA15(); } bool hasARMOps() const { return !NoARM; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 171c9adfa40..133a7dd8c76 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -150,7 +150,7 @@ bool ARMPassConfig::addPreRegAlloc() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) addPass(createARMLoadStoreOptimizationPass(true)); - if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9()) addPass(createMLxExpansionPass()); return true; } diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index ad60e3282e6..4ebba0e4d33 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -51,7 +51,7 @@ namespace { const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; - bool isA9; + bool isLikeA9; unsigned MIIdx; MachineInstr* LastMIs[4]; SmallPtrSet IgnoreStall; @@ -179,8 +179,8 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { // preserves the in-order retirement of the instructions. // Look at the next few instructions, if *most* of them can cause hazards, // then the scheduler can't *fix* this, we'd better break up the VMLA. - unsigned Limit1 = isA9 ? 1 : 4; - unsigned Limit2 = isA9 ? 1 : 4; + unsigned Limit1 = isLikeA9 ? 1 : 4; + unsigned Limit2 = isLikeA9 ? 1 : 4; for (unsigned i = 1; i <= 4; ++i) { int Idx = ((int)MIIdx - i + 4) % 4; MachineInstr *NextMI = LastMIs[Idx]; @@ -316,7 +316,7 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { TRI = Fn.getTarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); const ARMSubtarget *STI = &Fn.getTarget().getSubtarget(); - isA9 = STI->isCortexA9(); + isLikeA9 = STI->isLikeA9(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/test/CodeGen/ARM/a15.ll b/test/CodeGen/ARM/a15.ll new file mode 100644 index 00000000000..6f816c1c2c5 --- /dev/null +++ b/test/CodeGen/ARM/a15.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -mcpu=cortex-a15 | FileCheck %s + +; CHECK: a +define i32 @a(i32 %x) { + ret i32 %x; +}