mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-27 13:30:05 +00:00
- Add a hook for target to determine whether an instruction def is
"long latency" enough to hoist even if it may increase spilling. Reloading a value from spill slot is often cheaper than performing an expensive computation in the loop. For X86, that means machine LICM will hoist SQRT, DIV, etc. ARM will be somewhat aggressive with VFP and NEON instructions. - Enable register pressure aware machine LICM by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116781 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b5a2d3f8e3
commit
11e8b74a7a
@ -24,6 +24,7 @@ class InstrItineraryData;
|
|||||||
class LiveVariables;
|
class LiveVariables;
|
||||||
class MCAsmInfo;
|
class MCAsmInfo;
|
||||||
class MachineMemOperand;
|
class MachineMemOperand;
|
||||||
|
class MachineRegisterInfo;
|
||||||
class MDNode;
|
class MDNode;
|
||||||
class MCInst;
|
class MCInst;
|
||||||
class SDNode;
|
class SDNode;
|
||||||
@ -625,6 +626,19 @@ public:
|
|||||||
int getOperandLatency(const InstrItineraryData *ItinData,
|
int getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
SDNode *DefNode, unsigned DefIdx,
|
SDNode *DefNode, unsigned DefIdx,
|
||||||
SDNode *UseNode, unsigned UseIdx) const;
|
SDNode *UseNode, unsigned UseIdx) const;
|
||||||
|
|
||||||
|
/// hasHighOperandLatency - Compute operand latency between a def of 'Reg'
|
||||||
|
/// and an use in the current loop, return true if the target considered
|
||||||
|
/// it 'high'. This is used by optimization passes such as machine LICM to
|
||||||
|
/// determine whether it makes sense to hoist an instruction out even in
|
||||||
|
/// high register pressure situation.
|
||||||
|
virtual
|
||||||
|
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineRegisterInfo *MRI,
|
||||||
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
|
const MachineInstr *UseMI, unsigned UseIdx) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// TargetInstrInfoImpl - This is the default implementation of
|
/// TargetInstrInfoImpl - This is the default implementation of
|
||||||
|
@ -43,11 +43,6 @@
|
|||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
static cl::opt<bool>
|
|
||||||
TrackRegPressure("rp-aware-machine-licm",
|
|
||||||
cl::desc("Register pressure aware machine LICM"),
|
|
||||||
cl::init(false), cl::Hidden);
|
|
||||||
|
|
||||||
STATISTIC(NumHoisted,
|
STATISTIC(NumHoisted,
|
||||||
"Number of machine instructions hoisted out of loops");
|
"Number of machine instructions hoisted out of loops");
|
||||||
STATISTIC(NumLowRP,
|
STATISTIC(NumLowRP,
|
||||||
@ -124,6 +119,7 @@ namespace {
|
|||||||
RegSeen.clear();
|
RegSeen.clear();
|
||||||
RegPressure.clear();
|
RegPressure.clear();
|
||||||
RegLimit.clear();
|
RegLimit.clear();
|
||||||
|
BackTrace.clear();
|
||||||
for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
|
for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
|
||||||
CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
|
CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
|
||||||
CI->second.clear();
|
CI->second.clear();
|
||||||
@ -171,9 +167,10 @@ namespace {
|
|||||||
///
|
///
|
||||||
bool IsLoopInvariantInst(MachineInstr &I);
|
bool IsLoopInvariantInst(MachineInstr &I);
|
||||||
|
|
||||||
/// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
|
/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
|
||||||
/// and an use in the current loop.
|
/// and an use in the current loop, return true if the target considered
|
||||||
int ComputeOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg);
|
/// it 'high'.
|
||||||
|
bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg);
|
||||||
|
|
||||||
/// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
|
/// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
|
||||||
/// if hoisting an instruction of the given cost matrix can cause high
|
/// if hoisting an instruction of the given cost matrix can cause high
|
||||||
@ -556,28 +553,24 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
|
|||||||
if (!Preheader)
|
if (!Preheader)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (TrackRegPressure) {
|
if (IsHeader) {
|
||||||
if (IsHeader) {
|
// Compute registers which are liveout of preheader.
|
||||||
// Compute registers which are liveout of preheader.
|
RegSeen.clear();
|
||||||
RegSeen.clear();
|
BackTrace.clear();
|
||||||
BackTrace.clear();
|
InitRegPressure(Preheader);
|
||||||
InitRegPressure(Preheader);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remember livein register pressure.
|
|
||||||
BackTrace.push_back(RegPressure);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remember livein register pressure.
|
||||||
|
BackTrace.push_back(RegPressure);
|
||||||
|
|
||||||
for (MachineBasicBlock::iterator
|
for (MachineBasicBlock::iterator
|
||||||
MII = BB->begin(), E = BB->end(); MII != E; ) {
|
MII = BB->begin(), E = BB->end(); MII != E; ) {
|
||||||
MachineBasicBlock::iterator NextMII = MII; ++NextMII;
|
MachineBasicBlock::iterator NextMII = MII; ++NextMII;
|
||||||
MachineInstr *MI = &*MII;
|
MachineInstr *MI = &*MII;
|
||||||
|
|
||||||
if (TrackRegPressure)
|
UpdateRegPressureBefore(MI);
|
||||||
UpdateRegPressureBefore(MI);
|
|
||||||
Hoist(MI, Preheader);
|
Hoist(MI, Preheader);
|
||||||
if (TrackRegPressure)
|
UpdateRegPressureAfter(MI);
|
||||||
UpdateRegPressureAfter(MI);
|
|
||||||
|
|
||||||
MII = NextMII;
|
MII = NextMII;
|
||||||
}
|
}
|
||||||
@ -591,8 +584,7 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
|
|||||||
HoistRegion(Children[I]);
|
HoistRegion(Children[I]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (TrackRegPressure)
|
BackTrace.pop_back();
|
||||||
BackTrace.pop_back();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// InitRegPressure - Find all virtual register references that are liveout of
|
/// InitRegPressure - Find all virtual register references that are liveout of
|
||||||
@ -788,15 +780,14 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
|
/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
|
||||||
/// and an use in the current loop.
|
/// and an use in the current loop, return true if the target considered
|
||||||
int MachineLICM::ComputeOperandLatency(MachineInstr &MI,
|
/// it 'high'.
|
||||||
unsigned DefIdx, unsigned Reg) {
|
bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
|
||||||
|
unsigned DefIdx, unsigned Reg) {
|
||||||
if (MRI->use_nodbg_empty(Reg))
|
if (MRI->use_nodbg_empty(Reg))
|
||||||
// No use? Return arbitrary large number!
|
return false;
|
||||||
return 300;
|
|
||||||
|
|
||||||
int Latency = -1;
|
|
||||||
for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
|
for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
|
||||||
E = MRI->use_nodbg_end(); I != E; ++I) {
|
E = MRI->use_nodbg_end(); I != E; ++I) {
|
||||||
MachineInstr *UseMI = &*I;
|
MachineInstr *UseMI = &*I;
|
||||||
@ -810,18 +801,15 @@ int MachineLICM::ComputeOperandLatency(MachineInstr &MI,
|
|||||||
if (MOReg != Reg)
|
if (MOReg != Reg)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
int UseCycle = TII->getOperandLatency(InstrItins, &MI, DefIdx, UseMI, i);
|
if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
|
||||||
Latency = std::max(Latency, UseCycle);
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Latency != -1)
|
// Only look at the first in loop use.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Latency == -1)
|
return false;
|
||||||
Latency = InstrItins->getOperandCycle(MI.getDesc().getSchedClass(), DefIdx);
|
|
||||||
|
|
||||||
return Latency;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
|
/// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
|
||||||
@ -855,19 +843,19 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
|
|||||||
if (MI.isImplicitDef())
|
if (MI.isImplicitDef())
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// FIXME: For now, only hoist re-materilizable instructions. LICM will
|
// If the instruction is cheap, only hoist if it is re-materilizable. LICM
|
||||||
// increase register pressure. We want to make sure it doesn't increase
|
// will increase register pressure. It's probably not worth it if the
|
||||||
// spilling.
|
// instruction is cheap.
|
||||||
// Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
|
// Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
|
||||||
// these tend to help performance in low register pressure situation. The
|
// these tend to help performance in low register pressure situation. The
|
||||||
// trade off is it may cause spill in high pressure situation. It will end up
|
// trade off is it may cause spill in high pressure situation. It will end up
|
||||||
// adding a store in the loop preheader. But the reload is no more expensive.
|
// adding a store in the loop preheader. But the reload is no more expensive.
|
||||||
// The side benefit is these loads are frequently CSE'ed.
|
// The side benefit is these loads are frequently CSE'ed.
|
||||||
if (!TrackRegPressure || MI.getDesc().isAsCheapAsAMove()) {
|
if (MI.getDesc().isAsCheapAsAMove()) {
|
||||||
if (!TII->isTriviallyReMaterializable(&MI, AA) &&
|
if (!TII->isTriviallyReMaterializable(&MI, AA))
|
||||||
!isLoadFromConstantMemory(&MI))
|
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
|
// Estimate register pressure to determine whether to LICM the instruction.
|
||||||
// In low register pressure situation, we can be more aggressive about
|
// In low register pressure situation, we can be more aggressive about
|
||||||
// hoisting. Also, favors hoisting long latency instructions even in
|
// hoisting. Also, favors hoisting long latency instructions even in
|
||||||
// moderately high pressure situation.
|
// moderately high pressure situation.
|
||||||
@ -880,13 +868,9 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
|
|||||||
if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
|
if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
|
||||||
continue;
|
continue;
|
||||||
if (MO.isDef()) {
|
if (MO.isDef()) {
|
||||||
if (InstrItins && !InstrItins->isEmpty()) {
|
if (HasHighOperandLatency(MI, i, Reg)) {
|
||||||
int Cycle = ComputeOperandLatency(MI, i, Reg);
|
++NumHighLatency;
|
||||||
if (Cycle > 3) {
|
return true;
|
||||||
// FIXME: Target specific high latency limit?
|
|
||||||
++NumHighLatency;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
||||||
|
@ -1925,3 +1925,23 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
|||||||
return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
|
return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
|
||||||
UseTID, UseIdx, UseAlign);
|
UseTID, UseIdx, UseAlign);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ARMBaseInstrInfo::
|
||||||
|
hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineRegisterInfo *MRI,
|
||||||
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
|
const MachineInstr *UseMI, unsigned UseIdx) const {
|
||||||
|
unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
|
||||||
|
unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
|
||||||
|
if (Subtarget.isCortexA8() &&
|
||||||
|
(DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
|
||||||
|
// CortexA8 VFP instructions are not pipelined.
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// Hoist VFP / NEON instructions with 4 or higher latency.
|
||||||
|
int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
|
||||||
|
if (Latency <= 3)
|
||||||
|
return false;
|
||||||
|
return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
|
||||||
|
UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
|
||||||
|
}
|
||||||
|
@ -377,6 +377,11 @@ private:
|
|||||||
unsigned DefIdx, unsigned DefAlign,
|
unsigned DefIdx, unsigned DefAlign,
|
||||||
const TargetInstrDesc &UseTID,
|
const TargetInstrDesc &UseTID,
|
||||||
unsigned UseIdx, unsigned UseAlign) const;
|
unsigned UseIdx, unsigned UseAlign) const;
|
||||||
|
|
||||||
|
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineRegisterInfo *MRI,
|
||||||
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
|
const MachineInstr *UseMI, unsigned UseIdx) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
|
@ -3152,6 +3152,41 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
|
|||||||
NopInst.setOpcode(X86::NOOP);
|
NopInst.setOpcode(X86::NOOP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool X86InstrInfo::
|
||||||
|
hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineRegisterInfo *MRI,
|
||||||
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
|
const MachineInstr *UseMI, unsigned UseIdx) const {
|
||||||
|
switch (DefMI->getOpcode()) {
|
||||||
|
default: return false;
|
||||||
|
case X86::DIVSDrm:
|
||||||
|
case X86::DIVSDrm_Int:
|
||||||
|
case X86::DIVSDrr:
|
||||||
|
case X86::DIVSDrr_Int:
|
||||||
|
case X86::DIVSSrm:
|
||||||
|
case X86::DIVSSrm_Int:
|
||||||
|
case X86::DIVSSrr:
|
||||||
|
case X86::DIVSSrr_Int:
|
||||||
|
case X86::SQRTPDm:
|
||||||
|
case X86::SQRTPDm_Int:
|
||||||
|
case X86::SQRTPDr:
|
||||||
|
case X86::SQRTPDr_Int:
|
||||||
|
case X86::SQRTPSm:
|
||||||
|
case X86::SQRTPSm_Int:
|
||||||
|
case X86::SQRTPSr:
|
||||||
|
case X86::SQRTPSr_Int:
|
||||||
|
case X86::SQRTSDm:
|
||||||
|
case X86::SQRTSDm_Int:
|
||||||
|
case X86::SQRTSDr:
|
||||||
|
case X86::SQRTSDr_Int:
|
||||||
|
case X86::SQRTSSm:
|
||||||
|
case X86::SQRTSSm_Int:
|
||||||
|
case X86::SQRTSSr:
|
||||||
|
case X86::SQRTSSr_Int:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
/// CGBR - Create Global Base Reg pass. This initializes the PIC
|
/// CGBR - Create Global Base Reg pass. This initializes the PIC
|
||||||
/// global base register for x86-32.
|
/// global base register for x86-32.
|
||||||
|
@ -864,6 +864,11 @@ public:
|
|||||||
unsigned OpNum,
|
unsigned OpNum,
|
||||||
const SmallVectorImpl<MachineOperand> &MOs,
|
const SmallVectorImpl<MachineOperand> &MOs,
|
||||||
unsigned Size, unsigned Alignment) const;
|
unsigned Size, unsigned Alignment) const;
|
||||||
|
|
||||||
|
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineRegisterInfo *MRI,
|
||||||
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
|
const MachineInstr *UseMI, unsigned UseIdx) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
|
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
|
||||||
|
@ -1,65 +0,0 @@
|
|||||||
; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -o /dev/null -stats -info-output-file - | grep "Number of re-materialization"
|
|
||||||
|
|
||||||
define i32 @main(i32 %argc, i8** nocapture %argv, double %d1, double %d2) nounwind {
|
|
||||||
entry:
|
|
||||||
br i1 undef, label %smvp.exit, label %bb.i3
|
|
||||||
|
|
||||||
bb.i3: ; preds = %bb.i3, %bb134
|
|
||||||
br i1 undef, label %smvp.exit, label %bb.i3
|
|
||||||
|
|
||||||
smvp.exit: ; preds = %bb.i3
|
|
||||||
%0 = fmul double %d1, 2.400000e-03 ; <double> [#uses=2]
|
|
||||||
br i1 undef, label %bb138.preheader, label %bb159
|
|
||||||
|
|
||||||
bb138.preheader: ; preds = %smvp.exit
|
|
||||||
br label %bb138
|
|
||||||
|
|
||||||
bb138: ; preds = %bb138, %bb138.preheader
|
|
||||||
br i1 undef, label %bb138, label %bb145.loopexit
|
|
||||||
|
|
||||||
bb142: ; preds = %bb.nph218.bb.nph218.split_crit_edge, %phi0.exit
|
|
||||||
%1 = fmul double %d1, -1.200000e-03 ; <double> [#uses=1]
|
|
||||||
%2 = fadd double %d2, %1 ; <double> [#uses=1]
|
|
||||||
%3 = fmul double %2, %d2 ; <double> [#uses=1]
|
|
||||||
%4 = fsub double 0.000000e+00, %3 ; <double> [#uses=1]
|
|
||||||
br i1 %14, label %phi1.exit, label %bb.i35
|
|
||||||
|
|
||||||
bb.i35: ; preds = %bb142
|
|
||||||
%5 = call double @sin(double %15) nounwind readonly ; <double> [#uses=1]
|
|
||||||
%6 = fmul double %5, 0x4031740AFA84AD8A ; <double> [#uses=1]
|
|
||||||
%7 = fsub double 1.000000e+00, undef ; <double> [#uses=1]
|
|
||||||
%8 = fdiv double %7, 6.000000e-01 ; <double> [#uses=1]
|
|
||||||
br label %phi1.exit
|
|
||||||
|
|
||||||
phi1.exit: ; preds = %bb.i35, %bb142
|
|
||||||
%.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
|
|
||||||
%9 = phi double [ %8, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
|
|
||||||
%10 = fmul double %.pn, %9 ; <double> [#uses=1]
|
|
||||||
br i1 %14, label %phi0.exit, label %bb.i
|
|
||||||
|
|
||||||
bb.i: ; preds = %phi1.exit
|
|
||||||
unreachable
|
|
||||||
|
|
||||||
phi0.exit: ; preds = %phi1.exit
|
|
||||||
%11 = fsub double %4, %10 ; <double> [#uses=1]
|
|
||||||
%12 = fadd double 0.000000e+00, %11 ; <double> [#uses=1]
|
|
||||||
store double %12, double* undef, align 4
|
|
||||||
br label %bb142
|
|
||||||
|
|
||||||
bb145.loopexit: ; preds = %bb138
|
|
||||||
br i1 undef, label %bb.nph218.bb.nph218.split_crit_edge, label %bb159
|
|
||||||
|
|
||||||
bb.nph218.bb.nph218.split_crit_edge: ; preds = %bb145.loopexit
|
|
||||||
%13 = fmul double %0, 0x401921FB54442D18 ; <double> [#uses=1]
|
|
||||||
%14 = fcmp ugt double %0, 6.000000e-01 ; <i1> [#uses=2]
|
|
||||||
%15 = fdiv double %13, 6.000000e-01 ; <double> [#uses=1]
|
|
||||||
br label %bb142
|
|
||||||
|
|
||||||
bb159: ; preds = %bb145.loopexit, %smvp.exit, %bb134
|
|
||||||
unreachable
|
|
||||||
|
|
||||||
bb166: ; preds = %bb127
|
|
||||||
unreachable
|
|
||||||
}
|
|
||||||
|
|
||||||
declare double @sin(double) nounwind readonly
|
|
@ -2,17 +2,16 @@
|
|||||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s
|
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s
|
||||||
; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375.
|
; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375.
|
||||||
; Eventually this should become the default and be moved into machine-licm.ll.
|
; Eventually this should become the default and be moved into machine-licm.ll.
|
||||||
; FIXME: the vdup should be hoisted out of the loop, 8248029.
|
|
||||||
|
|
||||||
define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
|
define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: t2:
|
; CHECK: t2:
|
||||||
; CHECK: mov.w r3, #1065353216
|
; CHECK: mov.w r3, #1065353216
|
||||||
|
; CHECK: vdup.32 q{{.*}}, r3
|
||||||
br i1 undef, label %bb1, label %bb2
|
br i1 undef, label %bb1, label %bb2
|
||||||
|
|
||||||
bb1:
|
bb1:
|
||||||
; CHECK-NEXT: %bb1
|
; CHECK-NEXT: %bb1
|
||||||
; CHECK: vdup.32 q{{.*}}, r3
|
|
||||||
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
|
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
|
||||||
%tmp1 = shl i32 %indvar, 2
|
%tmp1 = shl i32 %indvar, 2
|
||||||
%gep1 = getelementptr i8* %ptr1, i32 %tmp1
|
%gep1 = getelementptr i8* %ptr1, i32 %tmp1
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
|
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s
|
||||||
|
; Now this test spills one register. But a reload in the loop is cheaper than
|
||||||
|
; the divsd so it's a win.
|
||||||
|
|
||||||
define fastcc void @fourn(double* %data, i32 %isign) nounwind {
|
define fastcc void @fourn(double* %data, i32 %isign) nounwind {
|
||||||
|
; CHECK: fourn
|
||||||
entry:
|
entry:
|
||||||
br label %bb
|
br label %bb
|
||||||
|
|
||||||
@ -11,6 +14,11 @@ bb: ; preds = %bb, %entry
|
|||||||
%1 = icmp sgt i32 %0, 2 ; <i1> [#uses=1]
|
%1 = icmp sgt i32 %0, 2 ; <i1> [#uses=1]
|
||||||
br i1 %1, label %bb30.loopexit, label %bb
|
br i1 %1, label %bb30.loopexit, label %bb
|
||||||
|
|
||||||
|
; CHECK: %bb30.loopexit
|
||||||
|
; CHECK: divsd %xmm0
|
||||||
|
; CHECK: movsd %xmm0, 16(%esp)
|
||||||
|
; CHECK: .align
|
||||||
|
; CHECK-NEXT: %bb3
|
||||||
bb3: ; preds = %bb30.loopexit, %bb25, %bb3
|
bb3: ; preds = %bb30.loopexit, %bb25, %bb3
|
||||||
%2 = load i32* null, align 4 ; <i32> [#uses=1]
|
%2 = load i32* null, align 4 ; <i32> [#uses=1]
|
||||||
%3 = mul i32 %2, 0 ; <i32> [#uses=1]
|
%3 = mul i32 %2, 0 ; <i32> [#uses=1]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm}
|
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm}
|
||||||
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
|
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
|
||||||
; rdar://6627786
|
; rdar://6627786
|
||||||
; rdar://7792037
|
; rdar://7792037
|
||||||
|
Loading…
Reference in New Issue
Block a user