mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-01 00:33:09 +00:00
Clean up spill weight computation. Also some changes to give loop induction
variable increment / decrement slighter high priority. This has major impact on some micro-benchmarks. On MultiSource/Applications and spec tests, it's a minor win. It also reduce 256.bzip instruction count by 8%, 55 on 164.gzip on i386 / Darwin. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82485 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b2bb7db9e2
commit
cf985a9545
@ -2535,7 +2535,8 @@ void SimpleRegisterCoalescing::releaseMemory() {
|
||||
ReMatDefs.clear();
|
||||
}
|
||||
|
||||
bool SimpleRegisterCoalescing::isZeroLengthInterval(LiveInterval *li) const {
|
||||
/// Returns true if the given live interval is zero length.
|
||||
static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) {
|
||||
for (LiveInterval::Ranges::const_iterator
|
||||
i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
|
||||
if (li_->getPrevIndex(i->end) > i->start)
|
||||
@ -2543,6 +2544,97 @@ bool SimpleRegisterCoalescing::isZeroLengthInterval(LiveInterval *li) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void SimpleRegisterCoalescing::CalculateSpillWeights() {
|
||||
SmallSet<unsigned, 4> Processed;
|
||||
for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
|
||||
mbbi != mbbe; ++mbbi) {
|
||||
MachineBasicBlock* MBB = mbbi;
|
||||
MachineInstrIndex MBBEnd = li_->getMBBEndIdx(MBB);
|
||||
MachineLoop* loop = loopInfo->getLoopFor(MBB);
|
||||
unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
|
||||
bool isExit = loop ? loop->isLoopExit(MBB) : false;
|
||||
|
||||
for (MachineBasicBlock::iterator mii = MBB->begin(), mie = MBB->end();
|
||||
mii != mie; ++mii) {
|
||||
MachineInstr *MI = mii;
|
||||
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &mopi = MI->getOperand(i);
|
||||
if (!mopi.isReg() || mopi.getReg() == 0)
|
||||
continue;
|
||||
unsigned Reg = mopi.getReg();
|
||||
if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
|
||||
continue;
|
||||
// Multiple uses of reg by the same instruction. It should not
|
||||
// contribute to spill weight again.
|
||||
if (!Processed.insert(Reg))
|
||||
continue;
|
||||
|
||||
bool HasDef = mopi.isDef();
|
||||
bool HasUse = mopi.isUse();
|
||||
for (unsigned j = i+1; j != e; ++j) {
|
||||
const MachineOperand &mopj = MI->getOperand(j);
|
||||
if (!mopj.isReg() || mopj.getReg() != Reg)
|
||||
continue;
|
||||
HasDef |= mopj.isDef();
|
||||
HasUse |= mopj.isUse();
|
||||
}
|
||||
|
||||
LiveInterval &RegInt = li_->getInterval(Reg);
|
||||
float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth+1);
|
||||
if (HasDef && isExit) {
|
||||
// Looks like this is a loop count variable update.
|
||||
MachineInstrIndex DefIdx =
|
||||
li_->getDefIndex(li_->getInstructionIndex(MI));
|
||||
const LiveRange *DLR =
|
||||
li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
|
||||
if (DLR->end > MBBEnd)
|
||||
Weight *= 3.0F;
|
||||
}
|
||||
RegInt.weight += Weight;
|
||||
}
|
||||
Processed.clear();
|
||||
}
|
||||
}
|
||||
|
||||
for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
|
||||
LiveInterval &LI = *I->second;
|
||||
if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
|
||||
// If the live interval length is essentially zero, i.e. in every live
|
||||
// range the use follows def immediately, it doesn't make sense to spill
|
||||
// it and hope it will be easier to allocate for this li.
|
||||
if (isZeroLengthInterval(&LI, li_)) {
|
||||
LI.weight = HUGE_VALF;
|
||||
continue;
|
||||
}
|
||||
|
||||
bool isLoad = false;
|
||||
SmallVector<LiveInterval*, 4> SpillIs;
|
||||
if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
|
||||
// If all of the definitions of the interval are re-materializable,
|
||||
// it is a preferred candidate for spilling. If non of the defs are
|
||||
// loads, then it's potentially very cheap to re-materialize.
|
||||
// FIXME: this gets much more complicated once we support non-trivial
|
||||
// re-materialization.
|
||||
if (isLoad)
|
||||
LI.weight *= 0.9F;
|
||||
else
|
||||
LI.weight *= 0.5F;
|
||||
}
|
||||
|
||||
// Slightly prefer live interval that has been assigned a preferred reg.
|
||||
std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
|
||||
if (Hint.first || Hint.second)
|
||||
LI.weight *= 1.01F;
|
||||
|
||||
// Divide the weight of the interval by its size. This encourages
|
||||
// spilling of intervals that are large and have few uses, and
|
||||
// discourages spilling of small intervals with many uses.
|
||||
LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
|
||||
mf_ = &fn;
|
||||
@ -2581,8 +2673,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
|
||||
for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
|
||||
mbbi != mbbe; ++mbbi) {
|
||||
MachineBasicBlock* mbb = mbbi;
|
||||
unsigned loopDepth = loopInfo->getLoopDepth(mbb);
|
||||
|
||||
for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
|
||||
mii != mie; ) {
|
||||
MachineInstr *MI = mii;
|
||||
@ -2656,62 +2746,12 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
|
||||
mii = mbbi->erase(mii);
|
||||
++numPeep;
|
||||
} else {
|
||||
SmallSet<unsigned, 4> UniqueUses;
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &mop = MI->getOperand(i);
|
||||
if (mop.isReg() && mop.getReg() &&
|
||||
TargetRegisterInfo::isVirtualRegister(mop.getReg())) {
|
||||
unsigned reg = mop.getReg();
|
||||
// Multiple uses of reg by the same instruction. It should not
|
||||
// contribute to spill weight again.
|
||||
if (UniqueUses.count(reg) != 0)
|
||||
continue;
|
||||
LiveInterval &RegInt = li_->getInterval(reg);
|
||||
RegInt.weight +=
|
||||
li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth);
|
||||
UniqueUses.insert(reg);
|
||||
}
|
||||
}
|
||||
++mii;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
|
||||
LiveInterval &LI = *I->second;
|
||||
if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
|
||||
// If the live interval length is essentially zero, i.e. in every live
|
||||
// range the use follows def immediately, it doesn't make sense to spill
|
||||
// it and hope it will be easier to allocate for this li.
|
||||
if (isZeroLengthInterval(&LI))
|
||||
LI.weight = HUGE_VALF;
|
||||
else {
|
||||
bool isLoad = false;
|
||||
SmallVector<LiveInterval*, 4> SpillIs;
|
||||
if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
|
||||
// If all of the definitions of the interval are re-materializable,
|
||||
// it is a preferred candidate for spilling. If non of the defs are
|
||||
// loads, then it's potentially very cheap to re-materialize.
|
||||
// FIXME: this gets much more complicated once we support non-trivial
|
||||
// re-materialization.
|
||||
if (isLoad)
|
||||
LI.weight *= 0.9F;
|
||||
else
|
||||
LI.weight *= 0.5F;
|
||||
}
|
||||
}
|
||||
|
||||
// Slightly prefer live interval that has been assigned a preferred reg.
|
||||
std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
|
||||
if (Hint.first || Hint.second)
|
||||
LI.weight *= 1.01F;
|
||||
|
||||
// Divide the weight of the interval by its size. This encourages
|
||||
// spilling of intervals that are large and have few uses, and
|
||||
// discourages spilling of small intervals with many uses.
|
||||
LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
|
||||
}
|
||||
}
|
||||
CalculateSpillWeights();
|
||||
|
||||
DEBUG(dump());
|
||||
return true;
|
||||
|
@ -123,7 +123,6 @@ namespace llvm {
|
||||
/// classes. The registers may be either phys or virt regs.
|
||||
bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
|
||||
|
||||
|
||||
/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
|
||||
/// the source value number is defined by a copy from the destination reg
|
||||
/// see if we can merge these two destination reg valno# into a single
|
||||
@ -235,13 +234,15 @@ namespace llvm {
|
||||
|
||||
/// lastRegisterUse - Returns the last use of the specific register between
|
||||
/// cycles Start and End or NULL if there are no uses.
|
||||
MachineOperand *lastRegisterUse(MachineInstrIndex Start, MachineInstrIndex End,
|
||||
unsigned Reg, MachineInstrIndex &LastUseIdx) const;
|
||||
MachineOperand *lastRegisterUse(MachineInstrIndex Start,
|
||||
MachineInstrIndex End, unsigned Reg,
|
||||
MachineInstrIndex &LastUseIdx) const;
|
||||
|
||||
/// CalculateSpillWeights - Compute spill weights for all virtual register
|
||||
/// live intervals.
|
||||
void CalculateSpillWeights();
|
||||
|
||||
void printRegName(unsigned reg) const;
|
||||
|
||||
/// Returns true if the given live interval is zero length.
|
||||
bool isZeroLengthInterval(LiveInterval *li) const;
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 164
|
||||
; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 161
|
||||
|
||||
%"struct.Adv5::Ekin<3>" = type <{ i8 }>
|
||||
%"struct.Adv5::X::Energyflux<3>" = type { double }
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -mtriple=arm-apple-darwin
|
||||
; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 2
|
||||
; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 5
|
||||
|
||||
%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
|
||||
%struct.LOCBOX = type { i32, i32, i32, i32 }
|
||||
|
@ -1,5 +1,4 @@
|
||||
; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 3
|
||||
; RUN: llc < %s -march=x86 -stats |& grep {Number of dead spill slots removed}
|
||||
; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 2
|
||||
; rdar://5761454
|
||||
|
||||
%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
|
||||
|
@ -2,8 +2,9 @@
|
||||
; PR2536
|
||||
|
||||
|
||||
; CHECK: movw %ax
|
||||
; CHECK: movw %cx
|
||||
; CHECK-NEXT: andl $65534, %
|
||||
; CHECK-NEXT: movl %
|
||||
; CHECK-NEXT: movl $17
|
||||
|
||||
@g_5 = external global i16 ; <i16*> [#uses=2]
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of registers downgraded}
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 84
|
||||
; rdar://6802189
|
||||
|
||||
; Test if linearscan is unfavoring registers for allocation to allow more reuse
|
||||
|
36
test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
Normal file
36
test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
Normal file
@ -0,0 +1,36 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s
|
||||
|
||||
define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp {
|
||||
; CHECK: dot:
|
||||
; CHECK: decl %
|
||||
; CHECK-NEXT: jne
|
||||
entry:
|
||||
%0 = icmp sgt i32 %N, 0 ; <i1> [#uses=1]
|
||||
br i1 %0, label %bb, label %bb2
|
||||
|
||||
bb: ; preds = %bb, %entry
|
||||
%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
|
||||
%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ] ; <i32> [#uses=1]
|
||||
%1 = mul i32 %i.03, %As ; <i32> [#uses=1]
|
||||
%2 = getelementptr i16* %A, i32 %1 ; <i16*> [#uses=1]
|
||||
%3 = load i16* %2, align 2 ; <i16> [#uses=1]
|
||||
%4 = sext i16 %3 to i32 ; <i32> [#uses=1]
|
||||
%5 = mul i32 %i.03, %Bs ; <i32> [#uses=1]
|
||||
%6 = getelementptr i16* %B, i32 %5 ; <i16*> [#uses=1]
|
||||
%7 = load i16* %6, align 2 ; <i16> [#uses=1]
|
||||
%8 = sext i16 %7 to i32 ; <i32> [#uses=1]
|
||||
%9 = mul i32 %8, %4 ; <i32> [#uses=1]
|
||||
%10 = add i32 %9, %sum.04 ; <i32> [#uses=2]
|
||||
%indvar.next = add i32 %i.03, 1 ; <i32> [#uses=2]
|
||||
%exitcond = icmp eq i32 %indvar.next, %N ; <i1> [#uses=1]
|
||||
br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb
|
||||
|
||||
bb1.bb2_crit_edge: ; preds = %bb
|
||||
%phitmp = trunc i32 %10 to i16 ; <i16> [#uses=1]
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %entry, %bb1.bb2_crit_edge
|
||||
%sum.0.lcssa = phi i16 [ %phitmp, %bb1.bb2_crit_edge ], [ 0, %entry ] ; <i16> [#uses=1]
|
||||
store i16 %sum.0.lcssa, i16* %C, align 2
|
||||
ret void
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
|
||||
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 8
|
||||
; RUN: grep asm-printer %t | grep 182
|
||||
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5
|
||||
; RUN: grep asm-printer %t | grep 179
|
||||
|
||||
type { [62 x %struct.Bitvec*] } ; type %0
|
||||
type { i8* } ; type %1
|
||||
|
Loading…
Reference in New Issue
Block a user