Clean up spill weight computation. Also some changes to give loop induction

variable increment / decrement slighter high priority. 

This has major impact on some micro-benchmarks. On MultiSource/Applications
and spec tests, it's a minor win. It also reduce 256.bzip instruction count
by 8%, 55 on 164.gzip on i386 / Darwin.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82485 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2009-09-21 21:12:25 +00:00
parent b2bb7db9e2
commit cf985a9545
9 changed files with 145 additions and 68 deletions

View File

@ -2535,7 +2535,8 @@ void SimpleRegisterCoalescing::releaseMemory() {
ReMatDefs.clear(); ReMatDefs.clear();
} }
bool SimpleRegisterCoalescing::isZeroLengthInterval(LiveInterval *li) const { /// Returns true if the given live interval is zero length.
static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) {
for (LiveInterval::Ranges::const_iterator for (LiveInterval::Ranges::const_iterator
i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
if (li_->getPrevIndex(i->end) > i->start) if (li_->getPrevIndex(i->end) > i->start)
@ -2543,6 +2544,97 @@ bool SimpleRegisterCoalescing::isZeroLengthInterval(LiveInterval *li) const {
return true; return true;
} }
void SimpleRegisterCoalescing::CalculateSpillWeights() {
SmallSet<unsigned, 4> Processed;
for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
mbbi != mbbe; ++mbbi) {
MachineBasicBlock* MBB = mbbi;
MachineInstrIndex MBBEnd = li_->getMBBEndIdx(MBB);
MachineLoop* loop = loopInfo->getLoopFor(MBB);
unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
bool isExit = loop ? loop->isLoopExit(MBB) : false;
for (MachineBasicBlock::iterator mii = MBB->begin(), mie = MBB->end();
mii != mie; ++mii) {
MachineInstr *MI = mii;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &mopi = MI->getOperand(i);
if (!mopi.isReg() || mopi.getReg() == 0)
continue;
unsigned Reg = mopi.getReg();
if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
continue;
// Multiple uses of reg by the same instruction. It should not
// contribute to spill weight again.
if (!Processed.insert(Reg))
continue;
bool HasDef = mopi.isDef();
bool HasUse = mopi.isUse();
for (unsigned j = i+1; j != e; ++j) {
const MachineOperand &mopj = MI->getOperand(j);
if (!mopj.isReg() || mopj.getReg() != Reg)
continue;
HasDef |= mopj.isDef();
HasUse |= mopj.isUse();
}
LiveInterval &RegInt = li_->getInterval(Reg);
float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth+1);
if (HasDef && isExit) {
// Looks like this is a loop count variable update.
MachineInstrIndex DefIdx =
li_->getDefIndex(li_->getInstructionIndex(MI));
const LiveRange *DLR =
li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
if (DLR->end > MBBEnd)
Weight *= 3.0F;
}
RegInt.weight += Weight;
}
Processed.clear();
}
}
for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
LiveInterval &LI = *I->second;
if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
// If the live interval length is essentially zero, i.e. in every live
// range the use follows def immediately, it doesn't make sense to spill
// it and hope it will be easier to allocate for this li.
if (isZeroLengthInterval(&LI, li_)) {
LI.weight = HUGE_VALF;
continue;
}
bool isLoad = false;
SmallVector<LiveInterval*, 4> SpillIs;
if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
// If all of the definitions of the interval are re-materializable,
// it is a preferred candidate for spilling. If non of the defs are
// loads, then it's potentially very cheap to re-materialize.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
if (isLoad)
LI.weight *= 0.9F;
else
LI.weight *= 0.5F;
}
// Slightly prefer live interval that has been assigned a preferred reg.
std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
if (Hint.first || Hint.second)
LI.weight *= 1.01F;
// Divide the weight of the interval by its size. This encourages
// spilling of intervals that are large and have few uses, and
// discourages spilling of small intervals with many uses.
LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
}
}
}
bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
mf_ = &fn; mf_ = &fn;
@ -2581,8 +2673,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
mbbi != mbbe; ++mbbi) { mbbi != mbbe; ++mbbi) {
MachineBasicBlock* mbb = mbbi; MachineBasicBlock* mbb = mbbi;
unsigned loopDepth = loopInfo->getLoopDepth(mbb);
for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
mii != mie; ) { mii != mie; ) {
MachineInstr *MI = mii; MachineInstr *MI = mii;
@ -2656,62 +2746,12 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
mii = mbbi->erase(mii); mii = mbbi->erase(mii);
++numPeep; ++numPeep;
} else { } else {
SmallSet<unsigned, 4> UniqueUses;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &mop = MI->getOperand(i);
if (mop.isReg() && mop.getReg() &&
TargetRegisterInfo::isVirtualRegister(mop.getReg())) {
unsigned reg = mop.getReg();
// Multiple uses of reg by the same instruction. It should not
// contribute to spill weight again.
if (UniqueUses.count(reg) != 0)
continue;
LiveInterval &RegInt = li_->getInterval(reg);
RegInt.weight +=
li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth);
UniqueUses.insert(reg);
}
}
++mii; ++mii;
} }
} }
} }
for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { CalculateSpillWeights();
LiveInterval &LI = *I->second;
if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
// If the live interval length is essentially zero, i.e. in every live
// range the use follows def immediately, it doesn't make sense to spill
// it and hope it will be easier to allocate for this li.
if (isZeroLengthInterval(&LI))
LI.weight = HUGE_VALF;
else {
bool isLoad = false;
SmallVector<LiveInterval*, 4> SpillIs;
if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
// If all of the definitions of the interval are re-materializable,
// it is a preferred candidate for spilling. If non of the defs are
// loads, then it's potentially very cheap to re-materialize.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
if (isLoad)
LI.weight *= 0.9F;
else
LI.weight *= 0.5F;
}
}
// Slightly prefer live interval that has been assigned a preferred reg.
std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
if (Hint.first || Hint.second)
LI.weight *= 1.01F;
// Divide the weight of the interval by its size. This encourages
// spilling of intervals that are large and have few uses, and
// discourages spilling of small intervals with many uses.
LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
}
}
DEBUG(dump()); DEBUG(dump());
return true; return true;

View File

@ -123,7 +123,6 @@ namespace llvm {
/// classes. The registers may be either phys or virt regs. /// classes. The registers may be either phys or virt regs.
bool differingRegisterClasses(unsigned RegA, unsigned RegB) const; bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
/// the source value number is defined by a copy from the destination reg /// the source value number is defined by a copy from the destination reg
/// see if we can merge these two destination reg valno# into a single /// see if we can merge these two destination reg valno# into a single
@ -235,13 +234,15 @@ namespace llvm {
/// lastRegisterUse - Returns the last use of the specific register between /// lastRegisterUse - Returns the last use of the specific register between
/// cycles Start and End or NULL if there are no uses. /// cycles Start and End or NULL if there are no uses.
MachineOperand *lastRegisterUse(MachineInstrIndex Start, MachineInstrIndex End, MachineOperand *lastRegisterUse(MachineInstrIndex Start,
unsigned Reg, MachineInstrIndex &LastUseIdx) const; MachineInstrIndex End, unsigned Reg,
MachineInstrIndex &LastUseIdx) const;
/// CalculateSpillWeights - Compute spill weights for all virtual register
/// live intervals.
void CalculateSpillWeights();
void printRegName(unsigned reg) const; void printRegName(unsigned reg) const;
/// Returns true if the given live interval is zero length.
bool isZeroLengthInterval(LiveInterval *li) const;
}; };
} // End llvm namespace } // End llvm namespace

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 164 ; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 161
%"struct.Adv5::Ekin<3>" = type <{ i8 }> %"struct.Adv5::Ekin<3>" = type <{ i8 }>
%"struct.Adv5::X::Energyflux<3>" = type { double } %"struct.Adv5::X::Energyflux<3>" = type { double }

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=arm-apple-darwin ; RUN: llc < %s -mtriple=arm-apple-darwin
; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 2 ; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 5
%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 } %struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
%struct.LOCBOX = type { i32, i32, i32, i32 } %struct.LOCBOX = type { i32, i32, i32, i32 }

View File

@ -1,5 +1,4 @@
; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 3 ; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 2
; RUN: llc < %s -march=x86 -stats |& grep {Number of dead spill slots removed}
; rdar://5761454 ; rdar://5761454
%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }

View File

@ -2,8 +2,9 @@
; PR2536 ; PR2536
; CHECK: movw %ax ; CHECK: movw %cx
; CHECK-NEXT: andl $65534, % ; CHECK-NEXT: andl $65534, %
; CHECK-NEXT: movl %
; CHECK-NEXT: movl $17 ; CHECK-NEXT: movl $17
@g_5 = external global i16 ; <i16*> [#uses=2] @g_5 = external global i16 ; <i16*> [#uses=2]

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of registers downgraded} ; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 84
; rdar://6802189 ; rdar://6802189
; Test if linearscan is unfavoring registers for allocation to allow more reuse ; Test if linearscan is unfavoring registers for allocation to allow more reuse

View File

@ -0,0 +1,36 @@
; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s
define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp {
; CHECK: dot:
; CHECK: decl %
; CHECK-NEXT: jne
entry:
%0 = icmp sgt i32 %N, 0 ; <i1> [#uses=1]
br i1 %0, label %bb, label %bb2
bb: ; preds = %bb, %entry
%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ] ; <i32> [#uses=1]
%1 = mul i32 %i.03, %As ; <i32> [#uses=1]
%2 = getelementptr i16* %A, i32 %1 ; <i16*> [#uses=1]
%3 = load i16* %2, align 2 ; <i16> [#uses=1]
%4 = sext i16 %3 to i32 ; <i32> [#uses=1]
%5 = mul i32 %i.03, %Bs ; <i32> [#uses=1]
%6 = getelementptr i16* %B, i32 %5 ; <i16*> [#uses=1]
%7 = load i16* %6, align 2 ; <i16> [#uses=1]
%8 = sext i16 %7 to i32 ; <i32> [#uses=1]
%9 = mul i32 %8, %4 ; <i32> [#uses=1]
%10 = add i32 %9, %sum.04 ; <i32> [#uses=2]
%indvar.next = add i32 %i.03, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %N ; <i1> [#uses=1]
br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb
bb1.bb2_crit_edge: ; preds = %bb
%phitmp = trunc i32 %10 to i16 ; <i16> [#uses=1]
br label %bb2
bb2: ; preds = %entry, %bb1.bb2_crit_edge
%sum.0.lcssa = phi i16 [ %phitmp, %bb1.bb2_crit_edge ], [ 0, %entry ] ; <i16> [#uses=1]
store i16 %sum.0.lcssa, i16* %C, align 2
ret void
}

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 8 ; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5
; RUN: grep asm-printer %t | grep 182 ; RUN: grep asm-printer %t | grep 179
type { [62 x %struct.Bitvec*] } ; type %0 type { [62 x %struct.Bitvec*] } ; type %0
type { i8* } ; type %1 type { i8* } ; type %1