Allow machine-cse to look across MBB boundary when cse'ing instructions that

define physical registers. It's currently very restrictive, only catching
cases where the CE is in an immediate (and only) predecessor. But it catches
a surprising large number of cases.

rdar://10660865


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147827 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng
2012-01-10 02:02:58 +00:00
parent 64925c55c6
commit 97b5beb7fe
4 changed files with 107 additions and 18 deletions

View File

@ -26,13 +26,14 @@
#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h" #include "llvm/Support/RecyclingAllocator.h"
using namespace llvm; using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated"); STATISTIC(NumCSEs, "Number of common subexpression eliminated");
STATISTIC(NumPhysCSEs, STATISTIC(NumPhysCSEs,
"Number of physreg referencing common subexpr eliminated"); "Number of physreg referencing common subexpr eliminated");
STATISTIC(NumCrossBBCSEs,
"Number of cross-MBB physreg referencing CS eliminated");
STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
namespace { namespace {
@ -82,9 +83,11 @@ namespace {
MachineBasicBlock::const_iterator E) const ; MachineBasicBlock::const_iterator E) const ;
bool hasLivePhysRegDefUses(const MachineInstr *MI, bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB, const MachineBasicBlock *MBB,
SmallSet<unsigned,8> &PhysRefs) const; SmallSet<unsigned,8> &PhysRefs,
SmallVector<unsigned,2> &PhysDefs) const;
bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
SmallSet<unsigned,8> &PhysRefs) const; SmallSet<unsigned,8> &PhysRefs,
bool &NonLocal) const;
bool isCSECandidate(MachineInstr *MI); bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg, bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI); MachineInstr *CSMI, MachineInstr *MI);
@ -189,7 +192,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
/// instruction does not uses a physical register. /// instruction does not uses a physical register.
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB, const MachineBasicBlock *MBB,
SmallSet<unsigned,8> &PhysRefs) const { SmallSet<unsigned,8> &PhysRefs,
SmallVector<unsigned,2> &PhysDefs) const{
MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i); const MachineOperand &MO = MI->getOperand(i);
@ -207,6 +211,8 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
(MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
continue; continue;
PhysRefs.insert(Reg); PhysRefs.insert(Reg);
if (MO.isDef())
PhysDefs.push_back(Reg);
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
PhysRefs.insert(*Alias); PhysRefs.insert(*Alias);
} }
@ -215,20 +221,39 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
} }
bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
SmallSet<unsigned,8> &PhysRefs) const { SmallSet<unsigned,8> &PhysRefs,
bool &NonLocal) const {
// For now conservatively returns false if the common subexpression is // For now conservatively returns false if the common subexpression is
// not in the same basic block as the given instruction. // not in the same basic block as the given instruction. The only exception
MachineBasicBlock *MBB = MI->getParent(); // is if the common subexpression is in the sole predecessor block.
if (CSMI->getParent() != MBB) const MachineBasicBlock *MBB = MI->getParent();
return false; const MachineBasicBlock *CSMBB = CSMI->getParent();
bool CrossMBB = false;
if (CSMBB != MBB) {
if (MBB->pred_size() == 1 && *MBB->pred_begin() == CSMBB)
CrossMBB = true;
else
return false;
}
MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
MachineBasicBlock::const_iterator E = MI; MachineBasicBlock::const_iterator E = MI;
MachineBasicBlock::const_iterator EE = CSMBB->end();
unsigned LookAheadLeft = LookAheadLimit; unsigned LookAheadLeft = LookAheadLimit;
while (LookAheadLeft) { while (LookAheadLeft) {
// Skip over dbg_value's. // Skip over dbg_value's.
while (I != E && I->isDebugValue()) while (I != E && I != EE && I->isDebugValue())
++I; ++I;
if (I == EE) {
assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
CrossMBB = false;
NonLocal = true;
I = MBB->begin();
EE = MBB->end();
continue;
}
if (I == E) if (I == E)
return true; return true;
@ -393,16 +418,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// If the instruction defines physical registers and the values *may* be // If the instruction defines physical registers and the values *may* be
// used, then it's not safe to replace it with a common subexpression. // used, then it's not safe to replace it with a common subexpression.
// It's also not safe if the instruction uses physical registers. // It's also not safe if the instruction uses physical registers.
bool CrossMBBPhysDef = false;
SmallSet<unsigned,8> PhysRefs; SmallSet<unsigned,8> PhysRefs;
if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) { SmallVector<unsigned, 2> PhysDefs;
if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
FoundCSE = false; FoundCSE = false;
// ... Unless the CS is local and it also defines the physical register // ... Unless the CS is local or is in the sole predecessor block
// which is not clobbered in between and the physical register uses // and it also defines the physical register which is not clobbered
// were not clobbered. // in between and the physical register uses were not clobbered.
unsigned CSVN = VNT.lookup(MI); unsigned CSVN = VNT.lookup(MI);
MachineInstr *CSMI = Exps[CSVN]; MachineInstr *CSMI = Exps[CSVN];
if (PhysRegDefsReach(CSMI, MI, PhysRefs)) if (PhysRegDefsReach(CSMI, MI, PhysRefs, CrossMBBPhysDef))
FoundCSE = true; FoundCSE = true;
} }
@ -457,6 +484,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
MRI->clearKillFlags(CSEPairs[i].second); MRI->clearKillFlags(CSEPairs[i].second);
} }
if (CrossMBBPhysDef) {
// Add physical register defs now coming in from a predecessor to MBB
// livein list.
while (!PhysDefs.empty()) {
unsigned LiveIn = PhysDefs.pop_back_val();
if (!MBB->isLiveIn(LiveIn))
MBB->addLiveIn(LiveIn);
}
++NumCrossBBCSEs;
}
MI->eraseFromParent(); MI->eraseFromParent();
++NumCSEs; ++NumCSEs;
if (!PhysRefs.empty()) if (!PhysRefs.empty())

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=arm | FileCheck %s ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
;rdar://8003725 ;rdar://8003725
@G1 = external global i32 @G1 = external global i32
@ -6,6 +6,7 @@
define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) { define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
entry: entry:
; CHECK: f1:
; CHECK: cmp ; CHECK: cmp
; CHECK: moveq ; CHECK: moveq
; CHECK-NOT: cmp ; CHECK-NOT: cmp
@ -16,3 +17,31 @@ entry:
%tmp4 = add i32 %tmp2, %tmp3 %tmp4 = add i32 %tmp2, %tmp3
ret i32 %tmp4 ret i32 %tmp4
} }
@foo = external global i32
@bar = external global [250 x i8], align 1
; CSE of cmp across BB boundary
; rdar://10660865
define void @f2() nounwind ssp {
entry:
; CHECK: f2:
; CHECK: cmp
; CHECK: poplt
; CHECK-NOT: cmp
; CHECK: movle
%0 = load i32* @foo, align 4
%cmp28 = icmp sgt i32 %0, 0
br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
for.body.lr.ph: ; preds = %entry
%1 = icmp sgt i32 %0, 1
%smax = select i1 %1, i32 %0, i32 1
call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
unreachable
for.cond1.preheader: ; preds = %entry
ret void
}
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind

View File

@ -24,7 +24,6 @@ bb7: ; preds = %bb3
bb9: ; preds = %bb7 bb9: ; preds = %bb7
; CHECK: cmp r0, #0 ; CHECK: cmp r0, #0
; CHECK: cmp r0, #0
; CHECK-NEXT: cbnz ; CHECK-NEXT: cbnz
%0 = tail call double @foo(double %b) nounwind readnone ; <double> [#uses=0] %0 = tail call double @foo(double %b) nounwind readnone ; <double> [#uses=0]
br label %bb11 br label %bb11

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s ; RUN: llc -mtriple=x86_64-apple-macosx < %s | FileCheck %s
; rdar://7610418 ; rdar://7610418
%ptr = type { i8* } %ptr = type { i8* }
@ -77,3 +77,25 @@ bb.nph743.us: ; preds = %for.body53.us, %if.
sw.bb307: ; preds = %sw.bb, %entry sw.bb307: ; preds = %sw.bb, %entry
ret void ret void
} }
; CSE physical register defining instruction across MBB boundary.
; rdar://10660865
define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp {
entry:
; CHECK: cross_mbb_phys_cse:
; CHECK: cmpl
; CHECK: ja
%cmp = icmp ugt i32 %a, %b
br i1 %cmp, label %return, label %if.end
if.end: ; preds = %entry
; CHECK-NOT: cmpl
; CHECK: sbbl
%cmp1 = icmp ult i32 %a, %b
%. = sext i1 %cmp1 to i32
br label %return
return: ; preds = %if.end, %entry
%retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
ret i32 %retval.0
}