diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 8c02cd7ddba..5dd51a7944c 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -26,13 +26,14 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" - using namespace llvm; STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); STATISTIC(NumPhysCSEs, "Number of physreg referencing common subexpr eliminated"); +STATISTIC(NumCrossBBCSEs, + "Number of cross-MBB physreg referencing CS eliminated"); STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); namespace { @@ -82,9 +83,11 @@ namespace { MachineBasicBlock::const_iterator E) const ; bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, - SmallSet &PhysRefs) const; + SmallSet &PhysRefs, + SmallVector &PhysDefs) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet &PhysRefs) const; + SmallSet &PhysRefs, + bool &NonLocal) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI); @@ -189,7 +192,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, /// instruction does not uses a physical register. bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, - SmallSet &PhysRefs) const { + SmallSet &PhysRefs, + SmallVector &PhysDefs) const{ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); @@ -207,6 +211,8 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) continue; PhysRefs.insert(Reg); + if (MO.isDef()) + PhysDefs.push_back(Reg); for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) PhysRefs.insert(*Alias); } @@ -215,20 +221,39 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, } bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet &PhysRefs) const { + SmallSet &PhysRefs, + bool &NonLocal) const { // For now conservatively returns false if the common subexpression is - // not in the same basic block as the given instruction. - MachineBasicBlock *MBB = MI->getParent(); - if (CSMI->getParent() != MBB) - return false; + // not in the same basic block as the given instruction. The only exception + // is if the common subexpression is in the sole predecessor block. + const MachineBasicBlock *MBB = MI->getParent(); + const MachineBasicBlock *CSMBB = CSMI->getParent(); + + bool CrossMBB = false; + if (CSMBB != MBB) { + if (MBB->pred_size() == 1 && *MBB->pred_begin() == CSMBB) + CrossMBB = true; + else + return false; + } MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); MachineBasicBlock::const_iterator E = MI; + MachineBasicBlock::const_iterator EE = CSMBB->end(); unsigned LookAheadLeft = LookAheadLimit; while (LookAheadLeft) { // Skip over dbg_value's. - while (I != E && I->isDebugValue()) + while (I != E && I != EE && I->isDebugValue()) ++I; + if (I == EE) { + assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); + CrossMBB = false; + NonLocal = true; + I = MBB->begin(); + EE = MBB->end(); + continue; + } + if (I == E) return true; @@ -393,16 +418,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // If the instruction defines physical registers and the values *may* be // used, then it's not safe to replace it with a common subexpression. // It's also not safe if the instruction uses physical registers. + bool CrossMBBPhysDef = false; SmallSet PhysRefs; - if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) { + SmallVector PhysDefs; + if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) { FoundCSE = false; - // ... Unless the CS is local and it also defines the physical register - // which is not clobbered in between and the physical register uses - // were not clobbered. + // ... Unless the CS is local or is in the sole predecessor block + // and it also defines the physical register which is not clobbered + // in between and the physical register uses were not clobbered. unsigned CSVN = VNT.lookup(MI); MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefsReach(CSMI, MI, PhysRefs)) + if (PhysRegDefsReach(CSMI, MI, PhysRefs, CrossMBBPhysDef)) FoundCSE = true; } @@ -457,6 +484,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); MRI->clearKillFlags(CSEPairs[i].second); } + + if (CrossMBBPhysDef) { + // Add physical register defs now coming in from a predecessor to MBB + // livein list. + while (!PhysDefs.empty()) { + unsigned LiveIn = PhysDefs.pop_back_val(); + if (!MBB->isLiveIn(LiveIn)) + MBB->addLiveIn(LiveIn); + } + ++NumCrossBBCSEs; + } + MI->eraseFromParent(); ++NumCSEs; if (!PhysRefs.empty()) diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll index c77402f3bc1..f566974c235 100644 --- a/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/test/CodeGen/ARM/machine-cse-cmp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s ;rdar://8003725 @G1 = external global i32 @@ -6,6 +6,7 @@ define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) { entry: +; CHECK: f1: ; CHECK: cmp ; CHECK: moveq ; CHECK-NOT: cmp @@ -16,3 +17,31 @@ entry: %tmp4 = add i32 %tmp2, %tmp3 ret i32 %tmp4 } + +@foo = external global i32 +@bar = external global [250 x i8], align 1 + +; CSE of cmp across BB boundary +; rdar://10660865 +define void @f2() nounwind ssp { +entry: +; CHECK: f2: +; CHECK: cmp +; CHECK: poplt +; CHECK-NOT: cmp +; CHECK: movle + %0 = load i32* @foo, align 4 + %cmp28 = icmp sgt i32 %0, 0 + br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader + +for.body.lr.ph: ; preds = %entry + %1 = icmp sgt i32 %0, 1 + %smax = select i1 %1, i32 %0, i32 1 + call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false) + unreachable + +for.cond1.preheader: ; preds = %entry + ret void +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll index 7993bbf0ed7..893bd0fdaef 100644 --- a/test/CodeGen/Thumb2/thumb2-cbnz.ll +++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll @@ -24,7 +24,6 @@ bb7: ; preds = %bb3 bb9: ; preds = %bb7 ; CHECK: cmp r0, #0 -; CHECK: cmp r0, #0 ; CHECK-NEXT: cbnz %0 = tail call double @foo(double %b) nounwind readnone ; [#uses=0] br label %bb11 diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll index d819fc8f6ec..a757cde6abe 100644 --- a/test/CodeGen/X86/machine-cse.ll +++ b/test/CodeGen/X86/machine-cse.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-apple-macosx < %s | FileCheck %s ; rdar://7610418 %ptr = type { i8* } @@ -77,3 +77,25 @@ bb.nph743.us: ; preds = %for.body53.us, %if. sw.bb307: ; preds = %sw.bb, %entry ret void } + +; CSE physical register defining instruction across MBB boundary. +; rdar://10660865 +define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp { +entry: +; CHECK: cross_mbb_phys_cse: +; CHECK: cmpl +; CHECK: ja + %cmp = icmp ugt i32 %a, %b + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry +; CHECK-NOT: cmpl +; CHECK: sbbl + %cmp1 = icmp ult i32 %a, %b + %. = sext i1 %cmp1 to i32 + br label %return + +return: ; preds = %if.end, %entry + %retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ] + ret i32 %retval.0 +}