Roll forward r242871

r242871 missed one place that should be guarded with isPhysicalReg. This patch
fixes that.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243555 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jingyue Wu 2015-07-29 18:59:09 +00:00
parent dd741a8795
commit 7e90f694b6
3 changed files with 75 additions and 19 deletions

View File

@ -12,7 +12,8 @@
// it then removes. // it then removes.
// //
// Note that this pass must be run after register allocation, it cannot handle // Note that this pass must be run after register allocation, it cannot handle
// SSA form. // SSA form. It also must handle virtual registers for targets that emit virtual
// ISA (e.g. NVPTX).
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -150,9 +151,13 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
if (!I->isImplicitDef()) if (!I->isImplicitDef())
break; break;
unsigned Reg = I->getOperand(0).getReg(); unsigned Reg = I->getOperand(0).getReg();
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
SubRegs.isValid(); ++SubRegs) for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
ImpDefRegs.insert(*SubRegs); SubRegs.isValid(); ++SubRegs)
ImpDefRegs.insert(*SubRegs);
} else {
ImpDefRegs.insert(Reg);
}
++I; ++I;
} }
if (ImpDefRegs.empty()) if (ImpDefRegs.empty())
@ -1573,6 +1578,17 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
return nullptr; return nullptr;
} }
template <class Container>
static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI,
Container &Set) {
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
Set.insert(*AI);
} else {
Set.insert(Reg);
}
}
/// findHoistingInsertPosAndDeps - Find the location to move common instructions /// findHoistingInsertPosAndDeps - Find the location to move common instructions
/// in successors to. The location is usually just before the terminator, /// in successors to. The location is usually just before the terminator,
/// however if the terminator is a conditional branch and its previous /// however if the terminator is a conditional branch and its previous
@ -1598,8 +1614,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (!Reg) if (!Reg)
continue; continue;
if (MO.isUse()) { if (MO.isUse()) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) addRegAndItsAliases(Reg, TRI, Uses);
Uses.insert(*AI);
} else { } else {
if (!MO.isDead()) if (!MO.isDead())
// Don't try to hoist code in the rare case the terminator defines a // Don't try to hoist code in the rare case the terminator defines a
@ -1608,8 +1623,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
// If the terminator defines a register, make sure we don't hoist // If the terminator defines a register, make sure we don't hoist
// the instruction whose def might be clobbered by the terminator. // the instruction whose def might be clobbered by the terminator.
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) addRegAndItsAliases(Reg, TRI, Defs);
Defs.insert(*AI);
} }
} }
@ -1665,15 +1679,15 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (!Reg) if (!Reg)
continue; continue;
if (MO.isUse()) { if (MO.isUse()) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) addRegAndItsAliases(Reg, TRI, Uses);
Uses.insert(*AI);
} else { } else {
if (Uses.erase(Reg)) { if (Uses.erase(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
Uses.erase(*SubRegs); // Use sub-registers to be conservative for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
Uses.erase(*SubRegs); // Use sub-registers to be conservative
}
} }
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) addRegAndItsAliases(Reg, TRI, Defs);
Defs.insert(*AI);
} }
} }
@ -1800,8 +1814,12 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
unsigned Reg = MO.getReg(); unsigned Reg = MO.getReg();
if (!Reg || !LocalDefsSet.count(Reg)) if (!Reg || !LocalDefsSet.count(Reg))
continue; continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
LocalDefsSet.erase(*AI); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
LocalDefsSet.erase(*AI);
} else {
LocalDefsSet.erase(Reg);
}
} }
// Track local defs so we can update liveins. // Track local defs so we can update liveins.
@ -1813,8 +1831,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!Reg) if (!Reg)
continue; continue;
LocalDefs.push_back(Reg); LocalDefs.push_back(Reg);
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) addRegAndItsAliases(Reg, TRI, LocalDefsSet);
LocalDefsSet.insert(*AI);
} }
HasDups = true; HasDups = true;

View File

@ -174,7 +174,6 @@ void NVPTXPassConfig::addIRPasses() {
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID); disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineCopyPropagationID); disablePass(&MachineCopyPropagationID);
disablePass(&BranchFolderPassID);
disablePass(&TailDuplicateID); disablePass(&TailDuplicateID);
addPass(createNVPTXImageOptimizerPass()); addPass(createNVPTXImageOptimizerPass());

View File

@ -0,0 +1,40 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp | FileCheck %s
; Disable CGP which also folds branches, so that only BranchFolding is under
; the spotlight.
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
define void @foo(i32 %x, float* %output) {
; CHECK-LABEL: .visible .func foo(
; CHECK-NOT: bra.uni
; CHECK-NOT: LBB0_
%1 = icmp eq i32 %x, 1
br i1 %1, label %then, label %else
then:
br label %merge
else:
br label %merge
merge:
store float 2.0, float* %output
ret void
}
; PR24299. no crash
define ptx_kernel void @hoge() #0 {
; CHECK-LABEL: .visible .entry hoge(
bb:
br i1 undef, label %bb1, label %bb4
bb1: ; preds = %bb1, %bb
%tmp = phi i64 [ %tmp2, %bb1 ], [ undef, %bb ]
%tmp2 = add nsw i64 %tmp, 1
%tmp3 = icmp sle i64 %tmp, 0
br i1 %tmp3, label %bb1, label %bb4
bb4: ; preds = %bb4, %bb1, %bb
br label %bb4
}