From 7e90f694b6a2771222dab5b4759158e6da29e2c5 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Wed, 29 Jul 2015 18:59:09 +0000 Subject: [PATCH] Roll forward r242871 r242871 missed one place that should be guarded with isPhysicalReg. This patch fixes that. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243555 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BranchFolding.cpp | 53 ++++++++++++++++--------- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 1 - test/CodeGen/NVPTX/branch-fold.ll | 40 +++++++++++++++++++ 3 files changed, 75 insertions(+), 19 deletions(-) create mode 100644 test/CodeGen/NVPTX/branch-fold.ll diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 618266731c0..fbf1504b548 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -12,7 +12,8 @@ // it then removes. // // Note that this pass must be run after register allocation, it cannot handle -// SSA form. +// SSA form. It also must handle virtual registers for targets that emit virtual +// ISA (e.g. NVPTX). // //===----------------------------------------------------------------------===// @@ -150,9 +151,13 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { if (!I->isImplicitDef()) break; unsigned Reg = I->getOperand(0).getReg(); - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - ImpDefRegs.insert(*SubRegs); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + ImpDefRegs.insert(*SubRegs); + } else { + ImpDefRegs.insert(Reg); + } ++I; } if (ImpDefRegs.empty()) @@ -1573,6 +1578,17 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, return nullptr; } +template +static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI, + Container &Set) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Set.insert(*AI); + } else { + Set.insert(Reg); + } +} + /// findHoistingInsertPosAndDeps - Find the location to move common instructions /// in successors to. The location is usually just before the terminator, /// however if the terminator is a conditional branch and its previous @@ -1598,8 +1614,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!Reg) continue; if (MO.isUse()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Uses.insert(*AI); + addRegAndItsAliases(Reg, TRI, Uses); } else { if (!MO.isDead()) // Don't try to hoist code in the rare case the terminator defines a @@ -1608,8 +1623,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // If the terminator defines a register, make sure we don't hoist // the instruction whose def might be clobbered by the terminator. - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Defs.insert(*AI); + addRegAndItsAliases(Reg, TRI, Defs); } } @@ -1665,15 +1679,15 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!Reg) continue; if (MO.isUse()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Uses.insert(*AI); + addRegAndItsAliases(Reg, TRI, Uses); } else { if (Uses.erase(Reg)) { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Uses.erase(*SubRegs); // Use sub-registers to be conservative + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Uses.erase(*SubRegs); // Use sub-registers to be conservative + } } - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Defs.insert(*AI); + addRegAndItsAliases(Reg, TRI, Defs); } } @@ -1800,8 +1814,12 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if (!Reg || !LocalDefsSet.count(Reg)) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LocalDefsSet.erase(*AI); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.erase(*AI); + } else { + LocalDefsSet.erase(Reg); + } } // Track local defs so we can update liveins. @@ -1813,8 +1831,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!Reg) continue; LocalDefs.push_back(Reg); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LocalDefsSet.insert(*AI); + addRegAndItsAliases(Reg, TRI, LocalDefsSet); } HasDups = true; diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 37440d0c51f..706314c9ed3 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -174,7 +174,6 @@ void NVPTXPassConfig::addIRPasses() { // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). disablePass(&PrologEpilogCodeInserterID); disablePass(&MachineCopyPropagationID); - disablePass(&BranchFolderPassID); disablePass(&TailDuplicateID); addPass(createNVPTXImageOptimizerPass()); diff --git a/test/CodeGen/NVPTX/branch-fold.ll b/test/CodeGen/NVPTX/branch-fold.ll new file mode 100644 index 00000000000..2b9cd0a35d9 --- /dev/null +++ b/test/CodeGen/NVPTX/branch-fold.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp | FileCheck %s +; Disable CGP which also folds branches, so that only BranchFolding is under +; the spotlight. + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +define void @foo(i32 %x, float* %output) { +; CHECK-LABEL: .visible .func foo( +; CHECK-NOT: bra.uni +; CHECK-NOT: LBB0_ + %1 = icmp eq i32 %x, 1 + br i1 %1, label %then, label %else + +then: + br label %merge + +else: + br label %merge + +merge: + store float 2.0, float* %output + ret void +} + +; PR24299. no crash +define ptx_kernel void @hoge() #0 { +; CHECK-LABEL: .visible .entry hoge( +bb: + br i1 undef, label %bb1, label %bb4 + +bb1: ; preds = %bb1, %bb + %tmp = phi i64 [ %tmp2, %bb1 ], [ undef, %bb ] + %tmp2 = add nsw i64 %tmp, 1 + %tmp3 = icmp sle i64 %tmp, 0 + br i1 %tmp3, label %bb1, label %bb4 + +bb4: ; preds = %bb4, %bb1, %bb + br label %bb4 +}