From bd24b33e5782997dfa26b0debf934dd364756982 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 30 Apr 2014 15:31:33 +0000 Subject: [PATCH] R600/SI: Use VALU instructions for copying i1 values We can't use SALU instructions for this since they ignore the EXEC mask and are always executed. This fixes several OpenCV tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207661 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 4 + lib/Target/R600/AMDGPUTargetMachine.cpp | 1 + lib/Target/R600/CMakeLists.txt | 1 + lib/Target/R600/SIFixSGPRCopies.cpp | 3 +- lib/Target/R600/SIISelLowering.cpp | 2 +- lib/Target/R600/SIInstructions.td | 11 +- lib/Target/R600/SILowerControlFlow.cpp | 4 +- lib/Target/R600/SILowerI1Copies.cpp | 130 ++++++++++++++++++++++++ lib/Target/R600/SIRegisterInfo.td | 2 + test/CodeGen/R600/valu-i1.ll | 39 +++++++ 10 files changed, 188 insertions(+), 9 deletions(-) create mode 100644 lib/Target/R600/SILowerI1Copies.cpp create mode 100644 test/CodeGen/R600/valu-i1.ll diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 3e1848b5f8e..5d0cf81c402 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -37,11 +37,15 @@ FunctionPass *createAMDGPUCFGStructurizerPass(); // SI Passes FunctionPass *createSITypeRewriter(); FunctionPass *createSIAnnotateControlFlowPass(); +FunctionPass *createSILowerI1CopiesPass(); FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSIInsertWaits(TargetMachine &tm); +void initializeSILowerI1CopiesPass(PassRegistry &); +extern char &SILowerI1CopiesID; + // Passes common to R600 and SI Pass *createAMDGPUStructurizeCFGPass(); FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index f0935401ecb..6b68c2abe36 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -154,6 +154,7 @@ AMDGPUPassConfig::addPreISel() { bool AMDGPUPassConfig::addInstSelector() { addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); + addPass(createSILowerI1CopiesPass()); return false; } diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 93a51179754..3c6fa5a7ae4 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -45,6 +45,7 @@ add_llvm_target(R600CodeGen SIInstrInfo.cpp SIISelLowering.cpp SILowerControlFlow.cpp + SILowerI1Copies.cpp SIMachineFunctionInfo.cpp SIRegisterInfo.cpp SITypeRewriter.cpp diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp index a9a7c5ce0fe..f6b8b783d02 100644 --- a/lib/Target/R600/SIFixSGPRCopies.cpp +++ b/lib/Target/R600/SIFixSGPRCopies.cpp @@ -185,7 +185,8 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy, const TargetRegisterClass *SrcRC; if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - DstRC == &AMDGPU::M0RegRegClass) + DstRC == &AMDGPU::M0RegRegClass || + MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass) return false; SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 8c686c91502..e6880485078 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -29,7 +29,7 @@ using namespace llvm; SITargetLowering::SITargetLowering(TargetMachine &TM) : AMDGPUTargetLowering(TM) { - addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass); addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 00f9be61e24..27e7abe1a38 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1398,6 +1398,12 @@ def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; let isCodeGenOnly = 1, isPseudo = 1 in { +def V_MOV_I1 : InstSI < + (outs VReg_1:$dst), + (ins i1imm:$src), + "", [(set i1:$dst, (imm:$src))] +>; + def LOAD_CONST : AMDGPUShaderInst < (outs GPRF32:$dst), (ins i32imm:$src), @@ -1980,11 +1986,6 @@ def : Pat < (V_MOV_B32_e32 fpimm:$imm) >; -def : Pat < - (i1 imm:$imm), - (S_MOV_B64 imm:$imm) ->; - def : Pat < (i64 InlineImm:$imm), (S_MOV_B64 InlineImm:$imm) diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index d1d925dd231..6601f2a9806 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -67,7 +67,7 @@ private: static const unsigned SkipThreshold = 12; static char ID; - const TargetRegisterInfo *TRI; + const SIRegisterInfo *TRI; const SIInstrInfo *TII; bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); @@ -427,7 +427,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { TII = static_cast(MF.getTarget().getInstrInfo()); - TRI = MF.getTarget().getRegisterInfo(); + TRI = static_cast(MF.getTarget().getRegisterInfo()); SIMachineFunctionInfo *MFI = MF.getInfo(); bool HaveKill = false; diff --git a/lib/Target/R600/SILowerI1Copies.cpp b/lib/Target/R600/SILowerI1Copies.cpp new file mode 100644 index 00000000000..766380ead58 --- /dev/null +++ b/lib/Target/R600/SILowerI1Copies.cpp @@ -0,0 +1,130 @@ +//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// i1 values are usually inserted by the CFG Structurize pass and they are +/// unique in that they can be copied from VALU to SALU registers. +/// This is not possible for any other value type. Since there are no +/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1. +/// +//===----------------------------------------------------------------------===// +// + +#define DEBUG_TYPE "si-i1-copies" +#include "AMDGPU.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + +class SILowerI1Copies : public MachineFunctionPass { +public: + static char ID; + +public: + SILowerI1Copies() : MachineFunctionPass(ID) { + initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF) override; + + virtual const char *getPassName() const override { + return "SI Lower il Copies"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE, + "SI Lower il Copies", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE, + "SI Lower il Copies", false, false) + +char SILowerI1Copies::ID = 0; + +char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID; + +FunctionPass *llvm::createSILowerI1CopiesPass() { + return new SILowerI1Copies(); +} + +bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const SIInstrInfo *TII = static_cast( + MF.getTarget().getInstrInfo()); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + MachineBasicBlock::iterator I, Next; + for (I = MBB.begin(); I != MBB.end(); I = Next) { + Next = std::next(I); + MachineInstr &MI = *I; + + if (MI.getOpcode() == AMDGPU::V_MOV_I1) { + MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32)); + continue; + } + + if (MI.getOpcode() != AMDGPU::COPY || + !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) || + !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg())) + continue; + + + const TargetRegisterClass *DstRC = + MRI.getRegClass(MI.getOperand(0).getReg()); + const TargetRegisterClass *SrcRC = + MRI.getRegClass(MI.getOperand(1).getReg()); + + if (DstRC == &AMDGPU::VReg_1RegClass && + TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) { + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64)) + .addOperand(MI.getOperand(0)) + .addImm(0) + .addImm(-1) + .addOperand(MI.getOperand(1)) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(0); + MI.eraseFromParent(); + } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) && + SrcRC == &AMDGPU::VReg_1RegClass) { + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64)) + .addOperand(MI.getOperand(0)) + .addImm(0) + .addOperand(MI.getOperand(1)) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(0); + MI.eraseFromParent(); + } + + } + } + return false; +} diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 6d6d8b9bd84..f1f01deaf36 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -189,6 +189,8 @@ def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256 def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; +def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>; + //===----------------------------------------------------------------------===// // [SV]Src_(32|64) register classes, can have either an immediate or an register //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll new file mode 100644 index 00000000000..5d5e3ff63a4 --- /dev/null +++ b/test/CodeGen/R600/valu-i1.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s + +; Make sure the i1 values created by the cfg structurizer pass are +; moved using VALU instructions +; SI-NOT: S_MOV_B64 s[{{[0-9]:[0-9]}}], -1 +; SI: V_MOV_B32_e32 v{{[0-9]}}, -1 +define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) { +entry: + switch i32 %a, label %default [ + i32 0, label %case0 + i32 1, label %case1 + ] + +case0: + %arrayidx1 = getelementptr i32 addrspace(1)* %dst, i32 %b + store i32 0, i32 addrspace(1)* %arrayidx1, align 4 + br label %end + +case1: + %arrayidx5 = getelementptr i32 addrspace(1)* %dst, i32 %b + store i32 1, i32 addrspace(1)* %arrayidx5, align 4 + br label %end + +default: + %cmp8 = icmp eq i32 %a, 2 + %arrayidx10 = getelementptr i32 addrspace(1)* %dst, i32 %b + br i1 %cmp8, label %if, label %else + +if: + store i32 2, i32 addrspace(1)* %arrayidx10, align 4 + br label %end + +else: + store i32 3, i32 addrspace(1)* %arrayidx10, align 4 + br label %end + +end: + ret void +}