R600/SI: Use VALU instructions for copying i1 values

We can't use SALU instructions for this since they ignore the EXEC mask
and are always executed.

This fixes several OpenCV tests.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207661 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2014-04-30 15:31:33 +00:00
parent 1d8e31fc7a
commit bd24b33e57
10 changed files with 188 additions and 9 deletions

View File

@ -37,11 +37,15 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
// SI Passes
FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSILowerI1CopiesPass();
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
// Passes common to R600 and SI
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);

View File

@ -154,6 +154,7 @@ AMDGPUPassConfig::addPreISel() {
bool AMDGPUPassConfig::addInstSelector() {
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
addPass(createSILowerI1CopiesPass());
return false;
}

View File

@ -45,6 +45,7 @@ add_llvm_target(R600CodeGen
SIInstrInfo.cpp
SIISelLowering.cpp
SILowerControlFlow.cpp
SILowerI1Copies.cpp
SIMachineFunctionInfo.cpp
SIRegisterInfo.cpp
SITypeRewriter.cpp

View File

@ -185,7 +185,8 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
const TargetRegisterClass *SrcRC;
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
DstRC == &AMDGPU::M0RegRegClass)
DstRC == &AMDGPU::M0RegRegClass ||
MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
return false;
SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);

View File

@ -29,7 +29,7 @@ using namespace llvm;
SITargetLowering::SITargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM) {
addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);

View File

@ -1398,6 +1398,12 @@ def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
let isCodeGenOnly = 1, isPseudo = 1 in {
def V_MOV_I1 : InstSI <
(outs VReg_1:$dst),
(ins i1imm:$src),
"", [(set i1:$dst, (imm:$src))]
>;
def LOAD_CONST : AMDGPUShaderInst <
(outs GPRF32:$dst),
(ins i32imm:$src),
@ -1980,11 +1986,6 @@ def : Pat <
(V_MOV_B32_e32 fpimm:$imm)
>;
def : Pat <
(i1 imm:$imm),
(S_MOV_B64 imm:$imm)
>;
def : Pat <
(i64 InlineImm<i64>:$imm),
(S_MOV_B64 InlineImm<i64>:$imm)

View File

@ -67,7 +67,7 @@ private:
static const unsigned SkipThreshold = 12;
static char ID;
const TargetRegisterInfo *TRI;
const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@ -427,7 +427,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
TRI = MF.getTarget().getRegisterInfo();
TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;

View File

@ -0,0 +1,130 @@
//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// i1 values are usually inserted by the CFG Structurize pass and they are
/// unique in that they can be copied from VALU to SALU registers.
/// This is not possible for any other value type. Since there are no
/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
///
//===----------------------------------------------------------------------===//
//
#define DEBUG_TYPE "si-i1-copies"
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
namespace {
class SILowerI1Copies : public MachineFunctionPass {
public:
static char ID;
public:
SILowerI1Copies() : MachineFunctionPass(ID) {
initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
}
virtual bool runOnMachineFunction(MachineFunction &MF) override;
virtual const char *getPassName() const override {
return "SI Lower il Copies";
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE,
"SI Lower il Copies", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE,
"SI Lower il Copies", false, false)
char SILowerI1Copies::ID = 0;
char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID;
FunctionPass *llvm::createSILowerI1CopiesPass() {
return new SILowerI1Copies();
}
bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
MF.getTarget().getInstrInfo());
const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
MachineBasicBlock::iterator I, Next;
for (I = MBB.begin(); I != MBB.end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
if (MI.getOpcode() == AMDGPU::V_MOV_I1) {
MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
continue;
}
if (MI.getOpcode() != AMDGPU::COPY ||
!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) ||
!TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg()))
continue;
const TargetRegisterClass *DstRC =
MRI.getRegClass(MI.getOperand(0).getReg());
const TargetRegisterClass *SrcRC =
MRI.getRegClass(MI.getOperand(1).getReg());
if (DstRC == &AMDGPU::VReg_1RegClass &&
TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64))
.addOperand(MI.getOperand(0))
.addImm(0)
.addImm(-1)
.addOperand(MI.getOperand(1))
.addImm(0)
.addImm(0)
.addImm(0)
.addImm(0);
MI.eraseFromParent();
} else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
SrcRC == &AMDGPU::VReg_1RegClass) {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
.addOperand(MI.getOperand(0))
.addImm(0)
.addOperand(MI.getOperand(1))
.addImm(0)
.addImm(0)
.addImm(0)
.addImm(0);
MI.eraseFromParent();
}
}
}
return false;
}

View File

@ -189,6 +189,8 @@ def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>;
//===----------------------------------------------------------------------===//
// [SV]Src_(32|64) register classes, can have either an immediate or an register
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,39 @@
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
; Make sure the i1 values created by the cfg structurizer pass are
; moved using VALU instructions
; SI-NOT: S_MOV_B64 s[{{[0-9]:[0-9]}}], -1
; SI: V_MOV_B32_e32 v{{[0-9]}}, -1
define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) {
entry:
switch i32 %a, label %default [
i32 0, label %case0
i32 1, label %case1
]
case0:
%arrayidx1 = getelementptr i32 addrspace(1)* %dst, i32 %b
store i32 0, i32 addrspace(1)* %arrayidx1, align 4
br label %end
case1:
%arrayidx5 = getelementptr i32 addrspace(1)* %dst, i32 %b
store i32 1, i32 addrspace(1)* %arrayidx5, align 4
br label %end
default:
%cmp8 = icmp eq i32 %a, 2
%arrayidx10 = getelementptr i32 addrspace(1)* %dst, i32 %b
br i1 %cmp8, label %if, label %else
if:
store i32 2, i32 addrspace(1)* %arrayidx10, align 4
br label %end
else:
store i32 3, i32 addrspace(1)* %arrayidx10, align 4
br label %end
end:
ret void
}