mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 21:32:10 +00:00
R600/SI: Use VALU instructions for copying i1 values
We can't use SALU instructions for this since they ignore the EXEC mask and are always executed. This fixes several OpenCV tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207661 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1d8e31fc7a
commit
bd24b33e57
@ -37,11 +37,15 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
|
||||
// SI Passes
|
||||
FunctionPass *createSITypeRewriter();
|
||||
FunctionPass *createSIAnnotateControlFlowPass();
|
||||
FunctionPass *createSILowerI1CopiesPass();
|
||||
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
|
||||
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
|
||||
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
||||
FunctionPass *createSIInsertWaits(TargetMachine &tm);
|
||||
|
||||
void initializeSILowerI1CopiesPass(PassRegistry &);
|
||||
extern char &SILowerI1CopiesID;
|
||||
|
||||
// Passes common to R600 and SI
|
||||
Pass *createAMDGPUStructurizeCFGPass();
|
||||
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
|
||||
|
@ -154,6 +154,7 @@ AMDGPUPassConfig::addPreISel() {
|
||||
|
||||
bool AMDGPUPassConfig::addInstSelector() {
|
||||
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
|
||||
addPass(createSILowerI1CopiesPass());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -45,6 +45,7 @@ add_llvm_target(R600CodeGen
|
||||
SIInstrInfo.cpp
|
||||
SIISelLowering.cpp
|
||||
SILowerControlFlow.cpp
|
||||
SILowerI1Copies.cpp
|
||||
SIMachineFunctionInfo.cpp
|
||||
SIRegisterInfo.cpp
|
||||
SITypeRewriter.cpp
|
||||
|
@ -185,7 +185,8 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
|
||||
const TargetRegisterClass *SrcRC;
|
||||
|
||||
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
|
||||
DstRC == &AMDGPU::M0RegRegClass)
|
||||
DstRC == &AMDGPU::M0RegRegClass ||
|
||||
MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
|
||||
return false;
|
||||
|
||||
SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
|
||||
|
@ -29,7 +29,7 @@ using namespace llvm;
|
||||
|
||||
SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
AMDGPUTargetLowering(TM) {
|
||||
addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
|
||||
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
|
||||
addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
|
||||
|
||||
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
|
||||
|
@ -1398,6 +1398,12 @@ def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
|
||||
|
||||
let isCodeGenOnly = 1, isPseudo = 1 in {
|
||||
|
||||
def V_MOV_I1 : InstSI <
|
||||
(outs VReg_1:$dst),
|
||||
(ins i1imm:$src),
|
||||
"", [(set i1:$dst, (imm:$src))]
|
||||
>;
|
||||
|
||||
def LOAD_CONST : AMDGPUShaderInst <
|
||||
(outs GPRF32:$dst),
|
||||
(ins i32imm:$src),
|
||||
@ -1980,11 +1986,6 @@ def : Pat <
|
||||
(V_MOV_B32_e32 fpimm:$imm)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i1 imm:$imm),
|
||||
(S_MOV_B64 imm:$imm)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i64 InlineImm<i64>:$imm),
|
||||
(S_MOV_B64 InlineImm<i64>:$imm)
|
||||
|
@ -67,7 +67,7 @@ private:
|
||||
static const unsigned SkipThreshold = 12;
|
||||
|
||||
static char ID;
|
||||
const TargetRegisterInfo *TRI;
|
||||
const SIRegisterInfo *TRI;
|
||||
const SIInstrInfo *TII;
|
||||
|
||||
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
|
||||
@ -427,7 +427,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
|
||||
|
||||
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
|
||||
TRI = MF.getTarget().getRegisterInfo();
|
||||
TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
bool HaveKill = false;
|
||||
|
130
lib/Target/R600/SILowerI1Copies.cpp
Normal file
130
lib/Target/R600/SILowerI1Copies.cpp
Normal file
@ -0,0 +1,130 @@
|
||||
//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
/// i1 values are usually inserted by the CFG Structurize pass and they are
|
||||
/// unique in that they can be copied from VALU to SALU registers.
|
||||
/// This is not possible for any other value type. Since there are no
|
||||
/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
||||
#define DEBUG_TYPE "si-i1-copies"
|
||||
#include "AMDGPU.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SILowerI1Copies : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
public:
|
||||
SILowerI1Copies() : MachineFunctionPass(ID) {
|
||||
initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
virtual const char *getPassName() const override {
|
||||
return "SI Lower il Copies";
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
|
||||
} // End anonymous namespace.
|
||||
|
||||
INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE,
|
||||
"SI Lower il Copies", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
||||
INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE,
|
||||
"SI Lower il Copies", false, false)
|
||||
|
||||
char SILowerI1Copies::ID = 0;
|
||||
|
||||
char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID;
|
||||
|
||||
FunctionPass *llvm::createSILowerI1CopiesPass() {
|
||||
return new SILowerI1Copies();
|
||||
}
|
||||
|
||||
bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
|
||||
MF.getTarget().getInstrInfo());
|
||||
const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
|
||||
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; ++BI) {
|
||||
|
||||
MachineBasicBlock &MBB = *BI;
|
||||
MachineBasicBlock::iterator I, Next;
|
||||
for (I = MBB.begin(); I != MBB.end(); I = Next) {
|
||||
Next = std::next(I);
|
||||
MachineInstr &MI = *I;
|
||||
|
||||
if (MI.getOpcode() == AMDGPU::V_MOV_I1) {
|
||||
MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (MI.getOpcode() != AMDGPU::COPY ||
|
||||
!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) ||
|
||||
!TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg()))
|
||||
continue;
|
||||
|
||||
|
||||
const TargetRegisterClass *DstRC =
|
||||
MRI.getRegClass(MI.getOperand(0).getReg());
|
||||
const TargetRegisterClass *SrcRC =
|
||||
MRI.getRegClass(MI.getOperand(1).getReg());
|
||||
|
||||
if (DstRC == &AMDGPU::VReg_1RegClass &&
|
||||
TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
|
||||
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addImm(0)
|
||||
.addImm(-1)
|
||||
.addOperand(MI.getOperand(1))
|
||||
.addImm(0)
|
||||
.addImm(0)
|
||||
.addImm(0)
|
||||
.addImm(0);
|
||||
MI.eraseFromParent();
|
||||
} else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
|
||||
SrcRC == &AMDGPU::VReg_1RegClass) {
|
||||
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addImm(0)
|
||||
.addOperand(MI.getOperand(1))
|
||||
.addImm(0)
|
||||
.addImm(0)
|
||||
.addImm(0)
|
||||
.addImm(0);
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
@ -189,6 +189,8 @@ def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256
|
||||
|
||||
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
|
||||
|
||||
def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// [SV]Src_(32|64) register classes, can have either an immediate or an register
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
39
test/CodeGen/R600/valu-i1.ll
Normal file
39
test/CodeGen/R600/valu-i1.ll
Normal file
@ -0,0 +1,39 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
|
||||
|
||||
; Make sure the i1 values created by the cfg structurizer pass are
|
||||
; moved using VALU instructions
|
||||
; SI-NOT: S_MOV_B64 s[{{[0-9]:[0-9]}}], -1
|
||||
; SI: V_MOV_B32_e32 v{{[0-9]}}, -1
|
||||
define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) {
|
||||
entry:
|
||||
switch i32 %a, label %default [
|
||||
i32 0, label %case0
|
||||
i32 1, label %case1
|
||||
]
|
||||
|
||||
case0:
|
||||
%arrayidx1 = getelementptr i32 addrspace(1)* %dst, i32 %b
|
||||
store i32 0, i32 addrspace(1)* %arrayidx1, align 4
|
||||
br label %end
|
||||
|
||||
case1:
|
||||
%arrayidx5 = getelementptr i32 addrspace(1)* %dst, i32 %b
|
||||
store i32 1, i32 addrspace(1)* %arrayidx5, align 4
|
||||
br label %end
|
||||
|
||||
default:
|
||||
%cmp8 = icmp eq i32 %a, 2
|
||||
%arrayidx10 = getelementptr i32 addrspace(1)* %dst, i32 %b
|
||||
br i1 %cmp8, label %if, label %else
|
||||
|
||||
if:
|
||||
store i32 2, i32 addrspace(1)* %arrayidx10, align 4
|
||||
br label %end
|
||||
|
||||
else:
|
||||
store i32 3, i32 addrspace(1)* %arrayidx10, align 4
|
||||
br label %end
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user