From b01bdf87ff5e13eb22fcc20cd395bf282fbf1ecd Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Fri, 7 Jun 2013 23:30:26 +0000 Subject: [PATCH] R600: Anti dep better handled in tex clause git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183592 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ControlFlowFinalizer.cpp | 10 ++++---- test/CodeGen/R600/tex-clause-antidep.ll | 24 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/R600/tex-clause-antidep.ll diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 6e21df8eb9d..ab29d609b51 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -110,7 +110,7 @@ private: } bool isCompatibleWithClause(const MachineInstr *MI, - std::set &DstRegs, std::set &SrcRegs) const { + std::set &DstRegs) const { unsigned DstMI, SrcMI; for (MachineInstr::const_mop_iterator I = MI->operands_begin(), E = MI->operands_end(); I != E; ++I) { @@ -136,9 +136,7 @@ private: &AMDGPU::R600_Reg128RegClass); } } - if ((DstRegs.find(SrcMI) == DstRegs.end()) && - (SrcRegs.find(DstMI) == SrcRegs.end())) { - SrcRegs.insert(SrcMI); + if ((DstRegs.find(SrcMI) == DstRegs.end())) { DstRegs.insert(DstMI); return true; } else @@ -152,7 +150,7 @@ private: std::vector ClauseContent; unsigned AluInstCount = 0; bool IsTex = TII->usesTextureCache(ClauseHead); - std::set DstRegs, SrcRegs; + std::set DstRegs; for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { if (IsTrivialInst(I)) continue; @@ -161,7 +159,7 @@ private: if ((IsTex && !TII->usesTextureCache(I)) || (!IsTex && !TII->usesVertexCache(I))) break; - if (!isCompatibleWithClause(I, DstRegs, SrcRegs)) + if (!isCompatibleWithClause(I, DstRegs)) break; AluInstCount ++; ClauseContent.push_back(I); diff --git a/test/CodeGen/R600/tex-clause-antidep.ll b/test/CodeGen/R600/tex-clause-antidep.ll new file mode 100644 index 00000000000..5979609ce45 --- /dev/null +++ b/test/CodeGen/R600/tex-clause-antidep.ll @@ -0,0 +1,24 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: TEX +;CHECK-NEXT: ALU + +define void @test() { + %1 = call float @llvm.R600.load.input(i32 0) + %2 = call float @llvm.R600.load.input(i32 1) + %3 = call float @llvm.R600.load.input(i32 2) + %4 = call float @llvm.R600.load.input(i32 3) + %5 = insertelement <4 x float> undef, float %1, i32 0 + %6 = insertelement <4 x float> %5, float %2, i32 1 + %7 = insertelement <4 x float> %6, float %3, i32 2 + %8 = insertelement <4 x float> %7, float %4, i32 3 + %9 = call <4 x float> @llvm.R600.tex(<4 x float> %8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %10 = call <4 x float> @llvm.R600.tex(<4 x float> %8, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %11 = fadd <4 x float> %9, %10 + call void @llvm.R600.store.swizzle(<4 x float> %11, i32 0, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone +declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)