R600: Remove AMDILPeeopholeOptimizer and replace optimizations with tablegen patterns

The BFE optimization was the only one we were actually using, and it was emitting an intrinsic that we don't support. https://bugs.freedesktop.org/show_bug.cgi?id=64201 Reviewed-by: Christian König <christian.koenig@amd.com> NOTE: This is a candidate for the 3.3 branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181580 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-16 00:33:10 +00:00 · 2013-05-10 02:09:45 +00:00 · 2013-05-10 02:09:45 +00:00 · 58e87a68a8
commit 58e87a68a8
parent dde6836456
6 changed files with 38 additions and 1217 deletions
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@ -284,6 +284,17 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
  (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
 >;

+// Bitfield extract patterns
+
+def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
+def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
+                            SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>;
+
+class BFEPattern <Instruction BFE> : Pat <
+  (and (srl i32:$x, legalshift32:$y), bfemask:$z),
+  (BFE $x, $y, $z)
+>;
+
 include "R600Instructions.td"

 include "SIInstrInfo.td"
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@ -115,7 +115,6 @@ AMDGPUPassConfig::addPreISel() {
 }

 bool AMDGPUPassConfig::addInstSelector() {
-  addPass(createAMDGPUPeepholeOpt(*TM));
  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));

  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
--- a/lib/Target/R600/AMDILPeepholeOptimizer.cpp
+++ b/lib/Target/R600/AMDILPeepholeOptimizer.cpp
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@ -21,7 +21,6 @@ add_llvm_target(R600CodeGen
  AMDILISelDAGToDAG.cpp
  AMDILISelLowering.cpp
  AMDILNIDevice.cpp
-  AMDILPeepholeOptimizer.cpp
  AMDILSIDevice.cpp
  AMDGPUAsmPrinter.cpp
  AMDGPUFrameLowering.cpp
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@ -1615,6 +1615,7 @@ let Predicates = [isEGorCayman] in {
                                               i32:$src2))],
    VecALU
  >;
+  def : BFEPattern <BFE_UINT_eg>;

  def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
  defm : BFIPatterns <BFI_INT_eg>;
--- a/test/CodeGen/R600/bfe_uint.ll
+++ b/test/CodeGen/R600/bfe_uint.ll
@ -0,0 +1,26 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @bfe_def
+; CHECK: BFE_UINT
+define void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
+entry:
+  %0 = lshr i32 %x, 5
+  %1 = and i32 %0, 15 ; 0xf
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; This program could be implemented using a BFE_UINT instruction, however
+; since the lshr constant + number of bits in the mask is >= 32, it can also be
+; implmented with a LSHR instruction, which is better, because LSHR has less
+; operands and requires less constants.
+
+; CHECK: @bfe_shift
+; CHECK-NOT: BFE_UINT
+define void @bfe_shift(i32 addrspace(1)* %out, i32 %x) {
+entry:
+  %0 = lshr i32 %x, 16
+  %1 = and i32 %0, 65535 ; 0xffff
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}