R600/SI: replace WQM intrinsic

Just enable WQM when we see an LDS interpolation instruction. Signed-off-by: Christian König <christian.koenig@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178019 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-13 15:37:24 +00:00 · 2013-03-26 14:03:50 +00:00 · 2013-03-26 14:03:50 +00:00 · 03cd75eedb
commit 03cd75eedb
parent e981802d9b
5 changed files with 14 additions and 28 deletions
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@ -208,28 +208,15 @@ SDValue SITargetLowering::LowerFormalArguments(

 MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
    MachineInstr * MI, MachineBasicBlock * BB) const {
-  MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
-  MachineBasicBlock::iterator I = MI;

  switch (MI->getOpcode()) {
  default:
    return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
  case AMDGPU::BRANCH: return BB;
-  case AMDGPU::SI_WQM:
-    LowerSI_WQM(MI, *BB, I, MRI);
-    break;
  }
  return BB;
 }

-void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
-    MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
-  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC);
-
-  MI->eraseFromParent();
-}
-
 EVT SITargetLowering::getSetCCResultType(EVT VT) const {
  return MVT::i1;
 }
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@ -24,9 +24,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
  const SIInstrInfo * TII;
  const TargetRegisterInfo * TRI;

-  void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
-              MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
-
  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;

--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@ -1067,17 +1067,6 @@ def LOAD_CONST : AMDGPUShaderInst <
  [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
 >;

-let usesCustomInserter = 1 in {
-
-def SI_WQM : InstSI <
-  (outs),
-  (ins),
-  "SI_WQM",
-  [(int_SI_wqm)]
->;
-
-} // end usesCustomInserter 
-
 // SI Psuedo instructions. These are used by the CFG structurizer pass
 // and should be lowered to ISA instructions prior to codegen.

--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@ -18,7 +18,6 @@ let TargetPrefix = "SI", isTarget = 1 in {
  def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
  def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>;
  def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
-  def int_SI_wqm : Intrinsic <[], [], []>;

  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>;

--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@ -410,6 +410,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
 bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {

  bool HaveKill = false;
+  bool NeedWQM = false;
  unsigned Depth = 0;

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@ -479,9 +480,22 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
        case AMDGPU::SI_INDIRECT_DST_V16:
          IndirectDst(MI);
          break;
+
+        case AMDGPU::V_INTERP_P1_F32:
+        case AMDGPU::V_INTERP_P2_F32:
+        case AMDGPU::V_INTERP_MOV_F32:
+          NeedWQM = true;
+          break;
+
      }
    }
  }

+  if (NeedWQM) {
+    MachineBasicBlock &MBB = MF.front();
+    BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
+            AMDGPU::EXEC).addReg(AMDGPU::EXEC);
+  }
+
  return true;
 }