R600/SI: Replace v1i32 type with i32 in imageload and sample intrinsics

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188430 91177308-0d34-0410-b5e6-96231b3b80d8
2024-10-06 14:57:41 +00:00 · 2013-08-14 23:24:53 +00:00 · 2013-08-14 23:24:53 +00:00 · e8e33f448e
commit e8e33f448e
parent 68db37b952
5 changed files with 35 additions and 4 deletions
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@ -43,8 +43,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
  addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass);
  addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass);
  addRegisterClass(MVT::v1i32, &AMDGPU::VSrc_32RegClass);
  addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass);
  addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
  addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@ -27,7 +27,7 @@ def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
 >;
 class SDSample<string opcode> : SDNode <opcode,
-  SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVec<1>, SDTCisVT<2, v32i8>,
+  SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
                       SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
 >;
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@ -1326,7 +1326,7 @@ def : Pat <
 /* SIsample for simple 1D texture lookup */
 def : Pat <
-  (SIsample v1i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
+  (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
  (IMAGE_SAMPLE_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
--- a/lib/Target/R600/SITypeRewriter.cpp
+++ b/lib/Target/R600/SITypeRewriter.cpp
@ -16,6 +16,9 @@
 ///      legal for some compute APIs, and we don't want to declare it as legal
 ///      in the backend, because we want the legalizer to expand all v16i8
 ///      operations.
 /// v1* => *
 ///   - Having v1* types complicates the legalizer and we can easily replace
 ///   - them with the element type.
 //===----------------------------------------------------------------------===//
 #include "AMDGPU.h"
@ -109,6 +112,19 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
      Types.push_back(i128);
      NeedToReplace = true;
      Name = Name + ".i128";
    } else if (Arg->getType()->isVectorTy() &&
               Arg->getType()->getVectorNumElements() == 1 &&
               Arg->getType()->getVectorElementType() ==
                                              Type::getInt32Ty(I.getContext())){
      Type *ElementTy = Arg->getType()->getVectorElementType();
      std::string TypeName = "i32";
      InsertElementInst *Def = dyn_cast<InsertElementInst>(Arg);
      assert(Def);
      Args.push_back(Def->getOperand(1));
      Types.push_back(ElementTy);
      std::string VecTypeName = "v1" + TypeName;
      Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName);
      NeedToReplace = true;
    } else {
      Args.push_back(Arg);
      Types.push_back(Arg->getType());
--- a/test/CodeGen/R600/llvm.SI.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@ -135,6 +135,23 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
   ret void
 }
 ; CHECK: @v1
 ; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 15
 define void @v1(i32 %a1) {
 entry:
  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
  %2 = extractelement <4 x float> %1, i32 0
  %3 = extractelement <4 x float> %1, i32 1
  %4 = extractelement <4 x float> %1, i32 2
  %5 = extractelement <4 x float> %1, i32 3
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %5)
  ret void
 }
 declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
 declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)