mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-10-06 14:57:41 +00:00
R600/SI: Replace v1i32 type with i32 in imageload and sample intrinsics
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188430 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
68db37b952
commit
e8e33f448e
@ -43,8 +43,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|||||||
addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass);
|
addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass);
|
||||||
addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass);
|
addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass);
|
||||||
|
|
||||||
addRegisterClass(MVT::v1i32, &AMDGPU::VSrc_32RegClass);
|
|
||||||
|
|
||||||
addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass);
|
addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass);
|
||||||
addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
|
addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
|
||||||
addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
|
addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
|
||||||
|
@ -27,7 +27,7 @@ def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
|
|||||||
>;
|
>;
|
||||||
|
|
||||||
class SDSample<string opcode> : SDNode <opcode,
|
class SDSample<string opcode> : SDNode <opcode,
|
||||||
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVec<1>, SDTCisVT<2, v32i8>,
|
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
|
||||||
SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
|
SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
@ -1326,7 +1326,7 @@ def : Pat <
|
|||||||
|
|
||||||
/* SIsample for simple 1D texture lookup */
|
/* SIsample for simple 1D texture lookup */
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(SIsample v1i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
|
(SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
|
||||||
(IMAGE_SAMPLE_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
(IMAGE_SAMPLE_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
@ -16,6 +16,9 @@
|
|||||||
/// legal for some compute APIs, and we don't want to declare it as legal
|
/// legal for some compute APIs, and we don't want to declare it as legal
|
||||||
/// in the backend, because we want the legalizer to expand all v16i8
|
/// in the backend, because we want the legalizer to expand all v16i8
|
||||||
/// operations.
|
/// operations.
|
||||||
|
/// v1* => *
|
||||||
|
/// - Having v1* types complicates the legalizer and we can easily replace
|
||||||
|
/// - them with the element type.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "AMDGPU.h"
|
#include "AMDGPU.h"
|
||||||
@ -109,6 +112,19 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
|
|||||||
Types.push_back(i128);
|
Types.push_back(i128);
|
||||||
NeedToReplace = true;
|
NeedToReplace = true;
|
||||||
Name = Name + ".i128";
|
Name = Name + ".i128";
|
||||||
|
} else if (Arg->getType()->isVectorTy() &&
|
||||||
|
Arg->getType()->getVectorNumElements() == 1 &&
|
||||||
|
Arg->getType()->getVectorElementType() ==
|
||||||
|
Type::getInt32Ty(I.getContext())){
|
||||||
|
Type *ElementTy = Arg->getType()->getVectorElementType();
|
||||||
|
std::string TypeName = "i32";
|
||||||
|
InsertElementInst *Def = dyn_cast<InsertElementInst>(Arg);
|
||||||
|
assert(Def);
|
||||||
|
Args.push_back(Def->getOperand(1));
|
||||||
|
Types.push_back(ElementTy);
|
||||||
|
std::string VecTypeName = "v1" + TypeName;
|
||||||
|
Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName);
|
||||||
|
NeedToReplace = true;
|
||||||
} else {
|
} else {
|
||||||
Args.push_back(Arg);
|
Args.push_back(Arg);
|
||||||
Types.push_back(Arg->getType());
|
Types.push_back(Arg->getType());
|
||||||
|
@ -135,6 +135,23 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: @v1
|
||||||
|
; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 15
|
||||||
|
define void @v1(i32 %a1) {
|
||||||
|
entry:
|
||||||
|
%0 = insertelement <1 x i32> undef, i32 %a1, i32 0
|
||||||
|
%1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
|
||||||
|
%2 = extractelement <4 x float> %1, i32 0
|
||||||
|
%3 = extractelement <4 x float> %1, i32 1
|
||||||
|
%4 = extractelement <4 x float> %1, i32 2
|
||||||
|
%5 = extractelement <4 x float> %1, i32 3
|
||||||
|
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %5)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
|
||||||
|
|
||||||
declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
|
declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
|
||||||
|
|
||||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||||
|
Loading…
Reference in New Issue
Block a user