mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-08 18:31:23 +00:00
R600/SI: Stop using i128 as the resource descriptor type
Having i128 as a legal type complicates the legalization phase. v4i32 is already a legal type, so we will use that instead. This fixes several piglit tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206500 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ec6e62ec98
commit
93ea1378d2
@ -42,9 +42,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
|
||||
addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
|
||||
|
||||
addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
|
||||
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
|
||||
addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
|
||||
addRegisterClass(MVT::v4i32, &AMDGPU::VSrc_128RegClass);
|
||||
addRegisterClass(MVT::v4f32, &AMDGPU::VSrc_128RegClass);
|
||||
|
||||
addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
|
||||
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
|
||||
@ -78,8 +77,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::ADDC, MVT::i32, Legal);
|
||||
setOperationAction(ISD::ADDE, MVT::i32, Legal);
|
||||
|
||||
setOperationAction(ISD::BITCAST, MVT::i128, Legal);
|
||||
|
||||
// We need to custom lower vector stores from local memory
|
||||
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
|
||||
@ -99,7 +96,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::STORE, MVT::i1, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::i64, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::i128, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
|
||||
|
||||
@ -164,7 +160,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
|
||||
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
||||
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
|
||||
setTruncStoreAction(MVT::i128, MVT::i64, Expand);
|
||||
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
|
||||
|
||||
@ -595,7 +590,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
AMDGPU::VGPR2, VT);
|
||||
case AMDGPUIntrinsic::SI_load_const: {
|
||||
SDValue Ops [] = {
|
||||
ResourceDescriptorToi128(Op.getOperand(1), DAG),
|
||||
Op.getOperand(1),
|
||||
Op.getOperand(2)
|
||||
};
|
||||
|
||||
@ -616,7 +611,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
|
||||
case AMDGPUIntrinsic::SI_vs_load_input:
|
||||
return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
|
||||
ResourceDescriptorToi128(Op.getOperand(1), DAG),
|
||||
Op.getOperand(1),
|
||||
Op.getOperand(2),
|
||||
Op.getOperand(3));
|
||||
}
|
||||
@ -631,7 +626,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
SDValue Ops [] = {
|
||||
Chain,
|
||||
ResourceDescriptorToi128(Op.getOperand(2), DAG),
|
||||
Op.getOperand(2),
|
||||
Op.getOperand(3),
|
||||
Op.getOperand(4),
|
||||
Op.getOperand(5),
|
||||
@ -799,26 +794,12 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
if (Op.getValueType() == MVT::i128) {
|
||||
return Op;
|
||||
}
|
||||
|
||||
assert(Op.getOpcode() == ISD::UNDEF);
|
||||
|
||||
return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128,
|
||||
DAG.getConstant(0, MVT::i64),
|
||||
DAG.getConstant(0, MVT::i64));
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
|
||||
const SDValue &Op,
|
||||
SelectionDAG &DAG) const {
|
||||
return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
|
||||
Op.getOperand(2),
|
||||
ResourceDescriptorToi128(Op.getOperand(3), DAG),
|
||||
Op.getOperand(3),
|
||||
Op.getOperand(4));
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
||||
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const;
|
||||
bool foldImm(SDValue &Operand, int32_t &Immediate,
|
||||
bool &ScalarSlotUsed) const;
|
||||
const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
|
||||
|
@ -17,13 +17,13 @@ def SIadd64bit32bit : SDNode<"ISD::ADD",
|
||||
>;
|
||||
|
||||
def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>,
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
|
||||
[SDNPMayLoad, SDNPMemOperand]
|
||||
>;
|
||||
|
||||
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
|
||||
SDTypeProfile<0, 13,
|
||||
[SDTCisVT<0, i128>, // rsrc(SGPR)
|
||||
[SDTCisVT<0, v4i32>, // rsrc(SGPR)
|
||||
SDTCisVT<1, iAny>, // vdata(VGPR)
|
||||
SDTCisVT<2, i32>, // num_channels(imm)
|
||||
SDTCisVT<3, i32>, // vaddr(VGPR)
|
||||
@ -41,13 +41,13 @@ def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
|
||||
>;
|
||||
|
||||
def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
|
||||
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>,
|
||||
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>,
|
||||
SDTCisVT<3, i32>]>
|
||||
>;
|
||||
|
||||
class SDSample<string opcode> : SDNode <opcode,
|
||||
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
|
||||
SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
|
||||
SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
|
||||
>;
|
||||
|
||||
def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
|
||||
|
@ -1456,7 +1456,7 @@ def : Pat <
|
||||
|
||||
/* int_SI_vs_load_input */
|
||||
def : Pat<
|
||||
(SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
|
||||
(SIload_input v4i32:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
|
||||
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
@ -1479,34 +1479,34 @@ def : Pat <
|
||||
|
||||
/* SIsample for simple 1D texture lookup */
|
||||
def : Pat <
|
||||
(SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
|
||||
(SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
|
||||
(IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, i128:$sampler, imm),
|
||||
(name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
|
||||
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT),
|
||||
(name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT),
|
||||
(opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY),
|
||||
(name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY),
|
||||
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
class SampleShadowPattern<SDNode name, MIMG opcode,
|
||||
ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW),
|
||||
(name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW),
|
||||
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
class SampleShadowArrayPattern<SDNode name, MIMG opcode,
|
||||
ValueType vt> : Pat <
|
||||
(name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY),
|
||||
(name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
|
||||
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
|
||||
>;
|
||||
|
||||
@ -1695,8 +1695,6 @@ def : BitConvert <i64, v2i32, VReg_64>;
|
||||
|
||||
def : BitConvert <v4f32, v4i32, VReg_128>;
|
||||
def : BitConvert <v4i32, v4f32, VReg_128>;
|
||||
def : BitConvert <v4i32, i128, VReg_128>;
|
||||
def : BitConvert <i128, v4i32, VReg_128>;
|
||||
|
||||
def : BitConvert <v8f32, v8i32, SReg_256>;
|
||||
def : BitConvert <v8i32, v8f32, SReg_256>;
|
||||
@ -1865,19 +1863,19 @@ def : Ext32Pat <anyext>;
|
||||
|
||||
// 1. Offset as 8bit DWORD immediate
|
||||
def : Pat <
|
||||
(SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
|
||||
(SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset),
|
||||
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
|
||||
>;
|
||||
|
||||
// 2. Offset loaded in an 32bit SGPR
|
||||
def : Pat <
|
||||
(SIload_constant i128:$sbase, imm:$offset),
|
||||
(SIload_constant v4i32:$sbase, imm:$offset),
|
||||
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
|
||||
>;
|
||||
|
||||
// 3. Offset in an 32Bit VGPR
|
||||
def : Pat <
|
||||
(SIload_constant i128:$sbase, i32:$voff),
|
||||
(SIload_constant v4i32:$sbase, i32:$voff),
|
||||
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
@ -1979,7 +1977,6 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, i128>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
|
||||
@ -2071,7 +2068,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
|
||||
MUBUF bothen> {
|
||||
|
||||
def : Pat <
|
||||
(vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
imm:$offset, 0, 0, imm:$glc, imm:$slc,
|
||||
imm:$tfe)),
|
||||
(offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
|
||||
@ -2079,7 +2076,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
imm, 1, 0, imm:$glc, imm:$slc,
|
||||
imm:$tfe)),
|
||||
(offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
|
||||
@ -2087,7 +2084,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
imm:$offset, 0, 1, imm:$glc, imm:$slc,
|
||||
imm:$tfe)),
|
||||
(idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
|
||||
@ -2095,7 +2092,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
|
||||
(vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
|
||||
imm, 1, 1, imm:$glc, imm:$slc,
|
||||
imm:$tfe)),
|
||||
(bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
|
||||
@ -2116,7 +2113,7 @@ defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_
|
||||
|
||||
// TBUFFER_STORE_FORMAT_*, addr64=0
|
||||
class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
|
||||
(SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
|
||||
(SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
|
||||
i32:$soffset, imm:$inst_offset, imm:$dfmt,
|
||||
imm:$nfmt, imm:$offen, imm:$idxen,
|
||||
imm:$glc, imm:$slc, imm:$tfe),
|
||||
@ -2240,13 +2237,6 @@ def : Pat<
|
||||
// Miscellaneous Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : Pat <
|
||||
(i64 (trunc i128:$x)),
|
||||
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
|
||||
(i32 (EXTRACT_SUBREG $x, sub0)), sub0),
|
||||
(i32 (EXTRACT_SUBREG $x, sub1)), sub1)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i32 (trunc i64:$a)),
|
||||
(EXTRACT_SUBREG $a, sub0)
|
||||
|
@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
|
||||
(add SGPR_64Regs, VCCReg, EXECReg)
|
||||
>;
|
||||
|
||||
def SReg_128 : RegisterClass<"AMDGPU", [i128, v4i32], 128, (add SGPR_128)>;
|
||||
def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>;
|
||||
|
||||
def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
|
||||
|
||||
@ -183,14 +183,14 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
|
||||
let Size = 96;
|
||||
}
|
||||
|
||||
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, i128], 128, (add VGPR_128)>;
|
||||
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
|
||||
|
||||
def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
|
||||
|
||||
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// [SV]Src_* register classes, can have either an immediate or an register
|
||||
// [SV]Src_(32|64) register classes, can have either an immediate or an register
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
|
||||
@ -201,3 +201,9 @@ def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
|
||||
|
||||
def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SGPR and VGPR register classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128,
|
||||
(add VReg_128, SReg_128)>;
|
||||
|
@ -35,7 +35,7 @@ class SITypeRewriter : public FunctionPass,
|
||||
static char ID;
|
||||
Module *Mod;
|
||||
Type *v16i8;
|
||||
Type *i128;
|
||||
Type *v4i32;
|
||||
|
||||
public:
|
||||
SITypeRewriter() : FunctionPass(ID) { }
|
||||
@ -56,7 +56,7 @@ char SITypeRewriter::ID = 0;
|
||||
bool SITypeRewriter::doInitialization(Module &M) {
|
||||
Mod = &M;
|
||||
v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
|
||||
i128 = Type::getIntNTy(M.getContext(), 128);
|
||||
v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -84,7 +84,8 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) {
|
||||
Type *ElemTy = PtrTy->getPointerElementType();
|
||||
IRBuilder<> Builder(&I);
|
||||
if (ElemTy == v16i8) {
|
||||
Value *BitCast = Builder.CreateBitCast(Ptr, Type::getIntNPtrTy(I.getContext(), 128, 2));
|
||||
Value *BitCast = Builder.CreateBitCast(Ptr,
|
||||
PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
|
||||
LoadInst *Load = Builder.CreateLoad(BitCast);
|
||||
SmallVector <std::pair<unsigned, MDNode*>, 8> MD;
|
||||
I.getAllMetadataOtherThanDebugLoc(MD);
|
||||
@ -99,6 +100,7 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) {
|
||||
|
||||
void SITypeRewriter::visitCallInst(CallInst &I) {
|
||||
IRBuilder<> Builder(&I);
|
||||
|
||||
SmallVector <Value*, 8> Args;
|
||||
SmallVector <Type*, 8> Types;
|
||||
bool NeedToReplace = false;
|
||||
@ -107,10 +109,10 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
|
||||
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
|
||||
Value *Arg = I.getArgOperand(i);
|
||||
if (Arg->getType() == v16i8) {
|
||||
Args.push_back(Builder.CreateBitCast(Arg, i128));
|
||||
Types.push_back(i128);
|
||||
Args.push_back(Builder.CreateBitCast(Arg, v4i32));
|
||||
Types.push_back(v4i32);
|
||||
NeedToReplace = true;
|
||||
Name = Name + ".i128";
|
||||
Name = Name + ".v4i32";
|
||||
} else if (Arg->getType()->isVectorTy() &&
|
||||
Arg->getType()->getVectorNumElements() == 1 &&
|
||||
Arg->getType()->getVectorElementType() ==
|
||||
@ -144,12 +146,12 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
|
||||
|
||||
void SITypeRewriter::visitBitCast(BitCastInst &I) {
|
||||
IRBuilder<> Builder(&I);
|
||||
if (I.getDestTy() != i128) {
|
||||
if (I.getDestTy() != v4i32) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
|
||||
if (Op->getSrcTy() == i128) {
|
||||
if (Op->getSrcTy() == v4i32) {
|
||||
I.replaceAllUsesWith(Op->getOperand(0));
|
||||
I.eraseFromParent();
|
||||
}
|
||||
|
@ -297,3 +297,29 @@ entry:
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
; When i128 was a legal type this program generated cannot select errors:
|
||||
|
||||
; FUNC-LABEL: @i128-const-store
|
||||
; FIXME: We should be able to to this with one store instruction
|
||||
; EG-CHECK: STORE_RAW
|
||||
; EG-CHECK: STORE_RAW
|
||||
; EG-CHECK: STORE_RAW
|
||||
; EG-CHECK: STORE_RAW
|
||||
; CM-CHECK: STORE_DWORD
|
||||
; CM-CHECK: STORE_DWORD
|
||||
; CM-CHECK: STORE_DWORD
|
||||
; CM-CHECK: STORE_DWORD
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
; SI: BUFFER_STORE_DWORDX2
|
||||
define void @i128-const-store(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
store i32 1, i32 addrspace(1)* %out, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
|
||||
store i32 1, i32 addrspace(1)* %arrayidx2, align 4
|
||||
%arrayidx4 = getelementptr inbounds i32 addrspace(1)* %out, i64 2
|
||||
store i32 2, i32 addrspace(1)* %arrayidx4, align 4
|
||||
%arrayidx6 = getelementptr inbounds i32 addrspace(1)* %out, i64 3
|
||||
store i32 2, i32 addrspace(1)* %arrayidx6, align 4
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user