Implement new way of expanding extloads.

Now that the source and destination types can be specified,
allow doing an expansion that doesn't use an EXTLOAD of the
result type. Try to do a legal extload to an intermediate type
and extend that if possible.

This generalizes the special case custom lowering of extloads
R600 has been using to work around this problem.

This also happens to fix a bug that would incorrectly use more
aligned loads than should be used.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225925 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2015-01-14 01:35:17 +00:00
parent 4d430f0b77
commit 8b6a26ca85
11 changed files with 1452 additions and 58 deletions

View File

@ -753,7 +753,7 @@ namespace ISD {
LAST_LOADEXT_TYPE
};
NodeType getExtForLoadExtType(LoadExtType);
NodeType getExtForLoadExtType(bool IsFP, LoadExtType);
//===--------------------------------------------------------------------===//
/// ISD::CondCode enum - These are ordered carefully to make the bitfields

View File

@ -1090,22 +1090,25 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
break;
}
case TargetLowering::Expand:
if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0),
SrcVT) && TLI.isTypeLegal(SrcVT)) {
SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, LD->getMemOperand());
unsigned ExtendOp;
switch (ExtType) {
case ISD::EXTLOAD:
ExtendOp = (SrcVT.isFloatingPoint() ?
ISD::FP_EXTEND : ISD::ANY_EXTEND);
if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) {
// If the source type is not legal, see if there is a legal extload to
// an intermediate type that we can then extend further.
EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) {
// If we are loading a legal type, this is a non-extload followed by a
// full extend.
ISD::LoadExtType MidExtType =
(LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType;
SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr,
SrcVT, LD->getMemOperand());
unsigned ExtendOp =
ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType);
Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
Chain = Load.getValue(1);
break;
case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
default: llvm_unreachable("Unexpected extend load type!");
}
Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
Chain = Load.getValue(1);
break;
}
assert(!SrcVT.isVector() &&

View File

@ -234,10 +234,10 @@ bool ISD::allOperandsUndef(const SDNode *N) {
return true;
}
ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
return ISD::ANY_EXTEND;
return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
case ISD::SEXTLOAD:
return ISD::SIGN_EXTEND;
case ISD::ZEXTLOAD:

View File

@ -216,6 +216,14 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
// There are no 64-bit extloads. These should be done as a 32-bit extload and
// an extension to 64-bit.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand);
}
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand);
@ -1412,24 +1420,6 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT MemVT = Load->getMemoryVT();
if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) {
// We can do the extload to 32-bits, and then need to separately extend to
// 64-bits.
SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32,
Load->getChain(),
Load->getBasePtr(),
MemVT,
Load->getMemOperand());
SDValue Ops[] = {
DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32),
ExtLoad32.getValue(1)
};
return DAG.getMergeValues(Ops, DL);
}
if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
assert(VT == MVT::i1 && "Only i1 non-extloads expected");
// FIXME: Copied from PPC

View File

@ -131,19 +131,22 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
for (MVT VT : MVT::integer_valuetypes()) {
if (VT == MVT::i64)
continue;
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
}

View File

@ -22,7 +22,7 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <2 x i8> addrspace(1)* %in, align 1
%load = load <2 x i8> addrspace(1)* %in, align 2
%cvt = uitofp <2 x i8> %load to <2 x float>
store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
ret void
@ -43,11 +43,7 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8>
}
; SI-LABEL: {{^}}load_v4i8_to_v4f32:
; We can't use buffer_load_dword here, because the load is byte aligned, and
; buffer_load_dword requires dword alignment.
; SI: buffer_load_ushort
; SI: buffer_load_ushort
; SI: v_or_b32_e32 [[LOADREG:v[0-9]+]]
; SI: buffer_load_dword [[LOADREG:v[0-9]+]]
; SI-NOT: bfe
; SI-NOT: lshr
; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
@ -56,6 +52,40 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8>
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
ret void
}
; This should not be adding instructions to shift into the correct
; position in the word for the component.
; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned:
; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
; SI: v_lshlrev_b32
; SI: v_or_b32
; SI: v_lshlrev_b32
; SI: v_or_b32
; SI: v_lshlrev_b32
; SI: v_or_b32
; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG0]]
; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG3]]
; SI-DAG: v_cvt_f32_ubyte0_e32
; SI-DAG: v_cvt_f32_ubyte1_e32
; SI-DAG: v_cvt_f32_ubyte2_e32
; SI-DAG: v_cvt_f32_ubyte3_e32
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16

View File

@ -0,0 +1,301 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FIXME: Evergreen broken
; FUNC-LABEL: {{^}}zextload_global_i1_to_i32:
; SI: buffer_load_ubyte
; SI: buffer_store_dword
; SI: s_endpgm
define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%a = load i1 addrspace(1)* %in
%ext = zext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_i1_to_i32:
; SI: buffer_load_ubyte
; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
; SI: buffer_store_dword
; SI: s_endpgm
define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%a = load i1 addrspace(1)* %in
%ext = sext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
; SI: s_endpgm
define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i1> addrspace(1)* %in
%ext = zext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
; SI: s_endpgm
define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i1> addrspace(1)* %in
%ext = sext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
; SI: s_endpgm
define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i1> addrspace(1)* %in
%ext = zext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
; SI: s_endpgm
define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i1> addrspace(1)* %in
%ext = sext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
; SI: s_endpgm
define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i1> addrspace(1)* %in
%ext = zext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
; SI: s_endpgm
define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i1> addrspace(1)* %in
%ext = sext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
; SI: s_endpgm
define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i1> addrspace(1)* %in
%ext = zext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
; SI: s_endpgm
define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i1> addrspace(1)* %in
%ext = sext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
; SI: s_endpgm
define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i1> addrspace(1)* %in
%ext = zext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
; SI: s_endpgm
define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i1> addrspace(1)* %in
%ext = sext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
}
; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
; XSI: s_endpgm
; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
; %load = load <32 x i1> addrspace(1)* %in
; %ext = zext <32 x i1> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
; XSI: s_endpgm
; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
; %load = load <32 x i1> addrspace(1)* %in
; %ext = sext <32 x i1> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
; XSI: s_endpgm
; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
; %load = load <64 x i1> addrspace(1)* %in
; %ext = zext <64 x i1> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
; XSI: s_endpgm
; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
; %load = load <64 x i1> addrspace(1)* %in
; %ext = sext <64 x i1> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
; }
; FUNC-LABEL: {{^}}zextload_global_i1_to_i64:
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; SI: buffer_store_dwordx2
define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%a = load i1 addrspace(1)* %in
%ext = zext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_i1_to_i64:
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
; SI: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
; SI: buffer_store_dwordx2
define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
%a = load i1 addrspace(1)* %in
%ext = sext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
; SI: s_endpgm
define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i1> addrspace(1)* %in
%ext = zext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
; SI: s_endpgm
define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i1> addrspace(1)* %in
%ext = sext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
; SI: s_endpgm
define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i1> addrspace(1)* %in
%ext = zext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
; SI: s_endpgm
define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i1> addrspace(1)* %in
%ext = sext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
; SI: s_endpgm
define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i1> addrspace(1)* %in
%ext = zext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
; SI: s_endpgm
define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i1> addrspace(1)* %in
%ext = sext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
; SI: s_endpgm
define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i1> addrspace(1)* %in
%ext = zext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
; SI: s_endpgm
define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i1> addrspace(1)* %in
%ext = sext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
; SI: s_endpgm
define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i1> addrspace(1)* %in
%ext = zext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
; SI: s_endpgm
define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i1> addrspace(1)* %in
%ext = sext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
; XSI: s_endpgm
; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
; %load = load <32 x i1> addrspace(1)* %in
; %ext = zext <32 x i1> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
; XSI: s_endpgm
; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
; %load = load <32 x i1> addrspace(1)* %in
; %ext = sext <32 x i1> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
; XSI: s_endpgm
; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
; %load = load <64 x i1> addrspace(1)* %in
; %ext = zext <64 x i1> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
; XSI: s_endpgm
; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
; %load = load <64 x i1> addrspace(1)* %in
; %ext = sext <64 x i1> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
; }

View File

@ -0,0 +1,301 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FIXME: cypress is broken because the bigger testcases spill and it's not implemented
; FUNC-LABEL: {{^}}zextload_global_i16_to_i32:
; SI: buffer_load_ushort
; SI: buffer_store_dword
; SI: s_endpgm
define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%a = load i16 addrspace(1)* %in
%ext = zext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_i16_to_i32:
; SI: buffer_load_sshort
; SI: buffer_store_dword
; SI: s_endpgm
define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%a = load i16 addrspace(1)* %in
%ext = sext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32:
; SI: buffer_load_ushort
; SI: s_endpgm
define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i16> addrspace(1)* %in
%ext = zext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32:
; SI: buffer_load_sshort
; SI: s_endpgm
define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i16> addrspace(1)* %in
%ext = sext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
; SI: s_endpgm
define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i16> addrspace(1)* %in
%ext = zext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
; SI: s_endpgm
define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i16> addrspace(1)* %in
%ext = sext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
; SI: s_endpgm
define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i16> addrspace(1)* %in
%ext = zext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
; SI: s_endpgm
define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i16> addrspace(1)* %in
%ext = sext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
; SI: s_endpgm
define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i16> addrspace(1)* %in
%ext = zext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
; SI: s_endpgm
define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i16> addrspace(1)* %in
%ext = sext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
; SI: s_endpgm
define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i16> addrspace(1)* %in
%ext = zext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
; SI: s_endpgm
define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i16> addrspace(1)* %in
%ext = sext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
; SI: s_endpgm
define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <32 x i16> addrspace(1)* %in
%ext = zext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
; SI: s_endpgm
define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <32 x i16> addrspace(1)* %in
%ext = sext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
; SI: s_endpgm
define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <64 x i16> addrspace(1)* %in
%ext = zext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
; SI: s_endpgm
define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <64 x i16> addrspace(1)* %in
%ext = sext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
; SI: buffer_load_ushort [[LOAD:v[0-9]+]],
; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; SI: buffer_store_dwordx2
define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%a = load i16 addrspace(1)* %in
%ext = zext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
; SI: buffer_load_sshort [[LOAD:v[0-9]+]],
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%a = load i16 addrspace(1)* %in
%ext = sext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
; SI: s_endpgm
define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i16> addrspace(1)* %in
%ext = zext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
; SI: s_endpgm
define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i16> addrspace(1)* %in
%ext = sext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
; SI: s_endpgm
define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i16> addrspace(1)* %in
%ext = zext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
; SI: s_endpgm
define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i16> addrspace(1)* %in
%ext = sext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
; SI: s_endpgm
define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i16> addrspace(1)* %in
%ext = zext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
; SI: s_endpgm
define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i16> addrspace(1)* %in
%ext = sext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
; SI: s_endpgm
define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i16> addrspace(1)* %in
%ext = zext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
; SI: s_endpgm
define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i16> addrspace(1)* %in
%ext = sext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
; SI: s_endpgm
define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i16> addrspace(1)* %in
%ext = zext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
; SI: s_endpgm
define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i16> addrspace(1)* %in
%ext = sext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
; SI: s_endpgm
define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <32 x i16> addrspace(1)* %in
%ext = zext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
; SI: s_endpgm
define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <32 x i16> addrspace(1)* %in
%ext = sext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
; SI: s_endpgm
define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <64 x i16> addrspace(1)* %in
%ext = zext <64 x i16> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
; SI: s_endpgm
define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
%load = load <64 x i16> addrspace(1)* %in
%ext = sext <64 x i16> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
ret void
}

View File

@ -0,0 +1,457 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
; SI: buffer_load_dword [[LOAD:v[0-9]+]],
; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%a = load i32 addrspace(1)* %in
%ext = zext i32 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
; SI: buffer_load_dword [[LOAD:v[0-9]+]],
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%a = load i32 addrspace(1)* %in
%ext = sext i32 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64:
; SI: buffer_load_dword
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i32> addrspace(1)* %in
%ext = zext <1 x i32> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64:
; SI: buffer_load_dword
; SI: v_ashrrev_i32
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i32> addrspace(1)* %in
%ext = sext <1 x i32> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
; SI: buffer_load_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i32> addrspace(1)* %in
%ext = zext <2 x i32> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64:
; SI: buffer_load_dwordx2
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i32> addrspace(1)* %in
%ext = sext <2 x i32> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
; SI: buffer_load_dwordx4
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i32> addrspace(1)* %in
%ext = zext <4 x i32> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64:
; SI: buffer_load_dwordx4
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i32> addrspace(1)* %in
%ext = sext <4 x i32> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i32> addrspace(1)* %in
%ext = zext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i32> addrspace(1)* %in
%ext = sext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i32> addrspace(1)* %in
%ext = sext <16 x i32> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i32> addrspace(1)* %in
%ext = zext <16 x i32> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <32 x i32> addrspace(1)* %in
%ext = sext <32 x i32> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI: buffer_load_dword
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <32 x i32> addrspace(1)* %in
%ext = zext <32 x i32> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
}

View File

@ -0,0 +1,298 @@
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}zextload_global_i8_to_i32:
; SI: buffer_load_ubyte
; SI: buffer_store_dword
; SI: s_endpgm
define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%a = load i8 addrspace(1)* %in
%ext = zext i8 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_i8_to_i32:
; SI: buffer_load_sbyte
; SI: buffer_store_dword
; SI: s_endpgm
define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%a = load i8 addrspace(1)* %in
%ext = sext i8 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
; SI: s_endpgm
define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i8> addrspace(1)* %in
%ext = zext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
; SI: s_endpgm
define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i8> addrspace(1)* %in
%ext = sext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
; SI: s_endpgm
define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i8> addrspace(1)* %in
%ext = zext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
; SI: s_endpgm
define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i8> addrspace(1)* %in
%ext = sext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
; SI: s_endpgm
define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in
%ext = zext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
; SI: s_endpgm
define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in
%ext = sext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
; SI: s_endpgm
define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i8> addrspace(1)* %in
%ext = zext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
; SI: s_endpgm
define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i8> addrspace(1)* %in
%ext = sext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
; SI: s_endpgm
define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i8> addrspace(1)* %in
%ext = zext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
; SI: s_endpgm
define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i8> addrspace(1)* %in
%ext = sext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
}
; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
; XSI: s_endpgm
; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
; %load = load <32 x i8> addrspace(1)* %in
; %ext = zext <32 x i8> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
; XSI: s_endpgm
; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
; %load = load <32 x i8> addrspace(1)* %in
; %ext = sext <32 x i8> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
; XSI: s_endpgm
; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
; %load = load <64 x i8> addrspace(1)* %in
; %ext = zext <64 x i8> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
; XSI: s_endpgm
; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
; %load = load <64 x i8> addrspace(1)* %in
; %ext = sext <64 x i8> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
; }
; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; SI: buffer_store_dwordx2
define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%a = load i8 addrspace(1)* %in
%ext = zext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
; SI: buffer_load_sbyte [[LOAD:v[0-9]+]],
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
%a = load i8 addrspace(1)* %in
%ext = sext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
; SI: s_endpgm
define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i8> addrspace(1)* %in
%ext = zext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
; SI: s_endpgm
define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <1 x i8> addrspace(1)* %in
%ext = sext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
; SI: s_endpgm
define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i8> addrspace(1)* %in
%ext = zext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
; SI: s_endpgm
define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i8> addrspace(1)* %in
%ext = sext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
; SI: s_endpgm
define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in
%ext = zext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
; SI: s_endpgm
define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in
%ext = sext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
; SI: s_endpgm
define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i8> addrspace(1)* %in
%ext = zext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
; SI: s_endpgm
define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i8> addrspace(1)* %in
%ext = sext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
; SI: s_endpgm
define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i8> addrspace(1)* %in
%ext = zext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
; SI: s_endpgm
define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i8> addrspace(1)* %in
%ext = sext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
}
; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
; XSI: s_endpgm
; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
; %load = load <32 x i8> addrspace(1)* %in
; %ext = zext <32 x i8> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
; XSI: s_endpgm
; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
; %load = load <32 x i8> addrspace(1)* %in
; %ext = sext <32 x i8> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
; XSI: s_endpgm
; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
; %load = load <64 x i8> addrspace(1)* %in
; %ext = zext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
; }
; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
; XSI: s_endpgm
; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
; %load = load <64 x i8> addrspace(1)* %in
; %ext = sext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
; }

View File

@ -1,9 +1,8 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; FIXME: This is probably wrong. This probably needs to expand to 8-bit reads and writes.
; SI-LABEL: {{^}}unaligned_load_store_i32:
; SI: ds_read_u16
; SI: ds_read_u16
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_write_b32
; SI: s_endpgm
define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
@ -13,14 +12,26 @@ define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r
}
; SI-LABEL: {{^}}unaligned_load_store_v4i32:
; SI: ds_read_u16
; SI: ds_read_u16
; SI: ds_read_u16
; SI: ds_read_u16
; SI: ds_read_u16
; SI: ds_read_u16
; SI: ds_read_u16
; SI: ds_read_u16
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_write_b32
; SI: ds_write_b32
; SI: ds_write_b32