diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 925f633c002..952362ed6ce 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -753,7 +753,7 @@ namespace ISD { LAST_LOADEXT_TYPE }; - NodeType getExtForLoadExtType(LoadExtType); + NodeType getExtForLoadExtType(bool IsFP, LoadExtType); //===--------------------------------------------------------------------===// /// ISD::CondCode enum - These are ordered carefully to make the bitfields diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ce8f96b7605..e5473e35cae 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1090,22 +1090,25 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), - SrcVT) && TLI.isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, LD->getMemOperand()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) { + // If the source type is not legal, see if there is a legal extload to + // an intermediate type that we can then extend further. + EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); + if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT? + TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) { + // If we are loading a legal type, this is a non-extload followed by a + // full extend. + ISD::LoadExtType MidExtType = + (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType; + + SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr, + SrcVT, LD->getMemOperand()); + unsigned ExtendOp = + ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType); + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); } - Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Chain = Load.getValue(1); - break; } assert(!SrcVT.isVector() && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f271bd5122e..c819516eca0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -234,10 +234,10 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } -ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) { +ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { switch (ExtType) { case ISD::EXTLOAD: - return ISD::ANY_EXTEND; + return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND; case ISD::SEXTLOAD: return ISD::SIGN_EXTEND; case ISD::ZEXTLOAD: diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 1ad2a693da1..206050d54a0 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -216,6 +216,14 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom); + // There are no 64-bit extloads. These should be done as a 32-bit extload and + // an extension to 64-bit. + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand); + } + for (MVT VT : MVT::integer_vector_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand); @@ -1412,24 +1420,6 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT MemVT = Load->getMemoryVT(); - if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) { - // We can do the extload to 32-bits, and then need to separately extend to - // 64-bits. - - SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32, - Load->getChain(), - Load->getBasePtr(), - MemVT, - Load->getMemOperand()); - - SDValue Ops[] = { - DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32), - ExtLoad32.getValue(1) - }; - - return DAG.getMergeValues(Ops, DL); - } - if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) { assert(VT == MVT::i1 && "Only i1 non-extloads expected"); // FIXME: Copied from PPC diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 12a356b4907..e7f9788496c 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -131,19 +131,22 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::BRCOND, MVT::Other, Custom); for (MVT VT : MVT::integer_valuetypes()) { + if (VT == MVT::i64) + continue; + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal); setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); } diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll index 90d09d6a807..52bcf5d1d67 100644 --- a/test/CodeGen/R600/cvt_f32_ubyte.ll +++ b/test/CodeGen/R600/cvt_f32_ubyte.ll @@ -22,7 +22,7 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]] ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind { - %load = load <2 x i8> addrspace(1)* %in, align 1 + %load = load <2 x i8> addrspace(1)* %in, align 2 %cvt = uitofp <2 x i8> %load to <2 x float> store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16 ret void @@ -43,11 +43,7 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> } ; SI-LABEL: {{^}}load_v4i8_to_v4f32: -; We can't use buffer_load_dword here, because the load is byte aligned, and -; buffer_load_dword requires dword alignment. -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: v_or_b32_e32 [[LOADREG:v[0-9]+]] +; SI: buffer_load_dword [[LOADREG:v[0-9]+]] ; SI-NOT: bfe ; SI-NOT: lshr ; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]] @@ -56,6 +52,40 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]] ; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in, align 4 + %cvt = uitofp <4 x i8> %load to <4 x float> + store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + ret void +} + +; This should not be adding instructions to shift into the correct +; position in the word for the component. + +; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned: +; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]] +; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]] +; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]] +; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]] + +; SI: v_lshlrev_b32 +; SI: v_or_b32 +; SI: v_lshlrev_b32 +; SI: v_or_b32 +; SI: v_lshlrev_b32 +; SI: v_or_b32 + +; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG0]] +; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]] +; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]] +; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG3]] + +; SI-DAG: v_cvt_f32_ubyte0_e32 +; SI-DAG: v_cvt_f32_ubyte1_e32 +; SI-DAG: v_cvt_f32_ubyte2_e32 +; SI-DAG: v_cvt_f32_ubyte3_e32 + +; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, +define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { %load = load <4 x i8> addrspace(1)* %in, align 1 %cvt = uitofp <4 x i8> %load to <4 x float> store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 diff --git a/test/CodeGen/R600/global-extload-i1.ll b/test/CodeGen/R600/global-extload-i1.ll new file mode 100644 index 00000000000..940911e7345 --- /dev/null +++ b/test/CodeGen/R600/global-extload-i1.ll @@ -0,0 +1,301 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; FIXME: Evergreen broken + +; FUNC-LABEL: {{^}}zextload_global_i1_to_i32: +; SI: buffer_load_ubyte +; SI: buffer_store_dword +; SI: s_endpgm +define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %a = load i1 addrspace(1)* %in + %ext = zext i1 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i1_to_i32: +; SI: buffer_load_ubyte +; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}} +; SI: buffer_store_dword +; SI: s_endpgm +define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %a = load i1 addrspace(1)* %in + %ext = sext i1 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32: +; SI: s_endpgm +define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i1> addrspace(1)* %in + %ext = zext <1 x i1> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32: +; SI: s_endpgm +define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i1> addrspace(1)* %in + %ext = sext <1 x i1> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32: +; SI: s_endpgm +define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i1> addrspace(1)* %in + %ext = zext <2 x i1> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32: +; SI: s_endpgm +define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i1> addrspace(1)* %in + %ext = sext <2 x i1> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32: +; SI: s_endpgm +define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i1> addrspace(1)* %in + %ext = zext <4 x i1> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32: +; SI: s_endpgm +define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i1> addrspace(1)* %in + %ext = sext <4 x i1> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32: +; SI: s_endpgm +define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i1> addrspace(1)* %in + %ext = zext <8 x i1> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32: +; SI: s_endpgm +define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i1> addrspace(1)* %in + %ext = sext <8 x i1> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32: +; SI: s_endpgm +define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i1> addrspace(1)* %in + %ext = zext <16 x i1> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32: +; SI: s_endpgm +define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i1> addrspace(1)* %in + %ext = sext <16 x i1> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32: +; XSI: s_endpgm +; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i1> addrspace(1)* %in +; %ext = zext <32 x i1> %load to <32 x i32> +; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32: +; XSI: s_endpgm +; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i1> addrspace(1)* %in +; %ext = sext <32 x i1> %load to <32 x i32> +; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32: +; XSI: s_endpgm +; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i1> addrspace(1)* %in +; %ext = zext <64 x i1> %load to <64 x i32> +; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32: +; XSI: s_endpgm +; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i1> addrspace(1)* %in +; %ext = sext <64 x i1> %load to <64 x i32> +; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out +; ret void +; } + +; FUNC-LABEL: {{^}}zextload_global_i1_to_i64: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]], +; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}} +; SI: buffer_store_dwordx2 +define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %a = load i1 addrspace(1)* %in + %ext = zext i1 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i1_to_i64: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]], +; SI: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}} +; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]] +; SI: buffer_store_dwordx2 +define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { + %a = load i1 addrspace(1)* %in + %ext = sext i1 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64: +; SI: s_endpgm +define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i1> addrspace(1)* %in + %ext = zext <1 x i1> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64: +; SI: s_endpgm +define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i1> addrspace(1)* %in + %ext = sext <1 x i1> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64: +; SI: s_endpgm +define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i1> addrspace(1)* %in + %ext = zext <2 x i1> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64: +; SI: s_endpgm +define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i1> addrspace(1)* %in + %ext = sext <2 x i1> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64: +; SI: s_endpgm +define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i1> addrspace(1)* %in + %ext = zext <4 x i1> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64: +; SI: s_endpgm +define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i1> addrspace(1)* %in + %ext = sext <4 x i1> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64: +; SI: s_endpgm +define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i1> addrspace(1)* %in + %ext = zext <8 x i1> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64: +; SI: s_endpgm +define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i1> addrspace(1)* %in + %ext = sext <8 x i1> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64: +; SI: s_endpgm +define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i1> addrspace(1)* %in + %ext = zext <16 x i1> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64: +; SI: s_endpgm +define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i1> addrspace(1)* %in + %ext = sext <16 x i1> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64: +; XSI: s_endpgm +; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i1> addrspace(1)* %in +; %ext = zext <32 x i1> %load to <32 x i64> +; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64: +; XSI: s_endpgm +; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i1> addrspace(1)* %in +; %ext = sext <32 x i1> %load to <32 x i64> +; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64: +; XSI: s_endpgm +; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i1> addrspace(1)* %in +; %ext = zext <64 x i1> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64: +; XSI: s_endpgm +; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i1> addrspace(1)* %in +; %ext = sext <64 x i1> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } diff --git a/test/CodeGen/R600/global-extload-i16.ll b/test/CodeGen/R600/global-extload-i16.ll new file mode 100644 index 00000000000..6c55955de9c --- /dev/null +++ b/test/CodeGen/R600/global-extload-i16.ll @@ -0,0 +1,301 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; FIXME: cypress is broken because the bigger testcases spill and it's not implemented + +; FUNC-LABEL: {{^}}zextload_global_i16_to_i32: +; SI: buffer_load_ushort +; SI: buffer_store_dword +; SI: s_endpgm +define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { + %a = load i16 addrspace(1)* %in + %ext = zext i16 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i16_to_i32: +; SI: buffer_load_sshort +; SI: buffer_store_dword +; SI: s_endpgm +define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { + %a = load i16 addrspace(1)* %in + %ext = sext i16 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32: +; SI: buffer_load_ushort +; SI: s_endpgm +define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i16> addrspace(1)* %in + %ext = zext <1 x i16> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32: +; SI: buffer_load_sshort +; SI: s_endpgm +define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i16> addrspace(1)* %in + %ext = sext <1 x i16> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32: +; SI: s_endpgm +define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i16> addrspace(1)* %in + %ext = zext <2 x i16> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32: +; SI: s_endpgm +define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i16> addrspace(1)* %in + %ext = sext <2 x i16> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32: +; SI: s_endpgm +define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i16> addrspace(1)* %in + %ext = zext <4 x i16> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32: +; SI: s_endpgm +define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i16> addrspace(1)* %in + %ext = sext <4 x i16> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32: +; SI: s_endpgm +define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i16> addrspace(1)* %in + %ext = zext <8 x i16> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32: +; SI: s_endpgm +define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i16> addrspace(1)* %in + %ext = sext <8 x i16> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32: +; SI: s_endpgm +define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i16> addrspace(1)* %in + %ext = zext <16 x i16> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32: +; SI: s_endpgm +define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i16> addrspace(1)* %in + %ext = sext <16 x i16> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32: +; SI: s_endpgm +define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i16> addrspace(1)* %in + %ext = zext <32 x i16> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32: +; SI: s_endpgm +define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i16> addrspace(1)* %in + %ext = sext <32 x i16> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32: +; SI: s_endpgm +define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <64 x i16> addrspace(1)* %in + %ext = zext <64 x i16> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32: +; SI: s_endpgm +define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <64 x i16> addrspace(1)* %in + %ext = sext <64 x i16> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_i16_to_i64: +; SI: buffer_load_ushort [[LOAD:v[0-9]+]], +; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}} +; SI: buffer_store_dwordx2 +define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { + %a = load i16 addrspace(1)* %in + %ext = zext i16 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i16_to_i64: +; SI: buffer_load_sshort [[LOAD:v[0-9]+]], +; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] +; SI: buffer_store_dwordx2 +define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { + %a = load i16 addrspace(1)* %in + %ext = sext i16 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64: +; SI: s_endpgm +define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i16> addrspace(1)* %in + %ext = zext <1 x i16> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64: +; SI: s_endpgm +define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i16> addrspace(1)* %in + %ext = sext <1 x i16> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64: +; SI: s_endpgm +define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i16> addrspace(1)* %in + %ext = zext <2 x i16> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64: +; SI: s_endpgm +define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i16> addrspace(1)* %in + %ext = sext <2 x i16> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64: +; SI: s_endpgm +define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i16> addrspace(1)* %in + %ext = zext <4 x i16> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64: +; SI: s_endpgm +define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i16> addrspace(1)* %in + %ext = sext <4 x i16> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64: +; SI: s_endpgm +define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i16> addrspace(1)* %in + %ext = zext <8 x i16> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64: +; SI: s_endpgm +define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i16> addrspace(1)* %in + %ext = sext <8 x i16> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64: +; SI: s_endpgm +define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i16> addrspace(1)* %in + %ext = zext <16 x i16> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64: +; SI: s_endpgm +define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i16> addrspace(1)* %in + %ext = sext <16 x i16> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64: +; SI: s_endpgm +define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i16> addrspace(1)* %in + %ext = zext <32 x i16> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64: +; SI: s_endpgm +define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i16> addrspace(1)* %in + %ext = sext <32 x i16> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64: +; SI: s_endpgm +define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <64 x i16> addrspace(1)* %in + %ext = zext <64 x i16> %load to <64 x i64> + store <64 x i64> %ext, <64 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64: +; SI: s_endpgm +define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { + %load = load <64 x i16> addrspace(1)* %in + %ext = sext <64 x i16> %load to <64 x i64> + store <64 x i64> %ext, <64 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/global-extload-i32.ll b/test/CodeGen/R600/global-extload-i32.ll new file mode 100644 index 00000000000..762b1d019ce --- /dev/null +++ b/test/CodeGen/R600/global-extload-i32.ll @@ -0,0 +1,457 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}zextload_global_i32_to_i64: +; SI: buffer_load_dword [[LOAD:v[0-9]+]], +; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}} +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %a = load i32 addrspace(1)* %in + %ext = zext i32 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i32_to_i64: +; SI: buffer_load_dword [[LOAD:v[0-9]+]], +; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] +; SI: buffer_store_dwordx2 +define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %a = load i32 addrspace(1)* %in + %ext = sext i32 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64: +; SI: buffer_load_dword +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i32> addrspace(1)* %in + %ext = zext <1 x i32> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64: +; SI: buffer_load_dword +; SI: v_ashrrev_i32 +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i32> addrspace(1)* %in + %ext = sext <1 x i32> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64: +; SI: buffer_load_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i32> addrspace(1)* %in + %ext = zext <2 x i32> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64: +; SI: buffer_load_dwordx2 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI: s_endpgm +define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i32> addrspace(1)* %in + %ext = sext <2 x i32> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64: +; SI: buffer_load_dwordx4 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i32> addrspace(1)* %in + %ext = zext <4 x i32> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64: +; SI: buffer_load_dwordx4 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI: s_endpgm +define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i32> addrspace(1)* %in + %ext = sext <4 x i32> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64: +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI: s_endpgm +define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i32> addrspace(1)* %in + %ext = zext <8 x i32> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64: +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI: s_endpgm +define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i32> addrspace(1)* %in + %ext = sext <8 x i32> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64: +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI: s_endpgm +define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i32> addrspace(1)* %in + %ext = sext <16 x i32> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64 +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 + +; SI: s_endpgm +define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i32> addrspace(1)* %in + %ext = zext <16 x i32> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64: +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 +; SI-DAG: v_ashrrev_i32 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI: s_endpgm +define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i32> addrspace(1)* %in + %ext = sext <32 x i32> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64: +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword +; SI: buffer_load_dword + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx2 + +; SI: s_endpgm +define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i32> addrspace(1)* %in + %ext = zext <32 x i32> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/global-extload-i8.ll b/test/CodeGen/R600/global-extload-i8.ll new file mode 100644 index 00000000000..f4188dd0486 --- /dev/null +++ b/test/CodeGen/R600/global-extload-i8.ll @@ -0,0 +1,298 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}zextload_global_i8_to_i32: +; SI: buffer_load_ubyte +; SI: buffer_store_dword +; SI: s_endpgm +define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %a = load i8 addrspace(1)* %in + %ext = zext i8 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i8_to_i32: +; SI: buffer_load_sbyte +; SI: buffer_store_dword +; SI: s_endpgm +define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %a = load i8 addrspace(1)* %in + %ext = sext i8 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32: +; SI: s_endpgm +define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i8> addrspace(1)* %in + %ext = zext <1 x i8> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32: +; SI: s_endpgm +define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i8> addrspace(1)* %in + %ext = sext <1 x i8> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32: +; SI: s_endpgm +define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i8> addrspace(1)* %in + %ext = zext <2 x i8> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32: +; SI: s_endpgm +define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i8> addrspace(1)* %in + %ext = sext <2 x i8> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32: +; SI: s_endpgm +define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in + %ext = zext <4 x i8> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32: +; SI: s_endpgm +define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in + %ext = sext <4 x i8> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32: +; SI: s_endpgm +define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i8> addrspace(1)* %in + %ext = zext <8 x i8> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32: +; SI: s_endpgm +define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i8> addrspace(1)* %in + %ext = sext <8 x i8> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32: +; SI: s_endpgm +define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i8> addrspace(1)* %in + %ext = zext <16 x i8> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32: +; SI: s_endpgm +define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i8> addrspace(1)* %in + %ext = sext <16 x i8> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32: +; XSI: s_endpgm +; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i8> addrspace(1)* %in +; %ext = zext <32 x i8> %load to <32 x i32> +; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32: +; XSI: s_endpgm +; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i8> addrspace(1)* %in +; %ext = sext <32 x i8> %load to <32 x i32> +; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32: +; XSI: s_endpgm +; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i8> addrspace(1)* %in +; %ext = zext <64 x i8> %load to <64 x i32> +; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32: +; XSI: s_endpgm +; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i8> addrspace(1)* %in +; %ext = sext <64 x i8> %load to <64 x i32> +; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out +; ret void +; } + +; FUNC-LABEL: {{^}}zextload_global_i8_to_i64: +; SI: buffer_load_ubyte [[LOAD:v[0-9]+]], +; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}} +; SI: buffer_store_dwordx2 +define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %a = load i8 addrspace(1)* %in + %ext = zext i8 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_i8_to_i64: +; SI: buffer_load_sbyte [[LOAD:v[0-9]+]], +; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] +; SI: buffer_store_dwordx2 +define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %a = load i8 addrspace(1)* %in + %ext = sext i8 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64: +; SI: s_endpgm +define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i8> addrspace(1)* %in + %ext = zext <1 x i8> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64: +; SI: s_endpgm +define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <1 x i8> addrspace(1)* %in + %ext = sext <1 x i8> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64: +; SI: s_endpgm +define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i8> addrspace(1)* %in + %ext = zext <2 x i8> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64: +; SI: s_endpgm +define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <2 x i8> addrspace(1)* %in + %ext = sext <2 x i8> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64: +; SI: s_endpgm +define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in + %ext = zext <4 x i8> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64: +; SI: s_endpgm +define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <4 x i8> addrspace(1)* %in + %ext = sext <4 x i8> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64: +; SI: s_endpgm +define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i8> addrspace(1)* %in + %ext = zext <8 x i8> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64: +; SI: s_endpgm +define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <8 x i8> addrspace(1)* %in + %ext = sext <8 x i8> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64: +; SI: s_endpgm +define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i8> addrspace(1)* %in + %ext = zext <16 x i8> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64: +; SI: s_endpgm +define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <16 x i8> addrspace(1)* %in + %ext = sext <16 x i8> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64: +; XSI: s_endpgm +; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i8> addrspace(1)* %in +; %ext = zext <32 x i8> %load to <32 x i64> +; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64: +; XSI: s_endpgm +; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <32 x i8> addrspace(1)* %in +; %ext = sext <32 x i8> %load to <32 x i64> +; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64: +; XSI: s_endpgm +; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i8> addrspace(1)* %in +; %ext = zext <64 x i8> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64: +; XSI: s_endpgm +; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { +; %load = load <64 x i8> addrspace(1)* %in +; %ext = sext <64 x i8> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll index ea3523c504a..47fba78544d 100644 --- a/test/CodeGen/R600/unaligned-load-store.ll +++ b/test/CodeGen/R600/unaligned-load-store.ll @@ -1,9 +1,8 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s -; FIXME: This is probably wrong. This probably needs to expand to 8-bit reads and writes. ; SI-LABEL: {{^}}unaligned_load_store_i32: -; SI: ds_read_u16 -; SI: ds_read_u16 +; SI: ds_read_u8 +; SI: ds_read_u8 ; SI: ds_write_b32 ; SI: s_endpgm define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind { @@ -13,14 +12,26 @@ define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r } ; SI-LABEL: {{^}}unaligned_load_store_v4i32: -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 + +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 + +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 + +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 + ; SI: ds_write_b32 ; SI: ds_write_b32 ; SI: ds_write_b32