diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f9a0d2901a2..9afbc5ef66e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3098,7 +3098,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (MemVT.getSizeInBits() <= WidenEltWidth) break; - if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); + if ((Action == TargetLowering::TypeLegal || + Action == TargetLowering::TypePromoteInteger) && + (WidenWidth % MemVTWidth) == 0 && isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { diff --git a/test/CodeGen/SystemZ/vec-move-02.ll b/test/CodeGen/SystemZ/vec-move-02.ll index e43676055fa..dcaf0acccb2 100644 --- a/test/CodeGen/SystemZ/vec-move-02.ll +++ b/test/CodeGen/SystemZ/vec-move-02.ll @@ -109,3 +109,66 @@ define <16 x i8> @f11(i8 *%base, i64 %index) { %ret = load <16 x i8>, <16 x i8> *%ptr, align 1 ret <16 x i8> %ret } + +; Test v2i8 loads. +define <2 x i8> @f12(<2 x i8> *%ptr) { +; CHECK-LABEL: f12: +; CHECK: vlreph %v24, 0(%r2) +; CHECK: br %r14 + %ret = load <2 x i8>, <2 x i8> *%ptr + ret <2 x i8> %ret +} + +; Test v4i8 loads. +define <4 x i8> @f13(<4 x i8> *%ptr) { +; CHECK-LABEL: f13: +; CHECK: vlrepf %v24, 0(%r2) +; CHECK: br %r14 + %ret = load <4 x i8>, <4 x i8> *%ptr + ret <4 x i8> %ret +} + +; Test v8i8 loads. +define <8 x i8> @f14(<8 x i8> *%ptr) { +; CHECK-LABEL: f14: +; CHECK: vlrepg %v24, 0(%r2) +; CHECK: br %r14 + %ret = load <8 x i8>, <8 x i8> *%ptr + ret <8 x i8> %ret +} + +; Test v2i16 loads. +define <2 x i16> @f15(<2 x i16> *%ptr) { +; CHECK-LABEL: f15: +; CHECK: vlrepf %v24, 0(%r2) +; CHECK: br %r14 + %ret = load <2 x i16>, <2 x i16> *%ptr + ret <2 x i16> %ret +} + +; Test v4i16 loads. +define <4 x i16> @f16(<4 x i16> *%ptr) { +; CHECK-LABEL: f16: +; CHECK: vlrepg %v24, 0(%r2) +; CHECK: br %r14 + %ret = load <4 x i16>, <4 x i16> *%ptr + ret <4 x i16> %ret +} + +; Test v2i32 loads. +define <2 x i32> @f17(<2 x i32> *%ptr) { +; CHECK-LABEL: f17: +; CHECK: vlrepg %v24, 0(%r2) +; CHECK: br %r14 + %ret = load <2 x i32>, <2 x i32> *%ptr + ret <2 x i32> %ret +} + +; Test v2f32 loads. +define <2 x float> @f18(<2 x float> *%ptr) { +; CHECK-LABEL: f18: +; CHECK: vlrepg %v24, 0(%r2) +; CHECK: br %r14 + %ret = load <2 x float>, <2 x float> *%ptr + ret <2 x float> %ret +} diff --git a/test/CodeGen/SystemZ/vec-move-03.ll b/test/CodeGen/SystemZ/vec-move-03.ll index 1b1f96163a0..f40e2cb2bf2 100644 --- a/test/CodeGen/SystemZ/vec-move-03.ll +++ b/test/CodeGen/SystemZ/vec-move-03.ll @@ -109,3 +109,66 @@ define void @f11(<16 x i8> %val, i8 *%base, i64 %index) { store <16 x i8> %val, <16 x i8> *%ptr, align 1 ret void } + +; Test v2i8 stores. +define void @f12(<2 x i8> %val, <2 x i8> *%ptr) { +; CHECK-LABEL: f12: +; CHECK: vsteh %v24, 0(%r2), 0 +; CHECK: br %r14 + store <2 x i8> %val, <2 x i8> *%ptr + ret void +} + +; Test v4i8 stores. +define void @f13(<4 x i8> %val, <4 x i8> *%ptr) { +; CHECK-LABEL: f13: +; CHECK: vstef %v24, 0(%r2) +; CHECK: br %r14 + store <4 x i8> %val, <4 x i8> *%ptr + ret void +} + +; Test v8i8 stores. +define void @f14(<8 x i8> %val, <8 x i8> *%ptr) { +; CHECK-LABEL: f14: +; CHECK: vsteg %v24, 0(%r2) +; CHECK: br %r14 + store <8 x i8> %val, <8 x i8> *%ptr + ret void +} + +; Test v2i16 stores. +define void @f15(<2 x i16> %val, <2 x i16> *%ptr) { +; CHECK-LABEL: f15: +; CHECK: vstef %v24, 0(%r2), 0 +; CHECK: br %r14 + store <2 x i16> %val, <2 x i16> *%ptr + ret void +} + +; Test v4i16 stores. +define void @f16(<4 x i16> %val, <4 x i16> *%ptr) { +; CHECK-LABEL: f16: +; CHECK: vsteg %v24, 0(%r2) +; CHECK: br %r14 + store <4 x i16> %val, <4 x i16> *%ptr + ret void +} + +; Test v2i32 stores. +define void @f17(<2 x i32> %val, <2 x i32> *%ptr) { +; CHECK-LABEL: f17: +; CHECK: vsteg %v24, 0(%r2), 0 +; CHECK: br %r14 + store <2 x i32> %val, <2 x i32> *%ptr + ret void +} + +; Test v2f32 stores. +define void @f18(<2 x float> %val, <2 x float> *%ptr) { +; CHECK-LABEL: f18: +; CHECK: vsteg %v24, 0(%r2), 0 +; CHECK: br %r14 + store <2 x float> %val, <2 x float> *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/vec-move-15.ll b/test/CodeGen/SystemZ/vec-move-15.ll index bf375e111cc..503627c163c 100644 --- a/test/CodeGen/SystemZ/vec-move-15.ll +++ b/test/CodeGen/SystemZ/vec-move-15.ll @@ -71,8 +71,7 @@ define <2 x i64> @f7(<2 x i1> *%ptr) { ; Test a v2i8->v2i64 extension. define <2 x i64> @f8(<2 x i8> *%ptr) { ; CHECK-LABEL: f8: -; CHECK: vlrepb [[REG1:%v[0-9]+]], 0(%r2) -; CHECK: vleb [[REG1]], 1(%r2), 1 +; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2) ; CHECK: vuphb [[REG2:%v[0-9]+]], [[REG1]] ; CHECK: vuphh [[REG3:%v[0-9]+]], [[REG2]] ; CHECK: vuphf %v24, [[REG3]] diff --git a/test/CodeGen/SystemZ/vec-move-16.ll b/test/CodeGen/SystemZ/vec-move-16.ll index 152b0d4d88c..cd257739680 100644 --- a/test/CodeGen/SystemZ/vec-move-16.ll +++ b/test/CodeGen/SystemZ/vec-move-16.ll @@ -71,8 +71,7 @@ define <2 x i64> @f7(<2 x i1> *%ptr) { ; Test a v2i8->v2i64 extension. define <2 x i64> @f8(<2 x i8> *%ptr) { ; CHECK-LABEL: f8: -; CHECK: vlrepb [[REG1:%v[0-9]+]], 0(%r2) -; CHECK: vleb [[REG1]], 1(%r2), 1 +; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2) ; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]] ; CHECK: vuplhh [[REG3:%v[0-9]+]], [[REG2]] ; CHECK: vuplhf %v24, [[REG3]] diff --git a/test/CodeGen/SystemZ/vec-move-17.ll b/test/CodeGen/SystemZ/vec-move-17.ll new file mode 100644 index 00000000000..e7fc06c9260 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-move-17.ll @@ -0,0 +1,104 @@ +; Test vector truncating stores. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test a v16i8->v16i1 truncation. +define void @f1(<16 x i8> %val, <16 x i1> *%ptr) { +; No expected output, but must compile. + %trunc = trunc <16 x i8> %val to <16 x i1> + store <16 x i1> %trunc, <16 x i1> *%ptr + ret void +} + +; Test a v8i16->v8i1 truncation. +define void @f2(<8 x i16> %val, <8 x i1> *%ptr) { +; No expected output, but must compile. + %trunc = trunc <8 x i16> %val to <8 x i1> + store <8 x i1> %trunc, <8 x i1> *%ptr + ret void +} + +; Test a v8i16->v8i8 truncation. +define void @f3(<8 x i16> %val, <8 x i8> *%ptr) { +; CHECK-LABEL: f3: +; CHECK: vpkh [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK: vsteg [[REG1]], 0(%r2) +; CHECK: br %r14 + %trunc = trunc <8 x i16> %val to <8 x i8> + store <8 x i8> %trunc, <8 x i8> *%ptr + ret void +} + +; Test a v4i32->v4i1 truncation. +define void @f4(<4 x i32> %val, <4 x i1> *%ptr) { +; No expected output, but must compile. + %trunc = trunc <4 x i32> %val to <4 x i1> + store <4 x i1> %trunc, <4 x i1> *%ptr + ret void +} + +; Test a v4i32->v4i8 truncation. At the moment we use a VPERM rather than +; a chain of packs. +define void @f5(<4 x i32> %val, <4 x i8> *%ptr) { +; CHECK-LABEL: f5: +; CHECK: vperm [[REG:%v[0-9]+]], +; CHECK: vstef [[REG]], 0(%r2) +; CHECK: br %r14 + %trunc = trunc <4 x i32> %val to <4 x i8> + store <4 x i8> %trunc, <4 x i8> *%ptr + ret void +} + +; Test a v4i32->v4i16 truncation. +define void @f6(<4 x i32> %val, <4 x i16> *%ptr) { +; CHECK-LABEL: f6: +; CHECK: vpkf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK: vsteg [[REG1]], 0(%r2) +; CHECK: br %r14 + %trunc = trunc <4 x i32> %val to <4 x i16> + store <4 x i16> %trunc, <4 x i16> *%ptr + ret void +} + +; Test a v2i64->v2i1 truncation. +define void @f7(<2 x i64> %val, <2 x i1> *%ptr) { +; No expected output, but must compile. + %trunc = trunc <2 x i64> %val to <2 x i1> + store <2 x i1> %trunc, <2 x i1> *%ptr + ret void +} + +; Test a v2i64->v2i8 truncation. At the moment we use a VPERM rather than +; a chain of packs. +define void @f8(<2 x i64> %val, <2 x i8> *%ptr) { +; CHECK-LABEL: f8: +; CHECK: vperm [[REG:%v[0-9]+]], +; CHECK: vsteh [[REG]], 0(%r2) +; CHECK: br %r14 + %trunc = trunc <2 x i64> %val to <2 x i8> + store <2 x i8> %trunc, <2 x i8> *%ptr + ret void +} + +; Test a v2i64->v2i16 truncation. At the moment we use a VPERM rather than +; a chain of packs. +define void @f9(<2 x i64> %val, <2 x i16> *%ptr) { +; CHECK-LABEL: f9: +; CHECK: vperm [[REG:%v[0-9]+]], +; CHECK: vstef [[REG]], 0(%r2) +; CHECK: br %r14 + %trunc = trunc <2 x i64> %val to <2 x i16> + store <2 x i16> %trunc, <2 x i16> *%ptr + ret void +} + +; Test a v2i64->v2i32 truncation. +define void @f10(<2 x i64> %val, <2 x i32> *%ptr) { +; CHECK-LABEL: f10: +; CHECK: vpkg [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK: vsteg [[REG1]], 0(%r2) +; CHECK: br %r14 + %trunc = trunc <2 x i64> %val to <2 x i32> + store <2 x i32> %trunc, <2 x i32> *%ptr + ret void +}