mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-08 03:30:22 +00:00
538287dea2
The ABI allows sub-128 vectors to be passed and returned in registers, with the vector occupying the upper part of a register. We therefore want to legalize those types by widening the vector rather than promoting the elements. The patch includes some simple tests for sub-128 vectors and also tests that we can recognize various pack sequences, some of which use sub-128 vectors as temporary results. One of these forms is based on the pack sequences generated by llvmpipe when no intrinsics are used. Signed unpacks are recognized as BUILD_VECTORs whose elements are individually sign-extended. Unsigned unpacks can have the equivalent form with zero extension, but they also occur as shuffles in which some elements are zero. Based on a patch by Richard Sandiford. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236525 91177308-0d34-0410-b5e6-96231b3b80d8
434 lines
20 KiB
LLVM
434 lines
20 KiB
LLVM
; Test various representations of pack-like operations.
|
|
;
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
|
|
|
; One way of writing a <4 x i32> -> <8 x i16> pack.
|
|
define <8 x i16> @f1(<4 x i32> %val0, <4 x i32> %val1) {
|
|
; CHECK-LABEL: f1:
|
|
; CHECK: vpkf %v24, %v24, %v26
|
|
; CHECK: br %r14
|
|
%elem0 = extractelement <4 x i32> %val0, i32 0
|
|
%elem1 = extractelement <4 x i32> %val0, i32 1
|
|
%elem2 = extractelement <4 x i32> %val0, i32 2
|
|
%elem3 = extractelement <4 x i32> %val0, i32 3
|
|
%elem4 = extractelement <4 x i32> %val1, i32 0
|
|
%elem5 = extractelement <4 x i32> %val1, i32 1
|
|
%elem6 = extractelement <4 x i32> %val1, i32 2
|
|
%elem7 = extractelement <4 x i32> %val1, i32 3
|
|
%hboth0 = bitcast i32 %elem0 to <2 x i16>
|
|
%hboth1 = bitcast i32 %elem1 to <2 x i16>
|
|
%hboth2 = bitcast i32 %elem2 to <2 x i16>
|
|
%hboth3 = bitcast i32 %elem3 to <2 x i16>
|
|
%hboth4 = bitcast i32 %elem4 to <2 x i16>
|
|
%hboth5 = bitcast i32 %elem5 to <2 x i16>
|
|
%hboth6 = bitcast i32 %elem6 to <2 x i16>
|
|
%hboth7 = bitcast i32 %elem7 to <2 x i16>
|
|
%hlow0 = shufflevector <2 x i16> %hboth0, <2 x i16> %hboth1,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%hlow1 = shufflevector <2 x i16> %hboth2, <2 x i16> %hboth3,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%hlow2 = shufflevector <2 x i16> %hboth4, <2 x i16> %hboth5,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%hlow3 = shufflevector <2 x i16> %hboth6, <2 x i16> %hboth7,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%join0 = shufflevector <2 x i16> %hlow0, <2 x i16> %hlow1,
|
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%join1 = shufflevector <2 x i16> %hlow2, <2 x i16> %hlow3,
|
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%ret = shufflevector <4 x i16> %join0, <4 x i16> %join1,
|
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
|
i32 4, i32 5, i32 6, i32 7>
|
|
ret <8 x i16> %ret
|
|
}
|
|
|
|
; A different way of writing a <4 x i32> -> <8 x i16> pack.
|
|
define <8 x i16> @f2(<4 x i32> %val0, <4 x i32> %val1) {
|
|
; CHECK-LABEL: f2:
|
|
; CHECK: vpkf %v24, %v24, %v26
|
|
; CHECK: br %r14
|
|
%elem0 = extractelement <4 x i32> %val0, i32 0
|
|
%elem1 = extractelement <4 x i32> %val0, i32 1
|
|
%elem2 = extractelement <4 x i32> %val0, i32 2
|
|
%elem3 = extractelement <4 x i32> %val0, i32 3
|
|
%elem4 = extractelement <4 x i32> %val1, i32 0
|
|
%elem5 = extractelement <4 x i32> %val1, i32 1
|
|
%elem6 = extractelement <4 x i32> %val1, i32 2
|
|
%elem7 = extractelement <4 x i32> %val1, i32 3
|
|
%wvec0 = insertelement <4 x i32> undef, i32 %elem0, i32 0
|
|
%wvec1 = insertelement <4 x i32> undef, i32 %elem1, i32 0
|
|
%wvec2 = insertelement <4 x i32> undef, i32 %elem2, i32 0
|
|
%wvec3 = insertelement <4 x i32> undef, i32 %elem3, i32 0
|
|
%wvec4 = insertelement <4 x i32> undef, i32 %elem4, i32 0
|
|
%wvec5 = insertelement <4 x i32> undef, i32 %elem5, i32 0
|
|
%wvec6 = insertelement <4 x i32> undef, i32 %elem6, i32 0
|
|
%wvec7 = insertelement <4 x i32> undef, i32 %elem7, i32 0
|
|
%hvec0 = bitcast <4 x i32> %wvec0 to <8 x i16>
|
|
%hvec1 = bitcast <4 x i32> %wvec1 to <8 x i16>
|
|
%hvec2 = bitcast <4 x i32> %wvec2 to <8 x i16>
|
|
%hvec3 = bitcast <4 x i32> %wvec3 to <8 x i16>
|
|
%hvec4 = bitcast <4 x i32> %wvec4 to <8 x i16>
|
|
%hvec5 = bitcast <4 x i32> %wvec5 to <8 x i16>
|
|
%hvec6 = bitcast <4 x i32> %wvec6 to <8 x i16>
|
|
%hvec7 = bitcast <4 x i32> %wvec7 to <8 x i16>
|
|
%hlow0 = shufflevector <8 x i16> %hvec0, <8 x i16> %hvec1,
|
|
<8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
|
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%hlow1 = shufflevector <8 x i16> %hvec2, <8 x i16> %hvec3,
|
|
<8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
|
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%hlow2 = shufflevector <8 x i16> %hvec4, <8 x i16> %hvec5,
|
|
<8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
|
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%hlow3 = shufflevector <8 x i16> %hvec6, <8 x i16> %hvec7,
|
|
<8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
|
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%join0 = shufflevector <8 x i16> %hlow0, <8 x i16> %hlow1,
|
|
<8 x i32> <i32 0, i32 1, i32 8, i32 9,
|
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%join1 = shufflevector <8 x i16> %hlow2, <8 x i16> %hlow3,
|
|
<8 x i32> <i32 0, i32 1, i32 8, i32 9,
|
|
i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%ret = shufflevector <8 x i16> %join0, <8 x i16> %join1,
|
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
|
i32 8, i32 9, i32 10, i32 11>
|
|
ret <8 x i16> %ret
|
|
}
|
|
|
|
; A direct pack operation.
|
|
define <8 x i16> @f3(<4 x i32> %val0, <4 x i32> %val1) {
|
|
; CHECK-LABEL: f3:
|
|
; CHECK: vpkf %v24, %v24, %v26
|
|
; CHECK: br %r14
|
|
%bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
|
|
%bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
|
|
%ret = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
|
|
<8 x i32> <i32 1, i32 3, i32 5, i32 7,
|
|
i32 9, i32 11, i32 13, i32 15>
|
|
ret <8 x i16> %ret
|
|
}
|
|
|
|
; One way of writing a <4 x i32> -> <16 x i8> pack. It doesn't matter
|
|
; whether the first pack is VPKF or VPKH since the even bytes of the
|
|
; result are discarded.
|
|
define <16 x i8> @f4(<4 x i32> %val0, <4 x i32> %val1,
|
|
<4 x i32> %val2, <4 x i32> %val3) {
|
|
; CHECK-LABEL: f4:
|
|
; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26
|
|
; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30
|
|
; CHECK: vpkh %v24, [[REG1]], [[REG2]]
|
|
; CHECK: br %r14
|
|
%bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
|
|
%bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
|
|
%bitcast2 = bitcast <4 x i32> %val2 to <8 x i16>
|
|
%bitcast3 = bitcast <4 x i32> %val3 to <8 x i16>
|
|
%join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
|
|
<8 x i32> <i32 1, i32 3, i32 5, i32 7,
|
|
i32 9, i32 11, i32 13, i32 15>
|
|
%join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3,
|
|
<8 x i32> <i32 1, i32 3, i32 5, i32 7,
|
|
i32 9, i32 11, i32 13, i32 15>
|
|
%bitcast4 = bitcast <8 x i16> %join0 to <16 x i8>
|
|
%bitcast5 = bitcast <8 x i16> %join1 to <16 x i8>
|
|
%ret = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5,
|
|
<16 x i32> <i32 1, i32 3, i32 5, i32 7,
|
|
i32 9, i32 11, i32 13, i32 15,
|
|
i32 17, i32 19, i32 21, i32 23,
|
|
i32 25, i32 27, i32 29, i32 31>
|
|
ret <16 x i8> %ret
|
|
}
|
|
|
|
; Check the same operation, but with elements being extracted from the result.
|
|
define void @f5(<4 x i32> %val0, <4 x i32> %val1,
|
|
<4 x i32> %val2, <4 x i32> %val3,
|
|
i8 *%base) {
|
|
; CHECK-LABEL: f5:
|
|
; CHECK-DAG: vsteb %v24, 0(%r2), 11
|
|
; CHECK-DAG: vsteb %v26, 1(%r2), 15
|
|
; CHECK-DAG: vsteb %v28, 2(%r2), 3
|
|
; CHECK-DAG: vsteb %v30, 3(%r2), 7
|
|
; CHECK: br %r14
|
|
%bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
|
|
%bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
|
|
%bitcast2 = bitcast <4 x i32> %val2 to <8 x i16>
|
|
%bitcast3 = bitcast <4 x i32> %val3 to <8 x i16>
|
|
%join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
|
|
<8 x i32> <i32 1, i32 3, i32 5, i32 7,
|
|
i32 9, i32 11, i32 13, i32 15>
|
|
%join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3,
|
|
<8 x i32> <i32 1, i32 3, i32 5, i32 7,
|
|
i32 9, i32 11, i32 13, i32 15>
|
|
%bitcast4 = bitcast <8 x i16> %join0 to <16 x i8>
|
|
%bitcast5 = bitcast <8 x i16> %join1 to <16 x i8>
|
|
%vec = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5,
|
|
<16 x i32> <i32 1, i32 3, i32 5, i32 7,
|
|
i32 9, i32 11, i32 13, i32 15,
|
|
i32 17, i32 19, i32 21, i32 23,
|
|
i32 25, i32 27, i32 29, i32 31>
|
|
|
|
%ptr0 = getelementptr i8, i8 *%base, i64 0
|
|
%ptr1 = getelementptr i8, i8 *%base, i64 1
|
|
%ptr2 = getelementptr i8, i8 *%base, i64 2
|
|
%ptr3 = getelementptr i8, i8 *%base, i64 3
|
|
|
|
%byte0 = extractelement <16 x i8> %vec, i32 2
|
|
%byte1 = extractelement <16 x i8> %vec, i32 7
|
|
%byte2 = extractelement <16 x i8> %vec, i32 8
|
|
%byte3 = extractelement <16 x i8> %vec, i32 13
|
|
|
|
store i8 %byte0, i8 *%ptr0
|
|
store i8 %byte1, i8 *%ptr1
|
|
store i8 %byte2, i8 *%ptr2
|
|
store i8 %byte3, i8 *%ptr3
|
|
|
|
ret void
|
|
}
|
|
|
|
; A different way of writing a <4 x i32> -> <16 x i8> pack.
|
|
define <16 x i8> @f6(<4 x i32> %val0, <4 x i32> %val1,
|
|
<4 x i32> %val2, <4 x i32> %val3) {
|
|
; CHECK-LABEL: f6:
|
|
; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26
|
|
; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30
|
|
; CHECK: vpkh %v24, [[REG1]], [[REG2]]
|
|
; CHECK: br %r14
|
|
%elem0 = extractelement <4 x i32> %val0, i32 0
|
|
%elem1 = extractelement <4 x i32> %val0, i32 1
|
|
%elem2 = extractelement <4 x i32> %val0, i32 2
|
|
%elem3 = extractelement <4 x i32> %val0, i32 3
|
|
%elem4 = extractelement <4 x i32> %val1, i32 0
|
|
%elem5 = extractelement <4 x i32> %val1, i32 1
|
|
%elem6 = extractelement <4 x i32> %val1, i32 2
|
|
%elem7 = extractelement <4 x i32> %val1, i32 3
|
|
%elem8 = extractelement <4 x i32> %val2, i32 0
|
|
%elem9 = extractelement <4 x i32> %val2, i32 1
|
|
%elem10 = extractelement <4 x i32> %val2, i32 2
|
|
%elem11 = extractelement <4 x i32> %val2, i32 3
|
|
%elem12 = extractelement <4 x i32> %val3, i32 0
|
|
%elem13 = extractelement <4 x i32> %val3, i32 1
|
|
%elem14 = extractelement <4 x i32> %val3, i32 2
|
|
%elem15 = extractelement <4 x i32> %val3, i32 3
|
|
%bitcast0 = bitcast i32 %elem0 to <2 x i16>
|
|
%bitcast1 = bitcast i32 %elem1 to <2 x i16>
|
|
%bitcast2 = bitcast i32 %elem2 to <2 x i16>
|
|
%bitcast3 = bitcast i32 %elem3 to <2 x i16>
|
|
%bitcast4 = bitcast i32 %elem4 to <2 x i16>
|
|
%bitcast5 = bitcast i32 %elem5 to <2 x i16>
|
|
%bitcast6 = bitcast i32 %elem6 to <2 x i16>
|
|
%bitcast7 = bitcast i32 %elem7 to <2 x i16>
|
|
%bitcast8 = bitcast i32 %elem8 to <2 x i16>
|
|
%bitcast9 = bitcast i32 %elem9 to <2 x i16>
|
|
%bitcast10 = bitcast i32 %elem10 to <2 x i16>
|
|
%bitcast11 = bitcast i32 %elem11 to <2 x i16>
|
|
%bitcast12 = bitcast i32 %elem12 to <2 x i16>
|
|
%bitcast13 = bitcast i32 %elem13 to <2 x i16>
|
|
%bitcast14 = bitcast i32 %elem14 to <2 x i16>
|
|
%bitcast15 = bitcast i32 %elem15 to <2 x i16>
|
|
%low0 = shufflevector <2 x i16> %bitcast0, <2 x i16> %bitcast1,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low1 = shufflevector <2 x i16> %bitcast2, <2 x i16> %bitcast3,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low2 = shufflevector <2 x i16> %bitcast4, <2 x i16> %bitcast5,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low3 = shufflevector <2 x i16> %bitcast6, <2 x i16> %bitcast7,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low4 = shufflevector <2 x i16> %bitcast8, <2 x i16> %bitcast9,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low5 = shufflevector <2 x i16> %bitcast10, <2 x i16> %bitcast11,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low6 = shufflevector <2 x i16> %bitcast12, <2 x i16> %bitcast13,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low7 = shufflevector <2 x i16> %bitcast14, <2 x i16> %bitcast15,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%bytes0 = bitcast <2 x i16> %low0 to <4 x i8>
|
|
%bytes1 = bitcast <2 x i16> %low1 to <4 x i8>
|
|
%bytes2 = bitcast <2 x i16> %low2 to <4 x i8>
|
|
%bytes3 = bitcast <2 x i16> %low3 to <4 x i8>
|
|
%bytes4 = bitcast <2 x i16> %low4 to <4 x i8>
|
|
%bytes5 = bitcast <2 x i16> %low5 to <4 x i8>
|
|
%bytes6 = bitcast <2 x i16> %low6 to <4 x i8>
|
|
%bytes7 = bitcast <2 x i16> %low7 to <4 x i8>
|
|
%blow0 = shufflevector <4 x i8> %bytes0, <4 x i8> %bytes1,
|
|
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
%blow1 = shufflevector <4 x i8> %bytes2, <4 x i8> %bytes3,
|
|
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
%blow2 = shufflevector <4 x i8> %bytes4, <4 x i8> %bytes5,
|
|
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
%blow3 = shufflevector <4 x i8> %bytes6, <4 x i8> %bytes7,
|
|
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
%join0 = shufflevector <4 x i8> %blow0, <4 x i8> %blow1,
|
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
|
i32 4, i32 5, i32 6, i32 7>
|
|
%join1 = shufflevector <4 x i8> %blow2, <4 x i8> %blow3,
|
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
|
i32 4, i32 5, i32 6, i32 7>
|
|
%ret = shufflevector <8 x i8> %join0, <8 x i8> %join1,
|
|
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
|
|
i32 4, i32 5, i32 6, i32 7,
|
|
i32 8, i32 9, i32 10, i32 11,
|
|
i32 12, i32 13, i32 14, i32 15>
|
|
ret <16 x i8> %ret
|
|
}
|
|
|
|
; One way of writing a <2 x i64> -> <16 x i8> pack.
|
|
define <16 x i8> @f7(<2 x i64> %val0, <2 x i64> %val1,
|
|
<2 x i64> %val2, <2 x i64> %val3,
|
|
<2 x i64> %val4, <2 x i64> %val5,
|
|
<2 x i64> %val6, <2 x i64> %val7) {
|
|
; CHECK-LABEL: f7:
|
|
; CHECK-DAG: vpk{{[hfg]}} [[REG1:%v[0-9]+]], %v24, %v26
|
|
; CHECK-DAG: vpk{{[hfg]}} [[REG2:%v[0-9]+]], %v28, %v30
|
|
; CHECK-DAG: vpk{{[hfg]}} [[REG3:%v[0-9]+]], %v25, %v27
|
|
; CHECK-DAG: vpk{{[hfg]}} [[REG4:%v[0-9]+]], %v29, %v31
|
|
; CHECK-DAG: vpk{{[hf]}} [[REG5:%v[0-9]+]], [[REG1]], [[REG2]]
|
|
; CHECK-DAG: vpk{{[hf]}} [[REG6:%v[0-9]+]], [[REG3]], [[REG4]]
|
|
; CHECK: vpkh %v24, [[REG5]], [[REG6]]
|
|
; CHECK: br %r14
|
|
%elem0 = extractelement <2 x i64> %val0, i32 0
|
|
%elem1 = extractelement <2 x i64> %val0, i32 1
|
|
%elem2 = extractelement <2 x i64> %val1, i32 0
|
|
%elem3 = extractelement <2 x i64> %val1, i32 1
|
|
%elem4 = extractelement <2 x i64> %val2, i32 0
|
|
%elem5 = extractelement <2 x i64> %val2, i32 1
|
|
%elem6 = extractelement <2 x i64> %val3, i32 0
|
|
%elem7 = extractelement <2 x i64> %val3, i32 1
|
|
%elem8 = extractelement <2 x i64> %val4, i32 0
|
|
%elem9 = extractelement <2 x i64> %val4, i32 1
|
|
%elem10 = extractelement <2 x i64> %val5, i32 0
|
|
%elem11 = extractelement <2 x i64> %val5, i32 1
|
|
%elem12 = extractelement <2 x i64> %val6, i32 0
|
|
%elem13 = extractelement <2 x i64> %val6, i32 1
|
|
%elem14 = extractelement <2 x i64> %val7, i32 0
|
|
%elem15 = extractelement <2 x i64> %val7, i32 1
|
|
%bitcast0 = bitcast i64 %elem0 to <2 x i32>
|
|
%bitcast1 = bitcast i64 %elem1 to <2 x i32>
|
|
%bitcast2 = bitcast i64 %elem2 to <2 x i32>
|
|
%bitcast3 = bitcast i64 %elem3 to <2 x i32>
|
|
%bitcast4 = bitcast i64 %elem4 to <2 x i32>
|
|
%bitcast5 = bitcast i64 %elem5 to <2 x i32>
|
|
%bitcast6 = bitcast i64 %elem6 to <2 x i32>
|
|
%bitcast7 = bitcast i64 %elem7 to <2 x i32>
|
|
%bitcast8 = bitcast i64 %elem8 to <2 x i32>
|
|
%bitcast9 = bitcast i64 %elem9 to <2 x i32>
|
|
%bitcast10 = bitcast i64 %elem10 to <2 x i32>
|
|
%bitcast11 = bitcast i64 %elem11 to <2 x i32>
|
|
%bitcast12 = bitcast i64 %elem12 to <2 x i32>
|
|
%bitcast13 = bitcast i64 %elem13 to <2 x i32>
|
|
%bitcast14 = bitcast i64 %elem14 to <2 x i32>
|
|
%bitcast15 = bitcast i64 %elem15 to <2 x i32>
|
|
%low0 = shufflevector <2 x i32> %bitcast0, <2 x i32> %bitcast1,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low1 = shufflevector <2 x i32> %bitcast2, <2 x i32> %bitcast3,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low2 = shufflevector <2 x i32> %bitcast4, <2 x i32> %bitcast5,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low3 = shufflevector <2 x i32> %bitcast6, <2 x i32> %bitcast7,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low4 = shufflevector <2 x i32> %bitcast8, <2 x i32> %bitcast9,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low5 = shufflevector <2 x i32> %bitcast10, <2 x i32> %bitcast11,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low6 = shufflevector <2 x i32> %bitcast12, <2 x i32> %bitcast13,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%low7 = shufflevector <2 x i32> %bitcast14, <2 x i32> %bitcast15,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%half0 = bitcast <2 x i32> %low0 to <4 x i16>
|
|
%half1 = bitcast <2 x i32> %low1 to <4 x i16>
|
|
%half2 = bitcast <2 x i32> %low2 to <4 x i16>
|
|
%half3 = bitcast <2 x i32> %low3 to <4 x i16>
|
|
%half4 = bitcast <2 x i32> %low4 to <4 x i16>
|
|
%half5 = bitcast <2 x i32> %low5 to <4 x i16>
|
|
%half6 = bitcast <2 x i32> %low6 to <4 x i16>
|
|
%half7 = bitcast <2 x i32> %low7 to <4 x i16>
|
|
%hlow0 = shufflevector <4 x i16> %half0, <4 x i16> %half1,
|
|
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
%hlow1 = shufflevector <4 x i16> %half2, <4 x i16> %half3,
|
|
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
%hlow2 = shufflevector <4 x i16> %half4, <4 x i16> %half5,
|
|
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
%hlow3 = shufflevector <4 x i16> %half6, <4 x i16> %half7,
|
|
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
%bytes0 = bitcast <4 x i16> %hlow0 to <8 x i8>
|
|
%bytes1 = bitcast <4 x i16> %hlow1 to <8 x i8>
|
|
%bytes2 = bitcast <4 x i16> %hlow2 to <8 x i8>
|
|
%bytes3 = bitcast <4 x i16> %hlow3 to <8 x i8>
|
|
%join0 = shufflevector <8 x i8> %bytes0, <8 x i8> %bytes1,
|
|
<8 x i32> <i32 1, i32 3, i32 5, i32 7,
|
|
i32 9, i32 11, i32 13, i32 15>
|
|
%join1 = shufflevector <8 x i8> %bytes2, <8 x i8> %bytes3,
|
|
<8 x i32> <i32 1, i32 3, i32 5, i32 7,
|
|
i32 9, i32 11, i32 13, i32 15>
|
|
%ret = shufflevector <8 x i8> %join0, <8 x i8> %join1,
|
|
<16 x i32> <i32 0, i32 1, i32 2, i32 3,
|
|
i32 4, i32 5, i32 6, i32 7,
|
|
i32 8, i32 9, i32 10, i32 11,
|
|
i32 12, i32 13, i32 14, i32 15>
|
|
ret <16 x i8> %ret
|
|
}
|
|
|
|
; Test a <2 x i64> -> <4 x f32> pack in which only individual elements are
|
|
; needed.
|
|
define float @f8(i64 %scalar0, i64 %scalar1, i64 %scalar2, i64 %scalar3) {
|
|
; CHECK-LABEL: f8:
|
|
; CHECK-NOT: vperm
|
|
; CHECK-NOT: vpk
|
|
; CHECK-NOT: vmrh
|
|
; CHECK: aebr {{%f[0-7]}},
|
|
; CHECK: aebr {{%f[0-7]}},
|
|
; CHECK: meebr %f0,
|
|
; CHECK: br %r14
|
|
%vec0 = insertelement <2 x i64> undef, i64 %scalar0, i32 0
|
|
%vec1 = insertelement <2 x i64> undef, i64 %scalar1, i32 0
|
|
%vec2 = insertelement <2 x i64> undef, i64 %scalar2, i32 0
|
|
%vec3 = insertelement <2 x i64> undef, i64 %scalar3, i32 0
|
|
%join0 = shufflevector <2 x i64> %vec0, <2 x i64> %vec1,
|
|
<2 x i32> <i32 0, i32 2>
|
|
%join1 = shufflevector <2 x i64> %vec2, <2 x i64> %vec3,
|
|
<2 x i32> <i32 0, i32 2>
|
|
%bitcast0 = bitcast <2 x i64> %join0 to <4 x float>
|
|
%bitcast1 = bitcast <2 x i64> %join1 to <4 x float>
|
|
%pack = shufflevector <4 x float> %bitcast0, <4 x float> %bitcast1,
|
|
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
%elt0 = extractelement <4 x float> %pack, i32 0
|
|
%elt1 = extractelement <4 x float> %pack, i32 1
|
|
%elt2 = extractelement <4 x float> %pack, i32 2
|
|
%elt3 = extractelement <4 x float> %pack, i32 3
|
|
%add0 = fadd float %elt0, %elt2
|
|
%add1 = fadd float %elt1, %elt3
|
|
%ret = fmul float %add0, %add1
|
|
ret float %ret
|
|
}
|
|
|
|
; Test a <2 x f64> -> <4 x i32> pack in which only individual elements are
|
|
; needed.
|
|
define i32 @f9(double %scalar0, double %scalar1, double %scalar2,
|
|
double %scalar3) {
|
|
; CHECK-LABEL: f9:
|
|
; CHECK-NOT: vperm
|
|
; CHECK-NOT: vpk
|
|
; CHECK-NOT: vmrh
|
|
; CHECK: ar {{%r[0-5]}},
|
|
; CHECK: ar {{%r[0-5]}},
|
|
; CHECK: or %r2,
|
|
; CHECK: br %r14
|
|
%vec0 = insertelement <2 x double> undef, double %scalar0, i32 0
|
|
%vec1 = insertelement <2 x double> undef, double %scalar1, i32 0
|
|
%vec2 = insertelement <2 x double> undef, double %scalar2, i32 0
|
|
%vec3 = insertelement <2 x double> undef, double %scalar3, i32 0
|
|
%join0 = shufflevector <2 x double> %vec0, <2 x double> %vec1,
|
|
<2 x i32> <i32 0, i32 2>
|
|
%join1 = shufflevector <2 x double> %vec2, <2 x double> %vec3,
|
|
<2 x i32> <i32 0, i32 2>
|
|
%bitcast0 = bitcast <2 x double> %join0 to <4 x i32>
|
|
%bitcast1 = bitcast <2 x double> %join1 to <4 x i32>
|
|
%pack = shufflevector <4 x i32> %bitcast0, <4 x i32> %bitcast1,
|
|
<4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
|
%elt0 = extractelement <4 x i32> %pack, i32 0
|
|
%elt1 = extractelement <4 x i32> %pack, i32 1
|
|
%elt2 = extractelement <4 x i32> %pack, i32 2
|
|
%elt3 = extractelement <4 x i32> %pack, i32 3
|
|
%add0 = add i32 %elt0, %elt2
|
|
%add1 = add i32 %elt1, %elt3
|
|
%ret = or i32 %add0, %add1
|
|
ret i32 %ret
|
|
}
|