mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-30 04:35:00 +00:00
Make better use of instructions that clear high bits; fix various 2-wide shuffle bugs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45058 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1d0ba37099
commit
7a831ce85f
@ -3138,8 +3138,6 @@ static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
|
||||
return V;
|
||||
}
|
||||
|
||||
/// is4WideVector - Returns true if the specific v8i16 or v16i8 vector is
|
||||
/// actually just a 4 wide vector. e.g. <a, a, y, y, d, d, x, x>
|
||||
SDOperand
|
||||
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
|
||||
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
|
||||
@ -3562,17 +3560,35 @@ SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
|
||||
}
|
||||
}
|
||||
|
||||
/// RewriteAs4WideShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
|
||||
/// ones if possible. This can be done when every pair / quad of shuffle mask
|
||||
/// elements point to elements in the right sequence. e.g.
|
||||
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
|
||||
/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
|
||||
/// done when every pair / quad of shuffle mask elements point to elements in
|
||||
/// the right sequence. e.g.
|
||||
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
|
||||
static
|
||||
SDOperand RewriteAs4WideShuffle(SDOperand V1, SDOperand V2,
|
||||
SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2,
|
||||
MVT::ValueType VT,
|
||||
SDOperand PermMask, SelectionDAG &DAG,
|
||||
TargetLowering &TLI) {
|
||||
unsigned NumElems = PermMask.getNumOperands();
|
||||
unsigned Scale = NumElems / 4;
|
||||
SmallVector<SDOperand, 4> MaskVec;
|
||||
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
|
||||
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
|
||||
MVT::ValueType NewVT = MaskVT;
|
||||
switch (VT) {
|
||||
case MVT::v4f32: NewVT = MVT::v2f64; break;
|
||||
case MVT::v4i32: NewVT = MVT::v2i64; break;
|
||||
case MVT::v8i16: NewVT = MVT::v4i32; break;
|
||||
case MVT::v16i8: NewVT = MVT::v4i32; break;
|
||||
default: assert(false && "Unexpected!");
|
||||
}
|
||||
|
||||
if (NewWidth == 2)
|
||||
if (MVT::isInteger(VT))
|
||||
NewVT = MVT::v2i64;
|
||||
else
|
||||
NewVT = MVT::v2f64;
|
||||
unsigned Scale = NumElems / NewWidth;
|
||||
SmallVector<SDOperand, 8> MaskVec;
|
||||
for (unsigned i = 0; i < NumElems; i += Scale) {
|
||||
unsigned StartIdx = ~0U;
|
||||
for (unsigned j = 0; j < Scale; ++j) {
|
||||
@ -3591,10 +3607,11 @@ SDOperand RewriteAs4WideShuffle(SDOperand V1, SDOperand V2,
|
||||
MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32));
|
||||
}
|
||||
|
||||
V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
|
||||
V2 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V2);
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, V2,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],4));
|
||||
V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1);
|
||||
V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2);
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
|
||||
&MaskVec[0], MaskVec.size()));
|
||||
}
|
||||
|
||||
SDOperand
|
||||
@ -3626,6 +3643,35 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
|
||||
return PromoteSplat(Op, DAG);
|
||||
}
|
||||
|
||||
// If the shuffle can be profitably rewritten as a narrower shuffle, then
|
||||
// do it!
|
||||
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
|
||||
SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
|
||||
if (NewOp.Val)
|
||||
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
|
||||
} else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
|
||||
// FIXME: Figure out a cleaner way to do this.
|
||||
// Try to make use of movq to zero out the top part.
|
||||
if (ISD::isBuildVectorAllZeros(V2.Val)) {
|
||||
SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
|
||||
if (NewOp.Val) {
|
||||
SDOperand NewV1 = NewOp.getOperand(0);
|
||||
SDOperand NewV2 = NewOp.getOperand(1);
|
||||
SDOperand NewMask = NewOp.getOperand(2);
|
||||
if (isCommutedMOVL(NewMask.Val, true, false)) {
|
||||
NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
|
||||
NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(),
|
||||
NewV1, NewV2, getMOVLMask(2, DAG));
|
||||
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
|
||||
}
|
||||
}
|
||||
} else if (ISD::isBuildVectorAllZeros(V1.Val)) {
|
||||
SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
|
||||
if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
|
||||
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
|
||||
}
|
||||
}
|
||||
|
||||
if (X86::isMOVLMask(PermMask.Val))
|
||||
return (V1IsUndef) ? V2 : Op;
|
||||
|
||||
@ -3654,6 +3700,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
|
||||
Commuted = true;
|
||||
}
|
||||
|
||||
// FIXME: Figure out a cleaner way to do this.
|
||||
if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
|
||||
if (V2IsUndef) return V1;
|
||||
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
|
||||
@ -3735,13 +3782,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
|
||||
}
|
||||
}
|
||||
|
||||
// If the shuffle can be rewritten as a 4 wide shuffle, then do it!
|
||||
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
|
||||
SDOperand NewOp = RewriteAs4WideShuffle(V1, V2, PermMask, DAG, *this);
|
||||
if (NewOp.Val)
|
||||
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
|
||||
}
|
||||
|
||||
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
|
||||
if (VT == MVT::v8i16) {
|
||||
SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
|
||||
|
@ -2224,35 +2224,56 @@ let AddedComplexity = 20 in
|
||||
(loadf64 addr:$src))),
|
||||
MOVL_shuffle_mask)))]>;
|
||||
|
||||
let AddedComplexity = 15 in
|
||||
// movd / movq to XMM register zero-extends
|
||||
let AddedComplexity = 15 in {
|
||||
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (vector_shuffle immAllZerosV,
|
||||
(v4i32 (scalar_to_vector GR32:$src)),
|
||||
MOVL_shuffle_mask)))]>;
|
||||
let AddedComplexity = 20 in
|
||||
// This is X86-64 only.
|
||||
def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
|
||||
"mov{d|q}\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (vector_shuffle immAllZerosV_bc,
|
||||
(v2i64 (scalar_to_vector GR64:$src)),
|
||||
MOVL_shuffle_mask)))]>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (vector_shuffle immAllZerosV,
|
||||
(v4i32 (scalar_to_vector (loadi32 addr:$src))),
|
||||
MOVL_shuffle_mask)))]>;
|
||||
|
||||
// Moving from XMM to XMM but still clear upper 64 bits.
|
||||
let AddedComplexity = 15 in
|
||||
def MOVZQI2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_movl_dq VR128:$src))]>,
|
||||
XS, Requires<[HasSSE2]>;
|
||||
let AddedComplexity = 20 in
|
||||
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_movl_dq
|
||||
(bitconvert (memopv2i64 addr:$src))))]>,
|
||||
XS, Requires<[HasSSE2]>;
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (vector_shuffle immAllZerosV_bc,
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))),
|
||||
MOVL_shuffle_mask)))]>, XS,
|
||||
Requires<[HasSSE2]>;
|
||||
}
|
||||
|
||||
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
|
||||
// IA32 document. movq xmm1, xmm2 does clear the high bits.
|
||||
let AddedComplexity = 15 in
|
||||
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v2i64 (vector_shuffle immAllZerosV_bc,
|
||||
VR128:$src,
|
||||
MOVL_shuffle_mask)))]>,
|
||||
XS, Requires<[HasSSE2]>;
|
||||
|
||||
let AddedComplexity = 20 in
|
||||
def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v2i64 (vector_shuffle immAllZerosV_bc,
|
||||
(memopv2i64 addr:$src),
|
||||
MOVL_shuffle_mask)))]>,
|
||||
XS, Requires<[HasSSE2]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE3 Instructions
|
||||
@ -2763,13 +2784,13 @@ def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm),
|
||||
|
||||
// Special unary SHUFPSrri case.
|
||||
// FIXME: when we want non two-address code, then we should use PSHUFD?
|
||||
def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
|
||||
SHUFP_unary_shuffle_mask:$sm),
|
||||
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
|
||||
SHUFP_unary_shuffle_mask:$sm)),
|
||||
(SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
||||
Requires<[HasSSE1]>;
|
||||
// Special unary SHUFPDrri case.
|
||||
def : Pat<(vector_shuffle (v2f64 VR128:$src1), (undef),
|
||||
SHUFP_unary_shuffle_mask:$sm),
|
||||
def : Pat<(v2f64 (vector_shuffle VR128:$src1, (undef),
|
||||
SHUFP_unary_shuffle_mask:$sm)),
|
||||
(SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
||||
Requires<[HasSSE2]>;
|
||||
// Unary v4f32 shuffle with PSHUF* in order to fold a load.
|
||||
@ -2778,14 +2799,24 @@ def : Pat<(vector_shuffle (memopv4f32 addr:$src1), (undef),
|
||||
(PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
||||
Requires<[HasSSE2]>;
|
||||
// Special binary v4i32 shuffle cases with SHUFPS.
|
||||
def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
|
||||
PSHUFD_binary_shuffle_mask:$sm),
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2),
|
||||
PSHUFD_binary_shuffle_mask:$sm)),
|
||||
(SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
|
||||
Requires<[HasSSE2]>;
|
||||
def : Pat<(vector_shuffle (v4i32 VR128:$src1),
|
||||
(bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm),
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1,
|
||||
(bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm)),
|
||||
(SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
|
||||
Requires<[HasSSE2]>;
|
||||
// Special binary v2i64 shuffle cases using SHUFPDrri.
|
||||
def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
SHUFP_shuffle_mask:$sm)),
|
||||
(SHUFPDrri VR128:$src1, VR128:$src2, SHUFP_shuffle_mask:$sm)>,
|
||||
Requires<[HasSSE2]>;
|
||||
// Special unary SHUFPDrri case.
|
||||
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef),
|
||||
SHUFP_unary_shuffle_mask:$sm)),
|
||||
(SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
||||
Requires<[HasSSE2]>;
|
||||
|
||||
// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
|
||||
let AddedComplexity = 10 in {
|
||||
@ -2888,11 +2919,11 @@ def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
}
|
||||
|
||||
// Set lowest element and zero upper elements.
|
||||
let AddedComplexity = 20 in
|
||||
def : Pat<(bc_v2i64 (vector_shuffle immAllZerosV_bc,
|
||||
(v2f64 (scalar_to_vector (loadf64 addr:$src))),
|
||||
MOVL_shuffle_mask)),
|
||||
(MOVZQI2PQIrm addr:$src)>, Requires<[HasSSE2]>;
|
||||
let AddedComplexity = 15 in
|
||||
def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src,
|
||||
MOVL_shuffle_mask)),
|
||||
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
|
||||
|
||||
|
||||
// FIXME: Temporary workaround since 2-wide shuffle is broken.
|
||||
def : Pat<(int_x86_sse2_movs_d VR128:$src1, VR128:$src2),
|
||||
|
@ -409,6 +409,9 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
|
||||
{ X86::MOVSX64rr8, X86::MOVSX64rm8 },
|
||||
{ X86::MOVUPDrr, X86::MOVUPDrm },
|
||||
{ X86::MOVUPSrr, X86::MOVUPSrm },
|
||||
{ X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm },
|
||||
{ X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm },
|
||||
{ X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm },
|
||||
{ X86::MOVZX16rr8, X86::MOVZX16rm8 },
|
||||
{ X86::MOVZX32rr16, X86::MOVZX32rm16 },
|
||||
{ X86::MOVZX32rr8, X86::MOVZX32rm8 },
|
||||
|
@ -1,7 +1,8 @@
|
||||
; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 -o %t -f
|
||||
; RUN: grep movlhps %t | count 2
|
||||
; RUN: grep movlhps %t | count 1
|
||||
; RUN: grep unpcklps %t | count 1
|
||||
; RUN: grep punpckldq %t | count 1
|
||||
; RUN: grep movq %t | count 1
|
||||
|
||||
<4 x float> %test1(float %a, float %b) {
|
||||
%tmp = insertelement <4 x float> zeroinitializer, float %a, uint 0
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 | grep movq | count 1
|
||||
; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 | grep movsd | count 1
|
||||
|
||||
<2 x long> %test(<2 x long>* %p) {
|
||||
%tmp = cast <2 x long>* %p to double*
|
||||
|
42
test/CodeGen/X86/vec_shuffle-14.ll
Normal file
42
test/CodeGen/X86/vec_shuffle-14.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 1
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd | count 2
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movq | count 3
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xor
|
||||
|
||||
define <4 x i32> @t1(i32 %a) nounwind {
|
||||
entry:
|
||||
%tmp = insertelement <4 x i32> undef, i32 %a, i32 0
|
||||
%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp, <4 x i32> < i32 4, i32 1, i32 2, i32 3 > ; <<4 x i32>> [#uses=1]
|
||||
ret <4 x i32> %tmp6
|
||||
}
|
||||
|
||||
define <2 x i64> @t2(i64 %a) nounwind {
|
||||
entry:
|
||||
%tmp = insertelement <2 x i64> undef, i64 %a, i32 0
|
||||
%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %tmp, <2 x i32> < i32 2, i32 1 > ; <<4 x i32>> [#uses=1]
|
||||
ret <2 x i64> %tmp6
|
||||
}
|
||||
|
||||
define <2 x i64> @t3(<2 x i64>* %a) nounwind {
|
||||
entry:
|
||||
%tmp4 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
|
||||
%tmp6 = bitcast <2 x i64> %tmp4 to <4 x i32> ; <<4 x i32>> [#uses=1]
|
||||
%tmp7 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp6, <4 x i32> < i32 4, i32 5, i32 2, i32 3 > ; <<4 x i32>> [#uses=1]
|
||||
%tmp8 = bitcast <4 x i32> %tmp7 to <2 x i64> ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %tmp8
|
||||
}
|
||||
|
||||
define <2 x i64> @t4(<2 x i64> %a) nounwind {
|
||||
entry:
|
||||
%tmp5 = bitcast <2 x i64> %a to <4 x i32> ; <<4 x i32>> [#uses=1]
|
||||
%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp5, <4 x i32> < i32 4, i32 5, i32 2, i32 3 > ; <<4 x i32>> [#uses=1]
|
||||
%tmp7 = bitcast <4 x i32> %tmp6 to <2 x i64> ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %tmp7
|
||||
}
|
||||
|
||||
define <2 x i64> @t5(<2 x i64> %a) nounwind {
|
||||
entry:
|
||||
%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <2 x i32> < i32 2, i32 1 > ; <<4 x i32>> [#uses=1]
|
||||
ret <2 x i64> %tmp6
|
||||
}
|
81
test/CodeGen/X86/vec_shuffle-15.ll
Normal file
81
test/CodeGen/X86/vec_shuffle-15.ll
Normal file
@ -0,0 +1,81 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
|
||||
|
||||
define <2 x i64> @t00(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 0 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t01(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 1 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t02(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 2 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t03(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 3 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t10(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 0 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t11(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 1 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t12(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 2 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t13(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 3 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t20(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 0 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t21(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 1 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t22(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 2 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t23(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 3 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t30(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 0 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t31(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 1 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t32(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 2 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
||||
|
||||
define <2 x i64> @t33(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 3 >
|
||||
ret <2 x i64> %tmp
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user