mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-19 06:31:18 +00:00
[X86][SSE] Added support for SSE3 lane duplication shuffle instructions
This patch adds shuffle matching for the SSE3 MOVDDUP, MOVSLDUP and MOVSHDUP instructions. The big use of these being that they avoid many single source shuffles from needing to use (pre-AVX) dual source instructions such as SHUFPD/SHUFPS: causing extra moves and preventing load folds. Adding these instructions uncovered an issue in XFormVExtractWithShuffleIntoLoad which crashed on single operand shuffle instructions (now fixed). It also involved fixing getTargetShuffleMask to correctly identify theses instructions as unary shuffles. Also adds a missing tablegen pattern for MOVDDUP. Differential Revision: http://reviews.llvm.org/D7042 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226716 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
50c3bc9956
commit
4269590166
@ -5531,11 +5531,16 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
|
||||
break;
|
||||
case X86ISD::MOVSLDUP:
|
||||
DecodeMOVSLDUPMask(VT, Mask);
|
||||
IsUnary = true;
|
||||
break;
|
||||
case X86ISD::MOVSHDUP:
|
||||
DecodeMOVSHDUPMask(VT, Mask);
|
||||
IsUnary = true;
|
||||
break;
|
||||
case X86ISD::MOVDDUP:
|
||||
DecodeMOVDDUPMask(VT, Mask);
|
||||
IsUnary = true;
|
||||
break;
|
||||
case X86ISD::MOVLHPD:
|
||||
case X86ISD::MOVLPD:
|
||||
case X86ISD::MOVLPS:
|
||||
@ -8284,6 +8289,11 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
|
||||
|
||||
if (isSingleInputShuffleMask(Mask)) {
|
||||
// Use low duplicate instructions for masks that match their pattern.
|
||||
if (Subtarget->hasSSE3())
|
||||
if (isShuffleEquivalent(Mask, 0, 0))
|
||||
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, V1);
|
||||
|
||||
// Straight shuffle of a single input vector. Simulate this by using the
|
||||
// single input as both of the "inputs" to this instruction..
|
||||
unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
|
||||
@ -8541,6 +8551,14 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
Mask, Subtarget, DAG))
|
||||
return Broadcast;
|
||||
|
||||
// Use even/odd duplicate instructions for masks that match their pattern.
|
||||
if (Subtarget->hasSSE3()) {
|
||||
if (isShuffleEquivalent(Mask, 0, 0, 2, 2))
|
||||
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
|
||||
if (isShuffleEquivalent(Mask, 1, 1, 3, 3))
|
||||
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
|
||||
}
|
||||
|
||||
if (Subtarget->hasAVX()) {
|
||||
// If we have AVX, we can use VPERMILPS which will allow folding a load
|
||||
// into the shuffle.
|
||||
@ -10266,6 +10284,10 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
Mask, Subtarget, DAG))
|
||||
return Broadcast;
|
||||
|
||||
// Use low duplicate instructions for masks that match their pattern.
|
||||
if (isShuffleEquivalent(Mask, 0, 0, 2, 2))
|
||||
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1);
|
||||
|
||||
if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
|
||||
// Non-half-crossing single input shuffles can be lowerid with an
|
||||
// interleaved permutation.
|
||||
@ -10449,6 +10471,13 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) {
|
||||
assert(RepeatedMask.size() == 4 &&
|
||||
"Repeated masks must be half the mask width!");
|
||||
|
||||
// Use even/odd duplicate instructions for masks that match their pattern.
|
||||
if (isShuffleEquivalent(Mask, 0, 0, 2, 2, 4, 4, 6, 6))
|
||||
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1);
|
||||
if (isShuffleEquivalent(Mask, 1, 1, 3, 3, 5, 5, 7, 7))
|
||||
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1);
|
||||
|
||||
if (isSingleInputShuffleMask(Mask))
|
||||
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1,
|
||||
getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
|
||||
@ -22838,7 +22867,8 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
||||
: InVec.getOperand(1);
|
||||
|
||||
// If inputs to shuffle are the same for both ops, then allow 2 uses
|
||||
unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
|
||||
unsigned AllowedUses = InVec.getNumOperands() > 1 &&
|
||||
InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
|
||||
|
||||
if (LdNode.getOpcode() == ISD::BITCAST) {
|
||||
// Don't duplicate a load with other uses.
|
||||
|
@ -1333,7 +1333,7 @@ let Predicates = [HasAVX] in {
|
||||
(VMOVHPSrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
// VMOVHPD patterns
|
||||
|
||||
|
||||
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
|
||||
// is during lowering, where it's not possible to recognize the load fold
|
||||
// cause it has two uses through a bitcast. One use disappears at isel time
|
||||
@ -2743,24 +2743,6 @@ let Predicates = [HasAVX1Only] in {
|
||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
|
||||
// problem is during lowering, where it's not possible to recognize the load
|
||||
// fold cause it has two uses through a bitcast. One use disappears at isel
|
||||
// time and the fold opportunity reappears.
|
||||
def : Pat<(v2f64 (X86Movddup VR128:$src)),
|
||||
(VUNPCKLPDrr VR128:$src, VR128:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseSSE2] in {
|
||||
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
|
||||
// problem is during lowering, where it's not possible to recognize the load
|
||||
// fold cause it has two uses through a bitcast. One use disappears at isel
|
||||
// time and the fold opportunity reappears.
|
||||
def : Pat<(v2f64 (X86Movddup VR128:$src)),
|
||||
(UNPCKLPDrr VR128:$src, VR128:$src)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE 1 & 2 - Extract Floating-Point Sign mask
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -5388,10 +5370,10 @@ let Predicates = [UseSSE3] in {
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
multiclass sse3_replicate_dfp<string OpcodeStr> {
|
||||
let hasSideEffects = 0 in
|
||||
def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[], IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
|
||||
[(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))],
|
||||
IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
|
||||
def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst,
|
||||
@ -8161,21 +8143,21 @@ let Predicates = [HasAVX, HasFastMem32] in {
|
||||
(loadv2f64 (add addr:$src, (iPTR 16))),
|
||||
(iPTR 2)),
|
||||
(VMOVUPDYrm addr:$src)>;
|
||||
|
||||
|
||||
def : Pat<(insert_subvector
|
||||
(v32i8 (insert_subvector
|
||||
undef, (bc_v16i8 (loadv2i64 addr:$src)), (iPTR 0))),
|
||||
(bc_v16i8 (loadv2i64 (add addr:$src, (iPTR 16)))),
|
||||
(iPTR 16)),
|
||||
(VMOVDQUYrm addr:$src)>;
|
||||
|
||||
|
||||
def : Pat<(insert_subvector
|
||||
(v16i16 (insert_subvector
|
||||
undef, (bc_v8i16 (loadv2i64 addr:$src)), (iPTR 0))),
|
||||
(bc_v8i16 (loadv2i64 (add addr:$src, (iPTR 16)))),
|
||||
(iPTR 8)),
|
||||
(VMOVDQUYrm addr:$src)>;
|
||||
|
||||
|
||||
def : Pat<(insert_subvector
|
||||
(v8i32 (insert_subvector
|
||||
undef, (bc_v4i32 (loadv2i64 addr:$src)), (iPTR 0))),
|
||||
|
@ -15,37 +15,37 @@ define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
||||
ret <16 x i16> %shuffle
|
||||
}
|
||||
|
||||
; CHECK: vmovq
|
||||
; CHECK-NEXT: vunpcklpd %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
}
|
||||
|
||||
; CHECK: vmovq
|
||||
; CHECK-NEXT: vmovddup %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
|
||||
%vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
|
||||
%vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
|
||||
%vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
|
||||
ret <4 x i64> %vecinit6.i
|
||||
}
|
||||
|
||||
; CHECK: vunpcklpd %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
ret <4 x i64> %vecinit6.i
|
||||
}
|
||||
|
||||
; CHECK: vmovddup %xmm
|
||||
; CHECK-NEXT: vinsertf128 $1
|
||||
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%vecinit.i = insertelement <4 x double> undef, double %q, i32 0
|
||||
%vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
|
||||
%vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
|
||||
%vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
|
||||
ret <4 x double> %vecinit6.i
|
||||
}
|
||||
|
||||
; Test this turns into a broadcast:
|
||||
; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
|
||||
;
|
||||
; CHECK: vbroadcastss
|
||||
define <8 x float> @funcE() nounwind {
|
||||
allocas:
|
||||
|
||||
; Test this turns into a broadcast:
|
||||
; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
|
||||
;
|
||||
; CHECK: vbroadcastss
|
||||
define <8 x float> @funcE() nounwind {
|
||||
allocas:
|
||||
%udx495 = alloca [18 x [18 x float]], align 32
|
||||
br label %for_test505.preheader
|
||||
|
||||
|
@ -314,13 +314,13 @@ define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
|
||||
define <4 x double> @_inreg4xdouble(<4 x double> %a) {
|
||||
%b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x double> %b
|
||||
}
|
||||
|
||||
;CHECK-LABEL: _inreg2xdouble:
|
||||
;CHECK: vunpcklpd
|
||||
;CHECK: ret
|
||||
define <2 x double> @_inreg2xdouble(<2 x double> %a) {
|
||||
%b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
|
||||
}
|
||||
|
||||
;CHECK-LABEL: _inreg2xdouble:
|
||||
;CHECK: vmovddup
|
||||
;CHECK: ret
|
||||
define <2 x double> @_inreg2xdouble(<2 x double> %a) {
|
||||
%b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x double> %b
|
||||
}
|
||||
|
||||
|
@ -12,15 +12,14 @@ entry:
|
||||
; GNU_SINCOS: callq sincosf
|
||||
; GNU_SINCOS: movss 4(%rsp), %xmm0
|
||||
; GNU_SINCOS: addss (%rsp), %xmm0
|
||||
|
||||
; OSX_SINCOS-LABEL: test1:
|
||||
; OSX_SINCOS: callq ___sincosf_stret
|
||||
; OSX_SINCOS: movaps %xmm0, %xmm1
|
||||
; OSX_SINCOS: shufps {{.*}} ## xmm1 = xmm1[1,1,2,3]
|
||||
; OSX_SINCOS: addss %xmm0, %xmm1
|
||||
|
||||
; OSX_NOOPT: test1
|
||||
; OSX_NOOPT: callq _sinf
|
||||
|
||||
; OSX_SINCOS-LABEL: test1:
|
||||
; OSX_SINCOS: callq ___sincosf_stret
|
||||
; OSX_SINCOS: movshdup {{.*}} xmm1 = xmm0[1,1,3,3]
|
||||
; OSX_SINCOS: addss %xmm1, %xmm0
|
||||
|
||||
; OSX_NOOPT: test1
|
||||
; OSX_NOOPT: callq _sinf
|
||||
; OSX_NOOPT: callq _cosf
|
||||
%call = tail call float @sinf(float %x) nounwind readnone
|
||||
%call1 = tail call float @cosf(float %x) nounwind readnone
|
||||
|
@ -288,28 +288,26 @@ declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
; This used to compile to insertps $0 + insertps $16. insertps $0 is always
|
||||
; pointless.
|
||||
define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
|
||||
; X32-LABEL: buildvector:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: movaps %xmm0, %xmm2
|
||||
; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
||||
; X32-NEXT: addss %xmm1, %xmm0
|
||||
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X32-NEXT: addss %xmm2, %xmm1
|
||||
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: buildvector:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64-NEXT: movaps %xmm0, %xmm2
|
||||
; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
|
||||
; X64-NEXT: addss %xmm1, %xmm0
|
||||
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X64-NEXT: addss %xmm2, %xmm1
|
||||
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%tmp7 = extractelement <2 x float> %A, i32 0
|
||||
define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
|
||||
; X32-LABEL: buildvector:
|
||||
; X32: ## BB#0: ## %entry
|
||||
; X32-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; X32-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
|
||||
; X32-NEXT: addss %xmm1, %xmm0
|
||||
; X32-NEXT: addss %xmm2, %xmm3
|
||||
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: buildvector:
|
||||
; X64: ## BB#0: ## %entry
|
||||
; X64-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; X64-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
|
||||
; X64-NEXT: addss %xmm1, %xmm0
|
||||
; X64-NEXT: addss %xmm2, %xmm3
|
||||
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%tmp7 = extractelement <2 x float> %A, i32 0
|
||||
%tmp5 = extractelement <2 x float> %A, i32 1
|
||||
%tmp3 = extractelement <2 x float> %B, i32 0
|
||||
%tmp1 = extractelement <2 x float> %B, i32 1
|
||||
|
@ -9,13 +9,13 @@
|
||||
|
||||
define void @rsqrtss(<4 x float> %a) nounwind uwtable ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: rsqrtss:
|
||||
; CHECK: rsqrtss %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: shufps
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: movap
|
||||
; CHECK-NEXT: jmp
|
||||
; CHECK-LABEL: rsqrtss:
|
||||
; CHECK: rsqrtss %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: movshdup
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: movap
|
||||
; CHECK-NEXT: jmp
|
||||
|
||||
%0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind
|
||||
%a.addr.0.extract = extractelement <4 x float> %0, i32 0
|
||||
@ -30,13 +30,13 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define void @rcpss(<4 x float> %a) nounwind uwtable ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: rcpss:
|
||||
; CHECK: rcpss %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: shufps
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: movap
|
||||
; CHECK-NEXT: jmp
|
||||
; CHECK-LABEL: rcpss:
|
||||
; CHECK: rcpss %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: movshdup
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: movap
|
||||
; CHECK-NEXT: jmp
|
||||
|
||||
%0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind
|
||||
%a.addr.0.extract = extractelement <4 x float> %0, i32 0
|
||||
@ -50,13 +50,13 @@ declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define void @sqrtss(<4 x float> %a) nounwind uwtable ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: sqrtss:
|
||||
; CHECK: sqrtss %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: shufps
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: movap
|
||||
; CHECK-NEXT: jmp
|
||||
; CHECK-LABEL: sqrtss:
|
||||
; CHECK: sqrtss %xmm0, %xmm0
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: movshdup
|
||||
; CHECK-NEXT: cvtss2sd %xmm0
|
||||
; CHECK-NEXT: movap
|
||||
; CHECK-NEXT: jmp
|
||||
|
||||
%0 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a) nounwind
|
||||
%a.addr.0.extract = extractelement <4 x float> %0, i32 0
|
||||
|
@ -2,23 +2,21 @@
|
||||
; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -o - | FileCheck %s --check-prefix=X32
|
||||
|
||||
; PR7518
|
||||
define void @test1(<2 x float> %Q, float *%P2) nounwind {
|
||||
; X64-LABEL: test1:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movaps %xmm0, %xmm1
|
||||
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X64-NEXT: addss %xmm0, %xmm1
|
||||
; X64-NEXT: movss %xmm1, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X32-LABEL: test1:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movaps %xmm0, %xmm1
|
||||
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X32-NEXT: addss %xmm0, %xmm1
|
||||
; X32-NEXT: movss %xmm1, (%eax)
|
||||
; X32-NEXT: retl
|
||||
define void @test1(<2 x float> %Q, float *%P2) nounwind {
|
||||
; X64-LABEL: test1:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; X64-NEXT: addss %xmm0, %xmm1
|
||||
; X64-NEXT: movss %xmm1, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X32-LABEL: test1:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; X32-NEXT: addss %xmm0, %xmm1
|
||||
; X32-NEXT: movss %xmm1, (%eax)
|
||||
; X32-NEXT: retl
|
||||
%a = extractelement <2 x float> %Q, i32 0
|
||||
%b = extractelement <2 x float> %Q, i32 1
|
||||
%c = fadd float %a, %b
|
||||
|
@ -115,22 +115,22 @@ define <8 x i8> @foo3_8(<8 x float> %src) {
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: shll $8, %eax
|
||||
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %ecx
|
||||
; CHECK-WIDE-NEXT: movzbl %cl, %ecx
|
||||
; CHECK-WIDE-NEXT: orl %eax, %ecx
|
||||
; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: shll $8, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %edx
|
||||
; CHECK-WIDE-NEXT: movzbl %dl, %edx
|
||||
; CHECK-WIDE-NEXT: orl %eax, %edx
|
||||
; CHECK-WIDE-NEXT: vpinsrw $0, %edx, %xmm0, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: shll $8, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %ecx
|
||||
; CHECK-WIDE-NEXT: movzbl %cl, %ecx
|
||||
; CHECK-WIDE-NEXT: orl %eax, %ecx
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: shll $8, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %edx
|
||||
; CHECK-WIDE-NEXT: movzbl %dl, %edx
|
||||
; CHECK-WIDE-NEXT: orl %eax, %edx
|
||||
; CHECK-WIDE-NEXT: vpinsrw $0, %edx, %xmm0, %xmm1
|
||||
; CHECK-WIDE-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
|
||||
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
|
||||
; CHECK-WIDE-NEXT: shll $8, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
|
||||
; CHECK-WIDE-NEXT: movzbl %cl, %ecx
|
||||
; CHECK-WIDE-NEXT: orl %eax, %ecx
|
||||
; CHECK-WIDE-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
|
||||
@ -160,13 +160,13 @@ define <4 x i8> @foo3_4(<4 x float> %src) {
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: shll $8, %eax
|
||||
; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %ecx
|
||||
; CHECK-WIDE-NEXT: movzbl %cl, %ecx
|
||||
; CHECK-WIDE-NEXT: orl %eax, %ecx
|
||||
; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: shll $8, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %edx
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %ecx
|
||||
; CHECK-WIDE-NEXT: movzbl %cl, %ecx
|
||||
; CHECK-WIDE-NEXT: orl %eax, %ecx
|
||||
; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
|
||||
; CHECK-WIDE-NEXT: shll $8, %eax
|
||||
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %edx
|
||||
; CHECK-WIDE-NEXT: movzbl %dl, %edx
|
||||
; CHECK-WIDE-NEXT: orl %eax, %edx
|
||||
; CHECK-WIDE-NEXT: vpinsrw $0, %edx, %xmm0, %xmm0
|
||||
|
@ -102,28 +102,28 @@ define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2f64_00:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2f64_00:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2f64_00:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v2f64_00:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2f64_00:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2f64_00:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2f64_00:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v2f64_00:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
|
||||
; SSE-LABEL: shuffle_v2f64_10:
|
||||
@ -157,31 +157,28 @@ define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2f64_22:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; SSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2f64_22:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2f64_22:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v2f64_22:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
|
||||
ret <2 x double> %shuffle
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2f64_22:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2f64_22:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2f64_22:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v2f64_22:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
|
||||
; SSE-LABEL: shuffle_v2f64_32:
|
||||
@ -1061,28 +1058,28 @@ define <2 x double> @insert_dup_reg_v2f64(double %a) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: insert_dup_reg_v2f64:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_reg_v2f64:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_dup_reg_v2f64:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_dup_reg_v2f64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
||||
;
|
||||
; SSE3-LABEL: insert_dup_reg_v2f64:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_reg_v2f64:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_dup_reg_v2f64:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_dup_reg_v2f64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
|
||||
|
@ -3,13 +3,13 @@
|
||||
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_0000:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_0000:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4f64_0000:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
|
||||
@ -18,13 +18,13 @@ define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_0001:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_0001:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4f64_0001:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
|
||||
@ -35,13 +35,13 @@ define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
|
||||
|
||||
define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_0020:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4f64_0020:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
|
||||
@ -67,13 +67,13 @@ define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_1000:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: shuffle_v4f64_1000:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4f64_1000:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
|
||||
@ -83,13 +83,13 @@ define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_2200:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4f64_2200:
|
||||
; AVX1-LABEL: shuffle_v4f64_2200:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4f64_2200:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
|
||||
; AVX2-NEXT: retq
|
||||
@ -138,13 +138,13 @@ define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v4f64_0022:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
||||
ret <4 x double> %shuffle
|
||||
define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v4f64_0022:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
|
||||
@ -183,13 +183,13 @@ define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_0423:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_0423:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4f64_0423:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
|
||||
@ -199,14 +199,14 @@ define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v4f64_0462:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
|
||||
define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v4f64_0462:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
|
||||
; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
@ -358,13 +358,13 @@ define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_0000:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_0000:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_0000:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
|
||||
@ -373,13 +373,13 @@ define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
|
||||
ret <4 x i64> %shuffle
|
||||
}
|
||||
|
||||
define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_0001:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_0001:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_0001:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
|
||||
@ -390,13 +390,13 @@ define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
|
||||
|
||||
define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_0020:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_0020:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
|
||||
@ -438,13 +438,13 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
|
||||
}
|
||||
|
||||
define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_1000:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: shuffle_v4i64_1000:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_1000:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
|
||||
@ -454,13 +454,13 @@ define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
|
||||
}
|
||||
|
||||
define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_2200:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_2200:
|
||||
; AVX1-LABEL: shuffle_v4i64_2200:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_2200:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
|
||||
; AVX2-NEXT: retq
|
||||
@ -500,13 +500,13 @@ define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
|
||||
ret <4 x i64> %shuffle
|
||||
}
|
||||
|
||||
define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_0124:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
|
||||
; AVX1-NEXT: retq
|
||||
define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_0124:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_0124:
|
||||
; AVX2: # BB#0:
|
||||
@ -538,13 +538,13 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
|
||||
define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_0412:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_0412:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
|
||||
@ -557,13 +557,13 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
|
||||
|
||||
define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4i64_4012:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
||||
; AVX1-NEXT: retq
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4i64_4012:
|
||||
; AVX2: # BB#0:
|
||||
@ -872,13 +872,13 @@ define <4 x double> @splat_mem_v4f64_2(double* %p) {
|
||||
ret <4 x double> %3
|
||||
}
|
||||
|
||||
define <4 x double> @splat_v4f64(<2 x double> %r) {
|
||||
; AVX1-LABEL: splat_v4f64:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
define <4 x double> @splat_v4f64(<2 x double> %r) {
|
||||
; AVX1-LABEL: splat_v4f64:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splat_v4f64:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
|
||||
|
@ -145,13 +145,13 @@ define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
|
||||
; ALL-LABEL: shuffle_v8f32_01014545:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
|
||||
ret <8 x float> %shuffle
|
||||
define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
|
||||
; ALL-LABEL: shuffle_v8f32_01014545:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
|
||||
@ -199,13 +199,13 @@ define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
|
||||
|
||||
define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
|
||||
; AVX1-LABEL: shuffle_v8f32_08080808:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX1-NEXT: retq
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8f32_08080808:
|
||||
; AVX2: # BB#0:
|
||||
@ -333,13 +333,13 @@ define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
|
||||
; AVX1-LABEL: shuffle_v8f32_09ab1def:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
|
||||
; AVX1-NEXT: retq
|
||||
define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
|
||||
; AVX1-LABEL: shuffle_v8f32_09ab1def:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8f32_09ab1def:
|
||||
; AVX2: # BB#0:
|
||||
@ -423,13 +423,13 @@ define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
|
||||
; ALL-LABEL: shuffle_v8f32_00224466:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
|
||||
ret <8 x float> %shuffle
|
||||
define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
|
||||
; ALL-LABEL: shuffle_v8f32_00224466:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
|
||||
@ -441,13 +441,13 @@ define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
|
||||
; ALL-LABEL: shuffle_v8f32_11335577:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
|
||||
ret <8 x float> %shuffle
|
||||
define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
|
||||
; ALL-LABEL: shuffle_v8f32_11335577:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
|
||||
define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
|
||||
@ -937,13 +937,13 @@ define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
|
||||
ret <8 x i32> %shuffle
|
||||
}
|
||||
|
||||
define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_01014545:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i32_01014545:
|
||||
define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_01014545:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i32_01014545:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
|
||||
; AVX2-NEXT: retq
|
||||
@ -1001,13 +1001,13 @@ define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
|
||||
|
||||
define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_08080808:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX1-NEXT: retq
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i32_08080808:
|
||||
; AVX2: # BB#0:
|
||||
@ -1172,13 +1172,13 @@ define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
|
||||
ret <8 x i32> %shuffle
|
||||
}
|
||||
|
||||
define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_09ab1def:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
|
||||
; AVX1-NEXT: retq
|
||||
define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_09ab1def:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i32_09ab1def:
|
||||
; AVX2: # BB#0:
|
||||
@ -1302,13 +1302,13 @@ define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
|
||||
ret <8 x i32> %shuffle
|
||||
}
|
||||
|
||||
define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_00224466:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i32_00224466:
|
||||
define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_00224466:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i32_00224466:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
|
||||
; AVX2-NEXT: retq
|
||||
@ -1330,13 +1330,13 @@ define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
|
||||
ret <8 x i32> %shuffle
|
||||
}
|
||||
|
||||
define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_11335577:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i32_11335577:
|
||||
define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_11335577:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v8i32_11335577:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
|
||||
; AVX2-NEXT: retq
|
||||
|
@ -404,15 +404,15 @@ define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
|
||||
ret <8 x double> %shuffle
|
||||
}
|
||||
|
||||
define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v8f64_00224466:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
|
||||
define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v8f64_00224466:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
||||
; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
|
||||
ret <8 x double> %shuffle
|
||||
}
|
||||
|
||||
@ -559,13 +559,13 @@ define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
|
||||
ret <8 x double> %shuffle
|
||||
}
|
||||
|
||||
define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v8f64_00226644:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
||||
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
|
||||
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v8f64_00226644:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
||||
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
|
||||
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
|
||||
ret <8 x double> %shuffle
|
||||
@ -615,13 +615,13 @@ define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
|
||||
ret <8 x double> %shuffle
|
||||
}
|
||||
|
||||
define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v8f64_002u6u44:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
||||
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0]
|
||||
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v8f64_002u6u44:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
|
||||
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
||||
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0]
|
||||
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
|
||||
ret <8 x double> %shuffle
|
||||
@ -673,13 +673,13 @@ define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
|
||||
}
|
||||
|
||||
define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v8f64_uuu3uu66:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
|
||||
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
|
||||
; ALL-LABEL: shuffle_v8f64_uuu3uu66:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
|
||||
; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
|
||||
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
|
||||
ret <8 x double> %shuffle
|
||||
}
|
||||
|
||||
@ -705,13 +705,13 @@ define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v8f64_f511235a:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
|
||||
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
|
||||
; ALL-NEXT: vpermpd {{.*#+}} ymm4 = ymm3[0,1,1,3]
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3]
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} ymm4 = ymm1[0,0,2,2]
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1,2],ymm4[3]
|
||||
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1]
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2,3]
|
||||
; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
|
||||
; ALL-NEXT: vpermpd {{.*#+}} ymm4 = ymm3[0,1,1,3]
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3]
|
||||
; ALL-NEXT: vmovddup {{.*#+}} ymm4 = ymm1[0,0,2,2]
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1,2],ymm4[3]
|
||||
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1]
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2,3]
|
||||
; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1
|
||||
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
|
||||
|
@ -1641,25 +1641,23 @@ define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) {
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_test2b:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; SSSE3-NEXT: movapd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_test2b:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
|
||||
; SSE41-NEXT: movapd %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_test2b:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
|
||||
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
|
||||
;
|
||||
; SSSE3-LABEL: combine_test2b:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_test2b:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_test2b:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
|
||||
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
|
||||
ret <4 x float> %2
|
||||
}
|
||||
|
||||
@ -2178,23 +2176,23 @@ define <4 x float> @combine_undef_input_test7(<4 x float> %a) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test7:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test7:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_undef_input_test7:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
|
||||
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test7:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test7:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_undef_input_test7:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
|
||||
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
|
||||
ret <4 x float> %2
|
||||
}
|
||||
|
||||
@ -2203,23 +2201,23 @@ define <4 x float> @combine_undef_input_test8(<4 x float> %a) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test8:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test8:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_undef_input_test8:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
|
||||
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test8:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test8:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_undef_input_test8:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
|
||||
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
|
||||
ret <4 x float> %2
|
||||
}
|
||||
|
||||
@ -2369,23 +2367,23 @@ define <4 x float> @combine_undef_input_test17(<4 x float> %a) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test17:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test17:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_undef_input_test17:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
|
||||
%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test17:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test17:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_undef_input_test17:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
|
||||
%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
|
||||
ret <4 x float> %2
|
||||
}
|
||||
|
||||
@ -2394,23 +2392,23 @@ define <4 x float> @combine_undef_input_test18(<4 x float> %a) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test18:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test18:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_undef_input_test18:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
|
||||
%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
|
||||
;
|
||||
; SSSE3-LABEL: combine_undef_input_test18:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: combine_undef_input_test18:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_undef_input_test18:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
|
||||
%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
|
||||
ret <4 x float> %2
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user