[X86][SSE] Shuffle mask decode support for zero extend, scalar float/double moves and integer load instructions

This patch adds shuffle mask decodes for integer zero extends (pmovzx** and movq xmm,xmm) and scalar float/double loads/moves (movss/movsd).

Also adds shuffle mask decodes for integer loads (movd/movq).

Differential Revision: http://reviews.llvm.org/D7228

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227688 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Simon Pilgrim
2015-01-31 14:09:36 +00:00
parent 1937233a22
commit 982005c23e
10 changed files with 834 additions and 627 deletions

View File

@@ -21,6 +21,92 @@
using namespace llvm;
/// \brief Extracts the src/dst types for a given zero extension instruction.
/// \note While the number of elements in DstVT type correct, the
/// number in the SrcVT type is expanded to fill the src xmm register and the
/// upper elements may not be included in the dst xmm/ymm register.
static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {
switch (MI->getOpcode()) {
default:
llvm_unreachable("Unknown zero extension instruction");
// i8 zero extension
case X86::PMOVZXBWrm:
case X86::PMOVZXBWrr:
case X86::VPMOVZXBWrm:
case X86::VPMOVZXBWrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v8i16;
break;
case X86::VPMOVZXBWYrm:
case X86::VPMOVZXBWYrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v16i16;
break;
case X86::PMOVZXBDrm:
case X86::PMOVZXBDrr:
case X86::VPMOVZXBDrm:
case X86::VPMOVZXBDrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v4i32;
break;
case X86::VPMOVZXBDYrm:
case X86::VPMOVZXBDYrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v8i32;
break;
case X86::PMOVZXBQrm:
case X86::PMOVZXBQrr:
case X86::VPMOVZXBQrm:
case X86::VPMOVZXBQrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v2i64;
break;
case X86::VPMOVZXBQYrm:
case X86::VPMOVZXBQYrr:
SrcVT = MVT::v16i8;
DstVT = MVT::v4i64;
break;
// i16 zero extension
case X86::PMOVZXWDrm:
case X86::PMOVZXWDrr:
case X86::VPMOVZXWDrm:
case X86::VPMOVZXWDrr:
SrcVT = MVT::v8i16;
DstVT = MVT::v4i32;
break;
case X86::VPMOVZXWDYrm:
case X86::VPMOVZXWDYrr:
SrcVT = MVT::v8i16;
DstVT = MVT::v8i32;
break;
case X86::PMOVZXWQrm:
case X86::PMOVZXWQrr:
case X86::VPMOVZXWQrm:
case X86::VPMOVZXWQrr:
SrcVT = MVT::v8i16;
DstVT = MVT::v2i64;
break;
case X86::VPMOVZXWQYrm:
case X86::VPMOVZXWQYrr:
SrcVT = MVT::v8i16;
DstVT = MVT::v4i64;
break;
// i32 zero extension
case X86::PMOVZXDQrm:
case X86::PMOVZXDQrr:
case X86::VPMOVZXDQrm:
case X86::VPMOVZXDQrr:
SrcVT = MVT::v4i32;
DstVT = MVT::v2i64;
break;
case X86::VPMOVZXDQYrm:
case X86::VPMOVZXDQYrr:
SrcVT = MVT::v4i32;
DstVT = MVT::v4i64;
break;
}
}
//===----------------------------------------------------------------------===//
// Top Level Entrypoint
//===----------------------------------------------------------------------===//
@@ -750,6 +836,92 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::MOVSDrr:
case X86::VMOVSDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::MOVSDrm:
case X86::VMOVSDrm:
DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::MOVSSrr:
case X86::VMOVSSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::MOVSSrm:
case X86::VMOVSSrm:
DecodeScalarMoveMask(MVT::v4f32, nullptr == Src2Name, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::MOVPQI2QIrr:
case X86::MOVZPQILo2PQIrr:
case X86::VMOVPQI2QIrr:
case X86::VMOVZPQILo2PQIrr:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::MOVQI2PQIrm:
case X86::MOVZQI2PQIrm:
case X86::MOVZPQILo2PQIrm:
case X86::VMOVQI2PQIrm:
case X86::VMOVZQI2PQIrm:
case X86::VMOVZPQILo2PQIrm:
DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::MOVDI2PDIrm:
case X86::VMOVDI2PDIrm:
DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::PMOVZXBWrr:
case X86::PMOVZXBDrr:
case X86::PMOVZXBQrr:
case X86::PMOVZXWDrr:
case X86::PMOVZXWQrr:
case X86::PMOVZXDQrr:
case X86::VPMOVZXBWrr:
case X86::VPMOVZXBDrr:
case X86::VPMOVZXBQrr:
case X86::VPMOVZXWDrr:
case X86::VPMOVZXWQrr:
case X86::VPMOVZXDQrr:
case X86::VPMOVZXBWYrr:
case X86::VPMOVZXBDYrr:
case X86::VPMOVZXBQYrr:
case X86::VPMOVZXWDYrr:
case X86::VPMOVZXWQYrr:
case X86::VPMOVZXDQYrr:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PMOVZXBWrm:
case X86::PMOVZXBDrm:
case X86::PMOVZXBQrm:
case X86::PMOVZXWDrm:
case X86::PMOVZXWQrm:
case X86::PMOVZXDQrm:
case X86::VPMOVZXBWrm:
case X86::VPMOVZXBDrm:
case X86::VPMOVZXBQrm:
case X86::VPMOVZXWDrm:
case X86::VPMOVZXWQrm:
case X86::VPMOVZXDQrm:
case X86::VPMOVZXBWYrm:
case X86::VPMOVZXBDYrm:
case X86::VPMOVZXBQYrm:
case X86::VPMOVZXWDYrm:
case X86::VPMOVZXWQYrm:
case X86::VPMOVZXDQYrm: {
MVT SrcVT, DstVT;
getZeroExtensionTypes(MI, SrcVT, DstVT);
DecodeZeroExtendMask(SrcVT, DstVT, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
} break;
}
// The only comments we decode are shuffles, so give up if we were unable to

View File

@@ -399,4 +399,36 @@ void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
}
}
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
unsigned NumSrcElts = SrcVT.getVectorNumElements();
unsigned NumDstElts = DstVT.getVectorNumElements();
unsigned SrcScalarBits = SrcVT.getScalarSizeInBits();
unsigned DstScalarBits = DstVT.getScalarSizeInBits();
unsigned Scale = DstScalarBits / SrcScalarBits;
assert(SrcScalarBits < DstScalarBits &&
"Expected zero extension mask to increase scalar size");
assert(NumSrcElts >= NumDstElts && "Too many zero extension lanes");
for (unsigned i = 0; i != NumDstElts; i++) {
Mask.push_back(i);
for (unsigned j = 1; j != Scale; j++)
Mask.push_back(SM_SentinelZero);
}
}
void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
ShuffleMask.push_back(0);
for (unsigned i = 1; i < NumElts; i++)
ShuffleMask.push_back(SM_SentinelZero);
}
void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
// First element comes from the first element of second source.
// Remaining elements: Load zero extends / Move copies from first source.
unsigned NumElts = VT.getVectorNumElements();
Mask.push_back(NumElts);
for (unsigned i = 1; i < NumElts; i++)
Mask.push_back(IsLoad ? SM_SentinelZero : i);
}
} // llvm namespace

View File

@@ -90,6 +90,16 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a zero extension instruction as a shuffle mask.
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT,
SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a move lower and zero upper instruction as a shuffle mask.
void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a scalar float move instruction as a shuffle mask.
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
#endif

View File

@@ -5496,16 +5496,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
IsUnary = true;
break;
case X86ISD::MOVSS:
case X86ISD::MOVSD: {
// The index 0 always comes from the first element of the second source,
// this is why MOVSS and MOVSD are used in the first place. The other
// elements come from the other positions of the first source vector
Mask.push_back(NumElems);
for (unsigned i = 1; i != NumElems; ++i) {
Mask.push_back(i);
}
case X86ISD::MOVSD:
DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
break;
}
case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);

View File

@@ -350,12 +350,12 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15]
; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7]
; SSE2-NEXT: movsd %xmm4, %xmm3
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; SSE2-NEXT: movsd %xmm0, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: packuswb %xmm3, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
@@ -800,12 +800,12 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(
;
; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbq %xmm0, %xmm0
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxbq %xmm0, %xmm0
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %shuffle
@@ -827,12 +827,12 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(
;
; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbq %xmm0, %xmm0
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxbq %xmm0, %xmm0
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %shuffle
@@ -853,12 +853,12 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(
;
; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbd %xmm0, %xmm0
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxbd %xmm0, %xmm0
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %shuffle
@@ -881,12 +881,12 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(
;
; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbd %xmm0, %xmm0
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxbd %xmm0, %xmm0
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
ret <16 x i8> %shuffle
@@ -905,12 +905,12 @@ define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(
;
; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbw %xmm0, %xmm0
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxbw %xmm0, %xmm0
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
ret <16 x i8> %shuffle
@@ -931,12 +931,12 @@ define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(
;
; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbw %xmm0, %xmm0
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxbw %xmm0, %xmm0
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
ret <16 x i8> %shuffle

View File

@@ -211,19 +211,19 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_03:
; SSE2: # BB#0:
; SSE2-NEXT: movsd %xmm0, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_03:
; SSE3: # BB#0:
; SSE3-NEXT: movsd %xmm0, %xmm1
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_03:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd %xmm0, %xmm1
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -242,17 +242,17 @@ define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_21:
; SSE2: # BB#0:
; SSE2-NEXT: movsd %xmm1, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_21:
; SSE3: # BB#0:
; SSE3-NEXT: movsd %xmm1, %xmm0
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_21:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd %xmm1, %xmm0
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_21:
@@ -299,19 +299,19 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03:
; SSE2: # BB#0:
; SSE2-NEXT: movsd %xmm0, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03:
; SSE3: # BB#0:
; SSE3-NEXT: movsd %xmm0, %xmm1
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd %xmm0, %xmm1
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -335,19 +335,19 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03_copy:
; SSE2: # BB#0:
; SSE2-NEXT: movsd %xmm1, %xmm2
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03_copy:
; SSE3: # BB#0:
; SSE3-NEXT: movsd %xmm1, %xmm2
; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; SSE3-NEXT: movaps %xmm2, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03_copy:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd %xmm1, %xmm2
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: retq
;
@@ -489,17 +489,17 @@ define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_21:
; SSE2: # BB#0:
; SSE2-NEXT: movsd %xmm1, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_21:
; SSE3: # BB#0:
; SSE3-NEXT: movsd %xmm1, %xmm0
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_21:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd %xmm1, %xmm0
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_21:
@@ -522,19 +522,19 @@ define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_21_copy:
; SSE2: # BB#0:
; SSE2-NEXT: movsd %xmm2, %xmm1
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_21_copy:
; SSE3: # BB#0:
; SSE3-NEXT: movsd %xmm2, %xmm1
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_21_copy:
; SSSE3: # BB#0:
; SSSE3-NEXT: movsd %xmm2, %xmm1
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -650,12 +650,12 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
; SSE-LABEL: shuffle_v2i64_0z:
; SSE: # BB#0:
; SSE-NEXT: movq %xmm0, %xmm0
; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_0z:
; AVX: # BB#0:
; AVX-NEXT: vmovq %xmm0, %xmm0
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
@@ -693,19 +693,19 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
; SSE2-LABEL: shuffle_v2i64_z1:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: movsd %xmm1, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_z1:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
; SSE3-NEXT: movsd %xmm1, %xmm0
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_z1:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: movsd %xmm1, %xmm0
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_z1:
@@ -732,12 +732,12 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
; SSE-LABEL: shuffle_v2f64_0z:
; SSE: # BB#0:
; SSE-NEXT: movq %xmm0, %xmm0
; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2f64_0z:
; AVX: # BB#0:
; AVX-NEXT: vmovq %xmm0, %xmm0
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
ret <2 x double> %shuffle
@@ -780,19 +780,19 @@ define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
; SSE2-LABEL: shuffle_v2f64_z1:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: movsd %xmm1, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_z1:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
; SSE3-NEXT: movsd %xmm1, %xmm0
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_z1:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: movsd %xmm1, %xmm0
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_z1:
@@ -828,12 +828,12 @@ define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
; SSE-LABEL: insert_mem_and_zero_v2i64:
; SSE: # BB#0:
; SSE-NEXT: movq (%rdi), %xmm0
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_and_zero_v2i64:
; AVX: # BB#0:
; AVX-NEXT: vmovq (%rdi), %xmm0
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: retq
%a = load i64* %ptr
%v = insertelement <2 x i64> undef, i64 %a, i32 0
@@ -844,12 +844,12 @@ define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
; SSE-LABEL: insert_reg_and_zero_v2f64:
; SSE: # BB#0:
; SSE-NEXT: movq %xmm0, %xmm0
; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_and_zero_v2f64:
; AVX: # BB#0:
; AVX-NEXT: vmovq %xmm0, %xmm0
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: retq
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
@@ -859,12 +859,12 @@ define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
; SSE-LABEL: insert_mem_and_zero_v2f64:
; SSE: # BB#0:
; SSE-NEXT: movsd (%rdi), %xmm0
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_and_zero_v2f64:
; AVX: # BB#0:
; AVX-NEXT: vmovsd (%rdi), %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: retq
%a = load double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
@@ -876,19 +876,19 @@ define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
; SSE2-LABEL: insert_reg_lo_v2i64:
; SSE2: # BB#0:
; SSE2-NEXT: movd %rdi, %xmm1
; SSE2-NEXT: movsd %xmm1, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v2i64:
; SSE3: # BB#0:
; SSE3-NEXT: movd %rdi, %xmm1
; SSE3-NEXT: movsd %xmm1, %xmm0
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v2i64:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %rdi, %xmm1
; SSSE3-NEXT: movsd %xmm1, %xmm0
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v2i64:
@@ -931,19 +931,19 @@ define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
;
; SSE41-LABEL: insert_mem_lo_v2i64:
; SSE41: # BB#0:
; SSE41-NEXT: movq (%rdi), %xmm1
; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: insert_mem_lo_v2i64:
; AVX1: # BB#0:
; AVX1-NEXT: vmovq (%rdi), %xmm1
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_mem_lo_v2i64:
; AVX2: # BB#0:
; AVX2-NEXT: vmovq (%rdi), %xmm1
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: retq
%a = load i64* %ptr
@@ -972,13 +972,13 @@ define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
; SSE-LABEL: insert_mem_hi_v2i64:
; SSE: # BB#0:
; SSE-NEXT: movq (%rdi), %xmm1
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_hi_v2i64:
; AVX: # BB#0:
; AVX-NEXT: vmovq (%rdi), %xmm1
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%a = load i64* %ptr
@@ -990,13 +990,13 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
; SSE-LABEL: insert_reg_lo_v2f64:
; SSE: # BB#0:
; SSE-NEXT: movsd %xmm0, %xmm1
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_lo_v2f64:
; AVX: # BB#0:
; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; AVX-NEXT: retq
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
@@ -1085,7 +1085,7 @@ define <2 x double> @insert_dup_reg_v2f64(double %a) {
define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
; SSE2-LABEL: insert_dup_mem_v2f64:
; SSE2: # BB#0:
; SSE2-NEXT: movsd (%rdi), %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
; SSE2-NEXT: retq
;

View File

@@ -441,21 +441,21 @@ define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
; SSE2-LABEL: shuffle_v4f32_4zzz:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: movss %xmm0, %xmm1
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4f32_4zzz:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
; SSE3-NEXT: movss %xmm0, %xmm1
; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4f32_4zzz:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: movss %xmm0, %xmm1
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -661,21 +661,21 @@ define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) {
; SSE2-LABEL: shuffle_v4i32_4zzz:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: movss %xmm0, %xmm1
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_4zzz:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
; SSE3-NEXT: movss %xmm0, %xmm1
; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_4zzz:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: movss %xmm0, %xmm1
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -698,21 +698,21 @@ define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) {
; SSE2-LABEL: shuffle_v4i32_z4zz:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: movss %xmm0, %xmm1
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_z4zz:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
; SSE3-NEXT: movss %xmm0, %xmm1
; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_z4zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: movss %xmm0, %xmm1
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
; SSSE3-NEXT: retq
;
@@ -737,21 +737,21 @@ define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) {
; SSE2-LABEL: shuffle_v4i32_zz4z:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: movss %xmm0, %xmm1
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_zz4z:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
; SSE3-NEXT: movss %xmm0, %xmm1
; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_zz4z:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: movss %xmm0, %xmm1
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
; SSSE3-NEXT: retq
;
@@ -1033,12 +1033,12 @@ define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
;
; SSE41-LABEL: shuffle_v4i32_0u1u:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxdq %xmm0, %xmm0
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_0u1u:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxdq %xmm0, %xmm0
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
ret <4 x i32> %shuffle
@@ -1065,12 +1065,12 @@ define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
;
; SSE41-LABEL: shuffle_v4i32_0z1z:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxdq %xmm0, %xmm0
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_0z1z:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxdq %xmm0, %xmm0
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
ret <4 x i32> %shuffle
@@ -1094,12 +1094,12 @@ define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) {
; SSE-LABEL: insert_mem_and_zero_v4i32:
; SSE: # BB#0:
; SSE-NEXT: movd (%rdi), %xmm0
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_and_zero_v4i32:
; AVX: # BB#0:
; AVX-NEXT: vmovd (%rdi), %xmm0
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: retq
%a = load i32* %ptr
%v = insertelement <4 x i32> undef, i32 %a, i32 0
@@ -1111,21 +1111,21 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
; SSE2-LABEL: insert_reg_and_zero_v4f32:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: movss %xmm0, %xmm1
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_and_zero_v4f32:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
; SSE3-NEXT: movss %xmm0, %xmm1
; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_and_zero_v4f32:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: movss %xmm0, %xmm1
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -1138,7 +1138,7 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
; AVX-LABEL: insert_reg_and_zero_v4f32:
; AVX: # BB#0:
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%v = insertelement <4 x float> undef, float %a, i32 0
%shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -1148,12 +1148,12 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
; SSE-LABEL: insert_mem_and_zero_v4f32:
; SSE: # BB#0:
; SSE-NEXT: movss (%rdi), %xmm0
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_and_zero_v4f32:
; AVX: # BB#0:
; AVX-NEXT: vmovss (%rdi), %xmm0
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: retq
%a = load float* %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
@@ -1165,19 +1165,19 @@ define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) {
; SSE2-LABEL: insert_reg_lo_v4i32:
; SSE2: # BB#0:
; SSE2-NEXT: movd %rdi, %xmm1
; SSE2-NEXT: movsd %xmm1, %xmm0
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v4i32:
; SSE3: # BB#0:
; SSE3-NEXT: movd %rdi, %xmm1
; SSE3-NEXT: movsd %xmm1, %xmm0
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v4i32:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %rdi, %xmm1
; SSSE3-NEXT: movsd %xmm1, %xmm0
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v4i32:
@@ -1221,19 +1221,19 @@ define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
;
; SSE41-LABEL: insert_mem_lo_v4i32:
; SSE41: # BB#0:
; SSE41-NEXT: movq (%rdi), %xmm1
; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: insert_mem_lo_v4i32:
; AVX1: # BB#0:
; AVX1-NEXT: vmovq (%rdi), %xmm1
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_mem_lo_v4i32:
; AVX2: # BB#0:
; AVX2-NEXT: vmovq (%rdi), %xmm1
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: retq
%a = load <2 x i32>* %ptr
@@ -1263,13 +1263,13 @@ define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) {
define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
; SSE-LABEL: insert_mem_hi_v4i32:
; SSE: # BB#0:
; SSE-NEXT: movq (%rdi), %xmm1
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_hi_v4i32:
; AVX: # BB#0:
; AVX-NEXT: vmovq (%rdi), %xmm1
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%a = load <2 x i32>* %ptr
@@ -1281,13 +1281,13 @@ define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
; SSE-LABEL: insert_reg_lo_v4f32:
; SSE: # BB#0:
; SSE-NEXT: movsd %xmm0, %xmm1
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_lo_v4f32:
; AVX: # BB#0:
; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; AVX-NEXT: retq
%a.cast = bitcast double %a to <2 x float>
%v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>

View File

@@ -1829,12 +1829,12 @@ define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
;
; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxwq %xmm0, %xmm0
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxwq %xmm0, %xmm0
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
@@ -1857,12 +1857,12 @@ define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
;
; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxwq %xmm0, %xmm0
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxwq %xmm0, %xmm0
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
ret <8 x i16> %shuffle
@@ -1881,12 +1881,12 @@ define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
;
; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxwd %xmm0, %xmm0
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxwd %xmm0, %xmm0
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
ret <8 x i16> %shuffle
@@ -1907,12 +1907,12 @@ define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
;
; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxwd %xmm0, %xmm0
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxwd %xmm0, %xmm0
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
ret <8 x i16> %shuffle

View File

@@ -794,14 +794,14 @@ define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
; AVX1-LABEL: insert_mem_and_zero_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vmovq (%rdi), %xmm0
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_mem_and_zero_v4i64:
; AVX2: # BB#0:
; AVX2-NEXT: vmovq (%rdi), %xmm0
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX2-NEXT: retq
@@ -815,7 +815,7 @@ define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
; ALL-LABEL: insert_reg_and_zero_v4f64:
; ALL: # BB#0:
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; ALL-NEXT: retq
%v = insertelement <4 x double> undef, double %a, i32 0
%shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -825,7 +825,7 @@ define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
; ALL-LABEL: insert_mem_and_zero_v4f64:
; ALL: # BB#0:
; ALL-NEXT: vmovsd (%rdi), %xmm0
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: retq
%a = load double* %ptr
%v = insertelement <4 x double> undef, double %a, i32 0

View File

@@ -1853,7 +1853,7 @@ define <8 x float> @splat_v8f32(<4 x float> %r) {
define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
; ALL-LABEL: concat_v2f32_1:
; ALL: # BB#0: # %entry
; ALL-NEXT: vmovq (%rdi), %xmm0
; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
; ALL-NEXT: retq
entry:
@@ -1868,7 +1868,7 @@ entry:
define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
; ALL-LABEL: concat_v2f32_2:
; ALL: # BB#0: # %entry
; ALL-NEXT: vmovq (%rdi), %xmm0
; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
; ALL-NEXT: retq
entry:
@@ -1881,7 +1881,7 @@ entry:
define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
; ALL-LABEL: concat_v2f32_3:
; ALL: # BB#0: # %entry
; ALL-NEXT: vmovq (%rdi), %xmm0
; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
; ALL-NEXT: retq
entry: