mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-15 22:28:18 +00:00
[X86][SSE] Shuffle mask decode support for zero extend, scalar float/double moves and integer load instructions
This patch adds shuffle mask decodes for integer zero extends (pmovzx** and movq xmm,xmm) and scalar float/double loads/moves (movss/movsd). Also adds shuffle mask decodes for integer loads (movd/movq). Differential Revision: http://reviews.llvm.org/D7228 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227688 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -21,6 +21,92 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
/// \brief Extracts the src/dst types for a given zero extension instruction.
|
||||
/// \note While the number of elements in DstVT type correct, the
|
||||
/// number in the SrcVT type is expanded to fill the src xmm register and the
|
||||
/// upper elements may not be included in the dst xmm/ymm register.
|
||||
static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Unknown zero extension instruction");
|
||||
// i8 zero extension
|
||||
case X86::PMOVZXBWrm:
|
||||
case X86::PMOVZXBWrr:
|
||||
case X86::VPMOVZXBWrm:
|
||||
case X86::VPMOVZXBWrr:
|
||||
SrcVT = MVT::v16i8;
|
||||
DstVT = MVT::v8i16;
|
||||
break;
|
||||
case X86::VPMOVZXBWYrm:
|
||||
case X86::VPMOVZXBWYrr:
|
||||
SrcVT = MVT::v16i8;
|
||||
DstVT = MVT::v16i16;
|
||||
break;
|
||||
case X86::PMOVZXBDrm:
|
||||
case X86::PMOVZXBDrr:
|
||||
case X86::VPMOVZXBDrm:
|
||||
case X86::VPMOVZXBDrr:
|
||||
SrcVT = MVT::v16i8;
|
||||
DstVT = MVT::v4i32;
|
||||
break;
|
||||
case X86::VPMOVZXBDYrm:
|
||||
case X86::VPMOVZXBDYrr:
|
||||
SrcVT = MVT::v16i8;
|
||||
DstVT = MVT::v8i32;
|
||||
break;
|
||||
case X86::PMOVZXBQrm:
|
||||
case X86::PMOVZXBQrr:
|
||||
case X86::VPMOVZXBQrm:
|
||||
case X86::VPMOVZXBQrr:
|
||||
SrcVT = MVT::v16i8;
|
||||
DstVT = MVT::v2i64;
|
||||
break;
|
||||
case X86::VPMOVZXBQYrm:
|
||||
case X86::VPMOVZXBQYrr:
|
||||
SrcVT = MVT::v16i8;
|
||||
DstVT = MVT::v4i64;
|
||||
break;
|
||||
// i16 zero extension
|
||||
case X86::PMOVZXWDrm:
|
||||
case X86::PMOVZXWDrr:
|
||||
case X86::VPMOVZXWDrm:
|
||||
case X86::VPMOVZXWDrr:
|
||||
SrcVT = MVT::v8i16;
|
||||
DstVT = MVT::v4i32;
|
||||
break;
|
||||
case X86::VPMOVZXWDYrm:
|
||||
case X86::VPMOVZXWDYrr:
|
||||
SrcVT = MVT::v8i16;
|
||||
DstVT = MVT::v8i32;
|
||||
break;
|
||||
case X86::PMOVZXWQrm:
|
||||
case X86::PMOVZXWQrr:
|
||||
case X86::VPMOVZXWQrm:
|
||||
case X86::VPMOVZXWQrr:
|
||||
SrcVT = MVT::v8i16;
|
||||
DstVT = MVT::v2i64;
|
||||
break;
|
||||
case X86::VPMOVZXWQYrm:
|
||||
case X86::VPMOVZXWQYrr:
|
||||
SrcVT = MVT::v8i16;
|
||||
DstVT = MVT::v4i64;
|
||||
break;
|
||||
// i32 zero extension
|
||||
case X86::PMOVZXDQrm:
|
||||
case X86::PMOVZXDQrr:
|
||||
case X86::VPMOVZXDQrm:
|
||||
case X86::VPMOVZXDQrr:
|
||||
SrcVT = MVT::v4i32;
|
||||
DstVT = MVT::v2i64;
|
||||
break;
|
||||
case X86::VPMOVZXDQYrm:
|
||||
case X86::VPMOVZXDQYrr:
|
||||
SrcVT = MVT::v4i32;
|
||||
DstVT = MVT::v4i64;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Top Level Entrypoint
|
||||
//===----------------------------------------------------------------------===//
|
||||
@@ -750,6 +836,92 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
|
||||
case X86::MOVSDrr:
|
||||
case X86::VMOVSDrr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::MOVSDrm:
|
||||
case X86::VMOVSDrm:
|
||||
DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::MOVSSrr:
|
||||
case X86::VMOVSSrr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::MOVSSrm:
|
||||
case X86::VMOVSSrm:
|
||||
DecodeScalarMoveMask(MVT::v4f32, nullptr == Src2Name, ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
|
||||
case X86::MOVPQI2QIrr:
|
||||
case X86::MOVZPQILo2PQIrr:
|
||||
case X86::VMOVPQI2QIrr:
|
||||
case X86::VMOVZPQILo2PQIrr:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::MOVQI2PQIrm:
|
||||
case X86::MOVZQI2PQIrm:
|
||||
case X86::MOVZPQILo2PQIrm:
|
||||
case X86::VMOVQI2PQIrm:
|
||||
case X86::VMOVZQI2PQIrm:
|
||||
case X86::VMOVZPQILo2PQIrm:
|
||||
DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::MOVDI2PDIrm:
|
||||
case X86::VMOVDI2PDIrm:
|
||||
DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
|
||||
case X86::PMOVZXBWrr:
|
||||
case X86::PMOVZXBDrr:
|
||||
case X86::PMOVZXBQrr:
|
||||
case X86::PMOVZXWDrr:
|
||||
case X86::PMOVZXWQrr:
|
||||
case X86::PMOVZXDQrr:
|
||||
case X86::VPMOVZXBWrr:
|
||||
case X86::VPMOVZXBDrr:
|
||||
case X86::VPMOVZXBQrr:
|
||||
case X86::VPMOVZXWDrr:
|
||||
case X86::VPMOVZXWQrr:
|
||||
case X86::VPMOVZXDQrr:
|
||||
case X86::VPMOVZXBWYrr:
|
||||
case X86::VPMOVZXBDYrr:
|
||||
case X86::VPMOVZXBQYrr:
|
||||
case X86::VPMOVZXWDYrr:
|
||||
case X86::VPMOVZXWQYrr:
|
||||
case X86::VPMOVZXDQYrr:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::PMOVZXBWrm:
|
||||
case X86::PMOVZXBDrm:
|
||||
case X86::PMOVZXBQrm:
|
||||
case X86::PMOVZXWDrm:
|
||||
case X86::PMOVZXWQrm:
|
||||
case X86::PMOVZXDQrm:
|
||||
case X86::VPMOVZXBWrm:
|
||||
case X86::VPMOVZXBDrm:
|
||||
case X86::VPMOVZXBQrm:
|
||||
case X86::VPMOVZXWDrm:
|
||||
case X86::VPMOVZXWQrm:
|
||||
case X86::VPMOVZXDQrm:
|
||||
case X86::VPMOVZXBWYrm:
|
||||
case X86::VPMOVZXBDYrm:
|
||||
case X86::VPMOVZXBQYrm:
|
||||
case X86::VPMOVZXWDYrm:
|
||||
case X86::VPMOVZXWQYrm:
|
||||
case X86::VPMOVZXDQYrm: {
|
||||
MVT SrcVT, DstVT;
|
||||
getZeroExtensionTypes(MI, SrcVT, DstVT);
|
||||
DecodeZeroExtendMask(SrcVT, DstVT, ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
} break;
|
||||
}
|
||||
|
||||
// The only comments we decode are shuffles, so give up if we were unable to
|
||||
|
@@ -399,4 +399,36 @@ void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
|
||||
unsigned NumSrcElts = SrcVT.getVectorNumElements();
|
||||
unsigned NumDstElts = DstVT.getVectorNumElements();
|
||||
unsigned SrcScalarBits = SrcVT.getScalarSizeInBits();
|
||||
unsigned DstScalarBits = DstVT.getScalarSizeInBits();
|
||||
unsigned Scale = DstScalarBits / SrcScalarBits;
|
||||
assert(SrcScalarBits < DstScalarBits &&
|
||||
"Expected zero extension mask to increase scalar size");
|
||||
assert(NumSrcElts >= NumDstElts && "Too many zero extension lanes");
|
||||
|
||||
for (unsigned i = 0; i != NumDstElts; i++) {
|
||||
Mask.push_back(i);
|
||||
for (unsigned j = 1; j != Scale; j++)
|
||||
Mask.push_back(SM_SentinelZero);
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
ShuffleMask.push_back(0);
|
||||
for (unsigned i = 1; i < NumElts; i++)
|
||||
ShuffleMask.push_back(SM_SentinelZero);
|
||||
}
|
||||
|
||||
void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
|
||||
// First element comes from the first element of second source.
|
||||
// Remaining elements: Load zero extends / Move copies from first source.
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
Mask.push_back(NumElts);
|
||||
for (unsigned i = 1; i < NumElts; i++)
|
||||
Mask.push_back(IsLoad ? SM_SentinelZero : i);
|
||||
}
|
||||
} // llvm namespace
|
||||
|
@@ -90,6 +90,16 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
|
||||
/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
|
||||
void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
/// \brief Decode a zero extension instruction as a shuffle mask.
|
||||
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT,
|
||||
SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
/// \brief Decode a move lower and zero upper instruction as a shuffle mask.
|
||||
void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
/// \brief Decode a scalar float move instruction as a shuffle mask.
|
||||
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
|
||||
SmallVectorImpl<int> &ShuffleMask);
|
||||
} // llvm namespace
|
||||
|
||||
#endif
|
||||
|
@@ -5496,16 +5496,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
|
||||
IsUnary = true;
|
||||
break;
|
||||
case X86ISD::MOVSS:
|
||||
case X86ISD::MOVSD: {
|
||||
// The index 0 always comes from the first element of the second source,
|
||||
// this is why MOVSS and MOVSD are used in the first place. The other
|
||||
// elements come from the other positions of the first source vector
|
||||
Mask.push_back(NumElems);
|
||||
for (unsigned i = 1; i != NumElems; ++i) {
|
||||
Mask.push_back(i);
|
||||
}
|
||||
case X86ISD::MOVSD:
|
||||
DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
|
||||
break;
|
||||
}
|
||||
case X86ISD::VPERM2X128:
|
||||
ImmN = N->getOperand(N->getNumOperands()-1);
|
||||
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
|
||||
|
@@ -350,12 +350,12 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7]
|
||||
; SSE2-NEXT: movsd %xmm4, %xmm3
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
|
||||
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
|
||||
; SSE2-NEXT: movsd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: packuswb %xmm3, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
@@ -800,12 +800,12 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxbq %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxbq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i8> %shuffle
|
||||
@@ -827,12 +827,12 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxbq %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxbq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
ret <16 x i8> %shuffle
|
||||
@@ -853,12 +853,12 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxbd %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxbd %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i8> %shuffle
|
||||
@@ -881,12 +881,12 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxbd %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxbd %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
|
||||
ret <16 x i8> %shuffle
|
||||
@@ -905,12 +905,12 @@ define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxbw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxbw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
|
||||
ret <16 x i8> %shuffle
|
||||
@@ -931,12 +931,12 @@ define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxbw %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxbw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
|
||||
ret <16 x i8> %shuffle
|
||||
|
@@ -211,19 +211,19 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
|
||||
define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
|
||||
; SSE2-LABEL: shuffle_v2f64_03:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2f64_03:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movsd %xmm0, %xmm1
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2f64_03:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@@ -242,17 +242,17 @@ define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
|
||||
define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
|
||||
; SSE2-LABEL: shuffle_v2f64_21:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2f64_21:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2f64_21:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2f64_21:
|
||||
@@ -299,19 +299,19 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
|
||||
define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
|
||||
; SSE2-LABEL: shuffle_v2i64_03:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2i64_03:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movsd %xmm0, %xmm1
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2i64_03:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@@ -335,19 +335,19 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
|
||||
define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
|
||||
; SSE2-LABEL: shuffle_v2i64_03_copy:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd %xmm1, %xmm2
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
||||
; SSE2-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2i64_03_copy:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movsd %xmm1, %xmm2
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
||||
; SSE3-NEXT: movaps %xmm2, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2i64_03_copy:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd %xmm1, %xmm2
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
||||
; SSSE3-NEXT: movaps %xmm2, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@@ -489,17 +489,17 @@ define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
|
||||
define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
|
||||
; SSE2-LABEL: shuffle_v2i64_21:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2i64_21:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2i64_21:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2i64_21:
|
||||
@@ -522,19 +522,19 @@ define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
|
||||
define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
|
||||
; SSE2-LABEL: shuffle_v2i64_21_copy:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd %xmm2, %xmm1
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2i64_21_copy:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movsd %xmm2, %xmm1
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2i64_21_copy:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movsd %xmm2, %xmm1
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@@ -650,12 +650,12 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
|
||||
define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
|
||||
; SSE-LABEL: shuffle_v2i64_0z:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movq %xmm0, %xmm0
|
||||
; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v2i64_0z:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovq %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x i64> %shuffle
|
||||
@@ -693,19 +693,19 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
|
||||
; SSE2-LABEL: shuffle_v2i64_z1:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2i64_z1:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2i64_z1:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2i64_z1:
|
||||
@@ -732,12 +732,12 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
|
||||
define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
|
||||
; SSE-LABEL: shuffle_v2f64_0z:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movq %xmm0, %xmm0
|
||||
; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v2f64_0z:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovq %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x double> %shuffle
|
||||
@@ -780,19 +780,19 @@ define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
|
||||
; SSE2-LABEL: shuffle_v2f64_z1:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v2f64_z1:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v2f64_z1:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v2f64_z1:
|
||||
@@ -828,12 +828,12 @@ define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
|
||||
define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
|
||||
; SSE-LABEL: insert_mem_and_zero_v2i64:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movq (%rdi), %xmm0
|
||||
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_mem_and_zero_v2i64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovq (%rdi), %xmm0
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: retq
|
||||
%a = load i64* %ptr
|
||||
%v = insertelement <2 x i64> undef, i64 %a, i32 0
|
||||
@@ -844,12 +844,12 @@ define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
|
||||
define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
|
||||
; SSE-LABEL: insert_reg_and_zero_v2f64:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movq %xmm0, %xmm0
|
||||
; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_reg_and_zero_v2f64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovq %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||
; AVX-NEXT: retq
|
||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
|
||||
@@ -859,12 +859,12 @@ define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
|
||||
define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
|
||||
; SSE-LABEL: insert_mem_and_zero_v2f64:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movsd (%rdi), %xmm0
|
||||
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_mem_and_zero_v2f64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovsd (%rdi), %xmm0
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX-NEXT: retq
|
||||
%a = load double* %ptr
|
||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||
@@ -876,19 +876,19 @@ define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
|
||||
; SSE2-LABEL: insert_reg_lo_v2i64:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd %rdi, %xmm1
|
||||
; SSE2-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: insert_reg_lo_v2i64:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movd %rdi, %xmm1
|
||||
; SSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_reg_lo_v2i64:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movd %rdi, %xmm1
|
||||
; SSSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_reg_lo_v2i64:
|
||||
@@ -931,19 +931,19 @@ define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
|
||||
;
|
||||
; SSE41-LABEL: insert_mem_lo_v2i64:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movq (%rdi), %xmm1
|
||||
; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_mem_lo_v2i64:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovq (%rdi), %xmm1
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_mem_lo_v2i64:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovq (%rdi), %xmm1
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
|
||||
; AVX2-NEXT: retq
|
||||
%a = load i64* %ptr
|
||||
@@ -972,13 +972,13 @@ define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
|
||||
define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
|
||||
; SSE-LABEL: insert_mem_hi_v2i64:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movq (%rdi), %xmm1
|
||||
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_mem_hi_v2i64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovq (%rdi), %xmm1
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX-NEXT: retq
|
||||
%a = load i64* %ptr
|
||||
@@ -990,13 +990,13 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
|
||||
define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
|
||||
; SSE-LABEL: insert_reg_lo_v2f64:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movsd %xmm0, %xmm1
|
||||
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_reg_lo_v2f64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; AVX-NEXT: retq
|
||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
|
||||
@@ -1085,7 +1085,7 @@ define <2 x double> @insert_dup_reg_v2f64(double %a) {
|
||||
define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
|
||||
; SSE2-LABEL: insert_dup_mem_v2f64:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movsd (%rdi), %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
|
@@ -441,21 +441,21 @@ define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
|
||||
; SSE2-LABEL: shuffle_v4f32_4zzz:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v4f32_4zzz:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v4f32_4zzz:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@@ -661,21 +661,21 @@ define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) {
|
||||
; SSE2-LABEL: shuffle_v4i32_4zzz:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v4i32_4zzz:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v4i32_4zzz:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@@ -698,21 +698,21 @@ define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) {
|
||||
; SSE2-LABEL: shuffle_v4i32_z4zz:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v4i32_z4zz:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v4i32_z4zz:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@@ -737,21 +737,21 @@ define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) {
|
||||
; SSE2-LABEL: shuffle_v4i32_zz4z:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: shuffle_v4i32_zz4z:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v4i32_zz4z:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@@ -1033,12 +1033,12 @@ define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v4i32_0u1u:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxdq %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v4i32_0u1u:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxdq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
|
||||
ret <4 x i32> %shuffle
|
||||
@@ -1065,12 +1065,12 @@ define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v4i32_0z1z:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxdq %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v4i32_0z1z:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxdq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
|
||||
ret <4 x i32> %shuffle
|
||||
@@ -1094,12 +1094,12 @@ define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
|
||||
define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) {
|
||||
; SSE-LABEL: insert_mem_and_zero_v4i32:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movd (%rdi), %xmm0
|
||||
; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_mem_and_zero_v4i32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd (%rdi), %xmm0
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%a = load i32* %ptr
|
||||
%v = insertelement <4 x i32> undef, i32 %a, i32 0
|
||||
@@ -1111,21 +1111,21 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
|
||||
; SSE2-LABEL: insert_reg_and_zero_v4f32:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: movss %xmm0, %xmm1
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: insert_reg_and_zero_v4f32:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_reg_and_zero_v4f32:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
||||
; SSSE3-NEXT: movss %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
@@ -1138,7 +1138,7 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
|
||||
; AVX-LABEL: insert_reg_and_zero_v4f32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; AVX-NEXT: retq
|
||||
%v = insertelement <4 x float> undef, float %a, i32 0
|
||||
%shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
||||
@@ -1148,12 +1148,12 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
|
||||
define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
|
||||
; SSE-LABEL: insert_mem_and_zero_v4f32:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movss (%rdi), %xmm0
|
||||
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_mem_and_zero_v4f32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovss (%rdi), %xmm0
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%a = load float* %ptr
|
||||
%v = insertelement <4 x float> undef, float %a, i32 0
|
||||
@@ -1165,19 +1165,19 @@ define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) {
|
||||
; SSE2-LABEL: insert_reg_lo_v4i32:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movd %rdi, %xmm1
|
||||
; SSE2-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: insert_reg_lo_v4i32:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movd %rdi, %xmm1
|
||||
; SSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_reg_lo_v4i32:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movd %rdi, %xmm1
|
||||
; SSSE3-NEXT: movsd %xmm1, %xmm0
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_reg_lo_v4i32:
|
||||
@@ -1221,19 +1221,19 @@ define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
|
||||
;
|
||||
; SSE41-LABEL: insert_mem_lo_v4i32:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movq (%rdi), %xmm1
|
||||
; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_mem_lo_v4i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovq (%rdi), %xmm1
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_mem_lo_v4i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovq (%rdi), %xmm1
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
|
||||
; AVX2-NEXT: retq
|
||||
%a = load <2 x i32>* %ptr
|
||||
@@ -1263,13 +1263,13 @@ define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) {
|
||||
define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
|
||||
; SSE-LABEL: insert_mem_hi_v4i32:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movq (%rdi), %xmm1
|
||||
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_mem_hi_v4i32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovq (%rdi), %xmm1
|
||||
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX-NEXT: retq
|
||||
%a = load <2 x i32>* %ptr
|
||||
@@ -1281,13 +1281,13 @@ define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
|
||||
define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
|
||||
; SSE-LABEL: insert_reg_lo_v4f32:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movsd %xmm0, %xmm1
|
||||
; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
|
||||
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_reg_lo_v4f32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; AVX-NEXT: retq
|
||||
%a.cast = bitcast double %a to <2 x float>
|
||||
%v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
|
@@ -1829,12 +1829,12 @@ define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxwq %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxwq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
|
||||
ret <8 x i16> %shuffle
|
||||
@@ -1857,12 +1857,12 @@ define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxwq %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxwq %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
|
||||
ret <8 x i16> %shuffle
|
||||
@@ -1881,12 +1881,12 @@ define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxwd %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxwd %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
|
||||
ret <8 x i16> %shuffle
|
||||
@@ -1907,12 +1907,12 @@ define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pmovzxwd %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpmovzxwd %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
|
||||
ret <8 x i16> %shuffle
|
||||
|
@@ -794,14 +794,14 @@ define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
|
||||
define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
|
||||
; AVX1-LABEL: insert_mem_and_zero_v4i64:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovq (%rdi), %xmm0
|
||||
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_mem_and_zero_v4i64:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovq (%rdi), %xmm0
|
||||
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX2-NEXT: retq
|
||||
@@ -815,7 +815,7 @@ define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
|
||||
; ALL-LABEL: insert_reg_and_zero_v4f64:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
|
||||
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
||||
; ALL-NEXT: retq
|
||||
%v = insertelement <4 x double> undef, double %a, i32 0
|
||||
%shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
||||
@@ -825,7 +825,7 @@ define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
|
||||
define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
|
||||
; ALL-LABEL: insert_mem_and_zero_v4f64:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovsd (%rdi), %xmm0
|
||||
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; ALL-NEXT: retq
|
||||
%a = load double* %ptr
|
||||
%v = insertelement <4 x double> undef, double %a, i32 0
|
||||
|
@@ -1853,7 +1853,7 @@ define <8 x float> @splat_v8f32(<4 x float> %r) {
|
||||
define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
|
||||
; ALL-LABEL: concat_v2f32_1:
|
||||
; ALL: # BB#0: # %entry
|
||||
; ALL-NEXT: vmovq (%rdi), %xmm0
|
||||
; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
|
||||
; ALL-NEXT: retq
|
||||
entry:
|
||||
@@ -1868,7 +1868,7 @@ entry:
|
||||
define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
|
||||
; ALL-LABEL: concat_v2f32_2:
|
||||
; ALL: # BB#0: # %entry
|
||||
; ALL-NEXT: vmovq (%rdi), %xmm0
|
||||
; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
|
||||
; ALL-NEXT: retq
|
||||
entry:
|
||||
@@ -1881,7 +1881,7 @@ entry:
|
||||
define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
|
||||
; ALL-LABEL: concat_v2f32_3:
|
||||
; ALL: # BB#0: # %entry
|
||||
; ALL-NEXT: vmovq (%rdi), %xmm0
|
||||
; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
|
||||
; ALL-NEXT: retq
|
||||
entry:
|
||||
|
Reference in New Issue
Block a user