diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8632886e384..3ba81157448 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1261,6 +1261,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal); + setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal); + setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal); + setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal); + setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal); + setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal); + setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); setOperationAction(ISD::SETCC, MVT::i1, Custom); setOperationAction(ISD::XOR, MVT::i1, Legal); @@ -1479,7 +1492,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v32i1, Custom); setOperationAction(ISD::SELECT, MVT::v64i1, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom); setOperationAction(ISD::VSELECT, MVT::v32i16, Legal); @@ -12093,13 +12110,13 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, } static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, - SelectionDAG &DAG) { + const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); SDLoc DL(Op); unsigned int NumElts = VT.getVectorNumElements(); - if (NumElts != 8 && NumElts != 16) + if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI()) return SDValue(); if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) @@ -12137,7 +12154,7 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, MVT SVT = In.getSimpleValueType(); if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1) - return LowerZERO_EXTEND_AVX512(Op, DAG); + return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG); if (Subtarget->hasFp256()) { SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); @@ -13876,7 +13893,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops); } -static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget, +static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, + const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); @@ -13902,7 +13920,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget unsigned int NumElts = VT.getVectorNumElements(); - if (NumElts != 8 && NumElts != 16) + if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI()) return SDValue(); if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) { diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 39f45fca30c..8b26603e250 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5261,79 +5261,146 @@ def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>; -multiclass avx512_extend opc, string OpcodeStr, RegisterClass KRC, - RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode, - PatFrag mem_frag, X86MemOperand x86memop, - ValueType OpVT, ValueType InVT> { +multiclass avx512_extend_common opc, string OpcodeStr, + X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, + X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ - def rr : AVX5128I, EVEX; - - def rrk : AVX5128I, EVEX, EVEX_K; - - def rrkz : AVX5128I, EVEX, EVEX_KZ; + defm rr : AVX512_maskable, + EVEX; let mayLoad = 1 in { - def rm : AVX5128I, - EVEX; - - def rmk : AVX5128I, - EVEX, EVEX_K; - - def rmkz : AVX5128I, - EVEX, EVEX_KZ; + defm rm : AVX512_maskable, + EVEX; } } -defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext, - loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VQ>; -defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext, - loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VO>; -defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext, - loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512, - EVEX_CD8<16, CD8VH>; -defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext, - loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512, - EVEX_CD8<16, CD8VQ>; -defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext, - loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512, - EVEX_CD8<32, CD8VH>; +multiclass avx512_extend_BW opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + let Predicates = [HasVLX, HasBWI] in { + defm Z128: avx512_extend_common, + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128; -defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext, - loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VQ>; -defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext, - loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512, - EVEX_CD8<8, CD8VO>; -defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext, - loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512, - EVEX_CD8<16, CD8VH>; -defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext, - loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512, - EVEX_CD8<16, CD8VQ>; -defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext, - loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512, - EVEX_CD8<32, CD8VH>; + defm Z256: avx512_extend_common, + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256; + } + let Predicates = [HasBWI] in { + defm Z : avx512_extend_common, + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_BD opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_BQ opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_WD opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_WQ opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512; + } +} + +multiclass avx512_extend_DQ opc, string OpcodeStr, SDNode OpNode, + string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi32")> { + + let Predicates = [HasVLX, HasAVX512] in { + defm Z128: avx512_extend_common, + EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; + + defm Z256: avx512_extend_common, + EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; + } + let Predicates = [HasAVX512] in { + defm Z : avx512_extend_common, + EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; + } +} + +defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">; +defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">; +defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">; +defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">; +defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">; +defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">; + + +defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">; +defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">; +defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">; +defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">; +defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">; +defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">; //===----------------------------------------------------------------------===// // GATHER - SCATTER Operations diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b898981ec13..d3b401e8cfc 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5850,10 +5850,10 @@ multiclass SS41I_pmovx_rm_all opc, string OpcodeStr, OpndItins SSEItins, OpndItins AVXItins, OpndItins AVX2Itins> { defm NAME : SS41I_pmovx_rrrm; - let Predicates = [HasAVX] in + let Predicates = [HasAVX, NoVLX] in defm V#NAME : SS41I_pmovx_rrrm, VEX; - let Predicates = [HasAVX2] in + let Predicates = [HasAVX2, NoVLX] in defm V#NAME#Y : SS41I_pmovx_rrrm, VEX, VEX_L; } @@ -5988,7 +5988,7 @@ multiclass SS41I_pmovx_avx2_patterns(OpcPrefix#DQYrm) addr:$src)>; } -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2, NoVLX] in { defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>; defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>; } @@ -6087,7 +6087,7 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#DQrm) addr:$src)>; } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>; defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>; } diff --git a/test/CodeGen/X86/avx512-trunc-ext.ll b/test/CodeGen/X86/avx512-trunc-ext.ll index bb36a1396df..f5e910c4a8e 100644 --- a/test/CodeGen/X86/avx512-trunc-ext.ll +++ b/test/CodeGen/X86/avx512-trunc-ext.ll @@ -1,95 +1,843 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s - -; CHECK-LABEL: trunc_16x32_to_16x8 -; CHECK: vpmovdb -; CHECK: ret +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX + + +; KNL-LABEL: trunc_16x32_to_16x8 +; KNL: vpmovdb +; KNL: ret define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone { %x = trunc <16 x i32> %i to <16 x i8> ret <16 x i8> %x } -; CHECK-LABEL: trunc_8x64_to_8x16 -; CHECK: vpmovqw -; CHECK: ret +; KNL-LABEL: trunc_8x64_to_8x16 +; KNL: vpmovqw +; KNL: ret define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone { %x = trunc <8 x i64> %i to <8 x i16> ret <8 x i16> %x } +;SKX-LABEL: zext_8x8mem_to_8x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = zext <8 x i8> %a to <8 x i16> + %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer + ret <8 x i16> %ret +} -; CHECK-LABEL: zext_16x8_to_16x32 -; CHECK: vpmovzxbd {{.*}}%zmm -; CHECK: ret +;SKX-LABEL: sext_8x8mem_to_8x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = sext <8 x i8> %a to <8 x i16> + %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer + ret <8 x i16> %ret +} + +;SKX-LABEL: zext_16x8mem_to_16x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbw (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i8>,<16 x i8> *%i,align 1 + %x = zext <16 x i8> %a to <16 x i16> + %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer + ret <16 x i16> %ret +} + +;SKX-LABEL: sext_16x8mem_to_16x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i8>,<16 x i8> *%i,align 1 + %x = sext <16 x i8> %a to <16 x i16> + %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer + ret <16 x i16> %ret +} + +;SKX-LABEL: zext_16x8_to_16x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 +;SKX-NEXT: retq +define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { + %x = zext <16 x i8> %a to <16 x i16> + ret <16 x i16> %x +} + +;SKX-LABEL: zext_16x8_to_16x16_mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm1, %k1 +;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { + %x = zext <16 x i8> %a to <16 x i16> + %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer + ret <16 x i16> %ret +} + +;SKX-LABEL: sext_16x8_to_16x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 +;SKX-NEXT: retq +define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { + %x = sext <16 x i8> %a to <16 x i16> + ret <16 x i16> %x +} + +;SKX-LABEL: sext_16x8_to_16x16_mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm1, %k1 +;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { + %x = sext <16 x i8> %a to <16 x i16> + %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer + ret <16 x i16> %ret +} + +;SKX-LABEL: zext_32x8mem_to_32x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %ymm0, %k1 +;SKX-NEXT: vpmovzxbw (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { + %a = load <32 x i8>,<32 x i8> *%i,align 1 + %x = zext <32 x i8> %a to <32 x i16> + %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer + ret <32 x i16> %ret +} + +;SKX-LABEL: sext_32x8mem_to_32x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %ymm0, %k1 +;SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { + %a = load <32 x i8>,<32 x i8> *%i,align 1 + %x = sext <32 x i8> %a to <32 x i16> + %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer + ret <32 x i16> %ret +} + +;SKX-LABEL: zext_32x8_to_32x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 +;SKX-NEXT: retq +define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { + %x = zext <32 x i8> %a to <32 x i16> + ret <32 x i16> %x +} + +;SKX-LABEL: zext_32x8_to_32x16_mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %ymm1, %k1 +;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { + %x = zext <32 x i8> %a to <32 x i16> + %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer + ret <32 x i16> %ret +} + +;SKX-LABEL: sext_32x8_to_32x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 +;SKX-NEXT: retq +define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { + %x = sext <32 x i8> %a to <32 x i16> + ret <32 x i16> %x +} + +;SKX-LABEL: sext_32x8_to_32x16_mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %ymm1, %k1 +;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { + %x = sext <32 x i8> %a to <32 x i16> + %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer + ret <32 x i16> %ret +} + +;SKX-LABEL: zext_4x8mem_to_4x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i8>,<4 x i8> *%i,align 1 + %x = zext <4 x i8> %a to <4 x i32> + %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +;SKX-LABEL: sext_4x8mem_to_4x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i8>,<4 x i8> *%i,align 1 + %x = sext <4 x i8> %a to <4 x i32> + %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +;SKX-LABEL: zext_8x8mem_to_8x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbd (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = zext <8 x i8> %a to <8 x i32> + %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + ret <8 x i32> %ret +} + +;SKX-LABEL: sext_8x8mem_to_8x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = sext <8 x i8> %a to <8 x i32> + %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + ret <8 x i32> %ret +} + +;KNL-LABEL: zext_16x8mem_to_16x32: +;KNL: vpmovzxbd (%rdi), %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i8>,<16 x i8> *%i,align 1 + %x = zext <16 x i8> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;KNL-LABEL: sext_16x8mem_to_16x32: +;KNL: vpmovsxbd (%rdi), %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i8>,<16 x i8> *%i,align 1 + %x = sext <16 x i8> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;KNL-LABEL: zext_16x8_to_16x32_mask: +;KNL: vpmovzxbd %xmm0, %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { + %x = zext <16 x i8> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;KNL-LABEL: sext_16x8_to_16x32_mask: +;KNL: vpmovsxbd %xmm0, %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { + %x = sext <16 x i8> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +; KNL-LABEL: zext_16x8_to_16x32 +; KNL: vpmovzxbd {{.*}}%zmm +; KNL: ret define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { %x = zext <16 x i8> %i to <16 x i32> ret <16 x i32> %x } -; CHECK-LABEL: sext_16x8_to_16x32 -; CHECK: vpmovsxbd {{.*}}%zmm -; CHECK: ret +; KNL-LABEL: sext_16x8_to_16x32 +; KNL: vpmovsxbd {{.*}}%zmm +; KNL: ret define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { %x = sext <16 x i8> %i to <16 x i32> ret <16 x i32> %x } - -; CHECK-LABEL: zext_16x16_to_16x32 -; CHECK: vpmovzxwd {{.*}}%zmm -; CHECK: ret -define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %i) nounwind readnone { - %x = zext <16 x i16> %i to <16 x i32> - ret <16 x i32> %x +;SKX-LABEL: zext_2x8mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i8>,<2 x i8> *%i,align 1 + %x = zext <2 x i8> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} +;SKX-LABEL: sext_2x8mem_to_2x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i8>,<2 x i8> *%i,align 1 + %x = sext <2 x i8> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} +;SKX-LABEL: sext_2x8mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 +;SKX-NEXT: retq +define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { + %a = load <2 x i8>,<2 x i8> *%i,align 1 + %x = sext <2 x i8> %a to <2 x i64> + ret <2 x i64> %x } -; CHECK-LABEL: zext_8x16_to_8x64 -; CHECK: vpmovzxwq -; CHECK: ret -define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %i) nounwind readnone { - %x = zext <8 x i16> %i to <8 x i64> +;SKX-LABEL: zext_4x8mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i8>,<4 x i8> *%i,align 1 + %x = zext <4 x i8> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x8mem_to_4x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i8>,<4 x i8> *%i,align 1 + %x = sext <4 x i8> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x8mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 +;SKX-NEXT: retq +define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { + %a = load <4 x i8>,<4 x i8> *%i,align 1 + %x = sext <4 x i8> %a to <4 x i64> + ret <4 x i64> %x +} + +;KNL-LABEL: zext_8x8mem_to_8x64: +;KNL: vpmovzxbq (%rdi), %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = zext <8 x i8> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;KNL-LABEL: sext_8x8mem_to_8x64mask: +;KNL: vpmovsxbq (%rdi), %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = sext <8 x i8> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;KNL-LABEL: sext_8x8mem_to_8x64: +;KNL: vpmovsxbq (%rdi), %zmm0 +;KNL-NEXT: retq +define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = sext <8 x i8> %a to <8 x i64> ret <8 x i64> %x } -;CHECK-LABEL: fptrunc_test -;CHECK: vcvtpd2ps {{.*}}%zmm -;CHECK: ret +;SKX-LABEL: zext_4x16mem_to_4x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = zext <4 x i16> %a to <4 x i32> + %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +;SKX-LABEL: sext_4x16mem_to_4x32mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = sext <4 x i16> %a to <4 x i32> + %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +;SKX-LABEL: sext_4x16mem_to_4x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 +;SKX-NEXT: retq +define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = sext <4 x i16> %a to <4 x i32> + ret <4 x i32> %x +} + + +;SKX-LABEL: zext_8x16mem_to_8x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwd (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = zext <8 x i16> %a to <8 x i32> + %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + ret <8 x i32> %ret +} + +;SKX-LABEL: sext_8x16mem_to_8x32mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = sext <8 x i16> %a to <8 x i32> + %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + ret <8 x i32> %ret +} + +;SKX-LABEL: sext_8x16mem_to_8x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 +;SKX-NEXT: retq +define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = sext <8 x i16> %a to <8 x i32> + ret <8 x i32> %x +} + +;SKX-LABEL: zext_8x16_to_8x32mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm1, %k1 +;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { + %x = zext <8 x i16> %a to <8 x i32> + %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + ret <8 x i32> %ret +} + +;SKX-LABEL: zext_8x16_to_8x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 +;SKX-NEXT: retq +define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { + %x = zext <8 x i16> %a to <8 x i32> + ret <8 x i32> %x +} + +;SKX-LABEL: zext_16x16mem_to_16x32: +;KNL-LABEL: zext_16x16mem_to_16x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z} +;KNL: vpmovzxwd (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i16>,<16 x i16> *%i,align 1 + %x = zext <16 x i16> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;SKX-LABEL: sext_16x16mem_to_16x32mask: +;KNL-LABEL: sext_16x16mem_to_16x32mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} +;KNL: vpmovsxwd (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i16>,<16 x i16> *%i,align 1 + %x = sext <16 x i16> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;SKX-LABEL: sext_16x16mem_to_16x32: +;KNL-LABEL: sext_16x16mem_to_16x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 +;KNL: vpmovsxwd (%rdi), %zmm0 +;SKX-NEXT: retq +define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { + %a = load <16 x i16>,<16 x i16> *%i,align 1 + %x = sext <16 x i16> %a to <16 x i32> + ret <16 x i32> %x +} +;SKX-LABEL: zext_16x16_to_16x32mask: +;KNL-LABEL: zext_16x16_to_16x32mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm1, %k1 +;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z} +;KNL: vpmovzxwd %ymm0, %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { + %x = zext <16 x i16> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;SKX-LABEL: zext_16x16_to_16x32: +;KNL-LABEL: zext_16x16_to_16x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 +;KNL: vpmovzxwd %ymm0, %zmm0 +;SKX-NEXT: retq +define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { + %x = zext <16 x i16> %a to <16 x i32> + ret <16 x i32> %x +} + +;SKX-LABEL: zext_2x16mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i16>,<2 x i16> *%i,align 1 + %x = zext <2 x i16> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} + +;SKX-LABEL: sext_2x16mem_to_2x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i16>,<2 x i16> *%i,align 1 + %x = sext <2 x i16> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} + +;SKX-LABEL: sext_2x16mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 +;SKX-NEXT: retq +define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { + %a = load <2 x i16>,<2 x i16> *%i,align 1 + %x = sext <2 x i16> %a to <2 x i64> + ret <2 x i64> %x +} + +;SKX-LABEL: zext_4x16mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = zext <4 x i16> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x16mem_to_4x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = sext <4 x i16> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x16mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 +;SKX-NEXT: retq +define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = sext <4 x i16> %a to <4 x i64> + ret <4 x i64> %x +} + +;SKX-LABEL: zext_8x16mem_to_8x64: +;KNL-LABEL: zext_8x16mem_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z} +;KNL: vpmovzxwq (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = zext <8 x i16> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;SKX-LABEL: sext_8x16mem_to_8x64mask: +;KNL-LABEL: sext_8x16mem_to_8x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} +;KNL: vpmovsxwq (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = sext <8 x i16> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;SKX-LABEL: sext_8x16mem_to_8x64: +;KNL-LABEL: sext_8x16mem_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 +;KNL: vpmovsxwq (%rdi), %zmm0 +;SKX-NEXT: retq +define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = sext <8 x i16> %a to <8 x i64> + ret <8 x i64> %x +} + +;SKX-LABEL: zext_8x16_to_8x64mask: +;KNL-LABEL: zext_8x16_to_8x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm1, %k1 +;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z} +;KNL: vpmovzxwq %xmm0, %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { + %x = zext <8 x i16> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;SKX-LABEL: zext_8x16_to_8x64: +;KNL-LABEL: zext_8x16_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 +;KNL: vpmovzxwq %xmm0, %zmm0 +;SKX-NEXT: retq +; KNL: ret +define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { + %ret = zext <8 x i16> %a to <8 x i64> + ret <8 x i64> %ret +} + +;SKX-LABEL: zext_2x32mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i32>,<2 x i32> *%i,align 1 + %x = zext <2 x i32> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} + +;SKX-LABEL: sext_2x32mem_to_2x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i32>,<2 x i32> *%i,align 1 + %x = sext <2 x i32> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} + +;SKX-LABEL: sext_2x32mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 +;SKX-NEXT: retq +define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { + %a = load <2 x i32>,<2 x i32> *%i,align 1 + %x = sext <2 x i32> %a to <2 x i64> + ret <2 x i64> %x +} + +;SKX-LABEL: zext_4x32mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i32>,<4 x i32> *%i,align 1 + %x = zext <4 x i32> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x32mem_to_4x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i32>,<4 x i32> *%i,align 1 + %x = sext <4 x i32> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x32mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 +;SKX-NEXT: retq +define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { + %a = load <4 x i32>,<4 x i32> *%i,align 1 + %x = sext <4 x i32> %a to <4 x i64> + ret <4 x i64> %x +} + +;SKX-LABEL: sext_4x32_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxdq %xmm0, %ymm0 +;SKX-NEXT: retq +define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { + %x = sext <4 x i32> %a to <4 x i64> + ret <4 x i64> %x +} + +;SKX-LABEL: zext_4x32_to_4x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm1, %k1 +;SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { + %x = zext <4 x i32> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: zext_8x32mem_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i32>,<8 x i32> *%i,align 1 + %x = zext <8 x i32> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;SKX-LABEL: sext_8x32mem_to_8x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i32>,<8 x i32> *%i,align 1 + %x = sext <8 x i32> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;SKX-LABEL: sext_8x32mem_to_8x64: +;KNL-LABEL: sext_8x32mem_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 +;KNL: vpmovsxdq (%rdi), %zmm0 +;SKX-NEXT: retq +define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { + %a = load <8 x i32>,<8 x i32> *%i,align 1 + %x = sext <8 x i32> %a to <8 x i64> + ret <8 x i64> %x +} + +;SKX-LABEL: sext_8x32_to_8x64: +;KNL-LABEL: sext_8x32_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxdq %ymm0, %zmm0 +;KNL: vpmovsxdq %ymm0, %zmm0 +;SKX-NEXT: retq +define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { + %x = sext <8 x i32> %a to <8 x i64> + ret <8 x i64> %x +} + +;SKX-LABEL: zext_8x32_to_8x64mask: +;KNL-LABEL: zext_8x32_to_8x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm1, %k1 +;SKX-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z} +;KNL: vpmovzxdq %ymm0, %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { + %x = zext <8 x i32> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} +;KNL-LABEL: fptrunc_test +;KNL: vcvtpd2ps {{.*}}%zmm +;KNL: ret define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { %b = fptrunc <8 x double> %a to <8 x float> ret <8 x float> %b } -;CHECK-LABEL: fpext_test -;CHECK: vcvtps2pd {{.*}}%zmm -;CHECK: ret +;KNL-LABEL: fpext_test +;KNL: vcvtps2pd {{.*}}%zmm +;KNL: ret define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { %b = fpext <8 x float> %a to <8 x double> ret <8 x double> %b } -; CHECK-LABEL: zext_16i1_to_16xi32 -; CHECK: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z} -; CHECK: ret +; KNL-LABEL: zext_16i1_to_16xi32 +; KNL: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z} +; KNL: ret define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { %a = bitcast i16 %b to <16 x i1> %c = zext <16 x i1> %a to <16 x i32> ret <16 x i32> %c } -; CHECK-LABEL: zext_8i1_to_8xi64 -; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z} -; CHECK: ret +; KNL-LABEL: zext_8i1_to_8xi64 +; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z} +; KNL: ret define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { %a = bitcast i8 %b to <8 x i1> %c = zext <8 x i1> %a to <8 x i64> ret <8 x i64> %c } -; CHECK-LABEL: trunc_16i8_to_16i1 -; CHECK: vpmovsxbd -; CHECK: vpandd -; CHECK: vptestmd -; CHECK: ret +; KNL-LABEL: trunc_16i8_to_16i1 +; KNL: vpmovsxbd +; KNL: vpandd +; KNL: vptestmd +; KNL: ret ; SKX-LABEL: trunc_16i8_to_16i1 ; SKX: vpmovb2m %xmm define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { @@ -98,10 +846,10 @@ define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { ret i16 %mask } -; CHECK-LABEL: trunc_16i32_to_16i1 -; CHECK: vpandd -; CHECK: vptestmd -; CHECK: ret +; KNL-LABEL: trunc_16i32_to_16i1 +; KNL: vpandd +; KNL: vptestmd +; KNL: ret ; SKX-LABEL: trunc_16i32_to_16i1 ; SKX: vpmovd2m %zmm define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { @@ -122,11 +870,11 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { ret <4 x i32>%res } -; CHECK-LABEL: trunc_8i16_to_8i1 -; CHECK: vpmovsxwq -; CHECK: vpandq LCP{{.*}}(%rip){1to8} -; CHECK: vptestmq -; CHECK: ret +; KNL-LABEL: trunc_8i16_to_8i1 +; KNL: vpmovsxwq +; KNL: vpandq LCP{{.*}}(%rip){1to8} +; KNL: vptestmq +; KNL: ret ; SKX-LABEL: trunc_8i16_to_8i1 ; SKX: vpmovw2m %xmm @@ -136,10 +884,10 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { ret i8 %mask } -; CHECK-LABEL: sext_8i1_8i32 -; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z} +; KNL-LABEL: sext_8i1_8i32 +; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z} ; SKX: vpmovm2d -; CHECK: ret +; KNL: ret define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { %x = icmp slt <8 x i32> %a1, %a2 %x1 = xor <8 x i1>%x, @@ -147,18 +895,18 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { ret <8 x i32> %y } -; CHECK-LABEL: trunc_v16i32_to_v16i16 -; CHECK: vpmovdw -; CHECK: ret +; KNL-LABEL: trunc_v16i32_to_v16i16 +; KNL: vpmovdw +; KNL: ret define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) { %1 = trunc <16 x i32> %x to <16 x i16> ret <16 x i16> %1 } -; CHECK-LABEL: trunc_i32_to_i1 -; CHECK: movw $-4, %ax -; CHECK: kmovw %eax, %k1 -; CKECK: korw +; KNL-LABEL: trunc_i32_to_i1 +; KNL: movw $-4, %ax +; KNL: kmovw %eax, %k1 +; KNL: korw define i16 @trunc_i32_to_i1(i32 %a) { %a_i = trunc i32 %a to i1 %maskv = insertelement <16 x i1> , i1 %a_i, i32 0 @@ -166,35 +914,35 @@ define i16 @trunc_i32_to_i1(i32 %a) { ret i16 %res } -; CHECK-LABEL: sext_8i1_8i16 +; KNL-LABEL: sext_8i1_8i16 ; SKX: vpmovm2w -; CHECK: ret +; KNL: ret define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { %x = icmp slt <8 x i32> %a1, %a2 %y = sext <8 x i1> %x to <8 x i16> ret <8 x i16> %y } -; CHECK-LABEL: sext_16i1_16i32 +; KNL-LABEL: sext_16i1_16i32 ; SKX: vpmovm2d -; CHECK: ret +; KNL: ret define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { %x = icmp slt <16 x i32> %a1, %a2 %y = sext <16 x i1> %x to <16 x i32> ret <16 x i32> %y } -; CHECK-LABEL: sext_8i1_8i64 +; KNL-LABEL: sext_8i1_8i64 ; SKX: vpmovm2q -; CHECK: ret +; KNL: ret define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { %x = icmp slt <8 x i32> %a1, %a2 %y = sext <8 x i1> %x to <8 x i64> ret <8 x i64> %y } -; CHECK-LABEL: @extload_v8i64 -; CHECK: vpmovsxbq +; KNL-LABEL: @extload_v8i64 +; KNL: vpmovsxbq define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { %sign_load = load <8 x i8>, <8 x i8>* %a %c = sext <8 x i8> %sign_load to <8 x i64> diff --git a/test/MC/X86/x86-64-avx512bw.s b/test/MC/X86/x86-64-avx512bw.s index 8b43dcbbbd7..ba043da6632 100644 --- a/test/MC/X86/x86-64-avx512bw.s +++ b/test/MC/X86/x86-64-avx512bw.s @@ -511,6 +511,78 @@ // CHECK: encoding: [0x62,0xe2,0x15,0x40,0x3a,0x9a,0xc0,0xdf,0xff,0xff] vpminuw -8256(%rdx), %zmm29, %zmm19 +// CHECK: vpmovsxbw %ymm18, %zmm22 +// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x20,0xf2] + vpmovsxbw %ymm18, %zmm22 + +// CHECK: vpmovsxbw %ymm18, %zmm22 {%k5} +// CHECK: encoding: [0x62,0xa2,0x7d,0x4d,0x20,0xf2] + vpmovsxbw %ymm18, %zmm22 {%k5} + +// CHECK: vpmovsxbw %ymm18, %zmm22 {%k5} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0xcd,0x20,0xf2] + vpmovsxbw %ymm18, %zmm22 {%k5} {z} + +// CHECK: vpmovsxbw (%rcx), %zmm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0x31] + vpmovsxbw (%rcx), %zmm22 + +// CHECK: vpmovsxbw 291(%rax,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x20,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpmovsxbw 291(%rax,%r14,8), %zmm22 + +// CHECK: vpmovsxbw 4064(%rdx), %zmm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0x72,0x7f] + vpmovsxbw 4064(%rdx), %zmm22 + +// CHECK: vpmovsxbw 4096(%rdx), %zmm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0xb2,0x00,0x10,0x00,0x00] + vpmovsxbw 4096(%rdx), %zmm22 + +// CHECK: vpmovsxbw -4096(%rdx), %zmm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0x72,0x80] + vpmovsxbw -4096(%rdx), %zmm22 + +// CHECK: vpmovsxbw -4128(%rdx), %zmm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0xb2,0xe0,0xef,0xff,0xff] + vpmovsxbw -4128(%rdx), %zmm22 + +// CHECK: vpmovzxbw %ymm26, %zmm24 +// CHECK: encoding: [0x62,0x02,0x7d,0x48,0x30,0xc2] + vpmovzxbw %ymm26, %zmm24 + +// CHECK: vpmovzxbw %ymm26, %zmm24 {%k4} +// CHECK: encoding: [0x62,0x02,0x7d,0x4c,0x30,0xc2] + vpmovzxbw %ymm26, %zmm24 {%k4} + +// CHECK: vpmovzxbw %ymm26, %zmm24 {%k4} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0xcc,0x30,0xc2] + vpmovzxbw %ymm26, %zmm24 {%k4} {z} + +// CHECK: vpmovzxbw (%rcx), %zmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x01] + vpmovzxbw (%rcx), %zmm24 + +// CHECK: vpmovzxbw 291(%rax,%r14,8), %zmm24 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x30,0x84,0xf0,0x23,0x01,0x00,0x00] + vpmovzxbw 291(%rax,%r14,8), %zmm24 + +// CHECK: vpmovzxbw 4064(%rdx), %zmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x42,0x7f] + vpmovzxbw 4064(%rdx), %zmm24 + +// CHECK: vpmovzxbw 4096(%rdx), %zmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x82,0x00,0x10,0x00,0x00] + vpmovzxbw 4096(%rdx), %zmm24 + +// CHECK: vpmovzxbw -4096(%rdx), %zmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x42,0x80] + vpmovzxbw -4096(%rdx), %zmm24 + +// CHECK: vpmovzxbw -4128(%rdx), %zmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x82,0xe0,0xef,0xff,0xff] + vpmovzxbw -4128(%rdx), %zmm24 + // CHECK: vpmullw %zmm19, %zmm28, %zmm19 // CHECK: encoding: [0x62,0xa1,0x1d,0x40,0xd5,0xdb] vpmullw %zmm19, %zmm28, %zmm19 diff --git a/test/MC/X86/x86-64-avx512bw_vl.s b/test/MC/X86/x86-64-avx512bw_vl.s index f6ee1cc8682..bd16b0ed95c 100644 --- a/test/MC/X86/x86-64-avx512bw_vl.s +++ b/test/MC/X86/x86-64-avx512bw_vl.s @@ -1312,6 +1312,150 @@ // CHECK: encoding: [0x62,0xe2,0x25,0x20,0x3a,0xa2,0xe0,0xef,0xff,0xff] vpminuw -4128(%rdx), %ymm27, %ymm20 +// CHECK: vpmovsxbw %xmm23, %xmm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x20,0xdf] + vpmovsxbw %xmm23, %xmm27 + +// CHECK: vpmovsxbw %xmm23, %xmm27 {%k7} +// CHECK: encoding: [0x62,0x22,0x7d,0x0f,0x20,0xdf] + vpmovsxbw %xmm23, %xmm27 {%k7} + +// CHECK: vpmovsxbw %xmm23, %xmm27 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0x8f,0x20,0xdf] + vpmovsxbw %xmm23, %xmm27 {%k7} {z} + +// CHECK: vpmovsxbw (%rcx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x19] + vpmovsxbw (%rcx), %xmm27 + +// CHECK: vpmovsxbw 291(%rax,%r14,8), %xmm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x20,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpmovsxbw 291(%rax,%r14,8), %xmm27 + +// CHECK: vpmovsxbw 1016(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x5a,0x7f] + vpmovsxbw 1016(%rdx), %xmm27 + +// CHECK: vpmovsxbw 1024(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x9a,0x00,0x04,0x00,0x00] + vpmovsxbw 1024(%rdx), %xmm27 + +// CHECK: vpmovsxbw -1024(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x5a,0x80] + vpmovsxbw -1024(%rdx), %xmm27 + +// CHECK: vpmovsxbw -1032(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x9a,0xf8,0xfb,0xff,0xff] + vpmovsxbw -1032(%rdx), %xmm27 + +// CHECK: vpmovsxbw %xmm23, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x20,0xef] + vpmovsxbw %xmm23, %ymm21 + +// CHECK: vpmovsxbw %xmm23, %ymm21 {%k7} +// CHECK: encoding: [0x62,0xa2,0x7d,0x2f,0x20,0xef] + vpmovsxbw %xmm23, %ymm21 {%k7} + +// CHECK: vpmovsxbw %xmm23, %ymm21 {%k7} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0xaf,0x20,0xef] + vpmovsxbw %xmm23, %ymm21 {%k7} {z} + +// CHECK: vpmovsxbw (%rcx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0x29] + vpmovsxbw (%rcx), %ymm21 + +// CHECK: vpmovsxbw 291(%rax,%r14,8), %ymm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x20,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovsxbw 291(%rax,%r14,8), %ymm21 + +// CHECK: vpmovsxbw 2032(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0x6a,0x7f] + vpmovsxbw 2032(%rdx), %ymm21 + +// CHECK: vpmovsxbw 2048(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0xaa,0x00,0x08,0x00,0x00] + vpmovsxbw 2048(%rdx), %ymm21 + +// CHECK: vpmovsxbw -2048(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0x6a,0x80] + vpmovsxbw -2048(%rdx), %ymm21 + +// CHECK: vpmovsxbw -2064(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0xaa,0xf0,0xf7,0xff,0xff] + vpmovsxbw -2064(%rdx), %ymm21 + +// CHECK: vpmovzxbw %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x30,0xf5] + vpmovzxbw %xmm29, %xmm30 + +// CHECK: vpmovzxbw %xmm29, %xmm30 {%k7} +// CHECK: encoding: [0x62,0x02,0x7d,0x0f,0x30,0xf5] + vpmovzxbw %xmm29, %xmm30 {%k7} + +// CHECK: vpmovzxbw %xmm29, %xmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0x8f,0x30,0xf5] + vpmovzxbw %xmm29, %xmm30 {%k7} {z} + +// CHECK: vpmovzxbw (%rcx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0x31] + vpmovzxbw (%rcx), %xmm30 + +// CHECK: vpmovzxbw 291(%rax,%r14,8), %xmm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpmovzxbw 291(%rax,%r14,8), %xmm30 + +// CHECK: vpmovzxbw 1016(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0x72,0x7f] + vpmovzxbw 1016(%rdx), %xmm30 + +// CHECK: vpmovzxbw 1024(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0xb2,0x00,0x04,0x00,0x00] + vpmovzxbw 1024(%rdx), %xmm30 + +// CHECK: vpmovzxbw -1024(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0x72,0x80] + vpmovzxbw -1024(%rdx), %xmm30 + +// CHECK: vpmovzxbw -1032(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0xb2,0xf8,0xfb,0xff,0xff] + vpmovzxbw -1032(%rdx), %xmm30 + +// CHECK: vpmovzxbw %xmm29, %ymm22 +// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x30,0xf5] + vpmovzxbw %xmm29, %ymm22 + +// CHECK: vpmovzxbw %xmm29, %ymm22 {%k2} +// CHECK: encoding: [0x62,0x82,0x7d,0x2a,0x30,0xf5] + vpmovzxbw %xmm29, %ymm22 {%k2} + +// CHECK: vpmovzxbw %xmm29, %ymm22 {%k2} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0xaa,0x30,0xf5] + vpmovzxbw %xmm29, %ymm22 {%k2} {z} + +// CHECK: vpmovzxbw (%rcx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0x31] + vpmovzxbw (%rcx), %ymm22 + +// CHECK: vpmovzxbw 291(%rax,%r14,8), %ymm22 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpmovzxbw 291(%rax,%r14,8), %ymm22 + +// CHECK: vpmovzxbw 2032(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0x72,0x7f] + vpmovzxbw 2032(%rdx), %ymm22 + +// CHECK: vpmovzxbw 2048(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0xb2,0x00,0x08,0x00,0x00] + vpmovzxbw 2048(%rdx), %ymm22 + +// CHECK: vpmovzxbw -2048(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0x72,0x80] + vpmovzxbw -2048(%rdx), %ymm22 + +// CHECK: vpmovzxbw -2064(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0xb2,0xf0,0xf7,0xff,0xff] + vpmovzxbw -2064(%rdx), %ymm22 + // CHECK: vpmullw %xmm26, %xmm19, %xmm29 // CHECK: encoding: [0x62,0x01,0x65,0x00,0xd5,0xea] vpmullw %xmm26, %xmm19, %xmm29 diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s index cf22322bf52..50077268170 100644 --- a/test/MC/X86/x86-64-avx512f_vl.s +++ b/test/MC/X86/x86-64-avx512f_vl.s @@ -4524,6 +4524,726 @@ // CHECK: encoding: [0x62,0x62,0xd5,0x30,0x3b,0xaa,0xf8,0xfb,0xff,0xff] vpminuq -1032(%rdx){1to4}, %ymm21, %ymm29 +// CHECK: vpmovsxbd %xmm28, %xmm24 +// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x21,0xc4] + vpmovsxbd %xmm28, %xmm24 + +// CHECK: vpmovsxbd %xmm28, %xmm24 {%k1} +// CHECK: encoding: [0x62,0x02,0x7d,0x09,0x21,0xc4] + vpmovsxbd %xmm28, %xmm24 {%k1} + +// CHECK: vpmovsxbd %xmm28, %xmm24 {%k1} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0x89,0x21,0xc4] + vpmovsxbd %xmm28, %xmm24 {%k1} {z} + +// CHECK: vpmovsxbd (%rcx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x01] + vpmovsxbd (%rcx), %xmm24 + +// CHECK: vpmovsxbd 291(%rax,%r14,8), %xmm24 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x21,0x84,0xf0,0x23,0x01,0x00,0x00] + vpmovsxbd 291(%rax,%r14,8), %xmm24 + +// CHECK: vpmovsxbd 508(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x42,0x7f] + vpmovsxbd 508(%rdx), %xmm24 + +// CHECK: vpmovsxbd 512(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x82,0x00,0x02,0x00,0x00] + vpmovsxbd 512(%rdx), %xmm24 + +// CHECK: vpmovsxbd -512(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x42,0x80] + vpmovsxbd -512(%rdx), %xmm24 + +// CHECK: vpmovsxbd -516(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x82,0xfc,0xfd,0xff,0xff] + vpmovsxbd -516(%rdx), %xmm24 + +// CHECK: vpmovsxbd %xmm20, %ymm24 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x21,0xc4] + vpmovsxbd %xmm20, %ymm24 + +// CHECK: vpmovsxbd %xmm20, %ymm24 {%k3} +// CHECK: encoding: [0x62,0x22,0x7d,0x2b,0x21,0xc4] + vpmovsxbd %xmm20, %ymm24 {%k3} + +// CHECK: vpmovsxbd %xmm20, %ymm24 {%k3} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xab,0x21,0xc4] + vpmovsxbd %xmm20, %ymm24 {%k3} {z} + +// CHECK: vpmovsxbd (%rcx), %ymm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x01] + vpmovsxbd (%rcx), %ymm24 + +// CHECK: vpmovsxbd 291(%rax,%r14,8), %ymm24 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x21,0x84,0xf0,0x23,0x01,0x00,0x00] + vpmovsxbd 291(%rax,%r14,8), %ymm24 + +// CHECK: vpmovsxbd 1016(%rdx), %ymm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x42,0x7f] + vpmovsxbd 1016(%rdx), %ymm24 + +// CHECK: vpmovsxbd 1024(%rdx), %ymm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x82,0x00,0x04,0x00,0x00] + vpmovsxbd 1024(%rdx), %ymm24 + +// CHECK: vpmovsxbd -1024(%rdx), %ymm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x42,0x80] + vpmovsxbd -1024(%rdx), %ymm24 + +// CHECK: vpmovsxbd -1032(%rdx), %ymm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x82,0xf8,0xfb,0xff,0xff] + vpmovsxbd -1032(%rdx), %ymm24 + +// CHECK: vpmovsxbq %xmm22, %xmm17 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x22,0xce] + vpmovsxbq %xmm22, %xmm17 + +// CHECK: vpmovsxbq %xmm22, %xmm17 {%k5} +// CHECK: encoding: [0x62,0xa2,0x7d,0x0d,0x22,0xce] + vpmovsxbq %xmm22, %xmm17 {%k5} + +// CHECK: vpmovsxbq %xmm22, %xmm17 {%k5} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0x8d,0x22,0xce] + vpmovsxbq %xmm22, %xmm17 {%k5} {z} + +// CHECK: vpmovsxbq (%rcx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x09] + vpmovsxbq (%rcx), %xmm17 + +// CHECK: vpmovsxbq 291(%rax,%r14,8), %xmm17 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpmovsxbq 291(%rax,%r14,8), %xmm17 + +// CHECK: vpmovsxbq 254(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x4a,0x7f] + vpmovsxbq 254(%rdx), %xmm17 + +// CHECK: vpmovsxbq 256(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x8a,0x00,0x01,0x00,0x00] + vpmovsxbq 256(%rdx), %xmm17 + +// CHECK: vpmovsxbq -256(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x4a,0x80] + vpmovsxbq -256(%rdx), %xmm17 + +// CHECK: vpmovsxbq -258(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x8a,0xfe,0xfe,0xff,0xff] + vpmovsxbq -258(%rdx), %xmm17 + +// CHECK: vpmovsxbq %xmm26, %ymm28 +// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x22,0xe2] + vpmovsxbq %xmm26, %ymm28 + +// CHECK: vpmovsxbq %xmm26, %ymm28 {%k5} +// CHECK: encoding: [0x62,0x02,0x7d,0x2d,0x22,0xe2] + vpmovsxbq %xmm26, %ymm28 {%k5} + +// CHECK: vpmovsxbq %xmm26, %ymm28 {%k5} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0xad,0x22,0xe2] + vpmovsxbq %xmm26, %ymm28 {%k5} {z} + +// CHECK: vpmovsxbq (%rcx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0x21] + vpmovsxbq (%rcx), %ymm28 + +// CHECK: vpmovsxbq 291(%rax,%r14,8), %ymm28 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x22,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpmovsxbq 291(%rax,%r14,8), %ymm28 + +// CHECK: vpmovsxbq 508(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0x62,0x7f] + vpmovsxbq 508(%rdx), %ymm28 + +// CHECK: vpmovsxbq 512(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0xa2,0x00,0x02,0x00,0x00] + vpmovsxbq 512(%rdx), %ymm28 + +// CHECK: vpmovsxbq -512(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0x62,0x80] + vpmovsxbq -512(%rdx), %ymm28 + +// CHECK: vpmovsxbq -516(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0xa2,0xfc,0xfd,0xff,0xff] + vpmovsxbq -516(%rdx), %ymm28 + +// CHECK: vpmovsxdq %xmm26, %xmm23 +// CHECK: encoding: [0x62,0x82,0x7d,0x08,0x25,0xfa] + vpmovsxdq %xmm26, %xmm23 + +// CHECK: vpmovsxdq %xmm26, %xmm23 {%k7} +// CHECK: encoding: [0x62,0x82,0x7d,0x0f,0x25,0xfa] + vpmovsxdq %xmm26, %xmm23 {%k7} + +// CHECK: vpmovsxdq %xmm26, %xmm23 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0x8f,0x25,0xfa] + vpmovsxdq %xmm26, %xmm23 {%k7} {z} + +// CHECK: vpmovsxdq (%rcx), %xmm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0x39] + vpmovsxdq (%rcx), %xmm23 + +// CHECK: vpmovsxdq 291(%rax,%r14,8), %xmm23 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpmovsxdq 291(%rax,%r14,8), %xmm23 + +// CHECK: vpmovsxdq 1016(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0x7a,0x7f] + vpmovsxdq 1016(%rdx), %xmm23 + +// CHECK: vpmovsxdq 1024(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0xba,0x00,0x04,0x00,0x00] + vpmovsxdq 1024(%rdx), %xmm23 + +// CHECK: vpmovsxdq -1024(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0x7a,0x80] + vpmovsxdq -1024(%rdx), %xmm23 + +// CHECK: vpmovsxdq -1032(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0xba,0xf8,0xfb,0xff,0xff] + vpmovsxdq -1032(%rdx), %xmm23 + +// CHECK: vpmovsxdq %xmm28, %ymm18 +// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x25,0xd4] + vpmovsxdq %xmm28, %ymm18 + +// CHECK: vpmovsxdq %xmm28, %ymm18 {%k7} +// CHECK: encoding: [0x62,0x82,0x7d,0x2f,0x25,0xd4] + vpmovsxdq %xmm28, %ymm18 {%k7} + +// CHECK: vpmovsxdq %xmm28, %ymm18 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0xaf,0x25,0xd4] + vpmovsxdq %xmm28, %ymm18 {%k7} {z} + +// CHECK: vpmovsxdq (%rcx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x11] + vpmovsxdq (%rcx), %ymm18 + +// CHECK: vpmovsxdq 291(%rax,%r14,8), %ymm18 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x25,0x94,0xf0,0x23,0x01,0x00,0x00] + vpmovsxdq 291(%rax,%r14,8), %ymm18 + +// CHECK: vpmovsxdq 2032(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x52,0x7f] + vpmovsxdq 2032(%rdx), %ymm18 + +// CHECK: vpmovsxdq 2048(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x92,0x00,0x08,0x00,0x00] + vpmovsxdq 2048(%rdx), %ymm18 + +// CHECK: vpmovsxdq -2048(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x52,0x80] + vpmovsxdq -2048(%rdx), %ymm18 + +// CHECK: vpmovsxdq -2064(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x92,0xf0,0xf7,0xff,0xff] + vpmovsxdq -2064(%rdx), %ymm18 + +// CHECK: vpmovsxwd %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x23,0xca] + vpmovsxwd %xmm18, %xmm17 + +// CHECK: vpmovsxwd %xmm18, %xmm17 {%k4} +// CHECK: encoding: [0x62,0xa2,0x7d,0x0c,0x23,0xca] + vpmovsxwd %xmm18, %xmm17 {%k4} + +// CHECK: vpmovsxwd %xmm18, %xmm17 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0x8c,0x23,0xca] + vpmovsxwd %xmm18, %xmm17 {%k4} {z} + +// CHECK: vpmovsxwd (%rcx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x09] + vpmovsxwd (%rcx), %xmm17 + +// CHECK: vpmovsxwd 291(%rax,%r14,8), %xmm17 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x23,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpmovsxwd 291(%rax,%r14,8), %xmm17 + +// CHECK: vpmovsxwd 1016(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x4a,0x7f] + vpmovsxwd 1016(%rdx), %xmm17 + +// CHECK: vpmovsxwd 1024(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x8a,0x00,0x04,0x00,0x00] + vpmovsxwd 1024(%rdx), %xmm17 + +// CHECK: vpmovsxwd -1024(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x4a,0x80] + vpmovsxwd -1024(%rdx), %xmm17 + +// CHECK: vpmovsxwd -1032(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x8a,0xf8,0xfb,0xff,0xff] + vpmovsxwd -1032(%rdx), %xmm17 + +// CHECK: vpmovsxwd %xmm25, %ymm21 +// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x23,0xe9] + vpmovsxwd %xmm25, %ymm21 + +// CHECK: vpmovsxwd %xmm25, %ymm21 {%k5} +// CHECK: encoding: [0x62,0x82,0x7d,0x2d,0x23,0xe9] + vpmovsxwd %xmm25, %ymm21 {%k5} + +// CHECK: vpmovsxwd %xmm25, %ymm21 {%k5} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0xad,0x23,0xe9] + vpmovsxwd %xmm25, %ymm21 {%k5} {z} + +// CHECK: vpmovsxwd (%rcx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0x29] + vpmovsxwd (%rcx), %ymm21 + +// CHECK: vpmovsxwd 291(%rax,%r14,8), %ymm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x23,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovsxwd 291(%rax,%r14,8), %ymm21 + +// CHECK: vpmovsxwd 2032(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0x6a,0x7f] + vpmovsxwd 2032(%rdx), %ymm21 + +// CHECK: vpmovsxwd 2048(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0xaa,0x00,0x08,0x00,0x00] + vpmovsxwd 2048(%rdx), %ymm21 + +// CHECK: vpmovsxwd -2048(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0x6a,0x80] + vpmovsxwd -2048(%rdx), %ymm21 + +// CHECK: vpmovsxwd -2064(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0xaa,0xf0,0xf7,0xff,0xff] + vpmovsxwd -2064(%rdx), %ymm21 + +// CHECK: vpmovsxwq %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x24,0xec] + vpmovsxwq %xmm20, %xmm29 + +// CHECK: vpmovsxwq %xmm20, %xmm29 {%k6} +// CHECK: encoding: [0x62,0x22,0x7d,0x0e,0x24,0xec] + vpmovsxwq %xmm20, %xmm29 {%k6} + +// CHECK: vpmovsxwq %xmm20, %xmm29 {%k6} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0x8e,0x24,0xec] + vpmovsxwq %xmm20, %xmm29 {%k6} {z} + +// CHECK: vpmovsxwq (%rcx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0x29] + vpmovsxwq (%rcx), %xmm29 + +// CHECK: vpmovsxwq 291(%rax,%r14,8), %xmm29 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x24,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovsxwq 291(%rax,%r14,8), %xmm29 + +// CHECK: vpmovsxwq 508(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0x6a,0x7f] + vpmovsxwq 508(%rdx), %xmm29 + +// CHECK: vpmovsxwq 512(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0xaa,0x00,0x02,0x00,0x00] + vpmovsxwq 512(%rdx), %xmm29 + +// CHECK: vpmovsxwq -512(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0x6a,0x80] + vpmovsxwq -512(%rdx), %xmm29 + +// CHECK: vpmovsxwq -516(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0xaa,0xfc,0xfd,0xff,0xff] + vpmovsxwq -516(%rdx), %xmm29 + +// CHECK: vpmovsxwq %xmm17, %ymm23 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x24,0xf9] + vpmovsxwq %xmm17, %ymm23 + +// CHECK: vpmovsxwq %xmm17, %ymm23 {%k5} +// CHECK: encoding: [0x62,0xa2,0x7d,0x2d,0x24,0xf9] + vpmovsxwq %xmm17, %ymm23 {%k5} + +// CHECK: vpmovsxwq %xmm17, %ymm23 {%k5} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0xad,0x24,0xf9] + vpmovsxwq %xmm17, %ymm23 {%k5} {z} + +// CHECK: vpmovsxwq (%rcx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0x39] + vpmovsxwq (%rcx), %ymm23 + +// CHECK: vpmovsxwq 291(%rax,%r14,8), %ymm23 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x24,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpmovsxwq 291(%rax,%r14,8), %ymm23 + +// CHECK: vpmovsxwq 1016(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0x7a,0x7f] + vpmovsxwq 1016(%rdx), %ymm23 + +// CHECK: vpmovsxwq 1024(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0xba,0x00,0x04,0x00,0x00] + vpmovsxwq 1024(%rdx), %ymm23 + +// CHECK: vpmovsxwq -1024(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0x7a,0x80] + vpmovsxwq -1024(%rdx), %ymm23 + +// CHECK: vpmovsxwq -1032(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0xba,0xf8,0xfb,0xff,0xff] + vpmovsxwq -1032(%rdx), %ymm23 + +// CHECK: vpmovzxbd %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x31,0xc1] + vpmovzxbd %xmm17, %xmm24 + +// CHECK: vpmovzxbd %xmm17, %xmm24 {%k6} +// CHECK: encoding: [0x62,0x22,0x7d,0x0e,0x31,0xc1] + vpmovzxbd %xmm17, %xmm24 {%k6} + +// CHECK: vpmovzxbd %xmm17, %xmm24 {%k6} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0x8e,0x31,0xc1] + vpmovzxbd %xmm17, %xmm24 {%k6} {z} + +// CHECK: vpmovzxbd (%rcx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x01] + vpmovzxbd (%rcx), %xmm24 + +// CHECK: vpmovzxbd 291(%rax,%r14,8), %xmm24 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x31,0x84,0xf0,0x23,0x01,0x00,0x00] + vpmovzxbd 291(%rax,%r14,8), %xmm24 + +// CHECK: vpmovzxbd 508(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x42,0x7f] + vpmovzxbd 508(%rdx), %xmm24 + +// CHECK: vpmovzxbd 512(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x82,0x00,0x02,0x00,0x00] + vpmovzxbd 512(%rdx), %xmm24 + +// CHECK: vpmovzxbd -512(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x42,0x80] + vpmovzxbd -512(%rdx), %xmm24 + +// CHECK: vpmovzxbd -516(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x82,0xfc,0xfd,0xff,0xff] + vpmovzxbd -516(%rdx), %xmm24 + +// CHECK: vpmovzxbd %xmm17, %ymm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x31,0xd9] + vpmovzxbd %xmm17, %ymm27 + +// CHECK: vpmovzxbd %xmm17, %ymm27 {%k1} +// CHECK: encoding: [0x62,0x22,0x7d,0x29,0x31,0xd9] + vpmovzxbd %xmm17, %ymm27 {%k1} + +// CHECK: vpmovzxbd %xmm17, %ymm27 {%k1} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xa9,0x31,0xd9] + vpmovzxbd %xmm17, %ymm27 {%k1} {z} + +// CHECK: vpmovzxbd (%rcx), %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x19] + vpmovzxbd (%rcx), %ymm27 + +// CHECK: vpmovzxbd 291(%rax,%r14,8), %ymm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x31,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpmovzxbd 291(%rax,%r14,8), %ymm27 + +// CHECK: vpmovzxbd 1016(%rdx), %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x5a,0x7f] + vpmovzxbd 1016(%rdx), %ymm27 + +// CHECK: vpmovzxbd 1024(%rdx), %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x9a,0x00,0x04,0x00,0x00] + vpmovzxbd 1024(%rdx), %ymm27 + +// CHECK: vpmovzxbd -1024(%rdx), %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x5a,0x80] + vpmovzxbd -1024(%rdx), %ymm27 + +// CHECK: vpmovzxbd -1032(%rdx), %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x9a,0xf8,0xfb,0xff,0xff] + vpmovzxbd -1032(%rdx), %ymm27 + +// CHECK: vpmovzxbq %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x32,0xdb] + vpmovzxbq %xmm19, %xmm19 + +// CHECK: vpmovzxbq %xmm19, %xmm19 {%k1} +// CHECK: encoding: [0x62,0xa2,0x7d,0x09,0x32,0xdb] + vpmovzxbq %xmm19, %xmm19 {%k1} + +// CHECK: vpmovzxbq %xmm19, %xmm19 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0x89,0x32,0xdb] + vpmovzxbq %xmm19, %xmm19 {%k1} {z} + +// CHECK: vpmovzxbq (%rcx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x19] + vpmovzxbq (%rcx), %xmm19 + +// CHECK: vpmovzxbq 291(%rax,%r14,8), %xmm19 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x32,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpmovzxbq 291(%rax,%r14,8), %xmm19 + +// CHECK: vpmovzxbq 254(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x5a,0x7f] + vpmovzxbq 254(%rdx), %xmm19 + +// CHECK: vpmovzxbq 256(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x9a,0x00,0x01,0x00,0x00] + vpmovzxbq 256(%rdx), %xmm19 + +// CHECK: vpmovzxbq -256(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x5a,0x80] + vpmovzxbq -256(%rdx), %xmm19 + +// CHECK: vpmovzxbq -258(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x9a,0xfe,0xfe,0xff,0xff] + vpmovzxbq -258(%rdx), %xmm19 + +// CHECK: vpmovzxbq %xmm19, %ymm24 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x32,0xc3] + vpmovzxbq %xmm19, %ymm24 + +// CHECK: vpmovzxbq %xmm19, %ymm24 {%k2} +// CHECK: encoding: [0x62,0x22,0x7d,0x2a,0x32,0xc3] + vpmovzxbq %xmm19, %ymm24 {%k2} + +// CHECK: vpmovzxbq %xmm19, %ymm24 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xaa,0x32,0xc3] + vpmovzxbq %xmm19, %ymm24 {%k2} {z} + +// CHECK: vpmovzxbq (%rcx), %ymm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x01] + vpmovzxbq (%rcx), %ymm24 + +// CHECK: vpmovzxbq 291(%rax,%r14,8), %ymm24 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x32,0x84,0xf0,0x23,0x01,0x00,0x00] + vpmovzxbq 291(%rax,%r14,8), %ymm24 + +// CHECK: vpmovzxbq 508(%rdx), %ymm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x42,0x7f] + vpmovzxbq 508(%rdx), %ymm24 + +// CHECK: vpmovzxbq 512(%rdx), %ymm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x82,0x00,0x02,0x00,0x00] + vpmovzxbq 512(%rdx), %ymm24 + +// CHECK: vpmovzxbq -512(%rdx), %ymm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x42,0x80] + vpmovzxbq -512(%rdx), %ymm24 + +// CHECK: vpmovzxbq -516(%rdx), %ymm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x82,0xfc,0xfd,0xff,0xff] + vpmovzxbq -516(%rdx), %ymm24 + +// CHECK: vpmovzxdq %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x35,0xcd] + vpmovzxdq %xmm21, %xmm25 + +// CHECK: vpmovzxdq %xmm21, %xmm25 {%k7} +// CHECK: encoding: [0x62,0x22,0x7d,0x0f,0x35,0xcd] + vpmovzxdq %xmm21, %xmm25 {%k7} + +// CHECK: vpmovzxdq %xmm21, %xmm25 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0x8f,0x35,0xcd] + vpmovzxdq %xmm21, %xmm25 {%k7} {z} + +// CHECK: vpmovzxdq (%rcx), %xmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x09] + vpmovzxdq (%rcx), %xmm25 + +// CHECK: vpmovzxdq 291(%rax,%r14,8), %xmm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x35,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpmovzxdq 291(%rax,%r14,8), %xmm25 + +// CHECK: vpmovzxdq 1016(%rdx), %xmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x4a,0x7f] + vpmovzxdq 1016(%rdx), %xmm25 + +// CHECK: vpmovzxdq 1024(%rdx), %xmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x8a,0x00,0x04,0x00,0x00] + vpmovzxdq 1024(%rdx), %xmm25 + +// CHECK: vpmovzxdq -1024(%rdx), %xmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x4a,0x80] + vpmovzxdq -1024(%rdx), %xmm25 + +// CHECK: vpmovzxdq -1032(%rdx), %xmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x8a,0xf8,0xfb,0xff,0xff] + vpmovzxdq -1032(%rdx), %xmm25 + +// CHECK: vpmovzxdq %xmm22, %ymm28 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x35,0xe6] + vpmovzxdq %xmm22, %ymm28 + +// CHECK: vpmovzxdq %xmm22, %ymm28 {%k7} +// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x35,0xe6] + vpmovzxdq %xmm22, %ymm28 {%k7} + +// CHECK: vpmovzxdq %xmm22, %ymm28 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x35,0xe6] + vpmovzxdq %xmm22, %ymm28 {%k7} {z} + +// CHECK: vpmovzxdq (%rcx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0x21] + vpmovzxdq (%rcx), %ymm28 + +// CHECK: vpmovzxdq 291(%rax,%r14,8), %ymm28 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x35,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpmovzxdq 291(%rax,%r14,8), %ymm28 + +// CHECK: vpmovzxdq 2032(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0x62,0x7f] + vpmovzxdq 2032(%rdx), %ymm28 + +// CHECK: vpmovzxdq 2048(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0xa2,0x00,0x08,0x00,0x00] + vpmovzxdq 2048(%rdx), %ymm28 + +// CHECK: vpmovzxdq -2048(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0x62,0x80] + vpmovzxdq -2048(%rdx), %ymm28 + +// CHECK: vpmovzxdq -2064(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0xa2,0xf0,0xf7,0xff,0xff] + vpmovzxdq -2064(%rdx), %ymm28 + +// CHECK: vpmovzxwd %xmm17, %xmm24 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x33,0xc1] + vpmovzxwd %xmm17, %xmm24 + +// CHECK: vpmovzxwd %xmm17, %xmm24 {%k4} +// CHECK: encoding: [0x62,0x22,0x7d,0x0c,0x33,0xc1] + vpmovzxwd %xmm17, %xmm24 {%k4} + +// CHECK: vpmovzxwd %xmm17, %xmm24 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0x8c,0x33,0xc1] + vpmovzxwd %xmm17, %xmm24 {%k4} {z} + +// CHECK: vpmovzxwd (%rcx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x01] + vpmovzxwd (%rcx), %xmm24 + +// CHECK: vpmovzxwd 291(%rax,%r14,8), %xmm24 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x33,0x84,0xf0,0x23,0x01,0x00,0x00] + vpmovzxwd 291(%rax,%r14,8), %xmm24 + +// CHECK: vpmovzxwd 1016(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x42,0x7f] + vpmovzxwd 1016(%rdx), %xmm24 + +// CHECK: vpmovzxwd 1024(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x82,0x00,0x04,0x00,0x00] + vpmovzxwd 1024(%rdx), %xmm24 + +// CHECK: vpmovzxwd -1024(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x42,0x80] + vpmovzxwd -1024(%rdx), %xmm24 + +// CHECK: vpmovzxwd -1032(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x82,0xf8,0xfb,0xff,0xff] + vpmovzxwd -1032(%rdx), %xmm24 + +// CHECK: vpmovzxwd %xmm29, %ymm26 +// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x33,0xd5] + vpmovzxwd %xmm29, %ymm26 + +// CHECK: vpmovzxwd %xmm29, %ymm26 {%k5} +// CHECK: encoding: [0x62,0x02,0x7d,0x2d,0x33,0xd5] + vpmovzxwd %xmm29, %ymm26 {%k5} + +// CHECK: vpmovzxwd %xmm29, %ymm26 {%k5} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0xad,0x33,0xd5] + vpmovzxwd %xmm29, %ymm26 {%k5} {z} + +// CHECK: vpmovzxwd (%rcx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x11] + vpmovzxwd (%rcx), %ymm26 + +// CHECK: vpmovzxwd 291(%rax,%r14,8), %ymm26 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x33,0x94,0xf0,0x23,0x01,0x00,0x00] + vpmovzxwd 291(%rax,%r14,8), %ymm26 + +// CHECK: vpmovzxwd 2032(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x52,0x7f] + vpmovzxwd 2032(%rdx), %ymm26 + +// CHECK: vpmovzxwd 2048(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x92,0x00,0x08,0x00,0x00] + vpmovzxwd 2048(%rdx), %ymm26 + +// CHECK: vpmovzxwd -2048(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x52,0x80] + vpmovzxwd -2048(%rdx), %ymm26 + +// CHECK: vpmovzxwd -2064(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x92,0xf0,0xf7,0xff,0xff] + vpmovzxwd -2064(%rdx), %ymm26 + +// CHECK: vpmovzxwq %xmm20, %xmm29 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x34,0xec] + vpmovzxwq %xmm20, %xmm29 + +// CHECK: vpmovzxwq %xmm20, %xmm29 {%k2} +// CHECK: encoding: [0x62,0x22,0x7d,0x0a,0x34,0xec] + vpmovzxwq %xmm20, %xmm29 {%k2} + +// CHECK: vpmovzxwq %xmm20, %xmm29 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0x8a,0x34,0xec] + vpmovzxwq %xmm20, %xmm29 {%k2} {z} + +// CHECK: vpmovzxwq (%rcx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0x29] + vpmovzxwq (%rcx), %xmm29 + +// CHECK: vpmovzxwq 291(%rax,%r14,8), %xmm29 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x34,0xac,0xf0,0x23,0x01,0x00,0x00] + vpmovzxwq 291(%rax,%r14,8), %xmm29 + +// CHECK: vpmovzxwq 508(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0x6a,0x7f] + vpmovzxwq 508(%rdx), %xmm29 + +// CHECK: vpmovzxwq 512(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0xaa,0x00,0x02,0x00,0x00] + vpmovzxwq 512(%rdx), %xmm29 + +// CHECK: vpmovzxwq -512(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0x6a,0x80] + vpmovzxwq -512(%rdx), %xmm29 + +// CHECK: vpmovzxwq -516(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0xaa,0xfc,0xfd,0xff,0xff] + vpmovzxwq -516(%rdx), %xmm29 + +// CHECK: vpmovzxwq %xmm25, %ymm18 +// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x34,0xd1] + vpmovzxwq %xmm25, %ymm18 + +// CHECK: vpmovzxwq %xmm25, %ymm18 {%k1} +// CHECK: encoding: [0x62,0x82,0x7d,0x29,0x34,0xd1] + vpmovzxwq %xmm25, %ymm18 {%k1} + +// CHECK: vpmovzxwq %xmm25, %ymm18 {%k1} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0xa9,0x34,0xd1] + vpmovzxwq %xmm25, %ymm18 {%k1} {z} + +// CHECK: vpmovzxwq (%rcx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x11] + vpmovzxwq (%rcx), %ymm18 + +// CHECK: vpmovzxwq 291(%rax,%r14,8), %ymm18 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x34,0x94,0xf0,0x23,0x01,0x00,0x00] + vpmovzxwq 291(%rax,%r14,8), %ymm18 + +// CHECK: vpmovzxwq 1016(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x52,0x7f] + vpmovzxwq 1016(%rdx), %ymm18 + +// CHECK: vpmovzxwq 1024(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x92,0x00,0x04,0x00,0x00] + vpmovzxwq 1024(%rdx), %ymm18 + +// CHECK: vpmovzxwq -1024(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x52,0x80] + vpmovzxwq -1024(%rdx), %ymm18 + +// CHECK: vpmovzxwq -1032(%rdx), %ymm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x92,0xf8,0xfb,0xff,0xff] + vpmovzxwq -1032(%rdx), %ymm18 + // CHECK: vpmulld %xmm24, %xmm19, %xmm25 // CHECK: encoding: [0x62,0x02,0x65,0x00,0x40,0xc8] vpmulld %xmm24, %xmm19, %xmm25