From 717d8ad6cf44524036acf9dc3e68dc6b3006f198 Mon Sep 17 00:00:00 2001 From: Asaf Badouh Date: Wed, 22 Jul 2015 12:00:43 +0000 Subject: [PATCH] [X86][AVX512] add reduce/range/scalef/rndScale include encoding and intrinsics Differential Revision: http://reviews.llvm.org/D11222 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242896 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 79 +++- lib/Target/X86/X86ISelLowering.cpp | 21 +- lib/Target/X86/X86ISelLowering.h | 5 +- lib/Target/X86/X86InstrAVX512.td | 228 ++++++---- lib/Target/X86/X86InstrFragmentsSIMD.td | 7 +- lib/Target/X86/X86IntrinsicsInfo.h | 32 +- test/CodeGen/X86/avx512-intrinsics.ll | 26 ++ test/CodeGen/X86/avx512dq-intrinsics.ll | 123 ++++++ test/CodeGen/X86/avx512dqvl-intrinsics.ll | 111 +++++ test/CodeGen/X86/avx512vl-intrinsics.ll | 52 +++ test/MC/X86/avx512-encodings.s | 336 +++++++++++++++ test/MC/X86/x86-64-avx512dq.s | 464 +++++++++++++++++++++ test/MC/X86/x86-64-avx512dq_vl.s | 480 ++++++++++++++++++++++ test/MC/X86/x86-64-avx512f_vl.s | 240 +++++++++++ 14 files changed, 2109 insertions(+), 95 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 8eb254f0793..352a592bbd8 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -4221,12 +4221,60 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_ps_512: GCCBuiltin<"__builtin_ia32_rndscaleps_mask">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_pd_512: GCCBuiltin<"__builtin_ia32_rndscalepd_mask">, + def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, + llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, + llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; } // Vector load with broadcast @@ -4508,7 +4556,28 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - + def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4b904bbcd63..ea05a2f8bcd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15460,6 +15460,24 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Src1, Src2, Rnd), Mask, PassThru, Subtarget, DAG); } + case INTR_TYPE_3OP_MASK_RM: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Imm = Op.getOperand(3); + SDValue PassThru = Op.getOperand(4); + SDValue Mask = Op.getOperand(5); + // We specify 2 possible modes for intrinsics, with/without rounding modes. + // First, we check if the intrinsic have rounding mode (7 operands), + // if not, we set rounding mode to "current". + SDValue Rnd; + if (Op.getNumOperands() == 7) + Rnd = Op.getOperand(6); + else + Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + Src1, Src2, Imm, Rnd), + Mask, PassThru, Subtarget, DAG); + } case INTR_TYPE_3OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); @@ -19039,7 +19057,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND"; case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND"; case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND"; - case X86ISD::RNDSCALE: return "X86ISD::RNDSCALE"; + case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE"; + case X86ISD::VREDUCE: return "X86ISD::VREDUCE"; case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI"; case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI"; case X86ISD::XTEST: return "X86ISD::XTEST"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 47b3ce0d4ec..7fab1938682 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -386,6 +386,10 @@ namespace llvm { VFIXUPIMM, //Range Restriction Calculation For Packed Pairs of Float32/64 values VRANGE, + // Reduce - Perform Reduction Transformation on scalar\packed FP + VREDUCE, + // RndScale - Round FP Values To Include A Given Number Of Fraction Bits + VRNDSCALE, // Broadcast scalar to vector VBROADCAST, // Broadcast subvector to vector @@ -419,7 +423,6 @@ namespace llvm { FNMSUB_RND, FMADDSUB_RND, FMSUBADD_RND, - RNDSCALE, // Compress and expand COMPRESS, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 58334a86b66..215dcebe661 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3394,7 +3394,7 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, } multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo _, - SDNode VecNode, OpndItins itins, bit IsCommutable> { + SDNode VecNode, OpndItins itins, bit IsCommutable = 0> { defm rrb : AVX512_maskable_scalar opc, string OpcodeStr, SDNode OpNode, }//let mayLoad = 1 } -multiclass avx512_fp_scalef_all opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_fp_scalef_scalar opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr: AVX512_maskable_scalar; + let mayLoad = 1 in { + defm rm: AVX512_maskable_scalar; + }//let mayLoad = 1 +} + +multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> { defm PSZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm SSZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V,EVEX_CD8<32, CD8VT1>; + defm SDZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; + // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp_scalef_p, @@ -3588,7 +3609,7 @@ multiclass avx512_fp_scalef_all opc, string OpcodeStr, SDNode OpNode> { EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD; +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD; //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions @@ -5481,47 +5502,6 @@ let Predicates = [HasAVX512] in { (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; } - -multiclass avx512_rndscale opc, string OpcodeStr, - X86MemOperand x86memop, RegisterClass RC, - PatFrag mem_frag, Domain d> { -let ExeDomain = d in { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def r : AVX512AIi8, EVEX; - - // Vector intrinsic operation, mem - def m : AVX512AIi8, EVEX; -} // ExeDomain -} - -defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512, - loadv16f32, SSEPackedSingle>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - -def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1), - imm:$src2, (v16f32 VR512:$src1), (i16 -1), - FROUND_CURRENT)), - (VRNDSCALEPSZr VR512:$src1, imm:$src2)>; - - -defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512, - loadv8f64, SSEPackedDouble>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; - -def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1), - imm:$src2, (v8f64 VR512:$src1), (i8 -1), - FROUND_CURRENT)), - (VRNDSCALEPDZr VR512:$src1, imm:$src2)>; - multiclass avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { @@ -5529,20 +5509,20 @@ avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { defm r : AVX512_maskable_scalar; defm rb : AVX512_maskable_scalar, EVEX_B; let mayLoad = 1 in defm m : AVX512_maskable_scalar; } @@ -5587,29 +5567,6 @@ defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>, defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>; -let Predicates = [HasAVX512] in { -def : Pat<(v16f32 (ffloor VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x1))>; -def : Pat<(v16f32 (fnearbyint VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0xC))>; -def : Pat<(v16f32 (fceil VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x2))>; -def : Pat<(v16f32 (frint VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x4))>; -def : Pat<(v16f32 (ftrunc VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x3))>; - -def : Pat<(v8f64 (ffloor VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x1))>; -def : Pat<(v8f64 (fnearbyint VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0xC))>; -def : Pat<(v8f64 (fceil VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x2))>; -def : Pat<(v8f64 (frint VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x4))>; -def : Pat<(v8f64 (ftrunc VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x3))>; -} //------------------------------------------------- // Integer truncate and extend operations //------------------------------------------------- @@ -6321,6 +6278,62 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>, defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, EVEX, VEX_W; +//handle instruction reg_vec1 = op(reg_vec,imm) +// op(mem_vec,imm) +// op(broadcast(eltVt),imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable; + let mayLoad = 1 in { + defm rmi : AVX512_maskable; + defm rmbi : AVX512_maskable, EVEX_B; + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _>{ + defm rrib : AVX512_maskable, EVEX_B; +} + +multiclass avx512_common_unary_fp_sae_packed_imm opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z : avx512_unary_fp_packed_imm, + avx512_unary_fp_sae_packed_imm, + EVEX_V512; + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_unary_fp_packed_imm, + EVEX_V128; + defm Z256 : avx512_unary_fp_packed_imm, + EVEX_V256; + } +} + //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) // op(reg_vec2,broadcast(eltVt),imm) @@ -6328,27 +6341,27 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ defm rri : AVX512_maskable; let mayLoad = 1 in { defm rmi : AVX512_maskable; defm rmbi : AVX512_maskable, EVEX_B; } } @@ -6388,20 +6401,20 @@ multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { defm rri : AVX512_maskable_scalar; let mayLoad = 1 in { defm rmi : AVX512_maskable_scalar; let isAsmParserOnly = 1 in { @@ -6417,18 +6430,25 @@ multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ defm rrib : AVX512_maskable, EVEX_B; } //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { - defm NAME: avx512_fp_sae_packed_imm; + defm NAME#rrib : AVX512_maskable_scalar, EVEX_B; } multiclass avx512_common_fp_sae_packed_imm opcPs, + bits<8> opcPd, SDNode OpNode, Predicate prd>{ + defm PS : avx512_common_unary_fp_sae_packed_imm, EVEX_CD8<32, CD8VF>; + defm PD : avx512_common_unary_fp_sae_packed_imm,EVEX_CD8<64, CD8VF> , VEX_W; +} + defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd", avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; @@ -6480,6 +6508,9 @@ defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info, 0x55, X86VFixupimm, HasAVX512>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; +defm VREDUCE : avx512_common_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, X86VReduce, HasDQI>,AVX512AIi8Base,EVEX; +defm VRNDSCALE : avx512_common_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, X86VRndScale, HasAVX512>,AVX512AIi8Base, EVEX; + defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 0x50, X86VRange, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; @@ -6494,6 +6525,12 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 0x51, X86VRange, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; +defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, + 0x57, X86Reduces, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, + 0x57, X86Reduces, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; multiclass avx512_shuff_packed_128 opc, SDNode OpNode = X86Shuf128>{ @@ -6505,6 +6542,29 @@ multiclass avx512_shuff_packed_128, EVEX_V256; } } +let Predicates = [HasAVX512] in { +def : Pat<(v16f32 (ffloor VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>; +def : Pat<(v16f32 (fnearbyint VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>; +def : Pat<(v16f32 (fceil VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>; +def : Pat<(v16f32 (frint VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>; +def : Pat<(v16f32 (ftrunc VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>; + +def : Pat<(v8f64 (ffloor VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>; +def : Pat<(v8f64 (fnearbyint VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>; +def : Pat<(v8f64 (fceil VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>; +def : Pat<(v8f64 (frint VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>; +def : Pat<(v8f64 (ftrunc VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>; +} defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index abb5314e4b7..401b3267368 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -232,6 +232,8 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>; +def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisInt<2>, SDTCisInt<3>]>; def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>; @@ -302,6 +304,8 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>; def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>; +def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImmRound>; +def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>; def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, @@ -346,7 +350,8 @@ def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>; def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>; def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>; -def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>; +def X86RndScales : SDNode<"X86ISD::VRNDSCALE", STDFp3SrcRm>; +def X86Reduces : SDNode<"X86ISD::VREDUCE", STDFp3SrcRm>; def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 7321a784912..1383fa37306 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -22,7 +22,7 @@ enum IntrinsicType { INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, - INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK, + INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM, BLEND @@ -903,10 +903,32 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_pxor_q_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_q_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0), + X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0), + X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0), + X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0), + X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0), + X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0), + X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0), + X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0), + X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0), + X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0), + X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0), + X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0), + X86_INTRINSIC_DATA(avx512_mask_reduce_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0), + X86_INTRINSIC_DATA(avx512_mask_reduce_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0), + X86_INTRINSIC_DATA(avx512_mask_reduce_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0), + X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0), + X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0), + X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0), + X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0), + X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0), + X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0), + X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0), + X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::RNDSCALE, 0), + X86ISD::VRNDSCALE, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::RNDSCALE, 0), + X86ISD::VRNDSCALE, 0), X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::SCALEF, 0), X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM, @@ -919,6 +941,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::SCALEF, 0), X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM, + X86ISD::SCALEF, 0), + X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM, + X86ISD::SCALEF, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT, diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index d0b0a0075d9..6e50fda7467 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -3352,3 +3352,29 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 ret <16 x i32> %res2 } + +declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) +; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ss +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefss {{.*}}{%k1} +; CHECK: vscalefss {rn-sae} +define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { + %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32) +; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_sd +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefsd {{.*}}{%k1} +; CHECK: vscalefsd {rn-sae} +define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { + %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} diff --git a/test/CodeGen/X86/avx512dq-intrinsics.ll b/test/CodeGen/X86/avx512dq-intrinsics.ll index 04cfeadb134..67a88e15570 100644 --- a/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -1,3 +1,4 @@ + ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32) @@ -192,3 +193,125 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x f ret <8 x float> %res2 } +declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32) +; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vreducepd {{.*}}{%k1} +; CHECK: vreducepd +; CHECK: {sae} +define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { + %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32) +; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vreduceps +; CHECK: {sae} +; CKECK: {%k1} +; CHECK: vreduceps +define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { + %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8) + %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32) +; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrangepd +; CKECK: {%k1} +; CHECK: vrangepd +; CHECK: {sae} +define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { + %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrangeps +; CKECK: {%k1} +; CHECK: vrangeps +; CHECK: {sae} +define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { + %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ss +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vreducess +; CKECK: {%k1} +; CHECK: vreducess +; CHECK: {sae} +define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { + %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) +; CHECK-LABEL: @test_int_x86_avx512_mask_range_ss +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrangess +; CHECK: {sae} +; CKECK: {%k1} +; CHECK: vrangess +; CHECK: {sae} +define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { + %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8) + %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_sd +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vreducesd +; CKECK: {%k1} +; CHECK: vreducesd +; CHECK: {sae} +define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { + %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) +; CHECK-LABEL: @test_int_x86_avx512_mask_range_sd +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrangesd +; CKECK: {%k1} +; CHECK: vrangesd +; CHECK: {sae} +define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { + %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} diff --git a/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/test/CodeGen/X86/avx512dqvl-intrinsics.ll index a41560ca465..2fcfac0f1bb 100644 --- a/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -1537,3 +1537,114 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x %res2 = add <4 x i64> %res, %res1 ret <4 x i64> %res2 } + +declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vreducepd {{.*}}{%k1} +; CHECK: vreducepd +define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { + %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vreducepd {{.*}}{%k1} +; CHECK: vreducepd +define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { + %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8) +; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vreduceps {{.*}}{%k1} +; CHECK: vreduceps +define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { + %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vreduceps {{.*}}{%k1} +; CHECK: vreduceps +define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { + %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrangepd {{.*}}{%k1} +; CHECK: vrangepd +define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { + %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrangepd {{.*}}{%k1} +; CHECK: vrangepd +define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { + %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4) + %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrangeps {{.*}}{%k1} +; CHECK: vrangeps +define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { + %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrangeps {{.*}}{%k1} +; CHECK: vrangeps +define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { + %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4) + %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index 72e1ac6e6f7..46ee51f47b6 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3481,3 +3481,55 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x f ret <8 x float> %res2 } +declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8) +; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrndscalepd {{.*}}{%k1} +; CHECK: vrndscalepd +define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { + %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8) +; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrndscalepd {{.*}}{%k1} +; CHECK: vrndscalepd +define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { + %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8) +; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrndscaleps {{.*}}{%k1} +; CHECK: vrndscaleps +define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { + %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vrndscaleps {{.*}}{%k1} +; CHECK: vrndscaleps +define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { + %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index 3bb7a5bcd2c..dc0e626d440 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -12846,6 +12846,342 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2 // CHECK: encoding: [0x62,0xf1,0xfd,0x58,0x5a,0xaa,0xf8,0xfb,0xff,0xff] vcvtpd2ps -1032(%rdx){1to8}, %ymm5 +// CHECK: vscalefsd %xmm21, %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0xcd,0x00,0x2d,0xed] + vscalefsd %xmm21, %xmm22, %xmm21 + +// CHECK: vscalefsd %xmm21, %xmm22, %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0xcd,0x02,0x2d,0xed] + vscalefsd %xmm21, %xmm22, %xmm21 {%k2} + +// CHECK: vscalefsd %xmm21, %xmm22, %xmm21 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0xcd,0x82,0x2d,0xed] + vscalefsd %xmm21, %xmm22, %xmm21 {%k2} {z} + +// CHECK: vscalefsd {rn-sae}, %xmm21, %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0xcd,0x10,0x2d,0xed] + vscalefsd {rn-sae}, %xmm21, %xmm22, %xmm21 + +// CHECK: vscalefsd {ru-sae}, %xmm21, %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0xcd,0x50,0x2d,0xed] + vscalefsd {ru-sae}, %xmm21, %xmm22, %xmm21 + +// CHECK: vscalefsd {rd-sae}, %xmm21, %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0xcd,0x30,0x2d,0xed] + vscalefsd {rd-sae}, %xmm21, %xmm22, %xmm21 + +// CHECK: vscalefsd {rz-sae}, %xmm21, %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0xcd,0x70,0x2d,0xed] + vscalefsd {rz-sae}, %xmm21, %xmm22, %xmm21 + +// CHECK: vscalefsd (%rcx), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x29] + vscalefsd (%rcx), %xmm22, %xmm21 + +// CHECK: vscalefsd 291(%rax,%r14,8), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa2,0xcd,0x00,0x2d,0xac,0xf0,0x23,0x01,0x00,0x00] + vscalefsd 291(%rax,%r14,8), %xmm22, %xmm21 + +// CHECK: vscalefsd 1016(%rdx), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x6a,0x7f] + vscalefsd 1016(%rdx), %xmm22, %xmm21 + +// CHECK: vscalefsd 1024(%rdx), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0xaa,0x00,0x04,0x00,0x00] + vscalefsd 1024(%rdx), %xmm22, %xmm21 + +// CHECK: vscalefsd -1024(%rdx), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x6a,0x80] + vscalefsd -1024(%rdx), %xmm22, %xmm21 + +// CHECK: vscalefsd -1032(%rdx), %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0xaa,0xf8,0xfb,0xff,0xff] + vscalefsd -1032(%rdx), %xmm22, %xmm21 + +// CHECK: vscalefss %xmm23, %xmm15, %xmm13 +// CHECK: encoding: [0x62,0x32,0x05,0x08,0x2d,0xef] + vscalefss %xmm23, %xmm15, %xmm13 + +// CHECK: vscalefss %xmm23, %xmm15, %xmm13 {%k3} +// CHECK: encoding: [0x62,0x32,0x05,0x0b,0x2d,0xef] + vscalefss %xmm23, %xmm15, %xmm13 {%k3} + +// CHECK: vscalefss %xmm23, %xmm15, %xmm13 {%k3} {z} +// CHECK: encoding: [0x62,0x32,0x05,0x8b,0x2d,0xef] + vscalefss %xmm23, %xmm15, %xmm13 {%k3} {z} + +// CHECK: vscalefss {rn-sae}, %xmm23, %xmm15, %xmm13 +// CHECK: encoding: [0x62,0x32,0x05,0x18,0x2d,0xef] + vscalefss {rn-sae}, %xmm23, %xmm15, %xmm13 + +// CHECK: vscalefss {ru-sae}, %xmm23, %xmm15, %xmm13 +// CHECK: encoding: [0x62,0x32,0x05,0x58,0x2d,0xef] + vscalefss {ru-sae}, %xmm23, %xmm15, %xmm13 + +// CHECK: vscalefss {rd-sae}, %xmm23, %xmm15, %xmm13 +// CHECK: encoding: [0x62,0x32,0x05,0x38,0x2d,0xef] + vscalefss {rd-sae}, %xmm23, %xmm15, %xmm13 + +// CHECK: vscalefss {rz-sae}, %xmm23, %xmm15, %xmm13 +// CHECK: encoding: [0x62,0x32,0x05,0x78,0x2d,0xef] + vscalefss {rz-sae}, %xmm23, %xmm15, %xmm13 + +// CHECK: vscalefss (%rcx), %xmm15, %xmm13 +// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0x29] + vscalefss (%rcx), %xmm15, %xmm13 + +// CHECK: vscalefss 291(%rax,%r14,8), %xmm15, %xmm13 +// CHECK: encoding: [0x62,0x32,0x05,0x08,0x2d,0xac,0xf0,0x23,0x01,0x00,0x00] + vscalefss 291(%rax,%r14,8), %xmm15, %xmm13 + +// CHECK: vscalefss 508(%rdx), %xmm15, %xmm13 +// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0x6a,0x7f] + vscalefss 508(%rdx), %xmm15, %xmm13 + +// CHECK: vscalefss 512(%rdx), %xmm15, %xmm13 +// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0xaa,0x00,0x02,0x00,0x00] + vscalefss 512(%rdx), %xmm15, %xmm13 + +// CHECK: vscalefss -512(%rdx), %xmm15, %xmm13 +// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0x6a,0x80] + vscalefss -512(%rdx), %xmm15, %xmm13 + +// CHECK: vscalefss -516(%rdx), %xmm15, %xmm13 +// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0xaa,0xfc,0xfd,0xff,0xff] + vscalefss -516(%rdx), %xmm15, %xmm13 + +// CHECK: vrndscalepd $171, %zmm7, %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xf7,0xab] + vrndscalepd $0xab, %zmm7, %zmm22 + +// CHECK: vrndscalepd $171, %zmm7, %zmm22 {%k1} +// CHECK: encoding: [0x62,0xe3,0xfd,0x49,0x09,0xf7,0xab] + vrndscalepd $0xab, %zmm7, %zmm22 {%k1} + +// CHECK: vrndscalepd $171, %zmm7, %zmm22 {%k1} {z} +// CHECK: encoding: [0x62,0xe3,0xfd,0xc9,0x09,0xf7,0xab] + vrndscalepd $0xab, %zmm7, %zmm22 {%k1} {z} + +// CHECK: vrndscalepd $171,{sae}, %zmm7, %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0xab] + vrndscalepd $0xab,{sae}, %zmm7, %zmm22 + +// CHECK: vrndscalepd $123, %zmm7, %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xf7,0x7b] + vrndscalepd $0x7b, %zmm7, %zmm22 + +// CHECK: vrndscalepd $123,{sae}, %zmm7, %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0x7b] + vrndscalepd $0x7b,{sae}, %zmm7, %zmm22 + +// CHECK: vrndscalepd $123, (%rcx), %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0x31,0x7b] + vrndscalepd $0x7b, (%rcx), %zmm22 + +// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %zmm22 +// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x09,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vrndscalepd $0x7b, 291(%rax,%r14,8), %zmm22 + +// CHECK: vrndscalepd $123, (%rcx){1to8}, %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0x31,0x7b] + vrndscalepd $0x7b, (%rcx){1to8}, %zmm22 + +// CHECK: vrndscalepd $123, 8128(%rdx), %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0x72,0x7f,0x7b] + vrndscalepd $0x7b, 8128(%rdx), %zmm22 + +// CHECK: vrndscalepd $123, 8192(%rdx), %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xb2,0x00,0x20,0x00,0x00,0x7b] + vrndscalepd $0x7b, 8192(%rdx), %zmm22 + +// CHECK: vrndscalepd $123, -8192(%rdx), %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0x72,0x80,0x7b] + vrndscalepd $0x7b, -8192(%rdx), %zmm22 + +// CHECK: vrndscalepd $123, -8256(%rdx), %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xb2,0xc0,0xdf,0xff,0xff,0x7b] + vrndscalepd $0x7b, -8256(%rdx), %zmm22 + +// CHECK: vrndscalepd $123, 1016(%rdx){1to8}, %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0x72,0x7f,0x7b] + vrndscalepd $0x7b, 1016(%rdx){1to8}, %zmm22 + +// CHECK: vrndscalepd $123, 1024(%rdx){1to8}, %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0xb2,0x00,0x04,0x00,0x00,0x7b] + vrndscalepd $0x7b, 1024(%rdx){1to8}, %zmm22 + +// CHECK: vrndscalepd $123, -1024(%rdx){1to8}, %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0x72,0x80,0x7b] + vrndscalepd $0x7b, -1024(%rdx){1to8}, %zmm22 + +// CHECK: vrndscalepd $123, -1032(%rdx){1to8}, %zmm22 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0xb2,0xf8,0xfb,0xff,0xff,0x7b] + vrndscalepd $0x7b, -1032(%rdx){1to8}, %zmm22 + +// CHECK: vrndscaleps $171, %zmm7, %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xef,0xab] + vrndscaleps $0xab, %zmm7, %zmm13 + +// CHECK: vrndscaleps $171, %zmm7, %zmm13 {%k1} +// CHECK: encoding: [0x62,0x73,0x7d,0x49,0x08,0xef,0xab] + vrndscaleps $0xab, %zmm7, %zmm13 {%k1} + +// CHECK: vrndscaleps $171, %zmm7, %zmm13 {%k1} {z} +// CHECK: encoding: [0x62,0x73,0x7d,0xc9,0x08,0xef,0xab] + vrndscaleps $0xab, %zmm7, %zmm13 {%k1} {z} + +// CHECK: vrndscaleps $171,{sae}, %zmm7, %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0xab] + vrndscaleps $0xab,{sae}, %zmm7, %zmm13 + +// CHECK: vrndscaleps $123, %zmm7, %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xef,0x7b] + vrndscaleps $0x7b, %zmm7, %zmm13 + +// CHECK: vrndscaleps $123,{sae}, %zmm7, %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0x7b] + vrndscaleps $0x7b,{sae}, %zmm7, %zmm13 + +// CHECK: vrndscaleps $123, (%rcx), %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0x29,0x7b] + vrndscaleps $0x7b, (%rcx), %zmm13 + +// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %zmm13 +// CHECK: encoding: [0x62,0x33,0x7d,0x48,0x08,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b] + vrndscaleps $0x7b, 291(%rax,%r14,8), %zmm13 + +// CHECK: vrndscaleps $123, (%rcx){1to16}, %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0x29,0x7b] + vrndscaleps $0x7b, (%rcx){1to16}, %zmm13 + +// CHECK: vrndscaleps $123, 8128(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0x6a,0x7f,0x7b] + vrndscaleps $0x7b, 8128(%rdx), %zmm13 + +// CHECK: vrndscaleps $123, 8192(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xaa,0x00,0x20,0x00,0x00,0x7b] + vrndscaleps $0x7b, 8192(%rdx), %zmm13 + +// CHECK: vrndscaleps $123, -8192(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0x6a,0x80,0x7b] + vrndscaleps $0x7b, -8192(%rdx), %zmm13 + +// CHECK: vrndscaleps $123, -8256(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xaa,0xc0,0xdf,0xff,0xff,0x7b] + vrndscaleps $0x7b, -8256(%rdx), %zmm13 + +// CHECK: vrndscaleps $123, 508(%rdx){1to16}, %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0x6a,0x7f,0x7b] + vrndscaleps $0x7b, 508(%rdx){1to16}, %zmm13 + +// CHECK: vrndscaleps $123, 512(%rdx){1to16}, %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0xaa,0x00,0x02,0x00,0x00,0x7b] + vrndscaleps $0x7b, 512(%rdx){1to16}, %zmm13 + +// CHECK: vrndscaleps $123, -512(%rdx){1to16}, %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0x6a,0x80,0x7b] + vrndscaleps $0x7b, -512(%rdx){1to16}, %zmm13 + +// CHECK: vrndscaleps $123, -516(%rdx){1to16}, %zmm13 +// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0xaa,0xfc,0xfd,0xff,0xff,0x7b] + vrndscaleps $0x7b, -516(%rdx){1to16}, %zmm13 + +// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25 +// CHECK: encoding: [0x62,0x43,0x9d,0x08,0x0b,0xcf,0xab] + vrndscalesd $0xab, %xmm15, %xmm12, %xmm25 + +// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25 {%k6} +// CHECK: encoding: [0x62,0x43,0x9d,0x0e,0x0b,0xcf,0xab] + vrndscalesd $0xab, %xmm15, %xmm12, %xmm25 {%k6} + +// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25 {%k6} {z} +// CHECK: encoding: [0x62,0x43,0x9d,0x8e,0x0b,0xcf,0xab] + vrndscalesd $0xab, %xmm15, %xmm12, %xmm25 {%k6} {z} + +// CHECK: vrndscalesd $171, {sae}, %xmm15, %xmm12, %xmm25 +// CHECK: encoding: [0x62,0x43,0x9d,0x18,0x0b,0xcf,0xab] + vrndscalesd $0xab,{sae}, %xmm15, %xmm12, %xmm25 + +// CHECK: vrndscalesd $123, %xmm15, %xmm12, %xmm25 +// CHECK: encoding: [0x62,0x43,0x9d,0x08,0x0b,0xcf,0x7b] + vrndscalesd $0x7b, %xmm15, %xmm12, %xmm25 + +// CHECK: vrndscalesd $123, {sae}, %xmm15, %xmm12, %xmm25 +// CHECK: encoding: [0x62,0x43,0x9d,0x18,0x0b,0xcf,0x7b] + vrndscalesd $0x7b,{sae}, %xmm15, %xmm12, %xmm25 + +// CHECK: vrndscalesd $123, (%rcx), %xmm12, %xmm25 +// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x09,0x7b] + vrndscalesd $0x7b, (%rcx), %xmm12, %xmm25 + +// CHECK: vrndscalesd $123, 291(%rax,%r14,8), %xmm12, %xmm25 +// CHECK: encoding: [0x62,0x23,0x9d,0x08,0x0b,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vrndscalesd $0x7b, 291(%rax,%r14,8), %xmm12, %xmm25 + +// CHECK: vrndscalesd $123, 1016(%rdx), %xmm12, %xmm25 +// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x4a,0x7f,0x7b] + vrndscalesd $0x7b, 1016(%rdx), %xmm12, %xmm25 + +// CHECK: vrndscalesd $123, 1024(%rdx), %xmm12, %xmm25 +// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x8a,0x00,0x04,0x00,0x00,0x7b] + vrndscalesd $0x7b, 1024(%rdx), %xmm12, %xmm25 + +// CHECK: vrndscalesd $123, -1024(%rdx), %xmm12, %xmm25 +// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x4a,0x80,0x7b] + vrndscalesd $0x7b, -1024(%rdx), %xmm12, %xmm25 + +// CHECK: vrndscalesd $123, -1032(%rdx), %xmm12, %xmm25 +// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x8a,0xf8,0xfb,0xff,0xff,0x7b] + vrndscalesd $0x7b, -1032(%rdx), %xmm12, %xmm25 + +// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11 +// CHECK: encoding: [0x62,0x33,0x25,0x08,0x0a,0xd9,0xab] + vrndscaless $0xab, %xmm17, %xmm11, %xmm11 + +// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11 {%k3} +// CHECK: encoding: [0x62,0x33,0x25,0x0b,0x0a,0xd9,0xab] + vrndscaless $0xab, %xmm17, %xmm11, %xmm11 {%k3} + +// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11 {%k3} {z} +// CHECK: encoding: [0x62,0x33,0x25,0x8b,0x0a,0xd9,0xab] + vrndscaless $0xab, %xmm17, %xmm11, %xmm11 {%k3} {z} + +// CHECK: vrndscaless $171, {sae}, %xmm17, %xmm11, %xmm11 +// CHECK: encoding: [0x62,0x33,0x25,0x18,0x0a,0xd9,0xab] + vrndscaless $0xab,{sae}, %xmm17, %xmm11, %xmm11 + +// CHECK: vrndscaless $123, %xmm17, %xmm11, %xmm11 +// CHECK: encoding: [0x62,0x33,0x25,0x08,0x0a,0xd9,0x7b] + vrndscaless $0x7b, %xmm17, %xmm11, %xmm11 + +// CHECK: vrndscaless $123, {sae}, %xmm17, %xmm11, %xmm11 +// CHECK: encoding: [0x62,0x33,0x25,0x18,0x0a,0xd9,0x7b] + vrndscaless $0x7b,{sae}, %xmm17, %xmm11, %xmm11 + +// CHECK: vrndscaless $123, (%rcx), %xmm11, %xmm11 +// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x19,0x7b] + vrndscaless $0x7b, (%rcx), %xmm11, %xmm11 + +// CHECK: vrndscaless $123, 291(%rax,%r14,8), %xmm11, %xmm11 +// CHECK: encoding: [0x62,0x33,0x25,0x08,0x0a,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vrndscaless $0x7b, 291(%rax,%r14,8), %xmm11, %xmm11 + +// CHECK: vrndscaless $123, 508(%rdx), %xmm11, %xmm11 +// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x5a,0x7f,0x7b] + vrndscaless $0x7b, 508(%rdx), %xmm11, %xmm11 + +// CHECK: vrndscaless $123, 512(%rdx), %xmm11, %xmm11 +// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x9a,0x00,0x02,0x00,0x00,0x7b] + vrndscaless $0x7b, 512(%rdx), %xmm11, %xmm11 + +// CHECK: vrndscaless $123, -512(%rdx), %xmm11, %xmm11 +// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x5a,0x80,0x7b] + vrndscaless $0x7b, -512(%rdx), %xmm11, %xmm11 + +// CHECK: vrndscaless $123, -516(%rdx), %xmm11, %xmm11 +// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x9a,0xfc,0xfd,0xff,0xff,0x7b] + vrndscaless $0x7b, -516(%rdx), %xmm11, %xmm11 + // CHECK: vfmadd132ss %xmm22, %xmm17, %xmm30 // CHECK: encoding: [0x62,0x22,0x75,0x00,0x99,0xf6] vfmadd132ss %xmm22, %xmm17, %xmm30 diff --git a/test/MC/X86/x86-64-avx512dq.s b/test/MC/X86/x86-64-avx512dq.s index 4b26f7a0b80..d4e847557bc 100644 --- a/test/MC/X86/x86-64-avx512dq.s +++ b/test/MC/X86/x86-64-avx512dq.s @@ -1391,6 +1391,470 @@ // CHECK: encoding: [0x62,0x63,0x3d,0x00,0x51,0x8a,0xfc,0xfd,0xff,0xff,0x7b] vrangess $0x7b,-516(%rdx), %xmm24, %xmm25 +// CHECK: vreducepd $171, %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0xdb,0xab] + vreducepd $0xab, %zmm19, %zmm19 + +// CHECK: vreducepd $171, %zmm19, %zmm19 {%k6} +// CHECK: encoding: [0x62,0xa3,0xfd,0x4e,0x56,0xdb,0xab] + vreducepd $0xab, %zmm19, %zmm19 {%k6} + +// CHECK: vreducepd $171, %zmm19, %zmm19 {%k6} {z} +// CHECK: encoding: [0x62,0xa3,0xfd,0xce,0x56,0xdb,0xab] + vreducepd $0xab, %zmm19, %zmm19 {%k6} {z} + +// CHECK: vreducepd $171,{sae}, %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0xab] + vreducepd $0xab,{sae}, %zmm19, %zmm19 + +// CHECK: vreducepd $123, %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0xdb,0x7b] + vreducepd $0x7b, %zmm19, %zmm19 + +// CHECK: vreducepd $123,{sae}, %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0x7b] + vreducepd $0x7b,{sae}, %zmm19, %zmm19 + +// CHECK: vreducepd $123, (%rcx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x19,0x7b] + vreducepd $0x7b,(%rcx), %zmm19 + +// CHECK: vreducepd $123, 291(%rax,%r14,8), %zmm19 +// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vreducepd $0x7b,291(%rax,%r14,8), %zmm19 + +// CHECK: vreducepd $123, (%rcx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x19,0x7b] + vreducepd $0x7b,(%rcx){1to8}, %zmm19 + +// CHECK: vreducepd $123, 8128(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x5a,0x7f,0x7b] + vreducepd $0x7b,8128(%rdx), %zmm19 + +// CHECK: vreducepd $123, 8192(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x9a,0x00,0x20,0x00,0x00,0x7b] + vreducepd $0x7b,8192(%rdx), %zmm19 + +// CHECK: vreducepd $123, -8192(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x5a,0x80,0x7b] + vreducepd $0x7b,-8192(%rdx), %zmm19 + +// CHECK: vreducepd $123, -8256(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x9a,0xc0,0xdf,0xff,0xff,0x7b] + vreducepd $0x7b,-8256(%rdx), %zmm19 + +// CHECK: vreducepd $123, 1016(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x5a,0x7f,0x7b] + vreducepd $0x7b,1016(%rdx){1to8}, %zmm19 + +// CHECK: vreducepd $123, 1024(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x9a,0x00,0x04,0x00,0x00,0x7b] + vreducepd $0x7b,1024(%rdx){1to8}, %zmm19 + +// CHECK: vreducepd $123, -1024(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x5a,0x80,0x7b] + vreducepd $0x7b,-1024(%rdx){1to8}, %zmm19 + +// CHECK: vreducepd $123, -1032(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x9a,0xf8,0xfb,0xff,0xff,0x7b] + vreducepd $0x7b,-1032(%rdx){1to8}, %zmm19 + +// CHECK: vreduceps $171, %zmm29, %zmm19 +// CHECK: encoding: [0x62,0x83,0x7d,0x48,0x56,0xdd,0xab] + vreduceps $0xab, %zmm29, %zmm19 + +// CHECK: vreduceps $171, %zmm29, %zmm19 {%k3} +// CHECK: encoding: [0x62,0x83,0x7d,0x4b,0x56,0xdd,0xab] + vreduceps $0xab, %zmm29, %zmm19 {%k3} + +// CHECK: vreduceps $171, %zmm29, %zmm19 {%k3} {z} +// CHECK: encoding: [0x62,0x83,0x7d,0xcb,0x56,0xdd,0xab] + vreduceps $0xab, %zmm29, %zmm19 {%k3} {z} + +// CHECK: vreduceps $171,{sae}, %zmm29, %zmm19 +// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0xab] + vreduceps $0xab,{sae}, %zmm29, %zmm19 + +// CHECK: vreduceps $123, %zmm29, %zmm19 +// CHECK: encoding: [0x62,0x83,0x7d,0x48,0x56,0xdd,0x7b] + vreduceps $0x7b, %zmm29, %zmm19 + +// CHECK: vreduceps $123,{sae}, %zmm29, %zmm19 +// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0x7b] + vreduceps $0x7b,{sae}, %zmm29, %zmm19 + +// CHECK: vreduceps $123, (%rcx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x19,0x7b] + vreduceps $0x7b,(%rcx), %zmm19 + +// CHECK: vreduceps $123, 291(%rax,%r14,8), %zmm19 +// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vreduceps $0x7b,291(%rax,%r14,8), %zmm19 + +// CHECK: vreduceps $123, (%rcx){1to16}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x19,0x7b] + vreduceps $0x7b,(%rcx){1to16}, %zmm19 + +// CHECK: vreduceps $123, 8128(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x5a,0x7f,0x7b] + vreduceps $0x7b,8128(%rdx), %zmm19 + +// CHECK: vreduceps $123, 8192(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x9a,0x00,0x20,0x00,0x00,0x7b] + vreduceps $0x7b,8192(%rdx), %zmm19 + +// CHECK: vreduceps $123, -8192(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x5a,0x80,0x7b] + vreduceps $0x7b,-8192(%rdx), %zmm19 + +// CHECK: vreduceps $123, -8256(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x9a,0xc0,0xdf,0xff,0xff,0x7b] + vreduceps $0x7b,-8256(%rdx), %zmm19 + +// CHECK: vreduceps $123, 508(%rdx){1to16}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x5a,0x7f,0x7b] + vreduceps $0x7b,508(%rdx){1to16}, %zmm19 + +// CHECK: vreduceps $123, 512(%rdx){1to16}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x9a,0x00,0x02,0x00,0x00,0x7b] + vreduceps $0x7b,512(%rdx){1to16}, %zmm19 + +// CHECK: vreduceps $123, -512(%rdx){1to16}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x5a,0x80,0x7b] + vreduceps $0x7b,-512(%rdx){1to16}, %zmm19 + +// CHECK: vreduceps $123, -516(%rdx){1to16}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x9a,0xfc,0xfd,0xff,0xff,0x7b] + vreduceps $0x7b,-516(%rdx){1to16}, %zmm19 + +// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17 +// CHECK: encoding: [0x62,0x83,0xf5,0x00,0x57,0xc9,0xab] + vreducesd $0xab, %xmm25, %xmm17, %xmm17 + +// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17 {%k6} +// CHECK: encoding: [0x62,0x83,0xf5,0x06,0x57,0xc9,0xab] + vreducesd $0xab, %xmm25, %xmm17, %xmm17 {%k6} + +// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17 {%k6} {z} +// CHECK: encoding: [0x62,0x83,0xf5,0x86,0x57,0xc9,0xab] + vreducesd $0xab, %xmm25, %xmm17, %xmm17 {%k6} {z} + +// CHECK: vreducesd $171,{sae}, %xmm25, %xmm17, %xmm17 +// CHECK: encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0xab] + vreducesd $0xab,{sae}, %xmm25, %xmm17, %xmm17 + +// CHECK: vreducesd $123, %xmm25, %xmm17, %xmm17 +// CHECK: encoding: [0x62,0x83,0xf5,0x00,0x57,0xc9,0x7b] + vreducesd $0x7b, %xmm25, %xmm17, %xmm17 + +// CHECK: vreducesd $123,{sae}, %xmm25, %xmm17, %xmm17 +// CHECK: encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0x7b] + vreducesd $0x7b,{sae}, %xmm25, %xmm17, %xmm17 + +// CHECK: vreducesd $123, (%rcx), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x09,0x7b] + vreducesd $0x7b,(%rcx), %xmm17, %xmm17 + +// CHECK: vreducesd $123, 291(%rax,%r14,8), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xa3,0xf5,0x00,0x57,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vreducesd $0x7b,291(%rax,%r14,8), %xmm17, %xmm17 + +// CHECK: vreducesd $123, 1016(%rdx), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x4a,0x7f,0x7b] + vreducesd $0x7b,1016(%rdx), %xmm17, %xmm17 + +// CHECK: vreducesd $123, 1024(%rdx), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x8a,0x00,0x04,0x00,0x00,0x7b] + vreducesd $0x7b,1024(%rdx), %xmm17, %xmm17 + +// CHECK: vreducesd $123, -1024(%rdx), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x4a,0x80,0x7b] + vreducesd $0x7b,-1024(%rdx), %xmm17, %xmm17 + +// CHECK: vreducesd $123, -1032(%rdx), %xmm17, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x8a,0xf8,0xfb,0xff,0xff,0x7b] + vreducesd $0x7b,-1032(%rdx), %xmm17, %xmm17 + +// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x23,0x15,0x00,0x57,0xf5,0xab] + vreducess $0xab, %xmm21, %xmm29, %xmm30 + +// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30 {%k1} +// CHECK: encoding: [0x62,0x23,0x15,0x01,0x57,0xf5,0xab] + vreducess $0xab, %xmm21, %xmm29, %xmm30 {%k1} + +// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30 {%k1} {z} +// CHECK: encoding: [0x62,0x23,0x15,0x81,0x57,0xf5,0xab] + vreducess $0xab, %xmm21, %xmm29, %xmm30 {%k1} {z} + +// CHECK: vreducess $171,{sae}, %xmm21, %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0xab] + vreducess $0xab,{sae}, %xmm21, %xmm29, %xmm30 + +// CHECK: vreducess $123, %xmm21, %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x23,0x15,0x00,0x57,0xf5,0x7b] + vreducess $0x7b, %xmm21, %xmm29, %xmm30 + +// CHECK: vreducess $123,{sae}, %xmm21, %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0x7b] + vreducess $0x7b,{sae}, %xmm21, %xmm29, %xmm30 + +// CHECK: vreducess $123, (%rcx), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0x31,0x7b] + vreducess $0x7b,(%rcx), %xmm29, %xmm30 + +// CHECK: vreducess $123, 291(%rax,%r14,8), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x23,0x15,0x00,0x57,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vreducess $0x7b,291(%rax,%r14,8), %xmm29, %xmm30 + +// CHECK: vreducess $123, 508(%rdx), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0x72,0x7f,0x7b] + vreducess $0x7b,508(%rdx), %xmm29, %xmm30 + +// CHECK: vreducess $123, 512(%rdx), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0xb2,0x00,0x02,0x00,0x00,0x7b] + vreducess $0x7b,512(%rdx), %xmm29, %xmm30 + +// CHECK: vreducess $123, -512(%rdx), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0x72,0x80,0x7b] + vreducess $0x7b,-512(%rdx), %xmm29, %xmm30 + +// CHECK: vreducess $123, -516(%rdx), %xmm29, %xmm30 +// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0xb2,0xfc,0xfd,0xff,0xff,0x7b] + vreducess $0x7b,-516(%rdx), %xmm29, %xmm30 + +// CHECK: vreducepd $171, %zmm28, %zmm18 +// CHECK: encoding: [0x62,0x83,0xfd,0x48,0x56,0xd4,0xab] + vreducepd $0xab, %zmm28, %zmm18 + +// CHECK: vreducepd $171, %zmm28, %zmm18 {%k5} +// CHECK: encoding: [0x62,0x83,0xfd,0x4d,0x56,0xd4,0xab] + vreducepd $0xab, %zmm28, %zmm18 {%k5} + +// CHECK: vreducepd $171, %zmm28, %zmm18 {%k5} {z} +// CHECK: encoding: [0x62,0x83,0xfd,0xcd,0x56,0xd4,0xab] + vreducepd $0xab, %zmm28, %zmm18 {%k5} {z} + +// CHECK: vreducepd $171,{sae}, %zmm28, %zmm18 +// CHECK: encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0xab] + vreducepd $0xab,{sae}, %zmm28, %zmm18 + +// CHECK: vreducepd $123, %zmm28, %zmm18 +// CHECK: encoding: [0x62,0x83,0xfd,0x48,0x56,0xd4,0x7b] + vreducepd $0x7b, %zmm28, %zmm18 + +// CHECK: vreducepd $123,{sae}, %zmm28, %zmm18 +// CHECK: encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0x7b] + vreducepd $0x7b,{sae}, %zmm28, %zmm18 + +// CHECK: vreducepd $123, (%rcx), %zmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x11,0x7b] + vreducepd $0x7b,(%rcx), %zmm18 + +// CHECK: vreducepd $123, 4660(%rax,%r14,8), %zmm18 +// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b] + vreducepd $0x7b,4660(%rax,%r14,8), %zmm18 + +// CHECK: vreducepd $123, (%rcx){1to8}, %zmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x11,0x7b] + vreducepd $0x7b,(%rcx){1to8}, %zmm18 + +// CHECK: vreducepd $123, 8128(%rdx), %zmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x52,0x7f,0x7b] + vreducepd $0x7b,8128(%rdx), %zmm18 + +// CHECK: vreducepd $123, 8192(%rdx), %zmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x92,0x00,0x20,0x00,0x00,0x7b] + vreducepd $0x7b,8192(%rdx), %zmm18 + +// CHECK: vreducepd $123, -8192(%rdx), %zmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x52,0x80,0x7b] + vreducepd $0x7b,-8192(%rdx), %zmm18 + +// CHECK: vreducepd $123, -8256(%rdx), %zmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x92,0xc0,0xdf,0xff,0xff,0x7b] + vreducepd $0x7b,-8256(%rdx), %zmm18 + +// CHECK: vreducepd $123, 1016(%rdx){1to8}, %zmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x52,0x7f,0x7b] + vreducepd $0x7b,1016(%rdx){1to8}, %zmm18 + +// CHECK: vreducepd $123, 1024(%rdx){1to8}, %zmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x92,0x00,0x04,0x00,0x00,0x7b] + vreducepd $0x7b,1024(%rdx){1to8}, %zmm18 + +// CHECK: vreducepd $123, -1024(%rdx){1to8}, %zmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x52,0x80,0x7b] + vreducepd $0x7b,-1024(%rdx){1to8}, %zmm18 + +// CHECK: vreducepd $123, -1032(%rdx){1to8}, %zmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x92,0xf8,0xfb,0xff,0xff,0x7b] + vreducepd $0x7b,-1032(%rdx){1to8}, %zmm18 + +// CHECK: vreduceps $171, %zmm25, %zmm26 +// CHECK: encoding: [0x62,0x03,0x7d,0x48,0x56,0xd1,0xab] + vreduceps $0xab, %zmm25, %zmm26 + +// CHECK: vreduceps $171, %zmm25, %zmm26 {%k3} +// CHECK: encoding: [0x62,0x03,0x7d,0x4b,0x56,0xd1,0xab] + vreduceps $0xab, %zmm25, %zmm26 {%k3} + +// CHECK: vreduceps $171, %zmm25, %zmm26 {%k3} {z} +// CHECK: encoding: [0x62,0x03,0x7d,0xcb,0x56,0xd1,0xab] + vreduceps $0xab, %zmm25, %zmm26 {%k3} {z} + +// CHECK: vreduceps $171,{sae}, %zmm25, %zmm26 +// CHECK: encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0xab] + vreduceps $0xab,{sae}, %zmm25, %zmm26 + +// CHECK: vreduceps $123, %zmm25, %zmm26 +// CHECK: encoding: [0x62,0x03,0x7d,0x48,0x56,0xd1,0x7b] + vreduceps $0x7b, %zmm25, %zmm26 + +// CHECK: vreduceps $123,{sae}, %zmm25, %zmm26 +// CHECK: encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0x7b] + vreduceps $0x7b,{sae}, %zmm25, %zmm26 + +// CHECK: vreduceps $123, (%rcx), %zmm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x11,0x7b] + vreduceps $0x7b,(%rcx), %zmm26 + +// CHECK: vreduceps $123, 4660(%rax,%r14,8), %zmm26 +// CHECK: encoding: [0x62,0x23,0x7d,0x48,0x56,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b] + vreduceps $0x7b,4660(%rax,%r14,8), %zmm26 + +// CHECK: vreduceps $123, (%rcx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x11,0x7b] + vreduceps $0x7b,(%rcx){1to16}, %zmm26 + +// CHECK: vreduceps $123, 8128(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x52,0x7f,0x7b] + vreduceps $0x7b,8128(%rdx), %zmm26 + +// CHECK: vreduceps $123, 8192(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x92,0x00,0x20,0x00,0x00,0x7b] + vreduceps $0x7b,8192(%rdx), %zmm26 + +// CHECK: vreduceps $123, -8192(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x52,0x80,0x7b] + vreduceps $0x7b,-8192(%rdx), %zmm26 + +// CHECK: vreduceps $123, -8256(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x92,0xc0,0xdf,0xff,0xff,0x7b] + vreduceps $0x7b,-8256(%rdx), %zmm26 + +// CHECK: vreduceps $123, 508(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x52,0x7f,0x7b] + vreduceps $0x7b,508(%rdx){1to16}, %zmm26 + +// CHECK: vreduceps $123, 512(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x92,0x00,0x02,0x00,0x00,0x7b] + vreduceps $0x7b,512(%rdx){1to16}, %zmm26 + +// CHECK: vreduceps $123, -512(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x52,0x80,0x7b] + vreduceps $0x7b,-512(%rdx){1to16}, %zmm26 + +// CHECK: vreduceps $123, -516(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x92,0xfc,0xfd,0xff,0xff,0x7b] + vreduceps $0x7b,-516(%rdx){1to16}, %zmm26 + +// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25 +// CHECK: encoding: [0x62,0x03,0xe5,0x00,0x57,0xc8,0xab] + vreducesd $0xab, %xmm24, %xmm19, %xmm25 + +// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25 {%k3} +// CHECK: encoding: [0x62,0x03,0xe5,0x03,0x57,0xc8,0xab] + vreducesd $0xab, %xmm24, %xmm19, %xmm25 {%k3} + +// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25 {%k3} {z} +// CHECK: encoding: [0x62,0x03,0xe5,0x83,0x57,0xc8,0xab] + vreducesd $0xab, %xmm24, %xmm19, %xmm25 {%k3} {z} + +// CHECK: vreducesd $171,{sae}, %xmm24, %xmm19, %xmm25 +// CHECK: encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0xab] + vreducesd $0xab,{sae}, %xmm24, %xmm19, %xmm25 + +// CHECK: vreducesd $123, %xmm24, %xmm19, %xmm25 +// CHECK: encoding: [0x62,0x03,0xe5,0x00,0x57,0xc8,0x7b] + vreducesd $0x7b, %xmm24, %xmm19, %xmm25 + +// CHECK: vreducesd $123,{sae}, %xmm24, %xmm19, %xmm25 +// CHECK: encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0x7b] + vreducesd $0x7b,{sae}, %xmm24, %xmm19, %xmm25 + +// CHECK: vreducesd $123, (%rcx), %xmm19, %xmm25 +// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x09,0x7b] + vreducesd $0x7b,(%rcx), %xmm19, %xmm25 + +// CHECK: vreducesd $123, 4660(%rax,%r14,8), %xmm19, %xmm25 +// CHECK: encoding: [0x62,0x23,0xe5,0x00,0x57,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b] + vreducesd $0x7b,4660(%rax,%r14,8), %xmm19, %xmm25 + +// CHECK: vreducesd $123, 1016(%rdx), %xmm19, %xmm25 +// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x4a,0x7f,0x7b] + vreducesd $0x7b,1016(%rdx), %xmm19, %xmm25 + +// CHECK: vreducesd $123, 1024(%rdx), %xmm19, %xmm25 +// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x8a,0x00,0x04,0x00,0x00,0x7b] + vreducesd $0x7b,1024(%rdx), %xmm19, %xmm25 + +// CHECK: vreducesd $123, -1024(%rdx), %xmm19, %xmm25 +// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x4a,0x80,0x7b] + vreducesd $0x7b,-1024(%rdx), %xmm19, %xmm25 + +// CHECK: vreducesd $123, -1032(%rdx), %xmm19, %xmm25 +// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x8a,0xf8,0xfb,0xff,0xff,0x7b] + vreducesd $0x7b,-1032(%rdx), %xmm19, %xmm25 + +// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30 +// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x57,0xf5,0xab] + vreducess $0xab, %xmm21, %xmm24, %xmm30 + +// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30 {%k2} +// CHECK: encoding: [0x62,0x23,0x3d,0x02,0x57,0xf5,0xab] + vreducess $0xab, %xmm21, %xmm24, %xmm30 {%k2} + +// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30 {%k2} {z} +// CHECK: encoding: [0x62,0x23,0x3d,0x82,0x57,0xf5,0xab] + vreducess $0xab, %xmm21, %xmm24, %xmm30 {%k2} {z} + +// CHECK: vreducess $171,{sae}, %xmm21, %xmm24, %xmm30 +// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0xab] + vreducess $0xab,{sae}, %xmm21, %xmm24, %xmm30 + +// CHECK: vreducess $123, %xmm21, %xmm24, %xmm30 +// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x57,0xf5,0x7b] + vreducess $0x7b, %xmm21, %xmm24, %xmm30 + +// CHECK: vreducess $123,{sae}, %xmm21, %xmm24, %xmm30 +// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0x7b] + vreducess $0x7b,{sae}, %xmm21, %xmm24, %xmm30 + +// CHECK: vreducess $123, (%rcx), %xmm24, %xmm30 +// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0x31,0x7b] + vreducess $0x7b,(%rcx), %xmm24, %xmm30 + +// CHECK: vreducess $123, 4660(%rax,%r14,8), %xmm24, %xmm30 +// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x57,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b] + vreducess $0x7b,4660(%rax,%r14,8), %xmm24, %xmm30 + +// CHECK: vreducess $123, 508(%rdx), %xmm24, %xmm30 +// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0x72,0x7f,0x7b] + vreducess $0x7b,508(%rdx), %xmm24, %xmm30 + +// CHECK: vreducess $123, 512(%rdx), %xmm24, %xmm30 +// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0xb2,0x00,0x02,0x00,0x00,0x7b] + vreducess $0x7b,512(%rdx), %xmm24, %xmm30 + +// CHECK: vreducess $123, -512(%rdx), %xmm24, %xmm30 +// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0x72,0x80,0x7b] + vreducess $0x7b,-512(%rdx), %xmm24, %xmm30 + +// CHECK: vreducess $123, -516(%rdx), %xmm24, %xmm30 +// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0xb2,0xfc,0xfd,0xff,0xff,0x7b] + vreducess $0x7b,-516(%rdx), %xmm24, %xmm30 + // CHECK: vcvtpd2qq %zmm29, %zmm18 // CHECK: encoding: [0x62,0x81,0xfd,0x48,0x7b,0xd5] vcvtpd2qq %zmm29, %zmm18 diff --git a/test/MC/X86/x86-64-avx512dq_vl.s b/test/MC/X86/x86-64-avx512dq_vl.s index 17c37c08335..e9ffd493fad 100644 --- a/test/MC/X86/x86-64-avx512dq_vl.s +++ b/test/MC/X86/x86-64-avx512dq_vl.s @@ -2208,6 +2208,486 @@ // CHECK: encoding: [0x62,0x63,0x45,0x30,0x50,0x82,0xfc,0xfd,0xff,0xff,0x7b] vrangeps $0x7b,-516(%rdx){1to8}, %ymm23, %ymm24 +// CHECK: vreducepd $171, %xmm17, %xmm18 +// CHECK: encoding: [0x62,0xa3,0xfd,0x08,0x56,0xd1,0xab] + vreducepd $0xab, %xmm17, %xmm18 + +// CHECK: vreducepd $171, %xmm17, %xmm18 {%k3} +// CHECK: encoding: [0x62,0xa3,0xfd,0x0b,0x56,0xd1,0xab] + vreducepd $0xab, %xmm17, %xmm18 {%k3} + +// CHECK: vreducepd $171, %xmm17, %xmm18 {%k3} {z} +// CHECK: encoding: [0x62,0xa3,0xfd,0x8b,0x56,0xd1,0xab] + vreducepd $0xab, %xmm17, %xmm18 {%k3} {z} + +// CHECK: vreducepd $123, %xmm17, %xmm18 +// CHECK: encoding: [0x62,0xa3,0xfd,0x08,0x56,0xd1,0x7b] + vreducepd $0x7b, %xmm17, %xmm18 + +// CHECK: vreducepd $123, (%rcx), %xmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x11,0x7b] + vreducepd $0x7b,(%rcx), %xmm18 + +// CHECK: vreducepd $123, 291(%rax,%r14,8), %xmm18 +// CHECK: encoding: [0x62,0xa3,0xfd,0x08,0x56,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b] + vreducepd $0x7b,291(%rax,%r14,8), %xmm18 + +// CHECK: vreducepd $171, %xmm28, %xmm25 +// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x56,0xcc,0xab] + vreducepd $0xab, %xmm28, %xmm25 + +// CHECK: vreducepd $171, %xmm28, %xmm25 {%k4} +// CHECK: encoding: [0x62,0x03,0xfd,0x0c,0x56,0xcc,0xab] + vreducepd $0xab, %xmm28, %xmm25 {%k4} + +// CHECK: vreducepd $171, %xmm28, %xmm25 {%k4} {z} +// CHECK: encoding: [0x62,0x03,0xfd,0x8c,0x56,0xcc,0xab] + vreducepd $0xab, %xmm28, %xmm25 {%k4} {z} + +// CHECK: vreducepd $123, %xmm28, %xmm25 +// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x56,0xcc,0x7b] + vreducepd $0x7b, %xmm28, %xmm25 + +// CHECK: vreducepd $123, (%rcx), %xmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x09,0x7b] + vreducepd $0x7b,(%rcx), %xmm25 + +// CHECK: vreducepd $123, 4660(%rax,%r14,8), %xmm25 +// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x56,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b] + vreducepd $0x7b,4660(%rax,%r14,8), %xmm25 + +// CHECK: vreducepd $123, (%rcx){1to2}, %xmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x09,0x7b] + vreducepd $0x7b,(%rcx){1to2}, %xmm25 + +// CHECK: vreducepd $123, 2032(%rdx), %xmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x4a,0x7f,0x7b] + vreducepd $0x7b,2032(%rdx), %xmm25 + +// CHECK: vreducepd $123, 2048(%rdx), %xmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x8a,0x00,0x08,0x00,0x00,0x7b] + vreducepd $0x7b,2048(%rdx), %xmm25 + +// CHECK: vreducepd $123, -2048(%rdx), %xmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x4a,0x80,0x7b] + vreducepd $0x7b,-2048(%rdx), %xmm25 + +// CHECK: vreducepd $123, -2064(%rdx), %xmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x8a,0xf0,0xf7,0xff,0xff,0x7b] + vreducepd $0x7b,-2064(%rdx), %xmm25 + +// CHECK: vreducepd $123, 1016(%rdx){1to2}, %xmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x4a,0x7f,0x7b] + vreducepd $0x7b,1016(%rdx){1to2}, %xmm25 + +// CHECK: vreducepd $123, 1024(%rdx){1to2}, %xmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x8a,0x00,0x04,0x00,0x00,0x7b] + vreducepd $0x7b,1024(%rdx){1to2}, %xmm25 + +// CHECK: vreducepd $123, -1024(%rdx){1to2}, %xmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x4a,0x80,0x7b] + vreducepd $0x7b,-1024(%rdx){1to2}, %xmm25 + +// CHECK: vreducepd $123, -1032(%rdx){1to2}, %xmm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x8a,0xf8,0xfb,0xff,0xff,0x7b] + vreducepd $0x7b,-1032(%rdx){1to2}, %xmm25 + +// CHECK: vreducepd $171, %ymm17, %ymm28 +// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0xe1,0xab] + vreducepd $0xab, %ymm17, %ymm28 + +// CHECK: vreducepd $171, %ymm17, %ymm28 {%k4} +// CHECK: encoding: [0x62,0x23,0xfd,0x2c,0x56,0xe1,0xab] + vreducepd $0xab, %ymm17, %ymm28 {%k4} + +// CHECK: vreducepd $171, %ymm17, %ymm28 {%k4} {z} +// CHECK: encoding: [0x62,0x23,0xfd,0xac,0x56,0xe1,0xab] + vreducepd $0xab, %ymm17, %ymm28 {%k4} {z} + +// CHECK: vreducepd $123, %ymm17, %ymm28 +// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0xe1,0x7b] + vreducepd $0x7b, %ymm17, %ymm28 + +// CHECK: vreducepd $123, (%rcx), %ymm28 +// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x21,0x7b] + vreducepd $0x7b,(%rcx), %ymm28 + +// CHECK: vreducepd $123, 4660(%rax,%r14,8), %ymm28 +// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b] + vreducepd $0x7b,4660(%rax,%r14,8), %ymm28 + +// CHECK: vreducepd $123, (%rcx){1to4}, %ymm28 +// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x21,0x7b] + vreducepd $0x7b,(%rcx){1to4}, %ymm28 + +// CHECK: vreducepd $123, 4064(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x62,0x7f,0x7b] + vreducepd $0x7b,4064(%rdx), %ymm28 + +// CHECK: vreducepd $123, 4096(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0xa2,0x00,0x10,0x00,0x00,0x7b] + vreducepd $0x7b,4096(%rdx), %ymm28 + +// CHECK: vreducepd $123, -4096(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x62,0x80,0x7b] + vreducepd $0x7b,-4096(%rdx), %ymm28 + +// CHECK: vreducepd $123, -4128(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0xa2,0xe0,0xef,0xff,0xff,0x7b] + vreducepd $0x7b,-4128(%rdx), %ymm28 + +// CHECK: vreducepd $123, 1016(%rdx){1to4}, %ymm28 +// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x62,0x7f,0x7b] + vreducepd $0x7b,1016(%rdx){1to4}, %ymm28 + +// CHECK: vreducepd $123, 1024(%rdx){1to4}, %ymm28 +// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0xa2,0x00,0x04,0x00,0x00,0x7b] + vreducepd $0x7b,1024(%rdx){1to4}, %ymm28 + +// CHECK: vreducepd $123, -1024(%rdx){1to4}, %ymm28 +// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x62,0x80,0x7b] + vreducepd $0x7b,-1024(%rdx){1to4}, %ymm28 + +// CHECK: vreducepd $123, -1032(%rdx){1to4}, %ymm28 +// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0xa2,0xf8,0xfb,0xff,0xff,0x7b] + vreducepd $0x7b,-1032(%rdx){1to4}, %ymm28 + +// CHECK: vreduceps $171, %xmm21, %xmm29 +// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x56,0xed,0xab] + vreduceps $0xab, %xmm21, %xmm29 + +// CHECK: vreduceps $171, %xmm21, %xmm29 {%k7} +// CHECK: encoding: [0x62,0x23,0x7d,0x0f,0x56,0xed,0xab] + vreduceps $0xab, %xmm21, %xmm29 {%k7} + +// CHECK: vreduceps $171, %xmm21, %xmm29 {%k7} {z} +// CHECK: encoding: [0x62,0x23,0x7d,0x8f,0x56,0xed,0xab] + vreduceps $0xab, %xmm21, %xmm29 {%k7} {z} + +// CHECK: vreduceps $123, %xmm21, %xmm29 +// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x56,0xed,0x7b] + vreduceps $0x7b, %xmm21, %xmm29 + +// CHECK: vreduceps $123, (%rcx), %xmm29 +// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0x29,0x7b] + vreduceps $0x7b,(%rcx), %xmm29 + +// CHECK: vreduceps $123, 4660(%rax,%r14,8), %xmm29 +// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x56,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b] + vreduceps $0x7b,4660(%rax,%r14,8), %xmm29 + +// CHECK: vreduceps $123, (%rcx){1to4}, %xmm29 +// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0x29,0x7b] + vreduceps $0x7b,(%rcx){1to4}, %xmm29 + +// CHECK: vreduceps $123, 2032(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0x6a,0x7f,0x7b] + vreduceps $0x7b,2032(%rdx), %xmm29 + +// CHECK: vreduceps $123, 2048(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0xaa,0x00,0x08,0x00,0x00,0x7b] + vreduceps $0x7b,2048(%rdx), %xmm29 + +// CHECK: vreduceps $123, -2048(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0x6a,0x80,0x7b] + vreduceps $0x7b,-2048(%rdx), %xmm29 + +// CHECK: vreduceps $123, -2064(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0xaa,0xf0,0xf7,0xff,0xff,0x7b] + vreduceps $0x7b,-2064(%rdx), %xmm29 + +// CHECK: vreduceps $123, 508(%rdx){1to4}, %xmm29 +// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0x6a,0x7f,0x7b] + vreduceps $0x7b,508(%rdx){1to4}, %xmm29 + +// CHECK: vreduceps $123, 512(%rdx){1to4}, %xmm29 +// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0xaa,0x00,0x02,0x00,0x00,0x7b] + vreduceps $0x7b,512(%rdx){1to4}, %xmm29 + +// CHECK: vreduceps $123, -512(%rdx){1to4}, %xmm29 +// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0x6a,0x80,0x7b] + vreduceps $0x7b,-512(%rdx){1to4}, %xmm29 + +// CHECK: vreduceps $123, -516(%rdx){1to4}, %xmm29 +// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0xaa,0xfc,0xfd,0xff,0xff,0x7b] + vreduceps $0x7b,-516(%rdx){1to4}, %xmm29 + +// CHECK: vreduceps $171, %ymm23, %ymm25 +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xcf,0xab] + vreduceps $0xab, %ymm23, %ymm25 + +// CHECK: vreduceps $171, %ymm23, %ymm25 {%k3} +// CHECK: encoding: [0x62,0x23,0x7d,0x2b,0x56,0xcf,0xab] + vreduceps $0xab, %ymm23, %ymm25 {%k3} + +// CHECK: vreduceps $171, %ymm23, %ymm25 {%k3} {z} +// CHECK: encoding: [0x62,0x23,0x7d,0xab,0x56,0xcf,0xab] + vreduceps $0xab, %ymm23, %ymm25 {%k3} {z} + +// CHECK: vreduceps $123, %ymm23, %ymm25 +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xcf,0x7b] + vreduceps $0x7b, %ymm23, %ymm25 + +// CHECK: vreduceps $123, (%rcx), %ymm25 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x09,0x7b] + vreduceps $0x7b,(%rcx), %ymm25 + +// CHECK: vreduceps $123, 4660(%rax,%r14,8), %ymm25 +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b] + vreduceps $0x7b,4660(%rax,%r14,8), %ymm25 + +// CHECK: vreduceps $123, (%rcx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x09,0x7b] + vreduceps $0x7b,(%rcx){1to8}, %ymm25 + +// CHECK: vreduceps $123, 4064(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x4a,0x7f,0x7b] + vreduceps $0x7b,4064(%rdx), %ymm25 + +// CHECK: vreduceps $123, 4096(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x8a,0x00,0x10,0x00,0x00,0x7b] + vreduceps $0x7b,4096(%rdx), %ymm25 + +// CHECK: vreduceps $123, -4096(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x4a,0x80,0x7b] + vreduceps $0x7b,-4096(%rdx), %ymm25 + +// CHECK: vreduceps $123, -4128(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x8a,0xe0,0xef,0xff,0xff,0x7b] + vreduceps $0x7b,-4128(%rdx), %ymm25 + +// CHECK: vreduceps $123, 508(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x4a,0x7f,0x7b] + vreduceps $0x7b,508(%rdx){1to8}, %ymm25 + +// CHECK: vreduceps $123, 512(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x8a,0x00,0x02,0x00,0x00,0x7b] + vreduceps $0x7b,512(%rdx){1to8}, %ymm25 + +// CHECK: vreduceps $123, -512(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x4a,0x80,0x7b] + vreduceps $0x7b,-512(%rdx){1to8}, %ymm25 + +// CHECK: vreduceps $123, -516(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x8a,0xfc,0xfd,0xff,0xff,0x7b] + vreduceps $0x7b,-516(%rdx){1to8}, %ymm25 + +// CHECK: vreducepd $123, (%rcx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x11,0x7b] + vreducepd $0x7b,(%rcx){1to2}, %xmm18 + +// CHECK: vreducepd $123, 2032(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x52,0x7f,0x7b] + vreducepd $0x7b,2032(%rdx), %xmm18 + +// CHECK: vreducepd $123, 2048(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x92,0x00,0x08,0x00,0x00,0x7b] + vreducepd $0x7b,2048(%rdx), %xmm18 + +// CHECK: vreducepd $123, -2048(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x52,0x80,0x7b] + vreducepd $0x7b,-2048(%rdx), %xmm18 + +// CHECK: vreducepd $123, -2064(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x92,0xf0,0xf7,0xff,0xff,0x7b] + vreducepd $0x7b,-2064(%rdx), %xmm18 + +// CHECK: vreducepd $123, 1016(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x52,0x7f,0x7b] + vreducepd $0x7b,1016(%rdx){1to2}, %xmm18 + +// CHECK: vreducepd $123, 1024(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x92,0x00,0x04,0x00,0x00,0x7b] + vreducepd $0x7b,1024(%rdx){1to2}, %xmm18 + +// CHECK: vreducepd $123, -1024(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x52,0x80,0x7b] + vreducepd $0x7b,-1024(%rdx){1to2}, %xmm18 + +// CHECK: vreducepd $123, -1032(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x92,0xf8,0xfb,0xff,0xff,0x7b] + vreducepd $0x7b,-1032(%rdx){1to2}, %xmm18 + +// CHECK: vreducepd $171, %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x03,0xfd,0x28,0x56,0xcd,0xab] + vreducepd $0xab, %ymm29, %ymm25 + +// CHECK: vreducepd $171, %ymm29, %ymm25 {%k1} +// CHECK: encoding: [0x62,0x03,0xfd,0x29,0x56,0xcd,0xab] + vreducepd $0xab, %ymm29, %ymm25 {%k1} + +// CHECK: vreducepd $171, %ymm29, %ymm25 {%k1} {z} +// CHECK: encoding: [0x62,0x03,0xfd,0xa9,0x56,0xcd,0xab] + vreducepd $0xab, %ymm29, %ymm25 {%k1} {z} + +// CHECK: vreducepd $123, %ymm29, %ymm25 +// CHECK: encoding: [0x62,0x03,0xfd,0x28,0x56,0xcd,0x7b] + vreducepd $0x7b, %ymm29, %ymm25 + +// CHECK: vreducepd $123, (%rcx), %ymm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x09,0x7b] + vreducepd $0x7b,(%rcx), %ymm25 + +// CHECK: vreducepd $123, 291(%rax,%r14,8), %ymm25 +// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vreducepd $0x7b,291(%rax,%r14,8), %ymm25 + +// CHECK: vreducepd $123, (%rcx){1to4}, %ymm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x09,0x7b] + vreducepd $0x7b,(%rcx){1to4}, %ymm25 + +// CHECK: vreducepd $123, 4064(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x4a,0x7f,0x7b] + vreducepd $0x7b,4064(%rdx), %ymm25 + +// CHECK: vreducepd $123, 4096(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x8a,0x00,0x10,0x00,0x00,0x7b] + vreducepd $0x7b,4096(%rdx), %ymm25 + +// CHECK: vreducepd $123, -4096(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x4a,0x80,0x7b] + vreducepd $0x7b,-4096(%rdx), %ymm25 + +// CHECK: vreducepd $123, -4128(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x8a,0xe0,0xef,0xff,0xff,0x7b] + vreducepd $0x7b,-4128(%rdx), %ymm25 + +// CHECK: vreducepd $123, 1016(%rdx){1to4}, %ymm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x4a,0x7f,0x7b] + vreducepd $0x7b,1016(%rdx){1to4}, %ymm25 + +// CHECK: vreducepd $123, 1024(%rdx){1to4}, %ymm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x8a,0x00,0x04,0x00,0x00,0x7b] + vreducepd $0x7b,1024(%rdx){1to4}, %ymm25 + +// CHECK: vreducepd $123, -1024(%rdx){1to4}, %ymm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x4a,0x80,0x7b] + vreducepd $0x7b,-1024(%rdx){1to4}, %ymm25 + +// CHECK: vreducepd $123, -1032(%rdx){1to4}, %ymm25 +// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x8a,0xf8,0xfb,0xff,0xff,0x7b] + vreducepd $0x7b,-1032(%rdx){1to4}, %ymm25 + +// CHECK: vreduceps $171, %xmm23, %xmm20 +// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x56,0xe7,0xab] + vreduceps $0xab, %xmm23, %xmm20 + +// CHECK: vreduceps $171, %xmm23, %xmm20 {%k7} +// CHECK: encoding: [0x62,0xa3,0x7d,0x0f,0x56,0xe7,0xab] + vreduceps $0xab, %xmm23, %xmm20 {%k7} + +// CHECK: vreduceps $171, %xmm23, %xmm20 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x7d,0x8f,0x56,0xe7,0xab] + vreduceps $0xab, %xmm23, %xmm20 {%k7} {z} + +// CHECK: vreduceps $123, %xmm23, %xmm20 +// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x56,0xe7,0x7b] + vreduceps $0x7b, %xmm23, %xmm20 + +// CHECK: vreduceps $123, (%rcx), %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0x21,0x7b] + vreduceps $0x7b,(%rcx), %xmm20 + +// CHECK: vreduceps $123, 291(%rax,%r14,8), %xmm20 +// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x56,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vreduceps $0x7b,291(%rax,%r14,8), %xmm20 + +// CHECK: vreduceps $123, (%rcx){1to4}, %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0x21,0x7b] + vreduceps $0x7b,(%rcx){1to4}, %xmm20 + +// CHECK: vreduceps $123, 2032(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0x62,0x7f,0x7b] + vreduceps $0x7b,2032(%rdx), %xmm20 + +// CHECK: vreduceps $123, 2048(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0xa2,0x00,0x08,0x00,0x00,0x7b] + vreduceps $0x7b,2048(%rdx), %xmm20 + +// CHECK: vreduceps $123, -2048(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0x62,0x80,0x7b] + vreduceps $0x7b,-2048(%rdx), %xmm20 + +// CHECK: vreduceps $123, -2064(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0xa2,0xf0,0xf7,0xff,0xff,0x7b] + vreduceps $0x7b,-2064(%rdx), %xmm20 + +// CHECK: vreduceps $123, 508(%rdx){1to4}, %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0x62,0x7f,0x7b] + vreduceps $0x7b,508(%rdx){1to4}, %xmm20 + +// CHECK: vreduceps $123, 512(%rdx){1to4}, %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0xa2,0x00,0x02,0x00,0x00,0x7b] + vreduceps $0x7b,512(%rdx){1to4}, %xmm20 + +// CHECK: vreduceps $123, -512(%rdx){1to4}, %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0x62,0x80,0x7b] + vreduceps $0x7b,-512(%rdx){1to4}, %xmm20 + +// CHECK: vreduceps $123, -516(%rdx){1to4}, %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0xa2,0xfc,0xfd,0xff,0xff,0x7b] + vreduceps $0x7b,-516(%rdx){1to4}, %xmm20 + +// CHECK: vreduceps $171, %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xd6,0xab] + vreduceps $0xab, %ymm22, %ymm26 + +// CHECK: vreduceps $171, %ymm22, %ymm26 {%k6} +// CHECK: encoding: [0x62,0x23,0x7d,0x2e,0x56,0xd6,0xab] + vreduceps $0xab, %ymm22, %ymm26 {%k6} + +// CHECK: vreduceps $171, %ymm22, %ymm26 {%k6} {z} +// CHECK: encoding: [0x62,0x23,0x7d,0xae,0x56,0xd6,0xab] + vreduceps $0xab, %ymm22, %ymm26 {%k6} {z} + +// CHECK: vreduceps $123, %ymm22, %ymm26 +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xd6,0x7b] + vreduceps $0x7b, %ymm22, %ymm26 + +// CHECK: vreduceps $123, (%rcx), %ymm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x11,0x7b] + vreduceps $0x7b,(%rcx), %ymm26 + +// CHECK: vreduceps $123, 291(%rax,%r14,8), %ymm26 +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b] + vreduceps $0x7b,291(%rax,%r14,8), %ymm26 + +// CHECK: vreduceps $123, (%rcx){1to8}, %ymm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x11,0x7b] + vreduceps $0x7b,(%rcx){1to8}, %ymm26 + +// CHECK: vreduceps $123, 4064(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x52,0x7f,0x7b] + vreduceps $0x7b,4064(%rdx), %ymm26 + +// CHECK: vreduceps $123, 4096(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x92,0x00,0x10,0x00,0x00,0x7b] + vreduceps $0x7b,4096(%rdx), %ymm26 + +// CHECK: vreduceps $123, -4096(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x52,0x80,0x7b] + vreduceps $0x7b,-4096(%rdx), %ymm26 + +// CHECK: vreduceps $123, -4128(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x92,0xe0,0xef,0xff,0xff,0x7b] + vreduceps $0x7b,-4128(%rdx), %ymm26 + +// CHECK: vreduceps $123, 508(%rdx){1to8}, %ymm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x52,0x7f,0x7b] + vreduceps $0x7b,508(%rdx){1to8}, %ymm26 + +// CHECK: vreduceps $123, 512(%rdx){1to8}, %ymm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x92,0x00,0x02,0x00,0x00,0x7b] + vreduceps $0x7b,512(%rdx){1to8}, %ymm26 + +// CHECK: vreduceps $123, -512(%rdx){1to8}, %ymm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x52,0x80,0x7b] + vreduceps $0x7b,-512(%rdx){1to8}, %ymm26 + +// CHECK: vreduceps $123, -516(%rdx){1to8}, %ymm26 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x92,0xfc,0xfd,0xff,0xff,0x7b] + vreduceps $0x7b,-516(%rdx){1to8}, %ymm26 + // CHECK: vcvtpd2qq %xmm22, %xmm24 // CHECK: encoding: [0x62,0x21,0xfd,0x08,0x7b,0xc6] vcvtpd2qq %xmm22, %xmm24 diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s index c746e6627f7..eca2ffbfc09 100644 --- a/test/MC/X86/x86-64-avx512f_vl.s +++ b/test/MC/X86/x86-64-avx512f_vl.s @@ -16285,6 +16285,246 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1 // CHECK: encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0xfc,0xfd,0xff,0xff] vscalefps -516(%rdx){1to8}, %ymm22, %ymm25 +// CHECK: vrndscalepd $171, %xmm28, %xmm29 +// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0xab] + vrndscalepd $0xab, %xmm28, %xmm29 + +// CHECK: vrndscalepd $171, %xmm28, %xmm29 {%k4} +// CHECK: encoding: [0x62,0x03,0xfd,0x0c,0x09,0xec,0xab] + vrndscalepd $0xab, %xmm28, %xmm29 {%k4} + +// CHECK: vrndscalepd $171, %xmm28, %xmm29 {%k4} {z} +// CHECK: encoding: [0x62,0x03,0xfd,0x8c,0x09,0xec,0xab] + vrndscalepd $0xab, %xmm28, %xmm29 {%k4} {z} + +// CHECK: vrndscalepd $123, %xmm28, %xmm29 +// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0x7b] + vrndscalepd $0x7b, %xmm28, %xmm29 + +// CHECK: vrndscalepd $123, (%rcx), %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0x29,0x7b] + vrndscalepd $0x7b, (%rcx), %xmm29 + +// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %xmm29 +// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x09,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b] + vrndscalepd $0x7b, 291(%rax,%r14,8), %xmm29 + +// CHECK: vrndscalepd $123, (%rcx){1to2}, %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0x29,0x7b] + vrndscalepd $0x7b, (%rcx){1to2}, %xmm29 + +// CHECK: vrndscalepd $123, 2032(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0x6a,0x7f,0x7b] + vrndscalepd $0x7b, 2032(%rdx), %xmm29 + +// CHECK: vrndscalepd $123, 2048(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0xaa,0x00,0x08,0x00,0x00,0x7b] + vrndscalepd $0x7b, 2048(%rdx), %xmm29 + +// CHECK: vrndscalepd $123, -2048(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0x6a,0x80,0x7b] + vrndscalepd $0x7b, -2048(%rdx), %xmm29 + +// CHECK: vrndscalepd $123, -2064(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0xaa,0xf0,0xf7,0xff,0xff,0x7b] + vrndscalepd $0x7b, -2064(%rdx), %xmm29 + +// CHECK: vrndscalepd $123, 1016(%rdx){1to2}, %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0x6a,0x7f,0x7b] + vrndscalepd $0x7b, 1016(%rdx){1to2}, %xmm29 + +// CHECK: vrndscalepd $123, 1024(%rdx){1to2}, %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0xaa,0x00,0x04,0x00,0x00,0x7b] + vrndscalepd $0x7b, 1024(%rdx){1to2}, %xmm29 + +// CHECK: vrndscalepd $123, -1024(%rdx){1to2}, %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0x6a,0x80,0x7b] + vrndscalepd $0x7b, -1024(%rdx){1to2}, %xmm29 + +// CHECK: vrndscalepd $123, -1032(%rdx){1to2}, %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0xaa,0xf8,0xfb,0xff,0xff,0x7b] + vrndscalepd $0x7b, -1032(%rdx){1to2}, %xmm29 + +// CHECK: vrndscalepd $171, %ymm22, %ymm17 +// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x09,0xce,0xab] + vrndscalepd $0xab, %ymm22, %ymm17 + +// CHECK: vrndscalepd $171, %ymm22, %ymm17 {%k7} +// CHECK: encoding: [0x62,0xa3,0xfd,0x2f,0x09,0xce,0xab] + vrndscalepd $0xab, %ymm22, %ymm17 {%k7} + +// CHECK: vrndscalepd $171, %ymm22, %ymm17 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0xfd,0xaf,0x09,0xce,0xab] + vrndscalepd $0xab, %ymm22, %ymm17 {%k7} {z} + +// CHECK: vrndscalepd $123, %ymm22, %ymm17 +// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x09,0xce,0x7b] + vrndscalepd $0x7b, %ymm22, %ymm17 + +// CHECK: vrndscalepd $123, (%rcx), %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x09,0x7b] + vrndscalepd $0x7b, (%rcx), %ymm17 + +// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %ymm17 +// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x09,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vrndscalepd $0x7b, 291(%rax,%r14,8), %ymm17 + +// CHECK: vrndscalepd $123, (%rcx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x09,0x7b] + vrndscalepd $0x7b, (%rcx){1to4}, %ymm17 + +// CHECK: vrndscalepd $123, 4064(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x4a,0x7f,0x7b] + vrndscalepd $0x7b, 4064(%rdx), %ymm17 + +// CHECK: vrndscalepd $123, 4096(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x8a,0x00,0x10,0x00,0x00,0x7b] + vrndscalepd $0x7b, 4096(%rdx), %ymm17 + +// CHECK: vrndscalepd $123, -4096(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x4a,0x80,0x7b] + vrndscalepd $0x7b, -4096(%rdx), %ymm17 + +// CHECK: vrndscalepd $123, -4128(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x8a,0xe0,0xef,0xff,0xff,0x7b] + vrndscalepd $0x7b, -4128(%rdx), %ymm17 + +// CHECK: vrndscalepd $123, 1016(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x4a,0x7f,0x7b] + vrndscalepd $0x7b, 1016(%rdx){1to4}, %ymm17 + +// CHECK: vrndscalepd $123, 1024(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x8a,0x00,0x04,0x00,0x00,0x7b] + vrndscalepd $0x7b, 1024(%rdx){1to4}, %ymm17 + +// CHECK: vrndscalepd $123, -1024(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x4a,0x80,0x7b] + vrndscalepd $0x7b, -1024(%rdx){1to4}, %ymm17 + +// CHECK: vrndscalepd $123, -1032(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x8a,0xf8,0xfb,0xff,0xff,0x7b] + vrndscalepd $0x7b, -1032(%rdx){1to4}, %ymm17 + +// CHECK: vrndscaleps $171, %xmm26, %xmm22 +// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x08,0xf2,0xab] + vrndscaleps $0xab, %xmm26, %xmm22 + +// CHECK: vrndscaleps $171, %xmm26, %xmm22 {%k4} +// CHECK: encoding: [0x62,0x83,0x7d,0x0c,0x08,0xf2,0xab] + vrndscaleps $0xab, %xmm26, %xmm22 {%k4} + +// CHECK: vrndscaleps $171, %xmm26, %xmm22 {%k4} {z} +// CHECK: encoding: [0x62,0x83,0x7d,0x8c,0x08,0xf2,0xab] + vrndscaleps $0xab, %xmm26, %xmm22 {%k4} {z} + +// CHECK: vrndscaleps $123, %xmm26, %xmm22 +// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x08,0xf2,0x7b] + vrndscaleps $0x7b, %xmm26, %xmm22 + +// CHECK: vrndscaleps $123, (%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0x31,0x7b] + vrndscaleps $0x7b, (%rcx), %xmm22 + +// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x08,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vrndscaleps $0x7b, 291(%rax,%r14,8), %xmm22 + +// CHECK: vrndscaleps $123, (%rcx){1to4}, %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0x31,0x7b] + vrndscaleps $0x7b, (%rcx){1to4}, %xmm22 + +// CHECK: vrndscaleps $123, 2032(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0x72,0x7f,0x7b] + vrndscaleps $0x7b, 2032(%rdx), %xmm22 + +// CHECK: vrndscaleps $123, 2048(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0xb2,0x00,0x08,0x00,0x00,0x7b] + vrndscaleps $0x7b, 2048(%rdx), %xmm22 + +// CHECK: vrndscaleps $123, -2048(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0x72,0x80,0x7b] + vrndscaleps $0x7b, -2048(%rdx), %xmm22 + +// CHECK: vrndscaleps $123, -2064(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0xb2,0xf0,0xf7,0xff,0xff,0x7b] + vrndscaleps $0x7b, -2064(%rdx), %xmm22 + +// CHECK: vrndscaleps $123, 508(%rdx){1to4}, %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0x72,0x7f,0x7b] + vrndscaleps $0x7b, 508(%rdx){1to4}, %xmm22 + +// CHECK: vrndscaleps $123, 512(%rdx){1to4}, %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0xb2,0x00,0x02,0x00,0x00,0x7b] + vrndscaleps $0x7b, 512(%rdx){1to4}, %xmm22 + +// CHECK: vrndscaleps $123, -512(%rdx){1to4}, %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0x72,0x80,0x7b] + vrndscaleps $0x7b, -512(%rdx){1to4}, %xmm22 + +// CHECK: vrndscaleps $123, -516(%rdx){1to4}, %xmm22 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0xb2,0xfc,0xfd,0xff,0xff,0x7b] + vrndscaleps $0x7b, -516(%rdx){1to4}, %xmm22 + +// CHECK: vrndscaleps $171, %ymm17, %ymm19 +// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x08,0xd9,0xab] + vrndscaleps $0xab, %ymm17, %ymm19 + +// CHECK: vrndscaleps $171, %ymm17, %ymm19 {%k7} +// CHECK: encoding: [0x62,0xa3,0x7d,0x2f,0x08,0xd9,0xab] + vrndscaleps $0xab, %ymm17, %ymm19 {%k7} + +// CHECK: vrndscaleps $171, %ymm17, %ymm19 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0x7d,0xaf,0x08,0xd9,0xab] + vrndscaleps $0xab, %ymm17, %ymm19 {%k7} {z} + +// CHECK: vrndscaleps $123, %ymm17, %ymm19 +// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x08,0xd9,0x7b] + vrndscaleps $0x7b, %ymm17, %ymm19 + +// CHECK: vrndscaleps $123, (%rcx), %ymm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x19,0x7b] + vrndscaleps $0x7b, (%rcx), %ymm19 + +// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %ymm19 +// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x08,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vrndscaleps $0x7b, 291(%rax,%r14,8), %ymm19 + +// CHECK: vrndscaleps $123, (%rcx){1to8}, %ymm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x19,0x7b] + vrndscaleps $0x7b, (%rcx){1to8}, %ymm19 + +// CHECK: vrndscaleps $123, 4064(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x5a,0x7f,0x7b] + vrndscaleps $0x7b, 4064(%rdx), %ymm19 + +// CHECK: vrndscaleps $123, 4096(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x9a,0x00,0x10,0x00,0x00,0x7b] + vrndscaleps $0x7b, 4096(%rdx), %ymm19 + +// CHECK: vrndscaleps $123, -4096(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x5a,0x80,0x7b] + vrndscaleps $0x7b, -4096(%rdx), %ymm19 + +// CHECK: vrndscaleps $123, -4128(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x9a,0xe0,0xef,0xff,0xff,0x7b] + vrndscaleps $0x7b, -4128(%rdx), %ymm19 + +// CHECK: vrndscaleps $123, 508(%rdx){1to8}, %ymm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x5a,0x7f,0x7b] + vrndscaleps $0x7b, 508(%rdx){1to8}, %ymm19 + +// CHECK: vrndscaleps $123, 512(%rdx){1to8}, %ymm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x9a,0x00,0x02,0x00,0x00,0x7b] + vrndscaleps $0x7b, 512(%rdx){1to8}, %ymm19 + +// CHECK: vrndscaleps $123, -512(%rdx){1to8}, %ymm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x5a,0x80,0x7b] + vrndscaleps $0x7b, -512(%rdx){1to8}, %ymm19 + +// CHECK: vrndscaleps $123, -516(%rdx){1to8}, %ymm19 +// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x9a,0xfc,0xfd,0xff,0xff,0x7b] + vrndscaleps $0x7b, -516(%rdx){1to8}, %ymm19 + // CHECK: vcvtps2pd %xmm27, %xmm20 // CHECK: encoding: [0x62,0x81,0x7c,0x08,0x5a,0xe3] vcvtps2pd %xmm27, %xmm20