From e1a621d84ff8d35bdbf27931352fa78305cabb2a Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 23 Jan 2014 14:27:26 +0000 Subject: [PATCH] AVX-512: added VPERM2D VPERM2Q VPERM2PS VPERM2PD instructions, they give better sequences than VPERMI git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199893 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 5 +- lib/Target/X86/X86ISelLowering.h | 1 + lib/Target/X86/X86InstrAVX512.td | 302 ++++++++++++------------ lib/Target/X86/X86InstrFragmentsSIMD.td | 1 + lib/Target/X86/X86InstrInfo.cpp | 6 +- test/CodeGen/X86/avx512-shuffle.ll | 14 +- 6 files changed, 171 insertions(+), 158 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 82ac9f90826..ede17c1750a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7588,8 +7588,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32 return DAG.getNode(X86ISD::VPERMV, dl, VT, DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1); - return DAG.getNode(X86ISD::VPERMV3, dl, VT, - DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1, V2); + return DAG.getNode(X86ISD::VPERMV3, dl, VT, V1, + DAG.getNode(ISD::BITCAST, dl, VT, Mask), V2); } //===--------------------------------------------------------------------===// @@ -14023,6 +14023,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; case X86ISD::VPERMV: return "X86ISD::VPERMV"; case X86ISD::VPERMV3: return "X86ISD::VPERMV3"; + case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3"; case X86ISD::VPERMI: return "X86ISD::VPERMI"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index e14c78f5add..9b32d121010 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -339,6 +339,7 @@ namespace llvm { VPERMILP, VPERMV, VPERMV3, + VPERMIV3, VPERMI, VPERM2X128, VBROADCAST, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index ff419f52bdd..0ae0da45ba7 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -369,10 +369,10 @@ multiclass avx512_fp_broadcast opc, string OpcodeStr, RegisterClass DestRC, RegisterClass SrcRC, X86MemOperand x86memop> { def rr : AVX5128I, EVEX; def rm : AVX5128I, EVEX; + !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX; } let ExeDomain = SSEPackedSingle in { defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512, @@ -399,12 +399,12 @@ def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src), multiclass avx512_int_broadcast_reg opc, string OpcodeStr, RegisterClass SrcRC, RegisterClass KRC> { def Zrr : AVX5128I, EVEX, EVEX_V512; def Zkrr : AVX5128I, EVEX, EVEX_V512, EVEX_KZ; } @@ -444,25 +444,25 @@ multiclass avx512_int_broadcast_rm opc, string OpcodeStr, RegisterClass DstRC, ValueType OpVT, ValueType SrcVT, RegisterClass KRC> { def rr : AVX5128I, EVEX; def krr : AVX5128I, EVEX, EVEX_KZ; let mayLoad = 1 in { def rm : AVX5128I, EVEX; def krm : AVX5128I, EVEX, EVEX_KZ; } @@ -512,7 +512,7 @@ multiclass avx512_mask_broadcast opc, string OpcodeStr, RegisterClass DstRC, RegisterClass KRC, ValueType OpVT, ValueType SrcVT> { def rr : AVX512XS8I, EVEX; } @@ -531,14 +531,14 @@ multiclass avx512_perm_imm opc, string OpcodeStr, RegisterClass RC, def ri : AVX512AIi8, EVEX; def mi : AVX512AIi8, EVEX; @@ -557,14 +557,14 @@ multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC, def rr : AVX5128I, EVEX_4V; def rm : AVX5128I, EVEX_4V; @@ -584,34 +584,42 @@ defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, // -- VPERM2I - 3 source operands form -- multiclass avx512_perm_3src opc, string OpcodeStr, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop, - ValueType OpVT> { + SDNode OpNode, ValueType OpVT> { let Constraints = "$src1 = $dst" in { def rr : AVX5128I, + (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, EVEX_4V; def rm : AVX5128I, EVEX_4V; } } defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, - v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; + X86VPermiv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem, - v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + X86VPermiv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem, - v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; + X86VPermiv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem, - v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + X86VPermiv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERM2D : avx512_perm_3src<0x7E, "vperm2d", VR512, memopv16i32, i512mem, + X86VPermv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERM2Q : avx512_perm_3src<0x7E, "vperm2q", VR512, memopv8i64, i512mem, + X86VPermv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERM2PS : avx512_perm_3src<0x7F, "vperm2ps", VR512, memopv16f32, i512mem, + X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERM2PD : avx512_perm_3src<0x7F, "vperm2pd", VR512, memopv8f64, i512mem, + X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask // @@ -622,14 +630,14 @@ multiclass avx512_blendmask opc, string OpcodeStr, def rr : AVX5128I, EVEX_4V, EVEX_K; let mayLoad = 1 in def rm : AVX5128I, EVEX_4V, EVEX_K; } @@ -731,12 +739,12 @@ multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, SDNode OpNode, ValueType vt> { def rr : AVX512BI, EVEX_4V; def rm : AVX512BI, EVEX_4V; } @@ -813,17 +821,17 @@ multiclass avx512_cmp_packed; def rrib: AVX512PIi8<0xC2, MRMSrcReg, (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), !strconcat("vcmp${cc}", suffix, - "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"), + " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"), [], d>, EVEX_B; def rmi : AVX512PIi8<0xC2, MRMSrcMem, (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc), - !strconcat("vcmp", suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), + !strconcat("vcmp${cc}", suffix, + " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [(set KRC:$dst, (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>; @@ -832,11 +840,11 @@ multiclass avx512_cmp_packed; + " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), !strconcat("vcmp", suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; + " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; } } @@ -896,14 +904,14 @@ multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, ValueType vt, X86MemOperand x86memop> { let hasSideEffects = 0 in { def kk : I; + !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>; let mayLoad = 1 in def km : I; let mayStore = 1 in def mk : I; + !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>; } } @@ -912,9 +920,9 @@ multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, RegisterClass KRC, RegisterClass GRC> { let hasSideEffects = 0 in { def kr : I; + !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>; def rk : I; + !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>; } } @@ -986,7 +994,7 @@ multiclass avx512_mask_unop opc, string OpcodeStr, RegisterClass KRC, SDPatternOperator OpNode> { let Predicates = [HasAVX512] in def rr : I; } @@ -1023,7 +1031,7 @@ multiclass avx512_mask_binop opc, string OpcodeStr, let Predicates = [HasAVX512] in def rr : I; } @@ -1098,7 +1106,7 @@ multiclass avx512_mask_unpck opc, string OpcodeStr, let Predicates = [HasAVX512] in def rr : I; + " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; } multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { @@ -1127,7 +1135,7 @@ multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, SDNode OpNode> { let Predicates = [HasAVX512], Defs = [EFLAGS] in def rr : I; } @@ -1148,7 +1156,7 @@ multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, let Predicates = [HasAVX512] in def ri : Ii8; } @@ -1200,25 +1208,25 @@ def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), multiclass avx512_mov_packed opc, RegisterClass RC, RegisterClass KRC, X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d> { + string asm, Domain d, bit IsReMaterializable = 1> { let hasSideEffects = 0 in def rr : AVX512PI, + !strconcat(asm, " \t{$src, $dst|$dst, $src}"), [], d>, EVEX; -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : AVX512PI, EVEX; let Constraints = "$src1 = $dst" in { def rrk : AVX512PI, + " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>, EVEX, EVEX_K; def rmk : AVX512PI, EVEX, EVEX_K; } } @@ -1234,7 +1242,7 @@ defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32, "vmovups", SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64, - "vmovupd", SSEPackedDouble>, + "vmovupd", SSEPackedDouble, 0>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src), @@ -1301,25 +1309,25 @@ multiclass avx512_mov_int load_opc, bits<8> store_opc, string asm, PatFrag ld_frag, X86MemOperand x86memop> { let hasSideEffects = 0 in def rr : AVX512XSI, EVEX; + !strconcat(asm, " \t{$src, $dst|$dst, $src}"), []>, EVEX; let canFoldAsLoad = 1 in def rm : AVX512XSI, EVEX; let mayStore = 1 in def mr : AVX512XSI, EVEX; + !strconcat(asm, " \t{$src, $dst|$dst, $src}"), []>, EVEX; let Constraints = "$src1 = $dst" in { def rrk : AVX512XSI, + " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), []>, EVEX, EVEX_K; def rmk : AVX512XSI, EVEX, EVEX_K; } } @@ -1465,7 +1473,7 @@ multiclass avx512_move_scalar { let hasSideEffects = 0 in { def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128X:$dst, (vt (OpNode VR128X:$src1, (scalar_to_vector RC:$src2))))], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; @@ -1473,14 +1481,14 @@ multiclass avx512_move_scalar , EVEX_4V, VEX_LIG, EVEX_K; def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + !strconcat(asm, " \t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, EVEX, VEX_LIG; def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + !strconcat(asm, " \t{$src, $dst|$dst, $src}"), [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, EVEX, VEX_LIG; } //hasSideEffects = 0 @@ -1728,17 +1736,17 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, let isCommutable = IsCommutable in def rr : AVX512BI, EVEX_4V; def rm : AVX512BI, EVEX_4V; def rmb : AVX512BI opc, string OpcodeStr, let isCommutable = IsCommutable in def rr : AVX512BI, EVEX_4V, VEX_W; def rm : AVX512BI, EVEX_4V, VEX_W; } @@ -1889,12 +1897,12 @@ multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, X86MemOperand x86memop> { def rr : AVX512BI, EVEX_4V; def rm : AVX512BI, EVEX_4V; @@ -1921,14 +1929,14 @@ multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC, def ri : AVX512Ii8, EVEX; def mi : AVX512Ii8, EVEX; @@ -2013,17 +2021,17 @@ multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, Domain d, OpndItins itins, bit commutable> { let isCommutable = commutable in def rr : PI, EVEX_4V, TB; let mayLoad = 1 in { def rm : PI, EVEX_4V, TB; def rmb : PI opc, string OpcodeStr, RegisterClass KRC, SDNode OpNode, ValueType vt> { def rr : AVX5128I, EVEX_4V; def rm : AVX5128I, EVEX_4V; } @@ -2142,23 +2150,23 @@ multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, RegisterClass KRC> { def ri : AVX512BIi8, EVEX_4V; def rik : AVX512BIi8, EVEX_4V, EVEX_K; def mi: AVX512BIi8, EVEX_4V; def mik: AVX512BIi8, EVEX_4V, EVEX_K; } @@ -2168,24 +2176,24 @@ multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, // src2 is always 128-bit def rr : AVX512BI, EVEX_4V; def rrk : AVX512BI, EVEX_4V, EVEX_K; def rm : AVX512BI, EVEX_4V; def rmk : AVX512BI, EVEX_4V, EVEX_K; } @@ -2239,13 +2247,13 @@ multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, X86MemOperand x86memop, PatFrag mem_frag> { def rr : AVX5128I, EVEX_4V; def rm : AVX5128I, EVEX_4V; @@ -2277,10 +2285,10 @@ defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64, multiclass avx512_movddup { def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX; def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), [(set RC:$dst, (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX; } @@ -2297,11 +2305,11 @@ multiclass avx512_replicate_sfp op, SDNode OpNode, string OpcodeStr, ValueType vt, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop> { def rr : AVX512XSI, EVEX; let mayLoad = 1 in def rm : AVX512XSI, EVEX; } @@ -2355,18 +2363,18 @@ multiclass avx512_fma3p_rm opc, string OpcodeStr, string BrdcstStr, SDNode OpNode, ValueType OpVT> { def r: AVX512FMA3; let mayLoad = 1 in def m: AVX512FMA3; def mb: AVX512FMA3, EVEX_B; @@ -2434,11 +2442,11 @@ multiclass avx512_fma3p_m132 opc, string OpcodeStr, let mayLoad = 1 in def m: AVX512FMA3; def mb: AVX512FMA3, EVEX_B; @@ -2509,14 +2517,14 @@ multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, def r : AVX512FMA3; let mayLoad = 1 in def m : AVX512FMA3; @@ -2549,12 +2557,12 @@ multiclass avx512_vcvtsi opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { let hasSideEffects = 0 in { def rr : SI, + !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V; let mayLoad = 1 in def rm : SI, + !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>, EVEX_4V; } // hasSideEffects = 0 } @@ -2622,12 +2630,12 @@ multiclass avx512_cvt_s_int opc, RegisterClass SrcRC, RegisterClass DstR string asm> { let hasSideEffects = 0 in { def rr : SI, EVEX, VEX_LIG, Requires<[HasAVX512]>; let mayLoad = 1 in def rm : SI, EVEX, VEX_LIG, + !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG, Requires<[HasAVX512]>; } // hasSideEffects = 0 } @@ -2725,10 +2733,10 @@ multiclass avx512_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm> { def rr : SI, EVEX; def rm : SI, EVEX; } @@ -2807,15 +2815,15 @@ multiclass avx512_vcvt_fp_with_rc opc, string asm, RegisterClass SrcRC, Domain d> { let hasSideEffects = 0 in { def rr : AVX512PI, EVEX; def rrb : AVX512PI, EVEX, EVEX_B, EVEX_RC; let mayLoad = 1 in def rm : AVX512PI, EVEX; } // hasSideEffects = 0 @@ -2827,12 +2835,12 @@ multiclass avx512_vcvt_fp opc, string asm, RegisterClass SrcRC, Domain d> { let hasSideEffects = 0 in { def rr : AVX512PI, EVEX; let mayLoad = 1 in def rm : AVX512PI, EVEX; } // hasSideEffects = 0 @@ -2933,14 +2941,14 @@ multiclass avx512_vcvt_fp2int opc, string asm, RegisterClass SrcRC, X86MemOperand x86memop, Domain d> { let hasSideEffects = 0 in { def rr : AVX512PI, EVEX; def rrb : AVX512PI, EVEX, EVEX_B, EVEX_RC; let mayLoad = 1 in def rm : AVX512PI, EVEX; } // hasSideEffects = 0 } @@ -3053,12 +3061,12 @@ multiclass avx512_fp14_s opc, string OpcodeStr, RegisterClass RC, def rr : AVX5128I, EVEX_4V; + " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V; let mayLoad = 1 in { def rm : AVX5128I, EVEX_4V; + " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V; } } } @@ -3098,11 +3106,11 @@ multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, PatFrag mem_frag, ValueType OpVt> { def r : AVX5128I, EVEX; def m : AVX5128I, EVEX; } @@ -3136,17 +3144,17 @@ multiclass avx512_fp28_s opc, string OpcodeStr, RegisterClass RC, def rr : AVX5128I, EVEX_4V; + " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V; def rrb : AVX5128I, EVEX_4V, EVEX_B; let mayLoad = 1 in { def rm : AVX5128I, EVEX_4V; + " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V; } } } @@ -3190,14 +3198,14 @@ multiclass avx512_fp28_p opc, string OpcodeStr, let hasSideEffects = 0, Predicates = [HasERI] in { def r : AVX5128I, EVEX; def rb : AVX5128I, EVEX, EVEX_B; def m : AVX5128I, EVEX; } } @@ -3228,25 +3236,25 @@ multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode, Intrinsic V16F32Int, Intrinsic V8F64Int, OpndItins itins_s, OpndItins itins_d> { def PSZrr :AVX512PSI, EVEX, EVEX_V512; let mayLoad = 1 in def PSZrm : AVX512PSI, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; def PDZrr : AVX512PDI, EVEX, EVEX_V512; let mayLoad = 1 in def PDZrm : AVX512PDI, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; @@ -3493,14 +3501,14 @@ let ExeDomain = d in { def r : AVX512AIi8, EVEX; // Vector intrinsic operation, mem def m : AVX512AIi8, EVEX; } // ExeDomain } @@ -3531,13 +3539,13 @@ let ExeDomain = d in { def r : AVX512AIi8, EVEX_4V; def m : AVX512AIi8, EVEX_4V; } // ExeDomain } @@ -3600,17 +3608,17 @@ multiclass avx512_trunc_sat opc, string OpcodeStr, RegisterClass KRC, X86MemOperand x86memop> { def rr : AVX512XS8I, EVEX; def krr : AVX512XS8I, EVEX, EVEX_KZ; def mr : AVX512XS8I, EVEX; } defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, @@ -3666,11 +3674,11 @@ multiclass avx512_extend opc, string OpcodeStr, RegisterClass DstRC, def rr : AVX5128I, EVEX; def rm : AVX5128I, EVEX; @@ -3718,7 +3726,7 @@ let mayLoad = 1, def rm : AVX5128I, EVEX, EVEX_K; } defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>, @@ -3747,7 +3755,7 @@ let mayStore = 1, Constraints = "$mask = $mask_wb" in def mr : AVX5128I, EVEX, EVEX_K; } @@ -3780,14 +3788,14 @@ multiclass avx512_shufp, EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>; def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, EVEX_4V, Sched<[WriteShuffle]>; @@ -3815,13 +3823,13 @@ multiclass avx512_alignr, EVEX_4V; let mayLoad = 1 in def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, EVEX_4V; } defm VALIGND : avx512_alignr<"valignd", VR512, i512mem>, @@ -3841,11 +3849,11 @@ def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))), multiclass avx512_vpabs opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop> { def rr : AVX5128I, + !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>, EVEX; def rm : AVX5128I, + !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>, EVEX; } @@ -3867,30 +3875,30 @@ multiclass avx512_conflict opc, string OpcodeStr, X86MemOperand x86scalar_mop, string BrdcstStr> { def rr : AVX5128I, EVEX; def rm : AVX5128I, EVEX; def rmb : AVX5128I, EVEX, EVEX_B; def rrkz : AVX5128I, EVEX, EVEX_KZ; def rmkz : AVX5128I, EVEX, EVEX_KZ; def rmbkz : AVX5128I, EVEX, EVEX_KZ, EVEX_B; @@ -3899,16 +3907,16 @@ multiclass avx512_conflict opc, string OpcodeStr, def rrk : AVX5128I, EVEX, EVEX_K; def rmk : AVX5128I, EVEX, EVEX_K; def rmbk : AVX5128I, EVEX, EVEX_K, EVEX_B; } diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 1db704eab90..663e96c6e00 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -224,6 +224,7 @@ def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>; def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>; +def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ed2bf9cf6a7..2bf39728381 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1540,8 +1540,8 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: - case X86::VMOVDQA32rm: - case X86::VMOVDQA64rm: + case X86::VMOVAPSZrm: + case X86::VMOVUPSZrm: return true; } } @@ -1567,6 +1567,8 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::VMOVAPSYmr: case X86::VMOVAPDYmr: case X86::VMOVDQAYmr: + case X86::VMOVUPSZmr: + case X86::VMOVAPSZmr: case X86::MMX_MOVD64mr: case X86::MMX_MOVQ64mr: case X86::MMX_MOVNTQmr: diff --git a/test/CodeGen/X86/avx512-shuffle.ll b/test/CodeGen/X86/avx512-shuffle.ll index 84a87e23f33..7356c1c05e3 100644 --- a/test/CodeGen/X86/avx512-shuffle.ll +++ b/test/CodeGen/X86/avx512-shuffle.ll @@ -49,7 +49,7 @@ define <8 x double> @test4(<8 x double> %a) nounwind { } ; CHECK-LABEL: test5: -; CHECK: vpermi2pd +; CHECK: vperm2pd ; CHECK: ret define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind { %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -65,7 +65,7 @@ define <8 x i64> @test6(<8 x i64> %a) nounwind { } ; CHECK-LABEL: test7: -; CHECK: vpermi2q +; CHECK: vperm2q ; CHECK: ret define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind { %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> @@ -73,7 +73,7 @@ define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind { } ; CHECK-LABEL: test8: -; CHECK: vpermi2d +; CHECK: vperm2d ; CHECK: ret define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind { %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> @@ -81,7 +81,7 @@ define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind { } ; CHECK-LABEL: test9: -; CHECK: vpermi2ps +; CHECK: vperm2ps ; CHECK: ret define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind { %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> @@ -89,7 +89,7 @@ define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind { } ; CHECK-LABEL: test10: -; CHECK: vpermi2ps ( +; CHECK: vperm2ps ( ; CHECK: ret define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind { %c = load <16 x float>* %b @@ -98,7 +98,7 @@ define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind { } ; CHECK-LABEL: test11: -; CHECK: vpermi2d ( +; CHECK: vperm2d ; CHECK: ret define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind { %c = load <16 x i32>* %b @@ -202,7 +202,7 @@ define <16 x float> @test23(<16 x float> %a, <16 x float> %c) { } ; CHECK-LABEL: @test24 -; CHECK: vpermi2d +; CHECK: vperm2d ; CHECK: ret define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind { %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32>