diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index d29754333fa..2b66e3d9f4a 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -3047,30 +3047,15 @@ let TargetPrefix = "x86" in { // AVX-512 conflict detection let TargetPrefix = "x86" in { - def int_x86_avx512_conflict_d_512 : GCCBuiltin<"__builtin_ia32_conflictd512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty], - []>; - def int_x86_avx512_conflict_d_mask_512 : - GCCBuiltin<"__builtin_ia32_mask_conflictd512">, + def int_x86_avx512_mask_conflict_d_512 : + GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_v16i1_ty, llvm_v16i32_ty], + llvm_v16i32_ty, llvm_i16_ty], []>; - def int_x86_avx512_conflict_d_maskz_512: - GCCBuiltin<"__builtin_ia32_maskz_conflictd512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i1_ty, llvm_v16i32_ty], - []>; - - def int_x86_avx512_conflict_q_512 : GCCBuiltin<"__builtin_ia32_conflictq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], - []>; - def int_x86_avx512_conflict_q_mask_512 : - GCCBuiltin<"__builtin_ia32_mask_conflictq512">, + def int_x86_avx512_mask_conflict_q_512 : + GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_v8i1_ty, llvm_v8i64_ty], - []>; - def int_x86_avx512_conflict_q_maskz_512: - GCCBuiltin<"__builtin_ia32_maskz_conflictq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i1_ty, llvm_v8i64_ty], + llvm_v8i64_ty, llvm_i8_ty], []>; } diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 974518a59a0..9bf3b5b91b0 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3459,18 +3459,17 @@ defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; multiclass avx512_conflict opc, string OpcodeStr, - RegisterClass RC, RegisterClass KRC, PatFrag memop_frag, - X86MemOperand x86memop, PatFrag scalar_mfrag, - X86MemOperand x86scalar_mop, string BrdcstStr, - Intrinsic Int, Intrinsic maskInt, Intrinsic maskzInt> { + RegisterClass RC, RegisterClass KRC, + X86MemOperand x86memop, + X86MemOperand x86scalar_mop, string BrdcstStr> { def rr : AVX5128I, EVEX; + []>, EVEX; def rm : AVX5128I, EVEX; + []>, EVEX; def rmb : AVX5128I opc, string OpcodeStr, (ins KRC:$mask, RC:$src), !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), - [(set RC:$dst, (maskzInt KRC:$mask, RC:$src))]>, EVEX, EVEX_KZ; + []>, EVEX, EVEX_KZ; def rmkz : AVX5128I, - EVEX, EVEX_KZ; + []>, EVEX, EVEX_KZ; def rmbkz : AVX5128I opc, string OpcodeStr, (ins RC:$src1, KRC:$mask, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), - [(set RC:$dst, (maskInt RC:$src1, KRC:$mask, RC:$src2))]>, EVEX, EVEX_K; + []>, EVEX, EVEX_K; def rmk : AVX5128I, EVEX, EVEX_K; + []>, EVEX, EVEX_K; def rmbk : AVX5128I opc, string OpcodeStr, let Predicates = [HasCDI] in { defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - int_x86_avx512_conflict_d_512, - int_x86_avx512_conflict_d_mask_512, - int_x86_avx512_conflict_d_maskz_512>, + i512mem, i32mem, "{1to16}">, EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", - int_x86_avx512_conflict_q_512, - int_x86_avx512_conflict_q_mask_512, - int_x86_avx512_conflict_q_maskz_512>, + i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + } + +def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1, + GR16:$mask), + (VPCONFLICTDrrk VR512:$src1, + (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>; + +def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1, + GR8:$mask), + (VPCONFLICTQrrk VR512:$src1, + (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 45af24b50d6..ae4982f404c 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3029,6 +3029,22 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) { (X86::VK8RegClass.contains(SrcReg) || X86::VK16RegClass.contains(SrcReg))) return X86::KMOVWkk; + if ((X86::VK8RegClass.contains(DestReg) || + X86::VK16RegClass.contains(DestReg)) && + (X86::GR32RegClass.contains(SrcReg) || + X86::GR16RegClass.contains(SrcReg) || + X86::GR8RegClass.contains(SrcReg))) { + SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32); + return X86::KMOVWkr; + } + if ((X86::GR32RegClass.contains(DestReg) || + X86::GR16RegClass.contains(DestReg) || + X86::GR8RegClass.contains(DestReg)) && + (X86::VK8RegClass.contains(SrcReg) || + X86::VK16RegClass.contains(SrcReg))) { + DestReg = getX86SubSuperRegister(DestReg, MVT::i32); + return X86::KMOVWrk; + } return 0; } diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 47b384bcfa1..b2e639024fd 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -319,27 +319,37 @@ define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) { declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly define <16 x i32> @test_conflict_d(<16 x i32> %a) { + ; CHECK: movw $-1, %ax + ; CHECK: vpxor ; CHECK: vpconflictd - %res = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a) + %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) ret <16 x i32> %res } -declare <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32>) nounwind readonly -define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { - ; CHECK: vpconflictd %zmm0, %zmm0 {%k1} {z} - %vmask = bitcast i16 %mask to <16 x i1> - %res = call <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1> %vmask, <16 x i32> %a) - ret <16 x i32> %res -} -declare <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1>,<16 x i32>) nounwind readonly +declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly -define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { - ; CHECK: vpconflictq {{.*}} {%k1} - %vmask = bitcast i8 %mask to <8 x i1> - %res = call <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64> %b, <8 x i1> %vmask, <8 x i64> %a) +define <8 x i64> @test_conflict_q(<8 x i64> %a) { + ; CHECK: movb $-1, %al + ; CHECK: vpxor + ; CHECK: vpconflictq + %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly + + +define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { + ; CHECK: vpconflictd + %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { + ; CHECK: vpconflictq + %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) ret <8 x i64> %res } -declare <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64>, <8 x i1>,<8 x i64>) nounwind readonly define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK: vblendmps @@ -347,6 +357,7 @@ define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x i1> %m0, <16 x float> %a1, <16 x float> %a2) ; <<16 x float>> [#uses=1] ret <16 x float> %res } + declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x i1> %a0, <16 x float> %a1, <16 x float> %a2) nounwind readonly define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {