AVX-512: Changed intrinsics of VPCONFLICT to match GCC builtin form

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196914 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2013-12-10 11:58:35 +00:00
parent 9f84f21a4c
commit 89458ced87
4 changed files with 70 additions and 54 deletions

View File

@ -3047,30 +3047,15 @@ let TargetPrefix = "x86" in {
// AVX-512 conflict detection
let TargetPrefix = "x86" in {
def int_x86_avx512_conflict_d_512 : GCCBuiltin<"__builtin_ia32_conflictd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty],
[]>;
def int_x86_avx512_conflict_d_mask_512 :
GCCBuiltin<"__builtin_ia32_mask_conflictd512">,
def int_x86_avx512_mask_conflict_d_512 :
GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i1_ty, llvm_v16i32_ty],
llvm_v16i32_ty, llvm_i16_ty],
[]>;
def int_x86_avx512_conflict_d_maskz_512:
GCCBuiltin<"__builtin_ia32_maskz_conflictd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i1_ty, llvm_v16i32_ty],
[]>;
def int_x86_avx512_conflict_q_512 : GCCBuiltin<"__builtin_ia32_conflictq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty],
[]>;
def int_x86_avx512_conflict_q_mask_512 :
GCCBuiltin<"__builtin_ia32_mask_conflictq512">,
def int_x86_avx512_mask_conflict_q_512 :
GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i1_ty, llvm_v8i64_ty],
[]>;
def int_x86_avx512_conflict_q_maskz_512:
GCCBuiltin<"__builtin_ia32_maskz_conflictq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i1_ty, llvm_v8i64_ty],
llvm_v8i64_ty, llvm_i8_ty],
[]>;
}

View File

@ -3459,18 +3459,17 @@ defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
RegisterClass RC, RegisterClass KRC, PatFrag memop_frag,
X86MemOperand x86memop, PatFrag scalar_mfrag,
X86MemOperand x86scalar_mop, string BrdcstStr,
Intrinsic Int, Intrinsic maskInt, Intrinsic maskzInt> {
RegisterClass RC, RegisterClass KRC,
X86MemOperand x86memop,
X86MemOperand x86scalar_mop, string BrdcstStr> {
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src),
!strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
[(set RC:$dst, (Int RC:$src))]>, EVEX;
[]>, EVEX;
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
[(set RC:$dst, (Int (memop_frag addr:$src)))]>, EVEX;
[]>, EVEX;
def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins x86scalar_mop:$src),
!strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
@ -3480,13 +3479,12 @@ multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
(ins KRC:$mask, RC:$src),
!strconcat(OpcodeStr,
"\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[(set RC:$dst, (maskzInt KRC:$mask, RC:$src))]>, EVEX, EVEX_KZ;
[]>, EVEX, EVEX_KZ;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, x86memop:$src),
!strconcat(OpcodeStr,
"\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[(set RC:$dst, (maskzInt KRC:$mask, (memop_frag addr:$src)))]>,
EVEX, EVEX_KZ;
[]>, EVEX, EVEX_KZ;
def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, x86scalar_mop:$src),
!strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
@ -3499,12 +3497,12 @@ multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
(ins RC:$src1, KRC:$mask, RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[(set RC:$dst, (maskInt RC:$src1, KRC:$mask, RC:$src2))]>, EVEX, EVEX_K;
[]>, EVEX, EVEX_K;
def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, x86memop:$src2),
!strconcat(OpcodeStr,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[(set RC:$dst, (maskInt RC:$src1, KRC:$mask, (memop_frag addr:$src2)))]>, EVEX, EVEX_K;
[]>, EVEX, EVEX_K;
def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
!strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
@ -3515,16 +3513,22 @@ multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
let Predicates = [HasCDI] in {
defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
int_x86_avx512_conflict_d_512,
int_x86_avx512_conflict_d_mask_512,
int_x86_avx512_conflict_d_maskz_512>,
i512mem, i32mem, "{1to16}">,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
int_x86_avx512_conflict_q_512,
int_x86_avx512_conflict_q_mask_512,
int_x86_avx512_conflict_q_maskz_512>,
i512mem, i64mem, "{1to8}">,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
GR16:$mask),
(VPCONFLICTDrrk VR512:$src1,
(v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
GR8:$mask),
(VPCONFLICTQrrk VR512:$src1,
(v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;

View File

@ -3029,6 +3029,22 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
(X86::VK8RegClass.contains(SrcReg) ||
X86::VK16RegClass.contains(SrcReg)))
return X86::KMOVWkk;
if ((X86::VK8RegClass.contains(DestReg) ||
X86::VK16RegClass.contains(DestReg)) &&
(X86::GR32RegClass.contains(SrcReg) ||
X86::GR16RegClass.contains(SrcReg) ||
X86::GR8RegClass.contains(SrcReg))) {
SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32);
return X86::KMOVWkr;
}
if ((X86::GR32RegClass.contains(DestReg) ||
X86::GR16RegClass.contains(DestReg) ||
X86::GR8RegClass.contains(DestReg)) &&
(X86::VK8RegClass.contains(SrcReg) ||
X86::VK16RegClass.contains(SrcReg))) {
DestReg = getX86SubSuperRegister(DestReg, MVT::i32);
return X86::KMOVWrk;
}
return 0;
}

View File

@ -319,27 +319,37 @@ define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) {
declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly
define <16 x i32> @test_conflict_d(<16 x i32> %a) {
; CHECK: movw $-1, %ax
; CHECK: vpxor
; CHECK: vpconflictd
%res = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a)
%res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32>) nounwind readonly
define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
; CHECK: vpconflictd %zmm0, %zmm0 {%k1} {z}
%vmask = bitcast i16 %mask to <16 x i1>
%res = call <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1> %vmask, <16 x i32> %a)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1>,<16 x i32>) nounwind readonly
declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK: vpconflictq {{.*}} {%k1}
%vmask = bitcast i8 %mask to <8 x i1>
%res = call <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64> %b, <8 x i1> %vmask, <8 x i64> %a)
define <8 x i64> @test_conflict_q(<8 x i64> %a) {
; CHECK: movb $-1, %al
; CHECK: vpxor
; CHECK: vpconflictq
%res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
; CHECK: vpconflictd
%res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK: vpconflictq
%res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64>, <8 x i1>,<8 x i64>) nounwind readonly
define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK: vblendmps
@ -347,6 +357,7 @@ define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x
%res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x i1> %m0, <16 x float> %a1, <16 x float> %a2) ; <<16 x float>> [#uses=1]
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x i1> %a0, <16 x float> %a1, <16 x float> %a2) nounwind readonly
define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {