AVX-512: Changed intrinsics of VPCONFLICT to match GCC builtin form

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196914 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2013-12-10 11:58:35 +00:00
parent 9f84f21a4c
commit 89458ced87
4 changed files with 70 additions and 54 deletions

View File

@ -3047,30 +3047,15 @@ let TargetPrefix = "x86" in {
// AVX-512 conflict detection // AVX-512 conflict detection
let TargetPrefix = "x86" in { let TargetPrefix = "x86" in {
def int_x86_avx512_conflict_d_512 : GCCBuiltin<"__builtin_ia32_conflictd512">, def int_x86_avx512_mask_conflict_d_512 :
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty], GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">,
[]>;
def int_x86_avx512_conflict_d_mask_512 :
GCCBuiltin<"__builtin_ia32_mask_conflictd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v16i1_ty, llvm_v16i32_ty], llvm_v16i32_ty, llvm_i16_ty],
[]>; []>;
def int_x86_avx512_conflict_d_maskz_512: def int_x86_avx512_mask_conflict_q_512 :
GCCBuiltin<"__builtin_ia32_maskz_conflictd512">, GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i1_ty, llvm_v16i32_ty],
[]>;
def int_x86_avx512_conflict_q_512 : GCCBuiltin<"__builtin_ia32_conflictq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty],
[]>;
def int_x86_avx512_conflict_q_mask_512 :
GCCBuiltin<"__builtin_ia32_mask_conflictq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i1_ty, llvm_v8i64_ty], llvm_v8i64_ty, llvm_i8_ty],
[]>;
def int_x86_avx512_conflict_q_maskz_512:
GCCBuiltin<"__builtin_ia32_maskz_conflictq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i1_ty, llvm_v8i64_ty],
[]>; []>;
} }

View File

@ -3459,18 +3459,17 @@ defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>; EVEX_CD8<64, CD8VF>;
multiclass avx512_conflict<bits<8> opc, string OpcodeStr, multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
RegisterClass RC, RegisterClass KRC, PatFrag memop_frag, RegisterClass RC, RegisterClass KRC,
X86MemOperand x86memop, PatFrag scalar_mfrag, X86MemOperand x86memop,
X86MemOperand x86scalar_mop, string BrdcstStr, X86MemOperand x86scalar_mop, string BrdcstStr> {
Intrinsic Int, Intrinsic maskInt, Intrinsic maskzInt> {
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src), (ins RC:$src),
!strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"), !strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
[(set RC:$dst, (Int RC:$src))]>, EVEX; []>, EVEX;
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins x86memop:$src), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"), !strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
[(set RC:$dst, (Int (memop_frag addr:$src)))]>, EVEX; []>, EVEX;
def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins x86scalar_mop:$src), (ins x86scalar_mop:$src),
!strconcat(OpcodeStr, "\t{${src}", BrdcstStr, !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
@ -3480,13 +3479,12 @@ multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
(ins KRC:$mask, RC:$src), (ins KRC:$mask, RC:$src),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[(set RC:$dst, (maskzInt KRC:$mask, RC:$src))]>, EVEX, EVEX_KZ; []>, EVEX, EVEX_KZ;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, x86memop:$src), (ins KRC:$mask, x86memop:$src),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[(set RC:$dst, (maskzInt KRC:$mask, (memop_frag addr:$src)))]>, []>, EVEX, EVEX_KZ;
EVEX, EVEX_KZ;
def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins KRC:$mask, x86scalar_mop:$src), (ins KRC:$mask, x86scalar_mop:$src),
!strconcat(OpcodeStr, "\t{${src}", BrdcstStr, !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
@ -3499,12 +3497,12 @@ multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
(ins RC:$src1, KRC:$mask, RC:$src2), (ins RC:$src1, KRC:$mask, RC:$src2),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[(set RC:$dst, (maskInt RC:$src1, KRC:$mask, RC:$src2))]>, EVEX, EVEX_K; []>, EVEX, EVEX_K;
def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, x86memop:$src2), (ins RC:$src1, KRC:$mask, x86memop:$src2),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[(set RC:$dst, (maskInt RC:$src1, KRC:$mask, (memop_frag addr:$src2)))]>, EVEX, EVEX_K; []>, EVEX, EVEX_K;
def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, x86scalar_mop:$src2), (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
!strconcat(OpcodeStr, "\t{${src2}", BrdcstStr, !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
@ -3515,16 +3513,22 @@ multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
let Predicates = [HasCDI] in { let Predicates = [HasCDI] in {
defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM, defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
memopv16i32, i512mem, loadi32, i32mem, "{1to16}", i512mem, i32mem, "{1to16}">,
int_x86_avx512_conflict_d_512,
int_x86_avx512_conflict_d_mask_512,
int_x86_avx512_conflict_d_maskz_512>,
EVEX_V512, EVEX_CD8<32, CD8VF>; EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM, defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
memopv8i64, i512mem, loadi64, i64mem, "{1to8}", i512mem, i64mem, "{1to8}">,
int_x86_avx512_conflict_q_512,
int_x86_avx512_conflict_q_mask_512,
int_x86_avx512_conflict_q_maskz_512>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
} }
def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
GR16:$mask),
(VPCONFLICTDrrk VR512:$src1,
(v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
GR8:$mask),
(VPCONFLICTQrrk VR512:$src1,
(v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;

View File

@ -3029,6 +3029,22 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
(X86::VK8RegClass.contains(SrcReg) || (X86::VK8RegClass.contains(SrcReg) ||
X86::VK16RegClass.contains(SrcReg))) X86::VK16RegClass.contains(SrcReg)))
return X86::KMOVWkk; return X86::KMOVWkk;
if ((X86::VK8RegClass.contains(DestReg) ||
X86::VK16RegClass.contains(DestReg)) &&
(X86::GR32RegClass.contains(SrcReg) ||
X86::GR16RegClass.contains(SrcReg) ||
X86::GR8RegClass.contains(SrcReg))) {
SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32);
return X86::KMOVWkr;
}
if ((X86::GR32RegClass.contains(DestReg) ||
X86::GR16RegClass.contains(DestReg) ||
X86::GR8RegClass.contains(DestReg)) &&
(X86::VK8RegClass.contains(SrcReg) ||
X86::VK16RegClass.contains(SrcReg))) {
DestReg = getX86SubSuperRegister(DestReg, MVT::i32);
return X86::KMOVWrk;
}
return 0; return 0;
} }

View File

@ -319,27 +319,37 @@ define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) {
declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly
define <16 x i32> @test_conflict_d(<16 x i32> %a) { define <16 x i32> @test_conflict_d(<16 x i32> %a) {
; CHECK: movw $-1, %ax
; CHECK: vpxor
; CHECK: vpconflictd ; CHECK: vpconflictd
%res = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a) %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res ret <16 x i32> %res
} }
declare <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32>) nounwind readonly
define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
; CHECK: vpconflictd %zmm0, %zmm0 {%k1} {z}
%vmask = bitcast i16 %mask to <16 x i1>
%res = call <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1> %vmask, <16 x i32> %a)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1>,<16 x i32>) nounwind readonly
define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { define <8 x i64> @test_conflict_q(<8 x i64> %a) {
; CHECK: vpconflictq {{.*}} {%k1} ; CHECK: movb $-1, %al
%vmask = bitcast i8 %mask to <8 x i1> ; CHECK: vpxor
%res = call <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64> %b, <8 x i1> %vmask, <8 x i64> %a) ; CHECK: vpconflictq
%res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
; CHECK: vpconflictd
%res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK: vpconflictq
%res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret <8 x i64> %res ret <8 x i64> %res
} }
declare <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64>, <8 x i1>,<8 x i64>) nounwind readonly
define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) { define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK: vblendmps ; CHECK: vblendmps
@ -347,6 +357,7 @@ define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x
%res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x i1> %m0, <16 x float> %a1, <16 x float> %a2) ; <<16 x float>> [#uses=1] %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x i1> %m0, <16 x float> %a1, <16 x float> %a2) ; <<16 x float>> [#uses=1]
ret <16 x float> %res ret <16 x float> %res
} }
declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x i1> %a0, <16 x float> %a1, <16 x float> %a2) nounwind readonly declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x i1> %a0, <16 x float> %a1, <16 x float> %a2) nounwind readonly
define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) { define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {