mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-28 19:31:58 +00:00
AVX-512: changed intrinsics for mask operations
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196918 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
89458ced87
commit
8a8581ca4b
@ -2643,37 +2643,30 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
// Mask instructions
|
||||
// 16-bit mask
|
||||
def int_x86_kadd_v16i1 : GCCBuiltin<"__builtin_ia32_kaddw">,
|
||||
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
|
||||
def int_x86_avx512_kand_w : GCCBuiltin<"__builtin_ia32_kandhi">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_kand_v16i1 : GCCBuiltin<"__builtin_ia32_kandw">,
|
||||
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
|
||||
def int_x86_avx512_kandn_w : GCCBuiltin<"__builtin_ia32_kandnhi">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_kandn_v16i1 : GCCBuiltin<"__builtin_ia32_kandnw">,
|
||||
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
|
||||
def int_x86_avx512_knot_w : GCCBuiltin<"__builtin_ia32_knothi">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_kor_w : GCCBuiltin<"__builtin_ia32_korhi">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_knot_v16i1 : GCCBuiltin<"__builtin_ia32_knotw">,
|
||||
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty], [IntrNoMem]>;
|
||||
def int_x86_kor_v16i1 : GCCBuiltin<"__builtin_ia32_korw">,
|
||||
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
|
||||
def int_x86_avx512_kxor_w : GCCBuiltin<"__builtin_ia32_kxorhi">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_kxor_v16i1 : GCCBuiltin<"__builtin_ia32_kxorw">,
|
||||
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
|
||||
def int_x86_avx512_kxnor_w : GCCBuiltin<"__builtin_ia32_kxnorhi">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_kxnor_v16i1 : GCCBuiltin<"__builtin_ia32_kxnorw">,
|
||||
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
|
||||
def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_mask2int_v16i1 : GCCBuiltin<"__builtin_ia32_mask2intw">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty], [IntrNoMem]>;
|
||||
def int_x86_int2mask_v16i1 : GCCBuiltin<"__builtin_ia32_int2maskw">,
|
||||
Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_kunpck_v16i1 : GCCBuiltin<"__builtin_ia32_kunpckbw">,
|
||||
Intrinsic<[llvm_v16i1_ty], [llvm_v8i1_ty, llvm_v8i1_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_kortestz : GCCBuiltin<"__builtin_ia32_kortestz">,
|
||||
def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_kortestc : GCCBuiltin<"__builtin_ia32_kortestc">,
|
||||
def int_x86_avx512_kortestc_w : GCCBuiltin<"__builtin_ia32_kortestchi">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
@ -11490,9 +11490,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
||||
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||
}
|
||||
case Intrinsic::x86_avx512_kortestz:
|
||||
case Intrinsic::x86_avx512_kortestc: {
|
||||
unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz)? X86::COND_E: X86::COND_B;
|
||||
case Intrinsic::x86_avx512_kortestz_w:
|
||||
case Intrinsic::x86_avx512_kortestc_w: {
|
||||
unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B;
|
||||
SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
|
||||
SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
|
||||
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
|
||||
|
@ -898,6 +898,15 @@ multiclass avx512_mask_unop_w<bits<8> opc, string OpcodeStr,
|
||||
|
||||
defm KNOT : avx512_mask_unop_w<0x44, "knot", not>;
|
||||
|
||||
multiclass avx512_mask_unop_int<string IntName, string InstName> {
|
||||
let Predicates = [HasAVX512] in
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
|
||||
(i16 GR16:$src)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
|
||||
(v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
|
||||
}
|
||||
defm : avx512_mask_unop_int<"knot", "KNOT">;
|
||||
|
||||
def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
|
||||
def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
|
||||
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
|
||||
@ -908,7 +917,7 @@ def : Pat<(not VK8:$src),
|
||||
(KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
|
||||
|
||||
// Mask binary operation
|
||||
// - KADD, KAND, KANDN, KOR, KXNOR, KXOR
|
||||
// - KAND, KANDN, KOR, KXNOR, KXOR
|
||||
multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass KRC, SDPatternOperator OpNode> {
|
||||
let Predicates = [HasAVX512] in
|
||||
@ -928,7 +937,6 @@ def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
|
||||
def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm KADD : avx512_mask_binop_w<0x4a, "kadd", add>;
|
||||
defm KAND : avx512_mask_binop_w<0x41, "kand", and>;
|
||||
let isCommutable = 0 in
|
||||
defm KANDN : avx512_mask_binop_w<0x42, "kandn", andn>;
|
||||
@ -939,17 +947,19 @@ let isCommutable = 1 in {
|
||||
|
||||
multiclass avx512_mask_binop_int<string IntName, string InstName> {
|
||||
let Predicates = [HasAVX512] in
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_"##IntName##"_v16i1")
|
||||
VK16:$src1, VK16:$src2),
|
||||
(!cast<Instruction>(InstName##"Wrr") VK16:$src1, VK16:$src2)>;
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
|
||||
(i16 GR16:$src1), (i16 GR16:$src2)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
|
||||
(v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
|
||||
(v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
|
||||
}
|
||||
|
||||
defm : avx512_mask_binop_int<"kadd", "KADD">;
|
||||
defm : avx512_mask_binop_int<"kand", "KAND">;
|
||||
defm : avx512_mask_binop_int<"kandn", "KANDN">;
|
||||
defm : avx512_mask_binop_int<"kor", "KOR">;
|
||||
defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
|
||||
defm : avx512_mask_binop_int<"kxor", "KXOR">;
|
||||
|
||||
// With AVX-512, 8-bit mask is promoted to 16-bit mask.
|
||||
multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
|
||||
let Predicates = [HasAVX512] in
|
||||
@ -967,15 +977,15 @@ defm : avx512_binop_pat<xor, KXORWrr>;
|
||||
|
||||
// Mask unpacking
|
||||
multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass KRC1, RegisterClass KRC2> {
|
||||
RegisterClass KRC> {
|
||||
let Predicates = [HasAVX512] in
|
||||
def rr : I<opc, MRMSrcReg, (outs KRC1:$dst), (ins KRC2:$src1, KRC2:$src2),
|
||||
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
}
|
||||
|
||||
multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
|
||||
defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16, VK8>,
|
||||
defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
|
||||
VEX_4V, VEX_L, OpSize, TB;
|
||||
}
|
||||
|
||||
@ -983,12 +993,14 @@ defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
|
||||
|
||||
multiclass avx512_mask_unpck_int<string IntName, string InstName> {
|
||||
let Predicates = [HasAVX512] in
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_"##IntName##"_v16i1")
|
||||
VK8:$src1, VK8:$src2),
|
||||
(!cast<Instruction>(InstName##"BWrr") VK8:$src1, VK8:$src2)>;
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
|
||||
(i16 GR16:$src1), (i16 GR16:$src2)),
|
||||
(COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
|
||||
(v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
|
||||
(v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
|
||||
}
|
||||
defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
|
||||
|
||||
defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
|
||||
// Mask bit testing
|
||||
multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
|
||||
SDNode OpNode> {
|
||||
|
@ -1,23 +1,51 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
declare i32 @llvm.x86.avx512.kortestz(i16, i16) nounwind readnone
|
||||
; CHECK: test_kortestz
|
||||
declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
|
||||
; CHECK-LABEL: test_kortestz
|
||||
; CHECK: kortestw
|
||||
; CHECK: sete
|
||||
define i32 @test_kortestz(i16 %a0, i16 %a1) {
|
||||
%res = call i32 @llvm.x86.avx512.kortestz(i16 %a0, i16 %a1)
|
||||
%res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.kortestc(i16, i16) nounwind readnone
|
||||
; CHECK: test_kortestc
|
||||
declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
|
||||
; CHECK-LABEL: test_kortestc
|
||||
; CHECK: kortestw
|
||||
; CHECK: sbbl
|
||||
define i32 @test_kortestc(i16 %a0, i16 %a1) {
|
||||
%res = call i32 @llvm.x86.avx512.kortestc(i16 %a0, i16 %a1)
|
||||
%res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
|
||||
; CHECK-LABEL: test_kand
|
||||
; CHECK: kandw
|
||||
; CHECK: kandw
|
||||
define i16 @test_kand(i16 %a0, i16 %a1) {
|
||||
%t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
|
||||
%t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
|
||||
ret i16 %t2
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
|
||||
; CHECK-LABEL: test_knot
|
||||
; CHECK: knotw
|
||||
define i16 @test_knot(i16 %a0) {
|
||||
%res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: unpckbw_test
|
||||
; CHECK: kunpckbw
|
||||
; CHECK:ret
|
||||
define i16 @unpckbw_test(i16 %a0, i16 %a1) {
|
||||
%res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrcp14ps
|
||||
%res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
|
||||
|
@ -33,19 +33,6 @@ define i16 @mand16(i16 %x, i16 %y) {
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
; CHECK: unpckbw_test
|
||||
; CHECK: kunpckbw
|
||||
; CHECK:ret
|
||||
declare <16 x i1> @llvm.x86.kunpck.v16i1(<8 x i1>, <8 x i1>) nounwind readnone
|
||||
|
||||
define i16 @unpckbw_test(i8 %x, i8 %y) {
|
||||
%m0 = bitcast i8 %x to <8 x i1>
|
||||
%m1 = bitcast i8 %y to <8 x i1>
|
||||
%k = tail call <16 x i1> @llvm.x86.kunpck.v16i1(<8 x i1> %m0, <8 x i1> %m1)
|
||||
%r = bitcast <16 x i1> %k to i16
|
||||
ret i16 %r
|
||||
}
|
||||
|
||||
; CHECK: shuf_test1
|
||||
; CHECK: kshiftrw $8
|
||||
; CHECK:ret
|
||||
|
Loading…
Reference in New Issue
Block a user