AVX-512: changed intrinsics for mask operations

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196918 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2013-12-10 13:53:10 +00:00
parent 89458ced87
commit 8a8581ca4b
5 changed files with 78 additions and 58 deletions

View File

@ -2643,37 +2643,30 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Mask instructions // Mask instructions
// 16-bit mask // 16-bit mask
def int_x86_kadd_v16i1 : GCCBuiltin<"__builtin_ia32_kaddw">, def int_x86_avx512_kand_w : GCCBuiltin<"__builtin_ia32_kandhi">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>; [IntrNoMem]>;
def int_x86_kand_v16i1 : GCCBuiltin<"__builtin_ia32_kandw">, def int_x86_avx512_kandn_w : GCCBuiltin<"__builtin_ia32_kandnhi">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>; [IntrNoMem]>;
def int_x86_kandn_v16i1 : GCCBuiltin<"__builtin_ia32_kandnw">, def int_x86_avx512_knot_w : GCCBuiltin<"__builtin_ia32_knothi">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_kor_w : GCCBuiltin<"__builtin_ia32_korhi">,
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>; [IntrNoMem]>;
def int_x86_knot_v16i1 : GCCBuiltin<"__builtin_ia32_knotw">, def int_x86_avx512_kxor_w : GCCBuiltin<"__builtin_ia32_kxorhi">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty], [IntrNoMem]>; Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
def int_x86_kor_v16i1 : GCCBuiltin<"__builtin_ia32_korw">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty],
[IntrNoMem]>; [IntrNoMem]>;
def int_x86_kxor_v16i1 : GCCBuiltin<"__builtin_ia32_kxorw">, def int_x86_avx512_kxnor_w : GCCBuiltin<"__builtin_ia32_kxnorhi">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>; [IntrNoMem]>;
def int_x86_kxnor_v16i1 : GCCBuiltin<"__builtin_ia32_kxnorw">, def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">,
Intrinsic<[llvm_v16i1_ty], [llvm_v16i1_ty, llvm_v16i1_ty], Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>; [IntrNoMem]>;
def int_x86_mask2int_v16i1 : GCCBuiltin<"__builtin_ia32_mask2intw">, def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
Intrinsic<[llvm_i32_ty], [llvm_v16i1_ty], [IntrNoMem]>;
def int_x86_int2mask_v16i1 : GCCBuiltin<"__builtin_ia32_int2maskw">,
Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_x86_kunpck_v16i1 : GCCBuiltin<"__builtin_ia32_kunpckbw">,
Intrinsic<[llvm_v16i1_ty], [llvm_v8i1_ty, llvm_v8i1_ty],
[IntrNoMem]>;
def int_x86_avx512_kortestz : GCCBuiltin<"__builtin_ia32_kortestz">,
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>; [IntrNoMem]>;
def int_x86_avx512_kortestc : GCCBuiltin<"__builtin_ia32_kortestc">, def int_x86_avx512_kortestc_w : GCCBuiltin<"__builtin_ia32_kortestchi">,
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>; [IntrNoMem]>;
} }

View File

@ -11490,9 +11490,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
} }
case Intrinsic::x86_avx512_kortestz: case Intrinsic::x86_avx512_kortestz_w:
case Intrinsic::x86_avx512_kortestc: { case Intrinsic::x86_avx512_kortestc_w: {
unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz)? X86::COND_E: X86::COND_B; unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B;
SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1)); SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2)); SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
SDValue CC = DAG.getConstant(X86CC, MVT::i8); SDValue CC = DAG.getConstant(X86CC, MVT::i8);

View File

@ -898,6 +898,15 @@ multiclass avx512_mask_unop_w<bits<8> opc, string OpcodeStr,
defm KNOT : avx512_mask_unop_w<0x44, "knot", not>; defm KNOT : avx512_mask_unop_w<0x44, "knot", not>;
multiclass avx512_mask_unop_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in
def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
(i16 GR16:$src)),
(COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
(v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
}
defm : avx512_mask_unop_int<"knot", "KNOT">;
def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>; def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
(COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>; (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
@ -908,7 +917,7 @@ def : Pat<(not VK8:$src),
(KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
// Mask binary operation // Mask binary operation
// - KADD, KAND, KANDN, KOR, KXNOR, KXOR // - KAND, KANDN, KOR, KXNOR, KXOR
multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
RegisterClass KRC, SDPatternOperator OpNode> { RegisterClass KRC, SDPatternOperator OpNode> {
let Predicates = [HasAVX512] in let Predicates = [HasAVX512] in
@ -928,7 +937,6 @@ def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
let isCommutable = 1 in { let isCommutable = 1 in {
defm KADD : avx512_mask_binop_w<0x4a, "kadd", add>;
defm KAND : avx512_mask_binop_w<0x41, "kand", and>; defm KAND : avx512_mask_binop_w<0x41, "kand", and>;
let isCommutable = 0 in let isCommutable = 0 in
defm KANDN : avx512_mask_binop_w<0x42, "kandn", andn>; defm KANDN : avx512_mask_binop_w<0x42, "kandn", andn>;
@ -939,17 +947,19 @@ let isCommutable = 1 in {
multiclass avx512_mask_binop_int<string IntName, string InstName> { multiclass avx512_mask_binop_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in let Predicates = [HasAVX512] in
def : Pat<(!cast<Intrinsic>("int_x86_"##IntName##"_v16i1") def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
VK16:$src1, VK16:$src2), (i16 GR16:$src1), (i16 GR16:$src2)),
(!cast<Instruction>(InstName##"Wrr") VK16:$src1, VK16:$src2)>; (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
(v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
(v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
} }
defm : avx512_mask_binop_int<"kadd", "KADD">;
defm : avx512_mask_binop_int<"kand", "KAND">; defm : avx512_mask_binop_int<"kand", "KAND">;
defm : avx512_mask_binop_int<"kandn", "KANDN">; defm : avx512_mask_binop_int<"kandn", "KANDN">;
defm : avx512_mask_binop_int<"kor", "KOR">; defm : avx512_mask_binop_int<"kor", "KOR">;
defm : avx512_mask_binop_int<"kxnor", "KXNOR">; defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
defm : avx512_mask_binop_int<"kxor", "KXOR">; defm : avx512_mask_binop_int<"kxor", "KXOR">;
// With AVX-512, 8-bit mask is promoted to 16-bit mask. // With AVX-512, 8-bit mask is promoted to 16-bit mask.
multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> { multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
let Predicates = [HasAVX512] in let Predicates = [HasAVX512] in
@ -967,15 +977,15 @@ defm : avx512_binop_pat<xor, KXORWrr>;
// Mask unpacking // Mask unpacking
multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr, multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
RegisterClass KRC1, RegisterClass KRC2> { RegisterClass KRC> {
let Predicates = [HasAVX512] in let Predicates = [HasAVX512] in
def rr : I<opc, MRMSrcReg, (outs KRC1:$dst), (ins KRC2:$src1, KRC2:$src2), def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr, !strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
} }
multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> { multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16, VK8>, defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
VEX_4V, VEX_L, OpSize, TB; VEX_4V, VEX_L, OpSize, TB;
} }
@ -983,12 +993,14 @@ defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
multiclass avx512_mask_unpck_int<string IntName, string InstName> { multiclass avx512_mask_unpck_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in let Predicates = [HasAVX512] in
def : Pat<(!cast<Intrinsic>("int_x86_"##IntName##"_v16i1") def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
VK8:$src1, VK8:$src2), (i16 GR16:$src1), (i16 GR16:$src2)),
(!cast<Instruction>(InstName##"BWrr") VK8:$src1, VK8:$src2)>; (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
(v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
(v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
} }
defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
// Mask bit testing // Mask bit testing
multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
SDNode OpNode> { SDNode OpNode> {

View File

@ -1,23 +1,51 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
declare i32 @llvm.x86.avx512.kortestz(i16, i16) nounwind readnone declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
; CHECK: test_kortestz ; CHECK-LABEL: test_kortestz
; CHECK: kortestw ; CHECK: kortestw
; CHECK: sete ; CHECK: sete
define i32 @test_kortestz(i16 %a0, i16 %a1) { define i32 @test_kortestz(i16 %a0, i16 %a1) {
%res = call i32 @llvm.x86.avx512.kortestz(i16 %a0, i16 %a1) %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
ret i32 %res ret i32 %res
} }
declare i32 @llvm.x86.avx512.kortestc(i16, i16) nounwind readnone declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
; CHECK: test_kortestc ; CHECK-LABEL: test_kortestc
; CHECK: kortestw ; CHECK: kortestw
; CHECK: sbbl ; CHECK: sbbl
define i32 @test_kortestc(i16 %a0, i16 %a1) { define i32 @test_kortestc(i16 %a0, i16 %a1) {
%res = call i32 @llvm.x86.avx512.kortestc(i16 %a0, i16 %a1) %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
ret i32 %res ret i32 %res
} }
declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
; CHECK-LABEL: test_kand
; CHECK: kandw
; CHECK: kandw
define i16 @test_kand(i16 %a0, i16 %a1) {
%t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
ret i16 %t2
}
declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
; CHECK-LABEL: test_knot
; CHECK: knotw
define i16 @test_knot(i16 %a0) {
%res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
ret i16 %res
}
declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
; CHECK-LABEL: unpckbw_test
; CHECK: kunpckbw
; CHECK:ret
define i16 @unpckbw_test(i16 %a0, i16 %a1) {
%res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
ret i16 %res
}
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
; CHECK: vrcp14ps ; CHECK: vrcp14ps
%res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1] %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]

View File

@ -33,19 +33,6 @@ define i16 @mand16(i16 %x, i16 %y) {
ret i16 %ret ret i16 %ret
} }
; CHECK: unpckbw_test
; CHECK: kunpckbw
; CHECK:ret
declare <16 x i1> @llvm.x86.kunpck.v16i1(<8 x i1>, <8 x i1>) nounwind readnone
define i16 @unpckbw_test(i8 %x, i8 %y) {
%m0 = bitcast i8 %x to <8 x i1>
%m1 = bitcast i8 %y to <8 x i1>
%k = tail call <16 x i1> @llvm.x86.kunpck.v16i1(<8 x i1> %m0, <8 x i1> %m1)
%r = bitcast <16 x i1> %k to i16
ret i16 %r
}
; CHECK: shuf_test1 ; CHECK: shuf_test1
; CHECK: kshiftrw $8 ; CHECK: kshiftrw $8
; CHECK:ret ; CHECK:ret