mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-07 01:38:26 +00:00
[AVX512] Added intrinsics for 128- and 256-bit versions of VCMPEQ{BWDQ}
Fixed lowering of this intrinsics in case when mask is v2i1 and v4i1. Now cmp intrinsics lower in the following way: (i8 (int_x86_avx512_mask_pcmpeq_q_128 (v2i64 %a), (v2i64 %b), (i8 %mask))) -> (i8 (bitcast (v8i1 (insert_subvector undef, (v2i1 (and (PCMPEQM %a, %b), (extract_subvector (v8i1 (bitcast %mask)), 0))), 0)))) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218669 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
cfa5724d50
commit
175ff01f0f
@ -3249,6 +3249,34 @@ let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_pcmpeq_q_512 : GCCBuiltin<"__builtin_ia32_pcmpeqq512_mask">,
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// 256-bit
|
||||
def int_x86_avx512_mask_pcmpeq_b_256 : GCCBuiltin<"__builtin_ia32_pcmpeqb256_mask">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_w_256 : GCCBuiltin<"__builtin_ia32_pcmpeqw256_mask">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_d_256 : GCCBuiltin<"__builtin_ia32_pcmpeqd256_mask">,
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_q_256 : GCCBuiltin<"__builtin_ia32_pcmpeqq256_mask">,
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// 128-bit
|
||||
def int_x86_avx512_mask_pcmpeq_b_128 : GCCBuiltin<"__builtin_ia32_pcmpeqb128_mask">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_w_128 : GCCBuiltin<"__builtin_ia32_pcmpeqw128_mask">,
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_d_128 : GCCBuiltin<"__builtin_ia32_pcmpeqd128_mask">,
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_q_128 : GCCBuiltin<"__builtin_ia32_pcmpeqq128_mask">,
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Misc.
|
||||
|
@ -1560,6 +1560,7 @@ void X86TargetLowering::resetOperationActions() {
|
||||
|
||||
setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal);
|
||||
}
|
||||
|
||||
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
|
||||
@ -15867,6 +15868,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
|
||||
EVT VT = Op.getValueType();
|
||||
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
MVT::i1, VT.getVectorNumElements());
|
||||
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
Mask.getValueType().getSizeInBits());
|
||||
SDLoc dl(Op);
|
||||
|
||||
assert(MaskVT.isSimple() && "invalid mask type");
|
||||
@ -15874,19 +15877,22 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
|
||||
if (isAllOnes(Mask))
|
||||
return Op;
|
||||
|
||||
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
|
||||
// are extracted by EXTRACT_SUBVECTOR.
|
||||
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
|
||||
DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
|
||||
DAG.getIntPtrConstant(0));
|
||||
|
||||
switch (Op.getOpcode()) {
|
||||
default: break;
|
||||
case X86ISD::PCMPEQM:
|
||||
case X86ISD::PCMPGTM:
|
||||
case X86ISD::CMPM:
|
||||
case X86ISD::CMPMU:
|
||||
return DAG.getNode(ISD::AND, dl, VT, Op,
|
||||
DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask));
|
||||
return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::VSELECT, dl, VT,
|
||||
DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask),
|
||||
Op, PreservedSrc);
|
||||
return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc);
|
||||
}
|
||||
|
||||
static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) {
|
||||
@ -15953,13 +15959,28 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
|
||||
Op.getOperand(2), Op.getOperand(3));
|
||||
case CMP_MASK: {
|
||||
// Comparison intrinsics with masks.
|
||||
// Example of transformation:
|
||||
// (i8 (int_x86_avx512_mask_pcmpeq_q_128
|
||||
// (v2i64 %a), (v2i64 %b), (i8 %mask))) ->
|
||||
// (i8 (bitcast
|
||||
// (v8i1 (insert_subvector undef,
|
||||
// (v2i1 (and (PCMPEQM %a, %b),
|
||||
// (extract_subvector
|
||||
// (v8i1 (bitcast %mask)), 0))), 0))))
|
||||
EVT VT = Op.getOperand(1).getValueType();
|
||||
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
VT.getVectorNumElements());
|
||||
SDValue Mask = Op.getOperand(3);
|
||||
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
Mask.getValueType().getSizeInBits());
|
||||
SDValue Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT,
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
SDValue Res = getVectorMaskingNode(Cmp, Op.getOperand(3),
|
||||
DAG.getTargetConstant(0, MaskVT), DAG);
|
||||
SDValue CmpMask = getVectorMaskingNode(Cmp, Op.getOperand(3),
|
||||
DAG.getTargetConstant(0, MaskVT), DAG);
|
||||
SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
|
||||
DAG.getUNDEF(BitcastVT), CmpMask,
|
||||
DAG.getIntPtrConstant(0));
|
||||
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
|
||||
}
|
||||
case COMI: { // Comparison intrinsics
|
||||
|
@ -1768,6 +1768,17 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
|
||||
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
|
||||
(v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
|
||||
(v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
|
||||
def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
|
||||
(v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
|
||||
def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
|
||||
(v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
|
||||
def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
|
||||
(v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
|
||||
}
|
||||
|
||||
def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
|
||||
(v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
|
||||
|
||||
|
@ -156,9 +156,17 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_128, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_256, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_512, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_128, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_256, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_512, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_128, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_256, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_512, CMP_MASK, X86ISD::PCMPEQM, 0),
|
||||
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
|
||||
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
|
||||
|
70
test/CodeGen/X86/avx512bwvl-intrinsics.ll
Normal file
70
test/CodeGen/X86/avx512bwvl-intrinsics.ll
Normal file
@ -0,0 +1,70 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
|
||||
|
||||
; 256-bit
|
||||
|
||||
define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_b_256
|
||||
; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 ##
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_b_256
|
||||
; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ##
|
||||
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
|
||||
|
||||
define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_w_256
|
||||
; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 ##
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_w_256
|
||||
; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ##
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
|
||||
|
||||
; 128-bit
|
||||
|
||||
define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_b_128
|
||||
; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 ##
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_b_128
|
||||
; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ##
|
||||
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16)
|
||||
|
||||
define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_w_128
|
||||
; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 ##
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_w_128
|
||||
; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ##
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8)
|
||||
|
69
test/CodeGen/X86/avx512vl-intrinsics.ll
Normal file
69
test/CodeGen/X86/avx512vl-intrinsics.ll
Normal file
@ -0,0 +1,69 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
|
||||
|
||||
; 256-bit
|
||||
|
||||
define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_d_256
|
||||
; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 ##
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_d_256
|
||||
; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ##
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
|
||||
|
||||
define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_q_256
|
||||
; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 ##
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_q_256
|
||||
; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ##
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
|
||||
|
||||
; 128-bit
|
||||
|
||||
define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_d_128
|
||||
; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 ##
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_d_128
|
||||
; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ##
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
|
||||
|
||||
define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_pcmpeq_q_128
|
||||
; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 ##
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
|
||||
; CHECK-LABEL: test_mask_pcmpeq_q_128
|
||||
; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ##
|
||||
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
|
Loading…
x
Reference in New Issue
Block a user