mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-06 09:44:39 +00:00
AVX-512: added VPSHUFB instruction - all SKX forms
Added intrinsics and encoding tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240277 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e16fa7fec8
commit
114489ab24
@ -1397,6 +1397,24 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v8i64_ty],
|
||||
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pshuf_b_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pshufb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pshuf_b_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pshufb256_mask">,
|
||||
Intrinsic<[llvm_v32i8_ty],
|
||||
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pshuf_b_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pshufb512_mask">,
|
||||
Intrinsic<[llvm_v64i8_ty],
|
||||
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector blend
|
||||
|
@ -3870,6 +3870,19 @@ defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
|
||||
X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W;
|
||||
defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
|
||||
X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W;
|
||||
|
||||
multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
let Predicates = [HasBWI] in
|
||||
defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
|
||||
|
||||
let Predicates = [HasVLX, HasBWI] in {
|
||||
defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
|
||||
defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
|
||||
}
|
||||
}
|
||||
|
||||
defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - MOVDDUP
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -582,6 +582,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::PSHUFB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::PSHUFB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::PSHUFB, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
|
||||
|
@ -957,3 +957,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16>
|
||||
%res2 = add <32 x i16> %res, %res1
|
||||
ret <32 x i16> %res2
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
|
||||
|
||||
; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512
|
||||
; CHECK-NOT: call
|
||||
; CHECK: kmov
|
||||
; CHECK: vpshufb %zmm{{.*}}{%k1}
|
||||
define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
||||
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
|
||||
%res2 = add <64 x i8> %res, %res1
|
||||
ret <64 x i8> %res2
|
||||
}
|
@ -3007,3 +3007,29 @@ define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16>
|
||||
%res2 = add <16 x i16> %res, %res1
|
||||
ret <16 x i16> %res2
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
|
||||
|
||||
; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_128
|
||||
; CHECK-NOT: call
|
||||
; CHECK: kmov
|
||||
; CHECK: vpshufb %xmm{{.*}}{%k1}
|
||||
define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
||||
%res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
|
||||
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
|
||||
%res2 = add <16 x i8> %res, %res1
|
||||
ret <16 x i8> %res2
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
|
||||
|
||||
; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_256
|
||||
; CHECK-NOT: call
|
||||
; CHECK: kmov
|
||||
; CHECK: vpshufb %ymm{{.*}}{%k1}
|
||||
define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
|
||||
%res2 = add <32 x i8> %res, %res1
|
||||
ret <32 x i8> %res2
|
||||
}
|
||||
|
@ -3560,3 +3560,38 @@
|
||||
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xe3,0xaa,0xc0,0xdf,0xff,0xff]
|
||||
vpavgw -8256(%rdx), %zmm29, %zmm29
|
||||
|
||||
// CHECK: vpshufb %zmm20, %zmm26, %zmm22
|
||||
// CHECK: encoding: [0x62,0xa2,0x2d,0x40,0x00,0xf4]
|
||||
vpshufb %zmm20, %zmm26, %zmm22
|
||||
|
||||
// CHECK: vpshufb %zmm20, %zmm26, %zmm22 {%k7}
|
||||
// CHECK: encoding: [0x62,0xa2,0x2d,0x47,0x00,0xf4]
|
||||
vpshufb %zmm20, %zmm26, %zmm22 {%k7}
|
||||
|
||||
// CHECK: vpshufb %zmm20, %zmm26, %zmm22 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0xa2,0x2d,0xc7,0x00,0xf4]
|
||||
vpshufb %zmm20, %zmm26, %zmm22 {%k7} {z}
|
||||
|
||||
// CHECK: vpshufb (%rcx), %zmm26, %zmm22
|
||||
// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x31]
|
||||
vpshufb (%rcx), %zmm26, %zmm22
|
||||
|
||||
// CHECK: vpshufb 291(%rax,%r14,8), %zmm26, %zmm22
|
||||
// CHECK: encoding: [0x62,0xa2,0x2d,0x40,0x00,0xb4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpshufb 291(%rax,%r14,8), %zmm26, %zmm22
|
||||
|
||||
// CHECK: vpshufb 8128(%rdx), %zmm26, %zmm22
|
||||
// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x72,0x7f]
|
||||
vpshufb 8128(%rdx), %zmm26, %zmm22
|
||||
|
||||
// CHECK: vpshufb 8192(%rdx), %zmm26, %zmm22
|
||||
// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0x00,0x20,0x00,0x00]
|
||||
vpshufb 8192(%rdx), %zmm26, %zmm22
|
||||
|
||||
// CHECK: vpshufb -8192(%rdx), %zmm26, %zmm22
|
||||
// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x72,0x80]
|
||||
vpshufb -8192(%rdx), %zmm26, %zmm22
|
||||
|
||||
// CHECK: vpshufb -8256(%rdx), %zmm26, %zmm22
|
||||
// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0xc0,0xdf,0xff,0xff]
|
||||
vpshufb -8256(%rdx), %zmm26, %zmm22
|
||||
|
@ -6510,3 +6510,76 @@
|
||||
// CHECK: vpavgw -4128(%rdx), %ymm23, %ymm21
|
||||
// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xe3,0xaa,0xe0,0xef,0xff,0xff]
|
||||
vpavgw -4128(%rdx), %ymm23, %ymm21
|
||||
|
||||
// CHECK: vpshufb %xmm27, %xmm24, %xmm23
|
||||
// CHECK: encoding: [0x62,0x82,0x3d,0x00,0x00,0xfb]
|
||||
vpshufb %xmm27, %xmm24, %xmm23
|
||||
|
||||
// CHECK: vpshufb %xmm27, %xmm24, %xmm23 {%k4}
|
||||
// CHECK: encoding: [0x62,0x82,0x3d,0x04,0x00,0xfb]
|
||||
vpshufb %xmm27, %xmm24, %xmm23 {%k4}
|
||||
|
||||
// CHECK: vpshufb %xmm27, %xmm24, %xmm23 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0x82,0x3d,0x84,0x00,0xfb]
|
||||
vpshufb %xmm27, %xmm24, %xmm23 {%k4} {z}
|
||||
|
||||
// CHECK: vpshufb (%rcx), %xmm24, %xmm23
|
||||
// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x39]
|
||||
vpshufb (%rcx), %xmm24, %xmm23
|
||||
|
||||
// CHECK: vpshufb 291(%rax,%r14,8), %xmm24, %xmm23
|
||||
// CHECK: encoding: [0x62,0xa2,0x3d,0x00,0x00,0xbc,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpshufb 291(%rax,%r14,8), %xmm24, %xmm23
|
||||
|
||||
// CHECK: vpshufb 2032(%rdx), %xmm24, %xmm23
|
||||
// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x7a,0x7f]
|
||||
vpshufb 2032(%rdx), %xmm24, %xmm23
|
||||
|
||||
// CHECK: vpshufb 2048(%rdx), %xmm24, %xmm23
|
||||
// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0xba,0x00,0x08,0x00,0x00]
|
||||
vpshufb 2048(%rdx), %xmm24, %xmm23
|
||||
|
||||
// CHECK: vpshufb -2048(%rdx), %xmm24, %xmm23
|
||||
// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x7a,0x80]
|
||||
vpshufb -2048(%rdx), %xmm24, %xmm23
|
||||
|
||||
// CHECK: vpshufb -2064(%rdx), %xmm24, %xmm23
|
||||
// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0xba,0xf0,0xf7,0xff,0xff]
|
||||
vpshufb -2064(%rdx), %xmm24, %xmm23
|
||||
|
||||
// CHECK: vpshufb %ymm17, %ymm18, %ymm19
|
||||
// CHECK: encoding: [0x62,0xa2,0x6d,0x20,0x00,0xd9]
|
||||
vpshufb %ymm17, %ymm18, %ymm19
|
||||
|
||||
// CHECK: vpshufb %ymm17, %ymm18, %ymm19 {%k4}
|
||||
// CHECK: encoding: [0x62,0xa2,0x6d,0x24,0x00,0xd9]
|
||||
vpshufb %ymm17, %ymm18, %ymm19 {%k4}
|
||||
|
||||
// CHECK: vpshufb %ymm17, %ymm18, %ymm19 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0xa2,0x6d,0xa4,0x00,0xd9]
|
||||
vpshufb %ymm17, %ymm18, %ymm19 {%k4} {z}
|
||||
|
||||
// CHECK: vpshufb (%rcx), %ymm18, %ymm19
|
||||
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x19]
|
||||
vpshufb (%rcx), %ymm18, %ymm19
|
||||
|
||||
// CHECK: vpshufb 291(%rax,%r14,8), %ymm18, %ymm19
|
||||
// CHECK: encoding: [0x62,0xa2,0x6d,0x20,0x00,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpshufb 291(%rax,%r14,8), %ymm18, %ymm19
|
||||
|
||||
// CHECK: vpshufb 4064(%rdx), %ymm18, %ymm19
|
||||
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x5a,0x7f]
|
||||
vpshufb 4064(%rdx), %ymm18, %ymm19
|
||||
|
||||
// CHECK: vpshufb 4096(%rdx), %ymm18, %ymm19
|
||||
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0x00,0x10,0x00,0x00]
|
||||
vpshufb 4096(%rdx), %ymm18, %ymm19
|
||||
|
||||
// CHECK: vpshufb -4096(%rdx), %ymm18, %ymm19
|
||||
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x5a,0x80]
|
||||
vpshufb -4096(%rdx), %ymm18, %ymm19
|
||||
|
||||
// CHECK: vpshufb -4128(%rdx), %ymm18, %ymm19
|
||||
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0xe0,0xef,0xff,0xff]
|
||||
vpshufb -4128(%rdx), %ymm18, %ymm19
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user