AVX-512: added VPSHUFB instruction - all SKX forms

Added intrinsics and encoding tests.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240277 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2015-06-22 13:00:42 +00:00
parent e16fa7fec8
commit 114489ab24
7 changed files with 184 additions and 0 deletions

View File

@ -1397,6 +1397,24 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pshuf_b_128 :
GCCBuiltin<"__builtin_ia32_pshufb128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pshuf_b_256 :
GCCBuiltin<"__builtin_ia32_pshufb256_mask">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pshuf_b_512 :
GCCBuiltin<"__builtin_ia32_pshufb512_mask">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
[IntrNoMem]>;
}
// Vector blend

View File

@ -3870,6 +3870,19 @@ defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W;
defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W;
multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
let Predicates = [HasBWI] in
defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
let Predicates = [HasVLX, HasBWI] in {
defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
}
}
defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
//===----------------------------------------------------------------------===//
// AVX-512 - MOVDDUP
//===----------------------------------------------------------------------===//

View File

@ -582,6 +582,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),

View File

@ -957,3 +957,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16>
%res2 = add <32 x i16> %res, %res1
ret <32 x i16> %res2
}
declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vpshufb %zmm{{.*}}{%k1}
define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
%res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res2 = add <64 x i8> %res, %res1
ret <64 x i8> %res2
}

View File

@ -3007,3 +3007,29 @@ define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16>
%res2 = add <16 x i16> %res, %res1
ret <16 x i16> %res2
}
declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_128
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vpshufb %xmm{{.*}}{%k1}
define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
%res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
%res2 = add <16 x i8> %res, %res1
ret <16 x i8> %res2
}
declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_256
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vpshufb %ymm{{.*}}{%k1}
define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
%res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
%res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
%res2 = add <32 x i8> %res, %res1
ret <32 x i8> %res2
}

View File

@ -3560,3 +3560,38 @@
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xe3,0xaa,0xc0,0xdf,0xff,0xff]
vpavgw -8256(%rdx), %zmm29, %zmm29
// CHECK: vpshufb %zmm20, %zmm26, %zmm22
// CHECK: encoding: [0x62,0xa2,0x2d,0x40,0x00,0xf4]
vpshufb %zmm20, %zmm26, %zmm22
// CHECK: vpshufb %zmm20, %zmm26, %zmm22 {%k7}
// CHECK: encoding: [0x62,0xa2,0x2d,0x47,0x00,0xf4]
vpshufb %zmm20, %zmm26, %zmm22 {%k7}
// CHECK: vpshufb %zmm20, %zmm26, %zmm22 {%k7} {z}
// CHECK: encoding: [0x62,0xa2,0x2d,0xc7,0x00,0xf4]
vpshufb %zmm20, %zmm26, %zmm22 {%k7} {z}
// CHECK: vpshufb (%rcx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x31]
vpshufb (%rcx), %zmm26, %zmm22
// CHECK: vpshufb 291(%rax,%r14,8), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xa2,0x2d,0x40,0x00,0xb4,0xf0,0x23,0x01,0x00,0x00]
vpshufb 291(%rax,%r14,8), %zmm26, %zmm22
// CHECK: vpshufb 8128(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x72,0x7f]
vpshufb 8128(%rdx), %zmm26, %zmm22
// CHECK: vpshufb 8192(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0x00,0x20,0x00,0x00]
vpshufb 8192(%rdx), %zmm26, %zmm22
// CHECK: vpshufb -8192(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x72,0x80]
vpshufb -8192(%rdx), %zmm26, %zmm22
// CHECK: vpshufb -8256(%rdx), %zmm26, %zmm22
// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0xc0,0xdf,0xff,0xff]
vpshufb -8256(%rdx), %zmm26, %zmm22

View File

@ -6510,3 +6510,76 @@
// CHECK: vpavgw -4128(%rdx), %ymm23, %ymm21
// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xe3,0xaa,0xe0,0xef,0xff,0xff]
vpavgw -4128(%rdx), %ymm23, %ymm21
// CHECK: vpshufb %xmm27, %xmm24, %xmm23
// CHECK: encoding: [0x62,0x82,0x3d,0x00,0x00,0xfb]
vpshufb %xmm27, %xmm24, %xmm23
// CHECK: vpshufb %xmm27, %xmm24, %xmm23 {%k4}
// CHECK: encoding: [0x62,0x82,0x3d,0x04,0x00,0xfb]
vpshufb %xmm27, %xmm24, %xmm23 {%k4}
// CHECK: vpshufb %xmm27, %xmm24, %xmm23 {%k4} {z}
// CHECK: encoding: [0x62,0x82,0x3d,0x84,0x00,0xfb]
vpshufb %xmm27, %xmm24, %xmm23 {%k4} {z}
// CHECK: vpshufb (%rcx), %xmm24, %xmm23
// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x39]
vpshufb (%rcx), %xmm24, %xmm23
// CHECK: vpshufb 291(%rax,%r14,8), %xmm24, %xmm23
// CHECK: encoding: [0x62,0xa2,0x3d,0x00,0x00,0xbc,0xf0,0x23,0x01,0x00,0x00]
vpshufb 291(%rax,%r14,8), %xmm24, %xmm23
// CHECK: vpshufb 2032(%rdx), %xmm24, %xmm23
// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x7a,0x7f]
vpshufb 2032(%rdx), %xmm24, %xmm23
// CHECK: vpshufb 2048(%rdx), %xmm24, %xmm23
// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0xba,0x00,0x08,0x00,0x00]
vpshufb 2048(%rdx), %xmm24, %xmm23
// CHECK: vpshufb -2048(%rdx), %xmm24, %xmm23
// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x7a,0x80]
vpshufb -2048(%rdx), %xmm24, %xmm23
// CHECK: vpshufb -2064(%rdx), %xmm24, %xmm23
// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0xba,0xf0,0xf7,0xff,0xff]
vpshufb -2064(%rdx), %xmm24, %xmm23
// CHECK: vpshufb %ymm17, %ymm18, %ymm19
// CHECK: encoding: [0x62,0xa2,0x6d,0x20,0x00,0xd9]
vpshufb %ymm17, %ymm18, %ymm19
// CHECK: vpshufb %ymm17, %ymm18, %ymm19 {%k4}
// CHECK: encoding: [0x62,0xa2,0x6d,0x24,0x00,0xd9]
vpshufb %ymm17, %ymm18, %ymm19 {%k4}
// CHECK: vpshufb %ymm17, %ymm18, %ymm19 {%k4} {z}
// CHECK: encoding: [0x62,0xa2,0x6d,0xa4,0x00,0xd9]
vpshufb %ymm17, %ymm18, %ymm19 {%k4} {z}
// CHECK: vpshufb (%rcx), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x19]
vpshufb (%rcx), %ymm18, %ymm19
// CHECK: vpshufb 291(%rax,%r14,8), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xa2,0x6d,0x20,0x00,0x9c,0xf0,0x23,0x01,0x00,0x00]
vpshufb 291(%rax,%r14,8), %ymm18, %ymm19
// CHECK: vpshufb 4064(%rdx), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x5a,0x7f]
vpshufb 4064(%rdx), %ymm18, %ymm19
// CHECK: vpshufb 4096(%rdx), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0x00,0x10,0x00,0x00]
vpshufb 4096(%rdx), %ymm18, %ymm19
// CHECK: vpshufb -4096(%rdx), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x5a,0x80]
vpshufb -4096(%rdx), %ymm18, %ymm19
// CHECK: vpshufb -4128(%rdx), %ymm18, %ymm19
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0xe0,0xef,0xff,0xff]
vpshufb -4128(%rdx), %ymm18, %ymm19