AVX : Fix ISA disabling in case AVX512VL , some instructions should be disabled only if AVX512BW present.

Tests added.

Differential Revision: http://reviews.llvm.org/D11122

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242270 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Igor Breger 2015-07-15 07:08:10 +00:00
parent 4c9cd28947
commit 368de4c9d6
3 changed files with 246 additions and 30 deletions

View File

@ -767,6 +767,7 @@ def HasDQI : Predicate<"Subtarget->hasDQI()">,
def NoDQI : Predicate<"!Subtarget->hasDQI()">;
def HasBWI : Predicate<"Subtarget->hasBWI()">,
AssemblerPredicate<"FeatureBWI", "AVX-512 BW ISA">;
def NoBWI : Predicate<"!Subtarget->hasBWI()">;
def HasVLX : Predicate<"Subtarget->hasVLX()">,
AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">;
def NoVLX : Predicate<"!Subtarget->hasVLX()">;

View File

@ -2845,8 +2845,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
ValueType OpVT128, ValueType OpVT256,
OpndItins itins, bit IsCommutable = 0> {
let Predicates = [HasAVX, NoVLX] in
OpndItins itins, bit IsCommutable = 0, Predicate prd> {
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
@ -2854,7 +2854,7 @@ let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
memopv2i64, i128mem, itins, IsCommutable, 1>;
let Predicates = [HasAVX2, NoVLX] in
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
OpVT256, VR256, loadv4i64, i256mem, itins,
IsCommutable, 0>, VEX_4V, VEX_L;
@ -2863,13 +2863,13 @@ let Predicates = [HasAVX2, NoVLX] in
// These are ordered here for pattern ordering requirements with the fp versions
defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
SSE_VEC_BIT_ITINS_P, 1>;
SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
SSE_VEC_BIT_ITINS_P, 1>;
SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
SSE_VEC_BIT_ITINS_P, 1>;
SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
SSE_VEC_BIT_ITINS_P, 0>;
SSE_VEC_BIT_ITINS_P, 0, NoVLX>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Logical Instructions
@ -4010,39 +4010,39 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
} // ExeDomain = SSEPackedInt
defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
SSE_INTALU_ITINS_P, 1>;
SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
SSE_INTALU_ITINS_P, 1>;
SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
SSE_INTALU_ITINS_P, 1>;
SSE_INTALU_ITINS_P, 1, NoVLX>;
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
SSE_INTALUQ_ITINS_P, 1>;
SSE_INTALUQ_ITINS_P, 1, NoVLX>;
defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
SSE_INTMUL_ITINS_P, 1>;
SSE_INTMUL_ITINS_P, 1, NoBWI>;
defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
SSE_INTMUL_ITINS_P, 1>;
SSE_INTMUL_ITINS_P, 1, NoBWI>;
defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
SSE_INTMUL_ITINS_P, 1>;
SSE_INTMUL_ITINS_P, 1, NoBWI>;
defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
SSE_INTALU_ITINS_P, 0>;
SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
SSE_INTALU_ITINS_P, 0>;
SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
SSE_INTALU_ITINS_P, 0>;
SSE_INTALU_ITINS_P, 0, NoVLX>;
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
SSE_INTALUQ_ITINS_P, 0>;
SSE_INTALUQ_ITINS_P, 0, NoVLX>;
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
SSE_INTALU_ITINS_P, 0>;
SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
SSE_INTALU_ITINS_P, 0>;
SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
SSE_INTALU_ITINS_P, 1>;
SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
SSE_INTALU_ITINS_P, 1>;
SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
SSE_INTALU_ITINS_P, 1>;
SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
SSE_INTALU_ITINS_P, 1>;
SSE_INTALU_ITINS_P, 1, NoBWI>;
// Intrinsic forms
defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b,
@ -4239,17 +4239,17 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
//===---------------------------------------------------------------------===//
defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
SSE_INTALU_ITINS_P, 1>;
SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
SSE_INTALU_ITINS_P, 1>;
SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
SSE_INTALU_ITINS_P, 1>;
SSE_INTALU_ITINS_P, 1, NoVLX>;
defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
SSE_INTALU_ITINS_P, 0>;
SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
SSE_INTALU_ITINS_P, 0>;
SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
SSE_INTALU_ITINS_P, 0>;
SSE_INTALU_ITINS_P, 0, NoVLX>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Shuffle Instructions

View File

@ -0,0 +1,215 @@
; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null
; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null
; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null
; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null
define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
; Force the execution domain with an add.
%a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
%x = and <4 x i64> %a2, %b
ret <4 x i64> %x
}
define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%x = and <2 x i64> %a2, %b
ret <2 x i64> %x
}
define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
; Force the execution domain with an add.
%a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
%y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
%x = and <4 x i64> %a, %y
ret <4 x i64> %x
}
define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%y = xor <2 x i64> %a2, <i64 -1, i64 -1>
%x = and <2 x i64> %a, %y
ret <2 x i64> %x
}
define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
; Force the execution domain with an add.
%a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
%x = or <4 x i64> %a2, %b
ret <4 x i64> %x
}
define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
; Force the execution domain with an add.
%a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
%x = xor <4 x i64> %a2, %b
ret <4 x i64> %x
}
define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%x = or <2 x i64> %a2, %b
ret <2 x i64> %x
}
define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%x = xor <2 x i64> %a2, %b
ret <2 x i64> %x
}
define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
%x = add <4 x i64> %i, %j
ret <4 x i64> %x
}
define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
%x = add <8 x i32> %i, %j
ret <8 x i32> %x
}
define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
%x = add <16 x i16> %i, %j
ret <16 x i16> %x
}
define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
%x = add <32 x i8> %i, %j
ret <32 x i8> %x
}
define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
%x = sub <4 x i64> %i, %j
ret <4 x i64> %x
}
define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
%x = sub <8 x i32> %i, %j
ret <8 x i32> %x
}
define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
%x = sub <16 x i16> %i, %j
ret <16 x i16> %x
}
define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
%x = sub <32 x i8> %i, %j
ret <32 x i8> %x
}
define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
%x = mul <16 x i16> %i, %j
ret <16 x i16> %x
}
define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
%bincmp = icmp slt <8 x i32> %i, %j
%x = sext <8 x i1> %bincmp to <8 x i32>
ret <8 x i32> %x
}
define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
%bincmp = icmp eq <32 x i8> %i, %j
%x = sext <32 x i1> %bincmp to <32 x i8>
ret <32 x i8> %x
}
define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
%bincmp = icmp eq <16 x i16> %i, %j
%x = sext <16 x i1> %bincmp to <16 x i16>
ret <16 x i16> %x
}
define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
%bincmp = icmp slt <32 x i8> %i, %j
%x = sext <32 x i1> %bincmp to <32 x i8>
ret <32 x i8> %x
}
define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
%bincmp = icmp slt <16 x i16> %i, %j
%x = sext <16 x i1> %bincmp to <16 x i16>
ret <16 x i16> %x
}
define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
%bincmp = icmp eq <8 x i32> %i, %j
%x = sext <8 x i1> %bincmp to <8 x i32>
ret <8 x i32> %x
}
define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
%x = add <2 x i64> %i, %j
ret <2 x i64> %x
}
define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
%x = add <4 x i32> %i, %j
ret <4 x i32> %x
}
define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
%x = add <8 x i16> %i, %j
ret <8 x i16> %x
}
define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
%x = add <16 x i8> %i, %j
ret <16 x i8> %x
}
define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
%x = sub <2 x i64> %i, %j
ret <2 x i64> %x
}
define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
%x = sub <4 x i32> %i, %j
ret <4 x i32> %x
}
define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
%x = sub <8 x i16> %i, %j
ret <8 x i16> %x
}
define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
%x = sub <16 x i8> %i, %j
ret <16 x i8> %x
}
define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
%x = mul <8 x i16> %i, %j
ret <8 x i16> %x
}
define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
%bincmp = icmp slt <8 x i16> %i, %j
%x = sext <8 x i1> %bincmp to <8 x i16>
ret <8 x i16> %x
}
define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
%bincmp = icmp slt <16 x i8> %i, %j
%x = sext <16 x i1> %bincmp to <16 x i8>
ret <16 x i8> %x
}
define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
%bincmp = icmp eq <8 x i16> %i, %j
%x = sext <8 x i1> %bincmp to <8 x i16>
ret <8 x i16> %x
}
define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
%bincmp = icmp eq <16 x i8> %i, %j
%x = sext <16 x i1> %bincmp to <16 x i8>
ret <16 x i8> %x
}