mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-07 14:33:15 +00:00
More AVX2 instructions and intrinsics.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143536 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
94dffd2bb0
commit
205e3378fd
@ -1398,6 +1398,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pmul_dq : GCCBuiltin<"__builtin_ia32_pmuldq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
|
||||
@ -1407,21 +1410,49 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
|
||||
}
|
||||
|
||||
// Vector min, max
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pmaxu_w : GCCBuiltin<"__builtin_ia32_pmaxuw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pmaxs_b : GCCBuiltin<"__builtin_ia32_pmaxsb256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pminu_w : GCCBuiltin<"__builtin_ia32_pminuw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pminu_d : GCCBuiltin<"__builtin_ia32_pminud256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pmins_b : GCCBuiltin<"__builtin_ia32_pminsb256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty], [IntrNoMem, Commutative]>;
|
||||
}
|
||||
|
||||
// Integer shift ops.
|
||||
@ -1501,15 +1532,21 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pcmpeq_q : GCCBuiltin<"__builtin_ia32_pcmpeqq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pcmpgt_q : GCCBuiltin<"__builtin_ia32_pcmpgtq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Pack ops.
|
||||
@ -1523,6 +1560,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_packuswb : GCCBuiltin<"__builtin_ia32_packuswb256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_packusdw : GCCBuiltin<"__builtin_ia32_packusdw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Absolute value ops
|
||||
@ -1620,6 +1660,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector blend
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector load with broadcast
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_vbroadcasti128 :
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastsi256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
||||
// Misc.
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
|
||||
@ -1627,6 +1684,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
|
||||
llvm_i32_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -6248,6 +6248,22 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
|
||||
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
|
||||
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
|
||||
multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId256> {
|
||||
let isCommutable = 1 in
|
||||
def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, OpSize;
|
||||
def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, i256mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst,
|
||||
(IntId256 VR256:$src1,
|
||||
(bitconvert (memopv32i8 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
let isCommutable = 0 in
|
||||
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
|
||||
@ -6279,6 +6295,32 @@ let Predicates = [HasAVX] in {
|
||||
(VPCMPEQQrm VR128:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
let isCommutable = 0 in
|
||||
defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
|
||||
int_x86_avx2_packusdw>, VEX_4V;
|
||||
defm VPCMPEQQ : SS41I_binop_rm_int_y<0x29, "vpcmpeqq",
|
||||
int_x86_avx2_pcmpeq_q>, VEX_4V;
|
||||
defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb",
|
||||
int_x86_avx2_pmins_b>, VEX_4V;
|
||||
defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd",
|
||||
int_x86_avx2_pmins_d>, VEX_4V;
|
||||
defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud",
|
||||
int_x86_avx2_pminu_d>, VEX_4V;
|
||||
defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw",
|
||||
int_x86_avx2_pminu_w>, VEX_4V;
|
||||
defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
|
||||
int_x86_avx2_pmaxs_b>, VEX_4V;
|
||||
defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
|
||||
int_x86_avx2_pmaxs_d>, VEX_4V;
|
||||
defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
|
||||
int_x86_avx2_pmaxu_d>, VEX_4V;
|
||||
defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
|
||||
int_x86_avx2_pmaxu_w>, VEX_4V;
|
||||
defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
|
||||
int_x86_avx2_pmul_dq>, VEX_4V;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let isCommutable = 0 in
|
||||
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
|
||||
@ -6301,7 +6343,7 @@ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
|
||||
|
||||
/// SS48I_binop_rm - Simple SSE41 binary operator.
|
||||
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType OpVT, bit Is2Addr = 1> {
|
||||
ValueType OpVT, bit Is2Addr = 1> {
|
||||
let isCommutable = 1 in
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
@ -6320,8 +6362,27 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
/// SS48I_binop_rm - Simple SSE41 binary operator.
|
||||
multiclass SS48I_binop_rm_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType OpVT> {
|
||||
let isCommutable = 1 in
|
||||
def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst, (OpVT (OpNode VR256:$src1, VR256:$src2)))]>,
|
||||
OpSize;
|
||||
def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, i256mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst, (OpNode VR256:$src1,
|
||||
(bc_v8i32 (memopv4i64 addr:$src2))))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in
|
||||
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
|
||||
let Predicates = [HasAVX2] in
|
||||
defm VPMULLD : SS48I_binop_rm_y<0x40, "vpmulld", mul, v8i32>, VEX_4V;
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
|
||||
|
||||
@ -6375,6 +6436,15 @@ let Predicates = [HasAVX] in {
|
||||
VR256, memopv32i8, i256mem, 0>, VEX_4V;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
let isCommutable = 0 in {
|
||||
defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
|
||||
VR256, memopv32i8, i256mem, 0>, VEX_4V;
|
||||
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
|
||||
VR256, memopv32i8, i256mem, 0>, VEX_4V;
|
||||
}
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let isCommutable = 0 in {
|
||||
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
|
||||
@ -6393,7 +6463,6 @@ let Constraints = "$src1 = $dst" in {
|
||||
}
|
||||
|
||||
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
|
||||
let Predicates = [HasAVX] in {
|
||||
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass RC, X86MemOperand x86memop,
|
||||
PatFrag mem_frag, Intrinsic IntId> {
|
||||
@ -6413,8 +6482,8 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
|
||||
RC:$src3))],
|
||||
SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
|
||||
memopv16i8, int_x86_sse41_blendvpd>;
|
||||
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
|
||||
@ -6425,6 +6494,12 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
|
||||
memopv32i8, int_x86_avx_blendv_pd_256>;
|
||||
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
|
||||
memopv32i8, int_x86_avx_blendv_ps_256>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
|
||||
memopv32i8, int_x86_avx2_pblendvb>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
|
||||
@ -6503,6 +6578,11 @@ def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"vmovntdqa\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
|
||||
OpSize, VEX;
|
||||
let Predicates = [HasAVX2] in
|
||||
def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
|
||||
"vmovntdqa\t{$src, $dst|$dst, $src}",
|
||||
[(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>,
|
||||
OpSize, VEX;
|
||||
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"movntdqa\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
|
||||
@ -6532,6 +6612,22 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
|
||||
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
|
||||
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
|
||||
multiclass SS42I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId256> {
|
||||
def Yrr : SS428I<opc, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
|
||||
OpSize;
|
||||
def Yrm : SS428I<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, i256mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst,
|
||||
(IntId256 VR256:$src1,
|
||||
(bitconvert (memopv32i8 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
|
||||
0>, VEX_4V;
|
||||
@ -6542,6 +6638,11 @@ let Predicates = [HasAVX] in {
|
||||
(VPCMPGTQrm VR128:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
defm VPCMPGTQ : SS42I_binop_rm_int_y<0x37, "vpcmpgtq", int_x86_avx2_pcmpgt_q>,
|
||||
VEX_4V;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
|
||||
|
||||
@ -6991,6 +7092,10 @@ def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
|
||||
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
|
||||
int_x86_avx_vbroadcastf128_pd_256>;
|
||||
|
||||
let Predicates = [HasAVX2] in
|
||||
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
|
||||
int_x86_avx2_vbroadcasti128>;
|
||||
|
||||
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
|
||||
(VBROADCASTF128 addr:$src)>;
|
||||
|
||||
|
@ -512,6 +512,119 @@ define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) {
|
||||
; CHECK: movl
|
||||
; CHECK: vmovntdqa
|
||||
%res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
; CHECK: vmpsadbw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK: vpackusdw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
|
||||
; CHECK: vpblendvb
|
||||
%res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1]
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; CHECK: vpblendw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK: vpcmpeqq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.pcmpeq.q(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pcmpeq.q(<4 x i64>, <4 x i64>) nounwind readnone
|
||||
|
||||
|
||||
define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
; CHECK: vpmaxsb
|
||||
%res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK: vpmaxsd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pmaxud(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK: vpmaxud
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; CHECK: vpmaxuw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <32 x i8> @test_x86_avx2_pminsb(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
; CHECK: vpminsb
|
||||
%res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pminsd(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK: vpminsd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pminud(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK: vpminud
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; CHECK: vpminuw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
|
||||
; CHECK: vpmovsxbd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
|
||||
@ -606,3 +719,27 @@ define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK: vpmuldq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<2 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1) {
|
||||
; CHECK: vpcmpgtq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.pcmpgt.q(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pcmpgt.q(<4 x i64>, <4 x i64>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) {
|
||||
; CHECK: vbroadcasti128
|
||||
%res = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %a0) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly
|
||||
|
Loading…
x
Reference in New Issue
Block a user