mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-14 02:33:53 +00:00
X86: Now that we have a canonical form for vector integer abs, match it into pabs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180600 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6242fda42a
commit
753981784f
@ -5094,6 +5094,16 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
|
||||
Sched<[WriteVecALULd]>;
|
||||
}
|
||||
|
||||
// Helper fragments to match sext vXi1 to vXiY.
|
||||
def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
|
||||
VR128:$src))>;
|
||||
def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>;
|
||||
def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>;
|
||||
def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
|
||||
VR256:$src))>;
|
||||
def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>;
|
||||
def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>;
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb",
|
||||
int_x86_ssse3_pabs_b_128>, VEX;
|
||||
@ -5101,6 +5111,19 @@ let Predicates = [HasAVX] in {
|
||||
int_x86_ssse3_pabs_w_128>, VEX;
|
||||
defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd",
|
||||
int_x86_ssse3_pabs_d_128>, VEX;
|
||||
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v16i1sextv16i8)),
|
||||
(bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
|
||||
(VPABSBrr128 VR128:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v8i1sextv8i16)),
|
||||
(bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
|
||||
(VPABSWrr128 VR128:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v4i1sextv4i32)),
|
||||
(bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
|
||||
(VPABSDrr128 VR128:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
@ -5110,6 +5133,19 @@ let Predicates = [HasAVX2] in {
|
||||
int_x86_avx2_pabs_w>, VEX, VEX_L;
|
||||
defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd",
|
||||
int_x86_avx2_pabs_d>, VEX, VEX_L;
|
||||
|
||||
def : Pat<(xor
|
||||
(bc_v4i64 (v32i1sextv32i8)),
|
||||
(bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
|
||||
(VPABSBrr256 VR256:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v4i64 (v16i1sextv16i16)),
|
||||
(bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
|
||||
(VPABSWrr256 VR256:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v4i64 (v8i1sextv8i32)),
|
||||
(bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
|
||||
(VPABSDrr256 VR256:$src)>;
|
||||
}
|
||||
|
||||
defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
|
||||
@ -5119,6 +5155,21 @@ defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
|
||||
defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
|
||||
int_x86_ssse3_pabs_d_128>;
|
||||
|
||||
let Predicates = [HasSSSE3] in {
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v16i1sextv16i8)),
|
||||
(bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
|
||||
(PABSBrr128 VR128:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v8i1sextv8i16)),
|
||||
(bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
|
||||
(PABSWrr128 VR128:$src)>;
|
||||
def : Pat<(xor
|
||||
(bc_v2i64 (v4i1sextv4i32)),
|
||||
(bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
|
||||
(PABSDrr128 VR128:$src)>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SSSE3 - Packed Binary Operator Instructions
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
@ -1,4 +1,6 @@
|
||||
; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck %s -check-prefix=SSE2
|
||||
; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSSE3
|
||||
; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2
|
||||
|
||||
define <4 x i32> @test1(<4 x i32> %a) nounwind {
|
||||
; SSE2: test1:
|
||||
@ -7,6 +9,14 @@ define <4 x i32> @test1(<4 x i32> %a) nounwind {
|
||||
; SSE2-NEXT: padd
|
||||
; SSE2-NEXT: pxor
|
||||
; SSE2-NEXT: ret
|
||||
|
||||
; SSSE3: test1:
|
||||
; SSSE3: pabsd
|
||||
; SSSE3-NEXT: ret
|
||||
|
||||
; AVX2: test1:
|
||||
; AVX2: vpabsd
|
||||
; AVX2-NEXT: ret
|
||||
%tmp1neg = sub <4 x i32> zeroinitializer, %a
|
||||
%b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
|
||||
@ -20,36 +30,60 @@ define <4 x i32> @test2(<4 x i32> %a) nounwind {
|
||||
; SSE2-NEXT: padd
|
||||
; SSE2-NEXT: pxor
|
||||
; SSE2-NEXT: ret
|
||||
|
||||
; SSSE3: test2:
|
||||
; SSSE3: pabsd
|
||||
; SSSE3-NEXT: ret
|
||||
|
||||
; AVX2: test2:
|
||||
; AVX2: vpabsd
|
||||
; AVX2-NEXT: ret
|
||||
%tmp1neg = sub <4 x i32> zeroinitializer, %a
|
||||
%b = icmp sge <4 x i32> %a, zeroinitializer
|
||||
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
|
||||
ret <4 x i32> %abs
|
||||
}
|
||||
|
||||
define <4 x i32> @test3(<4 x i32> %a) nounwind {
|
||||
define <8 x i16> @test3(<8 x i16> %a) nounwind {
|
||||
; SSE2: test3:
|
||||
; SSE2: movdqa
|
||||
; SSE2-NEXT: psrad $31
|
||||
; SSE2-NEXT: psraw $15
|
||||
; SSE2-NEXT: padd
|
||||
; SSE2-NEXT: pxor
|
||||
; SSE2-NEXT: ret
|
||||
%tmp1neg = sub <4 x i32> zeroinitializer, %a
|
||||
%b = icmp sgt <4 x i32> %a, zeroinitializer
|
||||
%abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
|
||||
ret <4 x i32> %abs
|
||||
|
||||
; SSSE3: test3:
|
||||
; SSSE3: pabsw
|
||||
; SSSE3-NEXT: ret
|
||||
|
||||
; AVX2: test3:
|
||||
; AVX2: vpabsw
|
||||
; AVX2-NEXT: ret
|
||||
%tmp1neg = sub <8 x i16> zeroinitializer, %a
|
||||
%b = icmp sgt <8 x i16> %a, zeroinitializer
|
||||
%abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
|
||||
ret <8 x i16> %abs
|
||||
}
|
||||
|
||||
define <4 x i32> @test4(<4 x i32> %a) nounwind {
|
||||
define <16 x i8> @test4(<16 x i8> %a) nounwind {
|
||||
; SSE2: test4:
|
||||
; SSE2: movdqa
|
||||
; SSE2-NEXT: psrad $31
|
||||
; SSE2: pxor
|
||||
; SSE2-NEXT: pcmpgtb
|
||||
; SSE2-NEXT: padd
|
||||
; SSE2-NEXT: pxor
|
||||
; SSE2-NEXT: ret
|
||||
%tmp1neg = sub <4 x i32> zeroinitializer, %a
|
||||
%b = icmp slt <4 x i32> %a, zeroinitializer
|
||||
%abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
|
||||
ret <4 x i32> %abs
|
||||
|
||||
; SSSE3: test4:
|
||||
; SSSE3: pabsb
|
||||
; SSSE3-NEXT: ret
|
||||
|
||||
; AVX2: test4:
|
||||
; AVX2: vpabsb
|
||||
; AVX2-NEXT: ret
|
||||
%tmp1neg = sub <16 x i8> zeroinitializer, %a
|
||||
%b = icmp slt <16 x i8> %a, zeroinitializer
|
||||
%abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
|
||||
ret <16 x i8> %abs
|
||||
}
|
||||
|
||||
define <4 x i32> @test5(<4 x i32> %a) nounwind {
|
||||
@ -59,8 +93,91 @@ define <4 x i32> @test5(<4 x i32> %a) nounwind {
|
||||
; SSE2-NEXT: padd
|
||||
; SSE2-NEXT: pxor
|
||||
; SSE2-NEXT: ret
|
||||
|
||||
; SSSE3: test5:
|
||||
; SSSE3: pabsd
|
||||
; SSSE3-NEXT: ret
|
||||
|
||||
; AVX2: test5:
|
||||
; AVX2: vpabsd
|
||||
; AVX2-NEXT: ret
|
||||
%tmp1neg = sub <4 x i32> zeroinitializer, %a
|
||||
%b = icmp sle <4 x i32> %a, zeroinitializer
|
||||
%abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
|
||||
ret <4 x i32> %abs
|
||||
}
|
||||
|
||||
define <8 x i32> @test6(<8 x i32> %a) nounwind {
|
||||
; SSSE3: test6:
|
||||
; SSSE3: pabsd
|
||||
; SSSE3: pabsd
|
||||
; SSSE3-NEXT: ret
|
||||
|
||||
; AVX2: test6:
|
||||
; AVX2: vpabsd %ymm
|
||||
; AVX2-NEXT: ret
|
||||
%tmp1neg = sub <8 x i32> zeroinitializer, %a
|
||||
%b = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg
|
||||
ret <8 x i32> %abs
|
||||
}
|
||||
|
||||
define <8 x i32> @test7(<8 x i32> %a) nounwind {
|
||||
; SSSE3: test7:
|
||||
; SSSE3: pabsd
|
||||
; SSSE3: pabsd
|
||||
; SSSE3-NEXT: ret
|
||||
|
||||
; AVX2: test7:
|
||||
; AVX2: vpabsd %ymm
|
||||
; AVX2-NEXT: ret
|
||||
%tmp1neg = sub <8 x i32> zeroinitializer, %a
|
||||
%b = icmp sge <8 x i32> %a, zeroinitializer
|
||||
%abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg
|
||||
ret <8 x i32> %abs
|
||||
}
|
||||
|
||||
define <16 x i16> @test8(<16 x i16> %a) nounwind {
|
||||
; SSSE3: test8:
|
||||
; SSSE3: pabsw
|
||||
; SSSE3: pabsw
|
||||
; SSSE3-NEXT: ret
|
||||
|
||||
; AVX2: test8:
|
||||
; AVX2: vpabsw %ymm
|
||||
; AVX2-NEXT: ret
|
||||
%tmp1neg = sub <16 x i16> zeroinitializer, %a
|
||||
%b = icmp sgt <16 x i16> %a, zeroinitializer
|
||||
%abs = select <16 x i1> %b, <16 x i16> %a, <16 x i16> %tmp1neg
|
||||
ret <16 x i16> %abs
|
||||
}
|
||||
|
||||
define <32 x i8> @test9(<32 x i8> %a) nounwind {
|
||||
; SSSE3: test9:
|
||||
; SSSE3: pabsb
|
||||
; SSSE3: pabsb
|
||||
; SSSE3-NEXT: ret
|
||||
|
||||
; AVX2: test9:
|
||||
; AVX2: vpabsb %ymm
|
||||
; AVX2-NEXT: ret
|
||||
%tmp1neg = sub <32 x i8> zeroinitializer, %a
|
||||
%b = icmp slt <32 x i8> %a, zeroinitializer
|
||||
%abs = select <32 x i1> %b, <32 x i8> %tmp1neg, <32 x i8> %a
|
||||
ret <32 x i8> %abs
|
||||
}
|
||||
|
||||
define <8 x i32> @test10(<8 x i32> %a) nounwind {
|
||||
; SSSE3: test10:
|
||||
; SSSE3: pabsd
|
||||
; SSSE3: pabsd
|
||||
; SSSE3-NEXT: ret
|
||||
|
||||
; AVX2: test10:
|
||||
; AVX2: vpabsd %ymm
|
||||
; AVX2-NEXT: ret
|
||||
%tmp1neg = sub <8 x i32> zeroinitializer, %a
|
||||
%b = icmp sle <8 x i32> %a, zeroinitializer
|
||||
%abs = select <8 x i1> %b, <8 x i32> %tmp1neg, <8 x i32> %a
|
||||
ret <8 x i32> %abs
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user