mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-08 21:32:39 +00:00
Make X86ISD::ANDNP more general and Codegen 256-bit VANDNP. A more
general version of X86ISD::ANDNP also opened the room for a little bit of refactoring. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135088 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c1af4772f1
commit
466b022c99
@ -11821,10 +11821,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
if (R.getNode())
|
if (R.getNode())
|
||||||
return R;
|
return R;
|
||||||
|
|
||||||
// Want to form ANDNP nodes, in the hopes of then easily combining them with
|
// Want to form ANDNP nodes:
|
||||||
// OR and AND nodes to form PBLEND/PSIGN.
|
// 1) In the hopes of then easily combining them with OR and AND nodes
|
||||||
|
// to form PBLEND/PSIGN.
|
||||||
|
// 2) To match ANDN packed intrinsics
|
||||||
EVT VT = N->getValueType(0);
|
EVT VT = N->getValueType(0);
|
||||||
if (VT != MVT::v2i64)
|
if (VT != MVT::v2i64 && VT != MVT::v4i64)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
SDValue N0 = N->getOperand(0);
|
SDValue N0 = N->getOperand(0);
|
||||||
|
@ -47,7 +47,7 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB",
|
|||||||
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||||
SDTCisSameAs<0,2>]>>;
|
SDTCisSameAs<0,2>]>>;
|
||||||
def X86andnp : SDNode<"X86ISD::ANDNP",
|
def X86andnp : SDNode<"X86ISD::ANDNP",
|
||||||
SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||||
SDTCisSameAs<0,2>]>>;
|
SDTCisSameAs<0,2>]>>;
|
||||||
def X86psignb : SDNode<"X86ISD::PSIGNB",
|
def X86psignb : SDNode<"X86ISD::PSIGNB",
|
||||||
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||||
|
@ -1473,98 +1473,68 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
|
|||||||
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
|
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
|
||||||
///
|
///
|
||||||
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
|
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
|
||||||
SDNode OpNode, int HasPat = 0,
|
SDNode OpNode> {
|
||||||
list<list<dag>> Pattern = []> {
|
|
||||||
let Pattern = []<dag> in {
|
let Pattern = []<dag> in {
|
||||||
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
|
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
|
||||||
!strconcat(OpcodeStr, "ps"), f128mem,
|
!strconcat(OpcodeStr, "ps"), f128mem,
|
||||||
!if(HasPat, Pattern[0], // rr
|
[(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
|
||||||
[(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
|
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
|
||||||
VR128:$src2)))]),
|
(memopv2i64 addr:$src2)))], 0>, VEX_4V;
|
||||||
!if(HasPat, Pattern[2], // rm
|
|
||||||
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
|
|
||||||
(memopv2i64 addr:$src2)))]), 0>,
|
|
||||||
VEX_4V;
|
|
||||||
|
|
||||||
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
|
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
|
||||||
!strconcat(OpcodeStr, "pd"), f128mem,
|
!strconcat(OpcodeStr, "pd"), f128mem,
|
||||||
!if(HasPat, Pattern[1], // rr
|
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
|
||||||
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
|
(bc_v2i64 (v2f64 VR128:$src2))))],
|
||||||
(bc_v2i64 (v2f64
|
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
|
||||||
VR128:$src2))))]),
|
(memopv2i64 addr:$src2)))], 0>,
|
||||||
!if(HasPat, Pattern[3], // rm
|
OpSize, VEX_4V;
|
||||||
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
|
|
||||||
(memopv2i64 addr:$src2)))]), 0>,
|
|
||||||
OpSize, VEX_4V;
|
|
||||||
}
|
}
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
|
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
|
||||||
!strconcat(OpcodeStr, "ps"), f128mem,
|
!strconcat(OpcodeStr, "ps"), f128mem,
|
||||||
!if(HasPat, Pattern[0], // rr
|
[(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
|
||||||
[(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
|
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
|
||||||
VR128:$src2)))]),
|
(memopv2i64 addr:$src2)))]>, TB;
|
||||||
!if(HasPat, Pattern[2], // rm
|
|
||||||
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
|
|
||||||
(memopv2i64 addr:$src2)))])>, TB;
|
|
||||||
|
|
||||||
defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
|
defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
|
||||||
!strconcat(OpcodeStr, "pd"), f128mem,
|
!strconcat(OpcodeStr, "pd"), f128mem,
|
||||||
!if(HasPat, Pattern[1], // rr
|
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
|
||||||
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
|
(bc_v2i64 (v2f64 VR128:$src2))))],
|
||||||
(bc_v2i64 (v2f64
|
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
|
||||||
VR128:$src2))))]),
|
(memopv2i64 addr:$src2)))]>, TB, OpSize;
|
||||||
!if(HasPat, Pattern[3], // rm
|
|
||||||
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
|
|
||||||
(memopv2i64 addr:$src2)))])>,
|
|
||||||
TB, OpSize;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
|
/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
|
||||||
///
|
///
|
||||||
multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
|
multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
|
||||||
SDNode OpNode, int HasNoPat = 0> {
|
SDNode OpNode> {
|
||||||
defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
|
defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
|
||||||
!strconcat(OpcodeStr, "ps"), f256mem,
|
!strconcat(OpcodeStr, "ps"), f256mem,
|
||||||
!if(HasNoPat, []<dag>, // rr
|
[(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
|
||||||
[(set VR256:$dst, (v4i64 (OpNode VR256:$src1,
|
[(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
|
||||||
VR256:$src2)))]),
|
(memopv4i64 addr:$src2)))], 0>, VEX_4V;
|
||||||
!if(HasNoPat, []<dag>, // rm
|
|
||||||
[(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
|
|
||||||
(memopv4i64 addr:$src2)))]), 0>, VEX_4V;
|
|
||||||
|
|
||||||
defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
|
defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
|
||||||
!strconcat(OpcodeStr, "pd"), f256mem,
|
!strconcat(OpcodeStr, "pd"), f256mem,
|
||||||
!if(HasNoPat, []<dag>, // rr
|
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
|
||||||
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
|
(bc_v4i64 (v4f64 VR256:$src2))))],
|
||||||
(bc_v4i64 (v4f64 VR256:$src2))))]),
|
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
|
||||||
!if(HasNoPat, []<dag>, // rm
|
(memopv4i64 addr:$src2)))], 0>,
|
||||||
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
|
OpSize, VEX_4V;
|
||||||
(memopv4i64 addr:$src2)))]), 0>,
|
|
||||||
OpSize, VEX_4V;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// AVX 256-bit packed logical ops forms
|
// AVX 256-bit packed logical ops forms
|
||||||
defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
|
defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
|
||||||
defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
|
defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
|
||||||
defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
|
defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
|
||||||
let isCommutable = 0 in {
|
defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
|
||||||
defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", undef /* dummy */, 1>;
|
|
||||||
}
|
|
||||||
|
|
||||||
defm AND : sse12_fp_packed_logical<0x54, "and", and>;
|
defm AND : sse12_fp_packed_logical<0x54, "and", and>;
|
||||||
defm OR : sse12_fp_packed_logical<0x56, "or", or>;
|
defm OR : sse12_fp_packed_logical<0x56, "or", or>;
|
||||||
defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
|
defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
|
||||||
let isCommutable = 0 in
|
let isCommutable = 0 in
|
||||||
defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
|
defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
|
||||||
// single r+r
|
|
||||||
[(set VR128:$dst, (X86andnp VR128:$src1, VR128:$src2))],
|
|
||||||
// double r+r
|
|
||||||
[],
|
|
||||||
// single r+m
|
|
||||||
[(set VR128:$dst, (X86andnp VR128:$src1, (memopv2i64 addr:$src2)))],
|
|
||||||
// double r+m
|
|
||||||
[]]>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE 1 & 2 - Arithmetic Instructions
|
// SSE 1 & 2 - Arithmetic Instructions
|
||||||
@ -3678,6 +3648,7 @@ let Predicates = [HasAVX] in {
|
|||||||
def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
|
def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
|
||||||
def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
|
def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
|
||||||
def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
|
def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
|
||||||
|
def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Move scalar to XMM zero-extended
|
// Move scalar to XMM zero-extended
|
||||||
|
@ -114,3 +114,48 @@ entry:
|
|||||||
ret <8 x float> %1
|
ret <8 x float> %1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: vandnpd
|
||||||
|
define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
|
||||||
|
entry:
|
||||||
|
%0 = bitcast <4 x double> %x to <4 x i64>
|
||||||
|
%neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
|
||||||
|
%1 = bitcast <4 x double> %y to <4 x i64>
|
||||||
|
%and.i = and <4 x i64> %1, %neg.i
|
||||||
|
%2 = bitcast <4 x i64> %and.i to <4 x double>
|
||||||
|
ret <4 x double> %2
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: vandnpd (%
|
||||||
|
define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
|
||||||
|
entry:
|
||||||
|
%tmp2 = load <4 x double>* %x, align 32
|
||||||
|
%0 = bitcast <4 x double> %y to <4 x i64>
|
||||||
|
%neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
|
||||||
|
%1 = bitcast <4 x double> %tmp2 to <4 x i64>
|
||||||
|
%and.i = and <4 x i64> %1, %neg.i
|
||||||
|
%2 = bitcast <4 x i64> %and.i to <4 x double>
|
||||||
|
ret <4 x double> %2
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: vandnps
|
||||||
|
define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
|
||||||
|
entry:
|
||||||
|
%0 = bitcast <8 x float> %x to <8 x i32>
|
||||||
|
%neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
|
||||||
|
%1 = bitcast <8 x float> %y to <8 x i32>
|
||||||
|
%and.i = and <8 x i32> %1, %neg.i
|
||||||
|
%2 = bitcast <8 x i32> %and.i to <8 x float>
|
||||||
|
ret <8 x float> %2
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: vandnps (%
|
||||||
|
define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
|
||||||
|
entry:
|
||||||
|
%tmp2 = load <8 x float>* %x, align 32
|
||||||
|
%0 = bitcast <8 x float> %y to <8 x i32>
|
||||||
|
%neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
|
||||||
|
%1 = bitcast <8 x float> %tmp2 to <8 x i32>
|
||||||
|
%and.i = and <8 x i32> %1, %neg.i
|
||||||
|
%2 = bitcast <8 x i32> %and.i to <8 x float>
|
||||||
|
ret <8 x float> %2
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user