mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
Implement vector-select support for avx256. Refactor the vblend implementation to have tablegen match the instruction by the node type
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139400 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
468709e43d
commit
8ffad56f8e
@ -1019,6 +1019,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
|
||||
|
||||
setOperationAction(ISD::VSELECT, MVT::v4f64, Custom);
|
||||
setOperationAction(ISD::VSELECT, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::VSELECT, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::VSELECT, MVT::v8f32, Custom);
|
||||
|
||||
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::ADD, MVT::v16i16, Custom);
|
||||
@ -8706,14 +8711,21 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT VT = Op1.getValueType();
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default: break;
|
||||
// SSE4:
|
||||
case MVT::v2i64:
|
||||
case MVT::v2f64:
|
||||
return DAG.getNode(X86ISD::BLENDVPD, DL, VT, Ops, array_lengthof(Ops));
|
||||
case MVT::v4i32:
|
||||
case MVT::v4f32:
|
||||
return DAG.getNode(X86ISD::BLENDVPS, DL, VT , Ops, array_lengthof(Ops));
|
||||
case MVT::v16i8:
|
||||
return DAG.getNode(X86ISD::PBLENDVB, DL, VT , Ops, array_lengthof(Ops));
|
||||
case MVT::v8i16:
|
||||
// AVX:
|
||||
case MVT::v4i64:
|
||||
case MVT::v4f64:
|
||||
case MVT::v8i32:
|
||||
case MVT::v8f32:
|
||||
case MVT::v32i8:
|
||||
case MVT::v16i16:
|
||||
return DAG.getNode(X86ISD::BLENDV, DL, VT, Ops, array_lengthof(Ops));
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
@ -9973,7 +9985,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
|
||||
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
|
||||
DAG.getConstant(4, MVT::i32));
|
||||
R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
|
||||
R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, M, Op);
|
||||
// a += a
|
||||
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
|
||||
|
||||
@ -9988,12 +10000,12 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
|
||||
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
|
||||
DAG.getConstant(2, MVT::i32));
|
||||
R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
|
||||
R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, M, Op);
|
||||
// a += a
|
||||
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
|
||||
|
||||
// return pblendv(r, r+r, a);
|
||||
R = DAG.getNode(X86ISD::PBLENDVB, dl, VT,
|
||||
R = DAG.getNode(X86ISD::BLENDV, dl, VT,
|
||||
R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
|
||||
return R;
|
||||
}
|
||||
@ -10631,7 +10643,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
|
||||
case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
|
||||
case X86ISD::PSIGND: return "X86ISD::PSIGND";
|
||||
case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB";
|
||||
case X86ISD::BLENDV: return "X86ISD::BLENDV";
|
||||
case X86ISD::FMAX: return "X86ISD::FMAX";
|
||||
case X86ISD::FMIN: return "X86ISD::FMIN";
|
||||
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
|
||||
@ -13361,7 +13373,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
||||
X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
|
||||
Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
|
||||
Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
|
||||
Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
|
||||
Mask = DAG.getNode(X86ISD::BLENDV, DL, MVT::v16i8, X, Y, Mask);
|
||||
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
|
||||
}
|
||||
}
|
||||
|
@ -175,10 +175,8 @@ namespace llvm {
|
||||
/// PSIGNB/W/D - Copy integer sign.
|
||||
PSIGNB, PSIGNW, PSIGND,
|
||||
|
||||
/// BLENDVXX family of opcodes
|
||||
PBLENDVB,
|
||||
BLENDVPD,
|
||||
BLENDVPS,
|
||||
/// BLEND family of opcodes
|
||||
BLENDV,
|
||||
|
||||
/// FMAX, FMIN - Floating point max and min.
|
||||
///
|
||||
|
@ -58,14 +58,8 @@ def X86psignw : SDNode<"X86ISD::PSIGNW",
|
||||
def X86psignd : SDNode<"X86ISD::PSIGND",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
def X86pblendvb : SDNode<"X86ISD::PBLENDVB",
|
||||
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
|
||||
def X86blendvpd : SDNode<"X86ISD::BLENDVPD",
|
||||
SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
|
||||
def X86blendvps : SDNode<"X86ISD::BLENDVPS",
|
||||
SDTypeProfile<1, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
|
||||
def X86blendv : SDNode<"X86ISD::BLENDV",
|
||||
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
|
||||
def X86pextrb : SDNode<"X86ISD::PEXTRB",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
|
||||
|
@ -5868,12 +5868,37 @@ defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
|
||||
memopv32i8, int_x86_avx_blendv_ps_256>;
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$mask),
|
||||
(VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>;
|
||||
def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, VR128:$mask),
|
||||
(VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>;
|
||||
def : Pat<(X86blendvps VR128:$src1, VR128:$src2, VR128:$mask),
|
||||
(VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>;
|
||||
def : Pat<(v16i8 (X86blendv (v16i8 VR128:$src1), (v16i8 VR128:$src2),
|
||||
VR128:$mask)),
|
||||
(VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>;
|
||||
def : Pat<(v4i32 (X86blendv (v4i32 VR128:$src1), (v4i32 VR128:$src2),
|
||||
VR128:$mask)),
|
||||
(VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>;
|
||||
def : Pat<(v4f32 (X86blendv (v4f32 VR128:$src1), (v4f32 VR128:$src2),
|
||||
VR128:$mask)),
|
||||
(VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>;
|
||||
def : Pat<(v2i64 (X86blendv (v2i64 VR128:$src1), (v2i64 VR128:$src2),
|
||||
VR128:$mask)),
|
||||
(VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>;
|
||||
def : Pat<(v2f64 (X86blendv (v2f64 VR128:$src1), (v2f64 VR128:$src2),
|
||||
VR128:$mask)),
|
||||
(VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>;
|
||||
|
||||
|
||||
def : Pat<(v8i32 (X86blendv (v8i32 VR256:$src1), (v8i32 VR256:$src2),
|
||||
VR256:$mask)),
|
||||
(VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
|
||||
def : Pat<(v8f32 (X86blendv (v8f32 VR256:$src1), (v8f32 VR256:$src2),
|
||||
VR256:$mask)),
|
||||
(VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
|
||||
|
||||
|
||||
def : Pat<(v4i64 (X86blendv (v4i64 VR256:$src1), (v4i64 VR256:$src2),
|
||||
VR256:$mask)),
|
||||
(VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
|
||||
def : Pat<(v4f64 (X86blendv (v4f64 VR256:$src1), (v4f64 VR256:$src2),
|
||||
VR256:$mask)),
|
||||
(VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
|
||||
}
|
||||
|
||||
/// SS41I_ternary_int - SSE 4.1 ternary operator
|
||||
@ -5901,12 +5926,16 @@ defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
|
||||
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
|
||||
|
||||
let Predicates = [HasSSE41] in {
|
||||
def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),
|
||||
def : Pat<(v16i8 (X86blendv (v16i8 VR128:$src1), (v16i8 VR128:$src2), XMM0)),
|
||||
(PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, XMM0),
|
||||
(BLENDVPDrr0 VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(X86blendvps VR128:$src1, VR128:$src2, XMM0),
|
||||
def : Pat<(v4i32 (X86blendv (v4i32 VR128:$src1), (v4i32 VR128:$src2), XMM0)),
|
||||
(BLENDVPSrr0 VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4f32 (X86blendv (v4f32 VR128:$src1), (v4f32 VR128:$src2), XMM0)),
|
||||
(BLENDVPSrr0 VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86blendv (v2i64 VR128:$src1), (v2i64 VR128:$src2), XMM0)),
|
||||
(BLENDVPDrr0 VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2f64 (X86blendv (v2f64 VR128:$src1), (v2f64 VR128:$src2), XMM0)),
|
||||
(BLENDVPDrr0 VR128:$src1, VR128:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in
|
||||
|
@ -1,5 +1,7 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -promote-elements -mattr=+avx | FileCheck %s
|
||||
|
||||
; AVX128 tests:
|
||||
|
||||
;CHECK: vsel_float
|
||||
;CHECK: vblendvps
|
||||
;CHECK: ret
|
||||
@ -45,3 +47,39 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
|
||||
}
|
||||
|
||||
|
||||
; AVX256 tests:
|
||||
|
||||
|
||||
;CHECK: vsel_float
|
||||
;CHECK: vblendvps
|
||||
;CHECK: ret
|
||||
define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
|
||||
ret <8 x float> %vsel
|
||||
}
|
||||
|
||||
;CHECK: vsel_i32
|
||||
;CHECK: vblendvps
|
||||
;CHECK: ret
|
||||
define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
|
||||
ret <8 x i32> %vsel
|
||||
}
|
||||
|
||||
;CHECK: vsel_double
|
||||
;CHECK: vblendvpd
|
||||
;CHECK: ret
|
||||
define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
|
||||
ret <8 x double> %vsel
|
||||
}
|
||||
|
||||
;CHECK: vsel_i64
|
||||
;CHECK: vblendvpd
|
||||
;CHECK: ret
|
||||
define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
|
||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
|
||||
ret <8 x i64> %vsel
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user