diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 171349c0663..0d658492e5d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1019,6 +1019,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8f32, Custom); + setOperationAction(ISD::VSELECT, MVT::v4f64, Custom); + setOperationAction(ISD::VSELECT, MVT::v4i64, Custom); + setOperationAction(ISD::VSELECT, MVT::v8i32, Custom); + setOperationAction(ISD::VSELECT, MVT::v8f32, Custom); + setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); setOperationAction(ISD::ADD, MVT::v16i16, Custom); @@ -8706,14 +8711,21 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op1.getValueType(); switch (VT.getSimpleVT().SimpleTy) { default: break; + // SSE4: case MVT::v2i64: case MVT::v2f64: - return DAG.getNode(X86ISD::BLENDVPD, DL, VT, Ops, array_lengthof(Ops)); case MVT::v4i32: case MVT::v4f32: - return DAG.getNode(X86ISD::BLENDVPS, DL, VT , Ops, array_lengthof(Ops)); case MVT::v16i8: - return DAG.getNode(X86ISD::PBLENDVB, DL, VT , Ops, array_lengthof(Ops)); + case MVT::v8i16: + // AVX: + case MVT::v4i64: + case MVT::v4f64: + case MVT::v8i32: + case MVT::v8f32: + case MVT::v32i8: + case MVT::v16i16: + return DAG.getNode(X86ISD::BLENDV, DL, VT, Ops, array_lengthof(Ops)); } return SDValue(); @@ -9973,7 +9985,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(4, MVT::i32)); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); + R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, M, Op); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); @@ -9988,12 +10000,12 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(2, MVT::i32)); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); + R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, M, Op); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); // return pblendv(r, r+r, a); - R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, + R = DAG.getNode(X86ISD::BLENDV, dl, VT, R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op); return R; } @@ -10631,7 +10643,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; case X86ISD::PSIGND: return "X86ISD::PSIGND"; - case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB"; + case X86ISD::BLENDV: return "X86ISD::BLENDV"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; @@ -13361,7 +13373,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X); Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y); Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask); - Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask); + Mask = DAG.getNode(X86ISD::BLENDV, DL, MVT::v16i8, X, Y, Mask); return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 3051e16485d..bd04de150d9 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -175,10 +175,8 @@ namespace llvm { /// PSIGNB/W/D - Copy integer sign. PSIGNB, PSIGNW, PSIGND, - /// BLENDVXX family of opcodes - PBLENDVB, - BLENDVPD, - BLENDVPS, + /// BLEND family of opcodes + BLENDV, /// FMAX, FMIN - Floating point max and min. /// diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index c2db9177cde..f25435f85eb 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -58,14 +58,8 @@ def X86psignw : SDNode<"X86ISD::PSIGNW", def X86psignd : SDNode<"X86ISD::PSIGND", SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86pblendvb : SDNode<"X86ISD::PBLENDVB", - SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; -def X86blendvpd : SDNode<"X86ISD::BLENDVPD", - SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; -def X86blendvps : SDNode<"X86ISD::BLENDVPS", - SDTypeProfile<1, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, +def X86blendv : SDNode<"X86ISD::BLENDV", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5071a36192f..cf363354c78 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5868,12 +5868,37 @@ defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, memopv32i8, int_x86_avx_blendv_ps_256>; let Predicates = [HasAVX] in { - def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$mask), - (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>; - def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, VR128:$mask), - (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>; - def : Pat<(X86blendvps VR128:$src1, VR128:$src2, VR128:$mask), - (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v16i8 (X86blendv (v16i8 VR128:$src1), (v16i8 VR128:$src2), + VR128:$mask)), + (VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v4i32 (X86blendv (v4i32 VR128:$src1), (v4i32 VR128:$src2), + VR128:$mask)), + (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v4f32 (X86blendv (v4f32 VR128:$src1), (v4f32 VR128:$src2), + VR128:$mask)), + (VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v2i64 (X86blendv (v2i64 VR128:$src1), (v2i64 VR128:$src2), + VR128:$mask)), + (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>; +def : Pat<(v2f64 (X86blendv (v2f64 VR128:$src1), (v2f64 VR128:$src2), + VR128:$mask)), + (VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>; + + +def : Pat<(v8i32 (X86blendv (v8i32 VR256:$src1), (v8i32 VR256:$src2), + VR256:$mask)), + (VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>; +def : Pat<(v8f32 (X86blendv (v8f32 VR256:$src1), (v8f32 VR256:$src2), + VR256:$mask)), + (VBLENDVPSYrr VR256:$src1, VR256:$src2, VR256:$mask)>; + + +def : Pat<(v4i64 (X86blendv (v4i64 VR256:$src1), (v4i64 VR256:$src2), + VR256:$mask)), + (VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>; +def : Pat<(v4f64 (X86blendv (v4f64 VR256:$src1), (v4f64 VR256:$src2), + VR256:$mask)), + (VBLENDVPDYrr VR256:$src1, VR256:$src2, VR256:$mask)>; } /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -5901,12 +5926,16 @@ defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; let Predicates = [HasSSE41] in { - def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0), + def : Pat<(v16i8 (X86blendv (v16i8 VR128:$src1), (v16i8 VR128:$src2), XMM0)), (PBLENDVBrr0 VR128:$src1, VR128:$src2)>; - def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, XMM0), - (BLENDVPDrr0 VR128:$src1, VR128:$src2)>; - def : Pat<(X86blendvps VR128:$src1, VR128:$src2, XMM0), + def : Pat<(v4i32 (X86blendv (v4i32 VR128:$src1), (v4i32 VR128:$src2), XMM0)), (BLENDVPSrr0 VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86blendv (v4f32 VR128:$src1), (v4f32 VR128:$src2), XMM0)), + (BLENDVPSrr0 VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86blendv (v2i64 VR128:$src1), (v2i64 VR128:$src2), XMM0)), + (BLENDVPDrr0 VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86blendv (v2f64 VR128:$src1), (v2f64 VR128:$src2), XMM0)), + (BLENDVPDrr0 VR128:$src1, VR128:$src2)>; } let Predicates = [HasAVX] in diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll index dc0d013698b..e025e26bde9 100644 --- a/test/CodeGen/X86/avx-blend.ll +++ b/test/CodeGen/X86/avx-blend.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -promote-elements -mattr=+avx | FileCheck %s +; AVX128 tests: + ;CHECK: vsel_float ;CHECK: vblendvps ;CHECK: ret @@ -45,3 +47,39 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { } +; AVX256 tests: + + +;CHECK: vsel_float +;CHECK: vblendvps +;CHECK: ret +define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { + %vsel = select <8 x i1> , <8 x float> %v1, <8 x float> %v2 + ret <8 x float> %vsel +} + +;CHECK: vsel_i32 +;CHECK: vblendvps +;CHECK: ret +define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) { + %vsel = select <8 x i1> , <8 x i32> %v1, <8 x i32> %v2 + ret <8 x i32> %vsel +} + +;CHECK: vsel_double +;CHECK: vblendvpd +;CHECK: ret +define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { + %vsel = select <8 x i1> , <8 x double> %v1, <8 x double> %v2 + ret <8 x double> %vsel +} + +;CHECK: vsel_i64 +;CHECK: vblendvpd +;CHECK: ret +define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { + %vsel = select <8 x i1> , <8 x i64> %v1, <8 x i64> %v2 + ret <8 x i64> %vsel +} + +