diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e9178f217a3..0a9f4b4ed71 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1398,8 +1398,8 @@ bool X86::isPSHUFDMask(SDNode *N) { bool X86::isSHUFPMask(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR); - unsigned NumOperands = N->getNumOperands(); - if (NumOperands == 2) { + unsigned NumElems = N->getNumOperands(); + if (NumElems == 2) { // The only case that ought be handled by SHUFPD is // Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 } // Expect bit 0 == 1, bit1 == 2 @@ -1411,21 +1411,21 @@ bool X86::isSHUFPMask(SDNode *N) { cast(Bit1)->getValue() == 2); } - if (NumOperands != 4) return false; + if (NumElems != 4) return false; // Each half must refer to only one of the vector. SDOperand Elt = N->getOperand(0); assert(isa(Elt) && "Invalid VECTOR_SHUFFLE mask!"); - for (unsigned i = 1; i != NumOperands / 2; ++i) { + for (unsigned i = 1; i != NumElems / 2; ++i) { assert(isa(N->getOperand(i)) && "Invalid VECTOR_SHUFFLE mask!"); if (cast(N->getOperand(i))->getValue() != cast(Elt)->getValue()) return false; } - Elt = N->getOperand(NumOperands / 2); + Elt = N->getOperand(NumElems / 2); assert(isa(Elt) && "Invalid VECTOR_SHUFFLE mask!"); - for (unsigned i = NumOperands / 2; i != NumOperands; ++i) { + for (unsigned i = NumElems / 2; i != NumElems; ++i) { assert(isa(N->getOperand(i)) && "Invalid VECTOR_SHUFFLE mask!"); if (cast(N->getOperand(i))->getValue() != @@ -1530,20 +1530,23 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) { return Mask; } -/// isZeroVector - Return true if all elements of BUILD_VECTOR are 0 or +0.0. +/// isZeroVector - Return true if this build_vector is an all-zero vector. +/// bool X86::isZeroVector(SDNode *N) { - for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); - I != E; ++I) { - if (ConstantFPSDNode *FPC = dyn_cast(*I)) { - if (!FPC->isExactlyValue(+0.0)) + if (MVT::isInteger(N->getOperand(0).getValueType())) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (!isa(N->getOperand(i)) || + cast(N->getOperand(i))->getValue() != 0) return false; - } else if (ConstantSDNode *C = dyn_cast(*I)) { - if (!C->isNullValue()) + } else { + assert(MVT::isFloatingPoint(N->getOperand(0).getValueType()) && + "Vector of non-int, non-float values?"); + // See if this is all zeros. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (!isa(N->getOperand(i)) || + !cast(N->getOperand(i))->isExactlyValue(0.0)) return false; - } else - return false; } - return true; } @@ -2318,7 +2321,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { } case ISD::SCALAR_TO_VECTOR: { SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); - return DAG.getNode(X86ISD::SCALAR_TO_VECTOR, Op.getValueType(), AnyExt); + return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt); } case ISD::VECTOR_SHUFFLE: { SDOperand V1 = Op.getOperand(0); @@ -2338,6 +2341,9 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, DAG.getNode(ISD::UNDEF, V1.getValueType()), PermMask); + } else if (NumElems == 2) { + // All v2f64 cases are handled. + return SDOperand(); } else if (X86::isPSHUFDMask(PermMask.Val)) { if (V2.getOpcode() == ISD::UNDEF) // Leave the VECTOR_SHUFFLE alone. It matches PSHUFD. @@ -2347,9 +2353,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, DAG.getNode(ISD::UNDEF, V1.getValueType()), PermMask); - } else if (NumElems == 2) { - // All v2f64 cases are handled. - return SDOperand(); } else if (X86::isSHUFPMask(PermMask.Val)) { SDOperand Elt = PermMask.getOperand(0); if (cast(Elt)->getValue() >= NumElems) { @@ -2370,22 +2373,32 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { abort(); } case ISD::BUILD_VECTOR: { - bool isZero = true; + SDOperand Elt0 = Op.getOperand(0); + bool Elt0IsZero = (isa(Elt0) && + cast(Elt0)->getValue() == 0) || + (isa(Elt0) && + cast(Elt0)->isExactlyValue(0.0)); + bool RestAreZero = true; unsigned NumElems = Op.getNumOperands(); - for (unsigned i = 0; i < NumElems; ++i) { + for (unsigned i = 1; i < NumElems; ++i) { SDOperand V = Op.getOperand(i); if (ConstantFPSDNode *FPC = dyn_cast(V)) { if (!FPC->isExactlyValue(+0.0)) - isZero = false; + RestAreZero = false; } else if (ConstantSDNode *C = dyn_cast(V)) { if (!C->isNullValue()) - isZero = false; + RestAreZero = false; } else - isZero = false; + RestAreZero = false; + } + + if (RestAreZero) { + if (Elt0IsZero) return Op; + + // Zero extend a scalar to a vector. + return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0); } - if (isZero) - return Op; return SDOperand(); } } @@ -2421,7 +2434,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; - case X86ISD::SCALAR_TO_VECTOR: return "X86ISD::SCALAR_TO_VECTOR"; + case X86ISD::S2VEC: return "X86ISD::S2VEC"; + case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 854f76da2be..1dc90e536e1 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -146,12 +146,13 @@ namespace llvm { /// TargetExternalSymbol, and TargetGlobalAddress. Wrapper, - /// SCALAR_TO_VECTOR - X86 version of SCALAR_TO_VECTOR. The destination base - /// type does not have to match the operand type. - SCALAR_TO_VECTOR, + /// S2VEC - X86 version of SCALAR_TO_VECTOR. The destination base does not + /// have to match the operand type. + S2VEC, - /// UNPCKLP - X86 unpack and interleave low instructions. - UNPCKLP, + /// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base + /// does not have to match the operand type. + ZEXT_S2VEC, }; // X86 specific condition code. These correspond to X86_*_COND in @@ -209,7 +210,8 @@ namespace llvm { /// instructions. unsigned getShuffleSHUFImmediate(SDNode *N); - /// isZeroVector - Return true if all elements of BUILD_VECTOR are 0 or +0.0. + /// isZeroVector - Return true if this build_vector is an all-zero vector. + /// bool isZeroVector(SDNode *N); } diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 1de163a01ee..20e1cd0ecd7 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -31,7 +31,8 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr || oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr || oc == X86::MOVAPSrr || oc == X86::MOVAPDrr || - oc == X86::FR32ToV4F32 || oc == X86::FR64ToV2F64) { + oc == X86::MOVSS128rr || oc == X86::MOVSD128rr || + oc == X86::MOVD128rr || oc == X86::MOVQ128rr) { assert(MI.getNumOperands() == 2 && MI.getOperand(0).isRegister() && MI.getOperand(1).isRegister() && diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 664b06170e9..3dadd2631a4 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -17,19 +17,16 @@ // SSE specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDTX86Unpcklp : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; - def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, [SDNPHasChain]>; def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; -def X86s2vec : SDNode<"X86ISD::SCALAR_TO_VECTOR", +def X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>; -def X86unpcklp : SDNode<"X86ISD::UNPCKLP", - SDTX86Unpcklp, []>; +def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC", + SDTypeProfile<1, 1, []>, []>; //===----------------------------------------------------------------------===// // SSE pattern fragments @@ -156,6 +153,25 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src), "movsd {$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; +// FR32 / FR64 to 128-bit vector conversion. +def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src), + "movss {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4f32 (scalar_to_vector FR32:$src)))]>; +def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src), + "movss {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>; +def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src), + "movsd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2f64 (scalar_to_vector FR64:$src)))]>; +def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src), + "movsd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4f32 (scalar_to_vector (loadf64 addr:$src))))]>; + + // Conversion instructions def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src), "cvttss2si {$src, $dst|$dst, $src}", @@ -788,7 +804,10 @@ def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src), [(set VR128:$dst, (v4i32 (scalar_to_vector R32:$src)))]>; def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), - "movd {$src, $dst|$dst, $src}", []>; + "movd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; + def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), "movd {$src, $dst|$dst, $src}", []>; @@ -808,8 +827,8 @@ def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src), (v2i64 (scalar_to_vector VR64:$src)))]>, XS, Requires<[HasSSE2]>; def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), - "movq {$src, $dst|$dst, $src}", []>, XS; - + "movq {$src, $dst|$dst, $src}", []>, XS, + Requires<[HasSSE2]>; def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src), "movq {$src, $dst|$dst, $src}", []>; @@ -870,15 +889,32 @@ def VZEROv4f32 : PSI<0x57, MRMInitReg, (ops VR128:$dst), def VZEROv2f64 : PDI<0x57, MRMInitReg, (ops VR128:$dst), "xorpd $dst, $dst", [(set VR128:$dst, (v2f64 vecimm0))]>; -def FR32ToV4F32 : PSI<0x28, MRMSrcReg, (ops VR128:$dst, FR32:$src), - "movaps {$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector FR32:$src)))]>; +// Scalar to 128-bit vector with zero extension. +// Three operand (but two address) aliases. +let isTwoAddress = 1 in { +def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2), + "movss {$src2, $dst|$dst, $src2}", []>; +def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2), + "movsd {$src2, $dst|$dst, $src2}", []>; +def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2), + "movd {$src2, $dst|$dst, $src2}", []>; +def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2), + "movq {$src2, $dst|$dst, $src2}", []>; +} -def FR64ToV2F64 : PDI<0x28, MRMSrcReg, (ops VR128:$dst, FR64:$src), - "movapd {$src, $dst|$dst, $src}", +// Loading from memory automatically zeroing upper bits. +def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src), + "movss {$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (scalar_to_vector FR64:$src)))]>; + (v4f32 (X86zexts2vec (loadf32 addr:$src))))]>; +def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src), + "movsd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2f64 (X86zexts2vec (loadf64 addr:$src))))]>; +def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), + "movd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (X86zexts2vec (loadi32 addr:$src))))]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -922,6 +958,20 @@ def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>, def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; +// Zeroing a VR128 then do a MOVS* to the lower bits. +def : Pat<(v2f64 (X86zexts2vec FR64:$src)), + (MOVZSD128rr (VZEROv2f64), FR64:$src)>; +def : Pat<(v4f32 (X86zexts2vec FR32:$src)), + (MOVZSS128rr (VZEROv4f32), FR32:$src)>; +def : Pat<(v2i64 (X86zexts2vec VR64:$src)), + (MOVZQ128rr (VZEROv2i64), VR64:$src)>, Requires<[HasSSE2]>; +def : Pat<(v4i32 (X86zexts2vec R32:$src)), + (MOVZD128rr (VZEROv4i32), R32:$src)>; +def : Pat<(v8i16 (X86zexts2vec R16:$src)), + (MOVZD128rr (VZEROv8i16), (MOVZX32rr16 R16:$src))>; +def : Pat<(v16i8 (X86zexts2vec R8:$src)), + (MOVZD128rr (VZEROv16i8), (MOVZX32rr8 R8:$src))>; + // Splat v4f32 / v4i32 def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SHUFP_splat_mask:$sm), (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SHUFP_splat_mask:$sm))>,