diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 174c7979043..1d4221414ff 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -265,19 +265,19 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); // FIXME: add MMX packed arithmetics - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); } if (TM.getSubtarget().hasSSE1()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); - setOperationAction(ISD::ADD , MVT::v4f32, Legal); - setOperationAction(ISD::SUB , MVT::v4f32, Legal); - setOperationAction(ISD::MUL , MVT::v4f32, Legal); - setOperationAction(ISD::LOAD , MVT::v4f32, Legal); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Expand); + setOperationAction(ISD::ADD, MVT::v4f32, Legal); + setOperationAction(ISD::SUB, MVT::v4f32, Legal); + setOperationAction(ISD::MUL, MVT::v4f32, Legal); + setOperationAction(ISD::LOAD, MVT::v4f32, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Expand); } if (TM.getSubtarget().hasSSE2()) { @@ -288,15 +288,17 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); - setOperationAction(ISD::ADD , MVT::v2f64, Legal); - setOperationAction(ISD::SUB , MVT::v2f64, Legal); - setOperationAction(ISD::MUL , MVT::v2f64, Legal); - setOperationAction(ISD::LOAD , MVT::v2f64, Legal); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Expand); + setOperationAction(ISD::ADD, MVT::v2f64, Legal); + setOperationAction(ISD::SUB, MVT::v2f64, Legal); + setOperationAction(ISD::MUL, MVT::v2f64, Legal); + setOperationAction(ISD::LOAD, MVT::v2f64, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); } computeRegisterProperties(); @@ -2135,6 +2137,10 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16), Copy.getValue(1)); } + case ISD::SCALAR_TO_VECTOR: { + SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0)); + return DAG.getNode(X86ISD::SCALAR_TO_VECTOR, Op.getValueType(), AnyExt); + } } } @@ -2168,6 +2174,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK"; case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; + case X86ISD::SCALAR_TO_VECTOR: return "X86ISD::SCALAR_TO_VECTOR"; } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 823fa6a0144..bdbe46d2b02 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -145,6 +145,10 @@ namespace llvm { /// TCPWrapper - A wrapper node for TargetConstantPool, /// TargetExternalSymbol, and TargetGlobalAddress. Wrapper, + + /// SCALAR_TO_VECTOR - X86 version of SCALAR_TO_VECTOR. The destination base + /// type does not have to match the operand type. + SCALAR_TO_VECTOR, }; // X86 specific condition code. These correspond to X86_*_COND in diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 244cac69d1f..38b40a73342 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -24,7 +24,9 @@ def : Pat<(v2i32 (undef)), (IMPLICIT_DEF_VR64)>, Requires<[HasMMX]>; // Move Instructions def MOVD64rr : I<0x6E, MRMSrcReg, (ops VR64:$dst, R32:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, + "movd {$src, $dst|$dst, $src}", + [(set VR64:$dst, + (v2i32 (scalar_to_vector R32:$src)))]>, TB, Requires<[HasMMX]>; def MOVD64rm : I<0x6E, MRMSrcMem, (ops VR64:$dst, i32mem:$src), "movd {$src, $dst|$dst, $src}", []>, TB, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b80b8e08efb..866203846b4 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -17,12 +17,14 @@ // SSE specific DAG Nodes. //===----------------------------------------------------------------------===// -def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, - [SDNPHasChain]>; -def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; +def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, + [SDNPHasChain]>; +def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86s2vec : SDNode<"X86ISD::SCALAR_TO_VECTOR", + SDTypeProfile<1, 1, []>, []>; //===----------------------------------------------------------------------===// // SSE pattern fragments @@ -347,12 +349,6 @@ def IMPLICIT_DEF_VR128 : I<0, Pseudo, (ops VR128:$dst), [(set VR128:$dst, (v4f32 (undef)))]>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; -def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; -def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; - // Move Instructions def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src), "movaps {$src, $dst|$dst, $src}", []>; @@ -700,7 +696,9 @@ def UNPCKLPDrm : PDI<0x14, MRMSrcMem, // Move Instructions def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src), - "movd {$src, $dst|$dst, $src}", []>; + "movd {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector R32:$src)))]>; def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), "movd {$src, $dst|$dst, $src}", []>; def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), @@ -708,11 +706,12 @@ def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), // SSE2 instructions with XS prefix def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src), - "movq {$src, $dst|$dst, $src}", []>, XS, + "movq {$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector VR64:$src)))]>, XS, Requires<[HasSSE2]>; def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), - "movq {$src, $dst|$dst, $src}", []>, XS, - Requires<[HasSSE2]>; + "movq {$src, $dst|$dst, $src}", []>, XS; def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src), "movq {$src, $dst|$dst, $src}", []>; @@ -731,3 +730,28 @@ def FR64ToV2F64 : PDI<0x28, MRMSrcReg, (ops VR128:$dst, FR64:$src), "movapd {$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (scalar_to_vector FR64:$src)))]>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// 128-bit vector undef's. +def : Pat<(v2f64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; +def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; +def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; +def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>; + +// Store 128-bit integer vector values. +def : Pat<(store (v16i8 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>; +def : Pat<(store (v8i16 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>; +def : Pat<(store (v4i32 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>; +def : Pat<(store (v2i64 VR128:$src), addr:$dst), (MOVAPSmr addr:$dst, VR128:$src)>; + +// Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or +// 16-bits matter. +def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>, + Requires<[HasSSE2]>; +