diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 498efd53d38..d413e6c34a3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2623,6 +2623,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::MOVLHPS: + case X86ISD::MOVLHPD: case X86ISD::MOVSS: case X86ISD::MOVSD: case X86ISD::PUNPCKLDQ: @@ -5004,6 +5005,22 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); } +static +SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, + bool HasSSE2) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + EVT VT = Op.getValueType(); + + assert(VT != MVT::v2i64 && "unsupported shuffle type"); + + if (HasSSE2 && VT == MVT::v2f64) + return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG); + + // v4f32 or v4i32 + return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG); +} + SDValue X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast(Op); @@ -5110,12 +5127,16 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } // FIXME: fold these into legal mask. - if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || - X86::isMOVSLDUPMask(SVOp) || - X86::isMOVHLPSMask(SVOp) || - X86::isMOVLHPSMask(SVOp) || - X86::isMOVLPMask(SVOp))) - return Op; + if (!isMMX) { + if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) + return getMOVLowToHigh(Op, dl, DAG, HasSSE2); + + if (X86::isMOVSHDUPMask(SVOp) || + X86::isMOVSLDUPMask(SVOp) || + X86::isMOVHLPSMask(SVOp) || + X86::isMOVLPMask(SVOp)) + return Op; + } if (ShouldXformToMOVHLPS(SVOp) || ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) @@ -8362,13 +8383,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::SHUFPS: return "X86ISD::SHUFPS"; case X86ISD::SHUFPD: return "X86ISD::SHUFPD"; case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS"; - case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS"; case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD"; + case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS"; case X86ISD::MOVHLPD: return "X86ISD::MOVHLPD"; - case X86ISD::MOVHPS: return "X86ISD::MOVHPS"; - case X86ISD::MOVLPS: return "X86ISD::MOVLPS"; - case X86ISD::MOVHPD: return "X86ISD::MOVHPD"; - case X86ISD::MOVLPD: return "X86ISD::MOVLPD"; case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP"; case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP"; case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index c2f5349d76a..7cd9a157a3b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -266,13 +266,9 @@ namespace llvm { MOVSHDUP_LD, MOVSLDUP_LD, MOVLHPS, - MOVHLPS, MOVLHPD, + MOVHLPS, MOVHLPD, - MOVHPS, - MOVHPD, - MOVLPS, - MOVLPD, MOVSD, MOVSS, UNPCKLPS, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 7ddf6897327..5b6b65edc2b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -166,14 +166,11 @@ def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>; def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>; def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>; -def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>; def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>; + +def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>; def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; -def X86MovhpsLd : SDNode<"X86ISD::MOVHPS", SDTShuff2OpLd, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def X86MovhpdLd : SDNode<"X86ISD::MOVHPD", SDTShuff2OpLd, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86MovlpsLd : SDNode<"X86ISD::MOVLPS", SDTShuff2OpLd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86MovlpdLd : SDNode<"X86ISD::MOVLPD", SDTShuff2OpLd, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d9e7b40acdd..e9f054ba34a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5664,22 +5664,6 @@ def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; -// Shuffle with MOVLHPS instruction -def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), - (VMOVLHPSrr (v4f32 VR128:$src1), VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr (v4f32 VR128:$src1), VR128:$src2)>; - -def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), - (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; - -def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), - (VMOVLHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; -def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr VR128:$src1, VR128:$src2)>; - // Shuffle with MOVHLPS instruction def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), (MOVHLPSrr VR128:$src1, VR128:$src2)>; @@ -5817,28 +5801,25 @@ def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))), def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)), (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>; -// Shuffle with MOVHPS -def : Pat<(v4f32 (X86MovhpsLd VR128:$src1, addr:$src2)), +// Shuffle with MOVLHPS +def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (MOVHPSrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86MovhpsLd VR128:$src1, addr:$src2)), +def : Pat<(X86Movlhps VR128:$src1, + (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), (MOVHPSrm VR128:$src1, addr:$src2)>; +def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; -// Shuffle with MOVHPD -def : Pat<(v2f64 (X86MovhpdLd VR128:$src1, addr:$src2)), +// Shuffle with MOVLHPD +def : Pat<(v2f64 (X86Movlhpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), (MOVHPDrm VR128:$src1, addr:$src2)>; -// Shuffle with MOVLPS -def : Pat<(v4f32 (X86MovlpsLd VR128:$src1, addr:$src2)), - (MOVLPSrm VR128:$src1, addr:$src2)>; -def : Pat<(v4i32 (X86MovlpsLd VR128:$src1, addr:$src2)), - (MOVLPSrm VR128:$src1, addr:$src2)>; - -// Shuffle with MOVLPD -def : Pat<(v2f64 (X86MovlpdLd VR128:$src1, addr:$src2)), - (MOVLPDrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (X86MovlpdLd VR128:$src1, addr:$src2)), - (MOVLPDrm VR128:$src1, addr:$src2)>; - // Shuffle with MOVSS def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), (MOVSSrr VR128:$src1, FR32:$src2)>; @@ -5928,13 +5909,3 @@ def : Pat<(store (f64 (vector_extract (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), (MOVHPDmr addr:$dst, VR128:$src)>; -def : Pat<(store (v2f64 (X86MovlpdLd VR128:$src1, addr:$src2)),addr:$src2), - (MOVLPDmr addr:$src2, VR128:$src1)>; -def : Pat<(store (v2i64 (X86MovlpdLd VR128:$src1, addr:$src2)),addr:$src2), - (MOVLPDmr addr:$src2, VR128:$src1)>; - -def : Pat<(store (v4f32 (X86MovlpsLd VR128:$src1, addr:$src2)),addr:$src2), - (MOVLPSmr addr:$src2, VR128:$src1)>; -def : Pat<(store (v4i32 (X86MovlpsLd VR128:$src1, addr:$src2)),addr:$src2), - (MOVLPSmr addr:$src2, VR128:$src1)>; -