diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3b11ef4e369..1b671590401 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2528,6 +2528,21 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { isUndefOrEqual(N->getMaskElt(3), 3); } +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + + if (NumElems != 4) + return false; + + return isUndefOrEqual(N->getMaskElt(0), 2) && + isUndefOrEqual(N->getMaskElt(1), 3) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); +} + /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { @@ -2547,10 +2562,9 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { return true; } -/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} -/// and MOVLHPS. -bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { +/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVLHPS. +bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { unsigned NumElems = N->getValueType(0).getVectorNumElements(); if (NumElems != 2 && NumElems != 4) @@ -2567,21 +2581,6 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { return true; } -/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form -/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, -/// <2, 3, 2, 3> -bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); - - if (NumElems != 4) - return false; - - return isUndefOrEqual(N->getMaskElt(0), 2) && - isUndefOrEqual(N->getMaskElt(1), 3) && - isUndefOrEqual(N->getMaskElt(2), 2) && - isUndefOrEqual(N->getMaskElt(3), 3); -} - /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(const SmallVectorImpl &Mask, EVT VT, @@ -4275,7 +4274,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || X86::isMOVSLDUPMask(SVOp) || X86::isMOVHLPSMask(SVOp) || - X86::isMOVHPMask(SVOp) || + X86::isMOVLHPSMask(SVOp) || X86::isMOVLPMask(SVOp))) return Op; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0755284a9e2..7b4ab62fddd 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -286,7 +286,7 @@ namespace llvm { /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for MOVHP{S|D}. /// as well as MOVLHPS. - bool isMOVHPMask(ShuffleVectorSDNode *N); + bool isMOVLHPSMask(ShuffleVectorSDNode *N); /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b8d2ee384f1..7d60507476a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -225,9 +225,9 @@ def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), return X86::isMOVHLPS_v_undef_Mask(cast(N)); }]>; -def movhp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHPMask(cast(N)); +def movlhps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLHPSMask(cast(N)); }]>; def movlp : PatFrag<(ops node:$lhs, node:$rhs), @@ -735,7 +735,7 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (movhp VR128:$src1, + (movlhps VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -760,7 +760,7 @@ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movhp VR128:$src1, VR128:$src2)))]>; + (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1494,7 +1494,7 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (movhp VR128:$src1, + (v2f64 (movlhps VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -3035,7 +3035,7 @@ def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS -def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS @@ -3051,48 +3051,26 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS -// vector_shuffle v1, (load v2) <6, 7, 2, 3> using MOVHPS def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 (movhlps (load addr:$src1), VR128:$src2)), - (MOVHPSrm VR128:$src2, addr:$src1)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (movhlps (load addr:$src1), VR128:$src2)), - (MOVHPDrm VR128:$src2, addr:$src1)>, Requires<[HasSSE2]>; - def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS -// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4f32 (movhlps (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (movhlps (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; - def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), - addr:$src1), - (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; - let AddedComplexity = 15 in { // Setting the lowest element in the vector. diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll index eedbce720ef..f4930b08450 100644 --- a/test/CodeGen/X86/vec_shuffle-3.ll +++ b/test/CodeGen/X86/vec_shuffle-3.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t ; RUN: grep movlhps %t | count 1 -; RUN: grep movhps %t | count 1 +; RUN: grep movhlps %t | count 1 define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) { %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2]