From 7d9061e3009af718544fde64ea88343fa9cd2ada Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 30 Mar 2006 19:54:57 +0000 Subject: [PATCH] Make sure all possible shuffles are matched. Use pshufd, pshuhw, and pshulw to shuffle v4f32 if shufps doesn't match. Use shufps to shuffle v4f32 if pshufd, pshuhw, and pshulw don't match. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27259 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 53 +++++++++++++++--------- lib/Target/X86/X86InstrSSE.td | 66 +++++++++++++++++++++++++----- 2 files changed, 89 insertions(+), 30 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 072e1990e79..7f2954c67b2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1486,23 +1486,17 @@ bool X86::isSHUFPMask(SDNode *N) { if (NumElems != 4) return false; // Each half must refer to only one of the vector. - SDOperand Elt = N->getOperand(0); - assert(isa(Elt) && "Invalid VECTOR_SHUFFLE mask!"); - for (unsigned i = 1; i < NumElems / 2; ++i) { + for (unsigned i = 0; i < 2; ++i) { assert(isa(N->getOperand(i)) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast(N->getOperand(i))->getValue() != - cast(Elt)->getValue()) - return false; + unsigned Val = cast(N->getOperand(i))->getValue(); + if (Val >= 4) return false; } - Elt = N->getOperand(NumElems / 2); - assert(isa(Elt) && "Invalid VECTOR_SHUFFLE mask!"); - for (unsigned i = NumElems / 2 + 1; i < NumElems; ++i) { + for (unsigned i = 2; i < 4; ++i) { assert(isa(N->getOperand(i)) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast(N->getOperand(i))->getValue() != - cast(Elt)->getValue()) - return false; + unsigned Val = cast(N->getOperand(i))->getValue(); + if (Val < 4) return false; } return true; @@ -2489,11 +2483,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { unsigned NumElems = PermMask.getNumOperands(); // Splat && PSHUFD's 2nd vector must be undef. - if (X86::isSplatMask(PermMask.Val) || - ((MVT::isInteger(VT) && - (X86::isPSHUFDMask(PermMask.Val) || - X86::isPSHUFHWMask(PermMask.Val) || - X86::isPSHUFLWMask(PermMask.Val))))) { + if (X86::isSplatMask(PermMask.Val)) { if (V2.getOpcode() != ISD::UNDEF) return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); @@ -2505,9 +2495,34 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. return SDOperand(); - if (NumElems == 2 || - X86::isSHUFPMask(PermMask.Val)) { + if (NumElems == 2) return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG); + + // If VT is integer, try PSHUF* first, then SHUFP*. + if (MVT::isInteger(VT)) { + if (X86::isPSHUFDMask(PermMask.Val) || + X86::isPSHUFHWMask(PermMask.Val) || + X86::isPSHUFLWMask(PermMask.Val)) { + if (V2.getOpcode() != ISD::UNDEF) + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, + DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); + return SDOperand(); + } + + if (X86::isSHUFPMask(PermMask.Val)) + return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG); + } else { + // Floating point cases in the other order. + if (X86::isSHUFPMask(PermMask.Val)) + return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG); + if (X86::isPSHUFDMask(PermMask.Val) || + X86::isPSHUFHWMask(PermMask.Val) || + X86::isPSHUFLWMask(PermMask.Val)) { + if (V2.getOpcode() != ISD::UNDEF) + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, + DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); + return SDOperand(); + } } assert(0 && "Unexpected VECTOR_SHUFFLE to lower"); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 661df4b3fe2..2245a2218fa 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -106,14 +106,32 @@ def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isPSHUFLWMask(N); }], SHUFFLE_get_pshuflw_imm>; +// Only use PSHUF* for v4f32 if SHUFP does not match. +def PSHUFD_fp_shuffle_mask : PatLeaf<(build_vector), [{ + return !X86::isSHUFPMask(N) && + X86::isPSHUFDMask(N); +}], SHUFFLE_get_shuf_imm>; + +def PSHUFHW_fp_shuffle_mask : PatLeaf<(build_vector), [{ + return !X86::isSHUFPMask(N) && + X86::isPSHUFHWMask(N); +}], SHUFFLE_get_pshufhw_imm>; + +def PSHUFLW_fp_shuffle_mask : PatLeaf<(build_vector), [{ + return !X86::isSHUFPMask(N) && + X86::isPSHUFLWMask(N); +}], SHUFFLE_get_pshuflw_imm>; + def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isSHUFPMask(N); }], SHUFFLE_get_shuf_imm>; -// Only use SHUFP for v4i32 if no other options are available. -// FIXME: add tblgen hook to reduce the complexity of pattern. -def SHUFP_v4i32_shuffle_mask : PatLeaf<(build_vector), [{ - return !X86::isUNPCKHMask(N) && !X86::isPSHUFDMask(N) && X86::isSHUFPMask(N); +// Only use SHUFP for v4i32 if PSHUF* do not match. +def SHUFP_int_shuffle_mask : PatLeaf<(build_vector), [{ + return !X86::isPSHUFDMask(N) && + !X86::isPSHUFHWMask(N) && + !X86::isPSHUFLWMask(N) && + X86::isSHUFPMask(N); }], SHUFFLE_get_shuf_imm>; //===----------------------------------------------------------------------===// @@ -1278,14 +1296,14 @@ def PSHUFHWrm : Ii8<0x70, MRMDestMem, // SSE2 with ImmT == Imm8 and XD prefix. def PSHUFLWrr : Ii8<0x70, MRMDestReg, (ops VR128:$dst, VR128:$src1, i32i8imm:$src2), - "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}", + "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v8i16 (vector_shuffle VR128:$src1, (undef), PSHUFLW_shuffle_mask:$src2)))]>, XD, Requires<[HasSSE2]>; def PSHUFLWrm : Ii8<0x70, MRMDestMem, (ops VR128:$dst, i128mem:$src1, i32i8imm:$src2), - "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}", + "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v8i16 (vector_shuffle (bc_v8i16 (loadv2i64 addr:$src1)), (undef), PSHUFLW_shuffle_mask:$src2)))]>, @@ -1593,15 +1611,41 @@ def : Pat<(vector_shuffle (v4f32 VR128:$src), (undef), SSE_splat_mask:$sm), (v4f32 (SHUFPSrr VR128:$src, VR128:$src, SSE_splat_mask:$sm))>, Requires<[HasSSE1]>; -// Shuffle v4i32 if others do not match +// Shuffle v4i32 with SHUFP* if others do not match. def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2), - SHUFP_shuffle_mask:$sm), + SHUFP_int_shuffle_mask:$sm), (v4i32 (SHUFPSrr VR128:$src1, VR128:$src2, - SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>; + SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>; def : Pat<(vector_shuffle (v4i32 VR128:$src1), (load addr:$src2), - SHUFP_shuffle_mask:$sm), + SHUFP_int_shuffle_mask:$sm), (v4i32 (SHUFPSrm VR128:$src1, addr:$src2, - SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>; + SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>; + +// Shuffle v4f32 with PSHUF* if others do not match. +def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef), + PSHUFD_fp_shuffle_mask:$sm), + (v4f32 (PSHUFDrr VR128:$src1, PSHUFD_fp_shuffle_mask:$sm))>, + Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef), + PSHUFD_fp_shuffle_mask:$sm), + (v4f32 (PSHUFDrm addr:$src1, PSHUFD_fp_shuffle_mask:$sm))>, + Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef), + PSHUFHW_fp_shuffle_mask:$sm), + (v4f32 (PSHUFHWrr VR128:$src1, PSHUFHW_fp_shuffle_mask:$sm))>, + Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef), + PSHUFHW_fp_shuffle_mask:$sm), + (v4f32 (PSHUFHWrm addr:$src1, PSHUFHW_fp_shuffle_mask:$sm))>, + Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef), + PSHUFLW_fp_shuffle_mask:$sm), + (v4f32 (PSHUFLWrr VR128:$src1, PSHUFLW_fp_shuffle_mask:$sm))>, + Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef), + PSHUFLW_fp_shuffle_mask:$sm), + (v4f32 (PSHUFLWrm addr:$src1, PSHUFLW_fp_shuffle_mask:$sm))>, + Requires<[HasSSE2]>; // Logical ops def : Pat<(and (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)),