From f686d9b71f0aa99578dac9a4dd50b57ece6945fa Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 27 Oct 2006 21:08:32 +0000 Subject: [PATCH] Fixed a significant bug where unpcklpd is incorrectly used to extract element 1 from a v2f64 value. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@31228 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 +++++++++++ lib/Target/X86/X86ISelLowering.h | 4 ++++ lib/Target/X86/X86InstrSSE.td | 16 ++++++++++------ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fb2b92ab725..d442a18850b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2868,6 +2868,17 @@ bool X86::isSplatMask(SDNode *N) { return ::isSplatMask(N); } +/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a splat of zero element. +bool X86::isSplatLoMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + if (!isUndefOrEqual(N->getOperand(i), 0)) + return false; + return true; +} + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 8fd2b52e1cc..6de2964ec78 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -225,6 +225,10 @@ namespace llvm { /// specifies a splat of a single element. bool isSplatMask(SDNode *N); + /// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a splat of zero element. + bool isSplatLoMask(SDNode *N); + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index c2fe9acb5b8..7361c70cdfe 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -104,8 +104,8 @@ def SSE_splat_mask : PatLeaf<(build_vector), [{ return X86::isSplatMask(N); }], SHUFFLE_get_shuf_imm>; -def SSE_splat_v2_mask : PatLeaf<(build_vector), [{ - return X86::isSplatMask(N); +def SSE_splat_lo_mask : PatLeaf<(build_vector), [{ + return X86::isSplatLoMask(N); }]>; def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{ @@ -812,13 +812,13 @@ def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src), "movddup {$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (vector_shuffle VR128:$src, (undef), - SSE_splat_v2_mask)))]>; + SSE_splat_lo_mask)))]>; def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src), "movddup {$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (vector_shuffle (scalar_to_vector (loadf64 addr:$src)), (undef), - SSE_splat_v2_mask)))]>; + SSE_splat_lo_mask)))]>; // SSE2 instructions without OpSize prefix def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src), @@ -1908,10 +1908,14 @@ def : Pat<(v4f32 (vector_shuffle immAllZerosV, // Splat v2f64 / v2i64 let AddedComplexity = 10 in { -def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_v2_mask:$sm), +def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_v2_mask:$sm), +def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), + (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), + (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; } // Splat v4f32