Fix PR10492 by teaching MOVHLPS and MOVLPS mask matching to be more strict.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137324 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-21 06:30:16 +00:00 · 2011-08-11 18:59:13 +00:00 · 2011-08-11 18:59:13 +00:00 · 59353b436a
commit 59353b436a
parent 2b7b238e84
2 changed files with 17 additions and 2 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -3863,7 +3863,10 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
 /// V1 (and in order), and the upper half elements should come from the upper
 /// half of V2 (and in order).
 static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
-  if (Op->getValueType(0).getVectorNumElements() != 4)
+  EVT VT = Op->getValueType(0);
  if (VT.getSizeInBits() != 128)
    return false;
  if (VT.getVectorNumElements() != 4)
    return false;
  for (unsigned i = 0, e = 2; i != e; ++i)
    if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
@ -3895,6 +3898,10 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
 /// MOVLP, it must be either a vector load or a scalar load to vector.
 static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
                               ShuffleVectorSDNode *Op) {
  EVT VT = Op->getValueType(0);
  if (VT.getSizeInBits() != 128)
    return false;
  if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
    return false;
  // Is V2 is a vector load, don't do this transformation. We will try to use
@ -3902,7 +3909,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
  if (ISD::isNON_EXTLoad(V2))
    return false;
-  unsigned NumElems = Op->getValueType(0).getVectorNumElements();
+  unsigned NumElems = VT.getVectorNumElements();
  if (NumElems != 2 && NumElems != 4)
    return false;
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@ -42,3 +42,11 @@ allocas:
  store <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32>* %ptr2vec615, align 32
  ret void
 }
 ;;; Just make sure this doesn't crash
 ; CHECK: _ISelCrash
 define <4 x i64> @ISelCrash(<4 x i64> %a) nounwind uwtable readnone ssp {
 entry:
  %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 4>
  ret <4 x i64> %shuffle
 }