Fix PR12359

- In addition to undefined, if V2 is zero vector, skip 2nd PSHUFB and POR as
  well as PSHUFB will zero elements with negative indices.

  Patch by Sriram Murali <sriram.murali@intel.com>



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163018 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Liao 2012-08-31 20:12:31 +00:00
parent 3185f9a2ea
commit 265bcb1e5b
2 changed files with 15 additions and 3 deletions

View File

@ -5881,8 +5881,6 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
DebugLoc dl = SVOp->getDebugLoc();
ArrayRef<int> MaskVals = SVOp->getMask();
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
// If we have SSSE3, case 1 is generated when all result bytes come from
// one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
// present, fall back to case 3.
@ -5906,7 +5904,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
if (V2IsUndef)
// As PSHUFB will zero elements with negative indices, it's safe to ignore
// the 2nd operand if it's undefined or zero.
if (V2.getOpcode() == ISD::UNDEF ||
ISD::isBuildVectorAllZeros(V2.getNode()))
return V1;
// Calculate the shuffle mask for the second input, shuffle it, and

View File

@ -0,0 +1,10 @@
; RUN: llc -asm-verbose -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s
define <16 x i8> @shuf(<16 x i8> %inval1) {
entry:
%0 = shufflevector <16 x i8> %inval1, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 4, i32 3, i32 2, i32 16, i32 16, i32 3, i32 4, i32 0, i32 4, i32 3, i32 2, i32 16, i32 16, i32 3, i32 4>
ret <16 x i8> %0
; CHECK: shuf
; CHECK: # BB#0: # %entry
; CHECK-NEXT: pshufb
; CHECK-NEXT: ret
}