X86 pmovsx/pmovzx ignore the upper half of their inputs.

rdar://problem/6945110


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131493 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Stuart Hastings 2011-05-17 22:13:31 +00:00
parent c81c9709ef
commit ca1ef48585
3 changed files with 38 additions and 1 deletions

View File

@ -588,6 +588,28 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::x86_sse41_pmovsxbw:
case Intrinsic::x86_sse41_pmovsxwd:
case Intrinsic::x86_sse41_pmovsxdq:
case Intrinsic::x86_sse41_pmovzxbw:
case Intrinsic::x86_sse41_pmovzxwd:
case Intrinsic::x86_sse41_pmovzxdq: {
unsigned VWidth =
cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
unsigned LowHalfElts = VWidth / 2;
APInt InputDemandedElts(VWidth, 0);
InputDemandedElts = InputDemandedElts.getBitsSet(VWidth, 0, LowHalfElts);
APInt UndefElts(VWidth, 0);
if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
InputDemandedElts,
UndefElts)) {
II->setArgOperand(0, TmpV);
return II;
}
break;
}
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {

View File

@ -867,7 +867,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (Depth == 10)
return 0;
// If multiple users are using the root value, procede with
// If multiple users are using the root value, proceed with
// simplification conservatively assuming that all elements
// are needed.
if (!V->hasOneUse()) {

View File

@ -0,0 +1,15 @@
; RUN: opt -instcombine -S < %s | FileCheck %s
; <rdar://problem/6945110>
define <4 x i32> @kernel3_vertical(<4 x i16> * %src, <8 x i16> * %foo) nounwind {
entry:
%tmp = load <4 x i16>* %src
%tmp1 = load <8 x i16>* %foo
; CHECK: shufflevector
%tmp2 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NOT: shufflevector
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
%0 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
ret <4 x i32> %0
}
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone