diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 726105f75d6..83653fd6d5b 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -588,6 +588,28 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + + case Intrinsic::x86_sse41_pmovsxbw: + case Intrinsic::x86_sse41_pmovsxwd: + case Intrinsic::x86_sse41_pmovsxdq: + case Intrinsic::x86_sse41_pmovzxbw: + case Intrinsic::x86_sse41_pmovzxwd: + case Intrinsic::x86_sse41_pmovzxdq: { + unsigned VWidth = + cast(II->getArgOperand(0)->getType())->getNumElements(); + unsigned LowHalfElts = VWidth / 2; + APInt InputDemandedElts(VWidth, 0); + InputDemandedElts = InputDemandedElts.getBitsSet(VWidth, 0, LowHalfElts); + APInt UndefElts(VWidth, 0); + if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), + InputDemandedElts, + UndefElts)) { + II->setArgOperand(0, TmpV); + return II; + } + break; + } + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. if (ConstantVector *Mask = dyn_cast(II->getArgOperand(2))) { diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 6e727ce6e35..4c9fed3abc3 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -867,7 +867,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (Depth == 10) return 0; - // If multiple users are using the root value, procede with + // If multiple users are using the root value, proceed with // simplification conservatively assuming that all elements // are needed. if (!V->hasOneUse()) { diff --git a/test/CodeGen/X86/2011-05-17-pmovzxwd.ll b/test/CodeGen/X86/2011-05-17-pmovzxwd.ll new file mode 100644 index 00000000000..9ef67fcb4ed --- /dev/null +++ b/test/CodeGen/X86/2011-05-17-pmovzxwd.ll @@ -0,0 +1,15 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s +; + +define <4 x i32> @kernel3_vertical(<4 x i16> * %src, <8 x i16> * %foo) nounwind { +entry: + %tmp = load <4 x i16>* %src + %tmp1 = load <8 x i16>* %foo +; CHECK: shufflevector + %tmp2 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> +; CHECK-NOT: shufflevector + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %0 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone