mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-08 21:32:39 +00:00
X86 pmovsx/pmovzx ignore the upper half of their inputs.
rdar://problem/6945110 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131493 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c81c9709ef
commit
ca1ef48585
@ -588,6 +588,28 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case Intrinsic::x86_sse41_pmovsxbw:
|
||||
case Intrinsic::x86_sse41_pmovsxwd:
|
||||
case Intrinsic::x86_sse41_pmovsxdq:
|
||||
case Intrinsic::x86_sse41_pmovzxbw:
|
||||
case Intrinsic::x86_sse41_pmovzxwd:
|
||||
case Intrinsic::x86_sse41_pmovzxdq: {
|
||||
unsigned VWidth =
|
||||
cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
|
||||
unsigned LowHalfElts = VWidth / 2;
|
||||
APInt InputDemandedElts(VWidth, 0);
|
||||
InputDemandedElts = InputDemandedElts.getBitsSet(VWidth, 0, LowHalfElts);
|
||||
APInt UndefElts(VWidth, 0);
|
||||
if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
|
||||
InputDemandedElts,
|
||||
UndefElts)) {
|
||||
II->setArgOperand(0, TmpV);
|
||||
return II;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::ppc_altivec_vperm:
|
||||
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
|
||||
if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
|
||||
|
@ -867,7 +867,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
||||
if (Depth == 10)
|
||||
return 0;
|
||||
|
||||
// If multiple users are using the root value, procede with
|
||||
// If multiple users are using the root value, proceed with
|
||||
// simplification conservatively assuming that all elements
|
||||
// are needed.
|
||||
if (!V->hasOneUse()) {
|
||||
|
15
test/CodeGen/X86/2011-05-17-pmovzxwd.ll
Normal file
15
test/CodeGen/X86/2011-05-17-pmovzxwd.ll
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: opt -instcombine -S < %s | FileCheck %s
|
||||
; <rdar://problem/6945110>
|
||||
|
||||
define <4 x i32> @kernel3_vertical(<4 x i16> * %src, <8 x i16> * %foo) nounwind {
|
||||
entry:
|
||||
%tmp = load <4 x i16>* %src
|
||||
%tmp1 = load <8 x i16>* %foo
|
||||
; CHECK: shufflevector
|
||||
%tmp2 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NOT: shufflevector
|
||||
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
|
||||
%0 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
|
Loading…
x
Reference in New Issue
Block a user