Explicitly zero out parts of a vector which are required to be zero by the algorithm in LowerUINT_TO_FP_i32. This only has a substantial effect on the generated code when the input is extracted from a vector register; other ways of loading an i32 do the appropriate zeroing implicitly. Fixes PR10802.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138768 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eli Friedman 2011-08-29 21:15:46 +00:00
parent d33b276bcb
commit f3704769bb
2 changed files with 31 additions and 3 deletions

View File

@ -7713,6 +7713,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
Op.getOperand(0));
// Zero out the upper parts of the register.
Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasSSE2(), DAG);
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
DAG.getIntPtrConstant(0));

View File

@ -1,8 +1,33 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1
; rdar://6504833
; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
define float @f(i32 %x) nounwind readnone {
; rdar://6504833
define float @test1(i32 %x) nounwind readnone {
; CHECK: test1
; CHECK: movd
; CHECK: orpd
; CHECK: subsd
; CHECK: cvtsd2ss
; CHECK: movss
; CHECK: flds
; CHECK: ret
entry:
%0 = uitofp i32 %x to float
ret float %0
}
; PR10802
define float @test2(<4 x i32> %x) nounwind readnone ssp {
; CHECK: test2
; CHECK: xorps [[ZERO:%xmm[0-9]+]]
; CHECK: movss {{.*}}, [[ZERO]]
; CHECK: orps
; CHECK: subsd
; CHECK: cvtsd2ss
; CHECK: movss
; CHECK: flds
; CHECK: ret
entry:
%vecext = extractelement <4 x i32> %x, i32 0
%conv = uitofp i32 %vecext to float
ret float %conv
}