From f3704769bb2e8170769840528869ae5696545481 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 29 Aug 2011 21:15:46 +0000 Subject: [PATCH] Explicitly zero out parts of a vector which are required to be zero by the algorithm in LowerUINT_TO_FP_i32. This only has a substantial effect on the generated code when the input is extracted from a vector register; other ways of loading an i32 do the appropriate zeroing implicitly. Fixes PR10802. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@138768 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 3 +++ test/CodeGen/X86/uint_to_fp-2.ll | 31 +++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9c940c49ca3..bd89bf999d0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7713,6 +7713,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(0)); + // Zero out the upper parts of the register. + Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasSSE2(), DAG); + Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load), DAG.getIntPtrConstant(0)); diff --git a/test/CodeGen/X86/uint_to_fp-2.ll b/test/CodeGen/X86/uint_to_fp-2.ll index da5105d8a4e..7536fb8f52c 100644 --- a/test/CodeGen/X86/uint_to_fp-2.ll +++ b/test/CodeGen/X86/uint_to_fp-2.ll @@ -1,8 +1,33 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1 -; rdar://6504833 +; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -define float @f(i32 %x) nounwind readnone { +; rdar://6504833 +define float @test1(i32 %x) nounwind readnone { +; CHECK: test1 +; CHECK: movd +; CHECK: orpd +; CHECK: subsd +; CHECK: cvtsd2ss +; CHECK: movss +; CHECK: flds +; CHECK: ret entry: %0 = uitofp i32 %x to float ret float %0 } + +; PR10802 +define float @test2(<4 x i32> %x) nounwind readnone ssp { +; CHECK: test2 +; CHECK: xorps [[ZERO:%xmm[0-9]+]] +; CHECK: movss {{.*}}, [[ZERO]] +; CHECK: orps +; CHECK: subsd +; CHECK: cvtsd2ss +; CHECK: movss +; CHECK: flds +; CHECK: ret +entry: + %vecext = extractelement <4 x i32> %x, i32 0 + %conv = uitofp i32 %vecext to float + ret float %conv +}