From 3a9e7690ba99c27d9b09fa8e61fb9f7ba01364c9 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Tue, 5 Oct 2010 17:24:05 +0000 Subject: [PATCH] Use a more efficient lowering of uint64_t --> float that can take advantage of hardware signed integer conversion without having to do a double cast (uint64_t --> double --> float). This is based on the algorithm from compiler_rt's __floatundisf for X86-64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115634 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 34 +++++++++++++++++++----- test/CodeGen/X86/uint64-to-float.ll | 21 +++++++++++++++ 2 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/X86/uint64-to-float.ll diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e71e2adf9ad..95aa445f4f1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2126,12 +2126,35 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } - // Implementation of unsigned i64 to f32. This implementation has the - // advantage of performing rounding correctly. + // Implementation of unsigned i64 to f32. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + // For unsigned conversions, convert them to signed conversions using the + // algorithm from the x86_64 __floatundidf in compiler_rt. + if (!isSigned) { + SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); + + SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy()); + SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); + SDValue AndConst = DAG.getConstant(1, MVT::i64); + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); + + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or); + SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt); + + // TODO: This really should be implemented using a branch rather than a + // select. We happen to get lucky and machinesink does the right + // thing most of the time. This would be a good candidate for a + //pseudo-op, or, even better, for whole-function isel. + SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT); + return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast); + } + + // Otherwise, implement the fully general conversion. EVT SHVT = TLI.getShiftAmountTy(); - + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, @@ -2143,9 +2166,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), - ISD::SETUGE); + ISD::SETUGE); SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); - + SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, DAG.getConstant(32, SHVT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); @@ -2158,7 +2181,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, DAG.getIntPtrConstant(0)); - } SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll new file mode 100644 index 00000000000..d9f753c7a88 --- /dev/null +++ b/test/CodeGen/X86/uint64-to-float.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s +; Verify that we are using the efficient uitofp --> sitofp lowering illustrated +; by the compiler_rt implementation of __floatundisf. +; + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +; CHECK: testq %rdi, %rdi +; CHECK-NEXT: jns LBB0_2 +; CHECK: shrq +; CHECK-NEXT: andq +; CHECK-NEXT: orq +; CHECK-NEXT: cvtsi2ss +; CHECK: LBB0_2 +; CHECK-NEXT: cvtsi2ss +define float @test(i64 %a) { +entry: + %b = uitofp i64 %a to float + ret float %b +}