Use a more efficient lowering of uint64_t --> float that can take advantage of hardware signed integer conversion without

having to do a double cast (uint64_t --> double --> float). This is based on the algorithm from compiler_rt's __floatundisf for X86-64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115634 91177308-0d34-0410-b5e6-96231b3b80d8
2025-11-23 16:19:52 +00:00 · 2010-10-05 17:24:05 +00:00
parent 64227940e7
commit 3a9e7690ba
2 changed files with 49 additions and 6 deletions
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2126,10 +2126,33 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
    return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
  }
-  // Implementation of unsigned i64 to f32.  This implementation has the
+  // Implementation of unsigned i64 to f32.
  // advantage of performing rounding correctly.
  // TODO: Generalize this for use with other types.
  if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
    // For unsigned conversions, convert them to signed conversions using the
    // algorithm from the x86_64 __floatundidf in compiler_rt.
    if (!isSigned) {
      SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
      SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy());
      SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
      SDValue AndConst = DAG.getConstant(1, MVT::i64);
      SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
      SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
      SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
      SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
      // TODO: This really should be implemented using a branch rather than a
      // select.  We happen to get lucky and machinesink does the right 
      // thing most of the time.  This would be a good candidate for a 
      //pseudo-op, or, even better, for whole-function isel.
      SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),  
        Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
      return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
    }
    // Otherwise, implement the fully general conversion.
    EVT SHVT = TLI.getShiftAmountTy();
    SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
@@ -2158,7 +2181,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
    SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
    return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
                       DAG.getIntPtrConstant(0));
  }
  SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
--- a/test/CodeGen/X86/uint64-to-float.ll
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -0,0 +1,21 @@
 ; RUN: llc < %s -march=x86-64 | FileCheck %s
 ; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
 ; by the compiler_rt implementation of __floatundisf.
 ; <rdar://problem/8493982>
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 ; CHECK: testq %rdi, %rdi
 ; CHECK-NEXT: jns LBB0_2
 ; CHECK: shrq
 ; CHECK-NEXT: andq
 ; CHECK-NEXT: orq
 ; CHECK-NEXT: cvtsi2ss
 ; CHECK: LBB0_2
 ; CHECK-NEXT: cvtsi2ss
 define float @test(i64 %a) {
 entry:
  %b = uitofp i64 %a to float
  ret float %b
 }