Use a more efficient lowering of uint64_t --> float that can take advantage of hardware signed integer conversion without

having to do a double cast (uint64_t --> double --> float).  This is based on the algorithm from compiler_rt's __floatundisf
for X86-64.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115634 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Owen Anderson 2010-10-05 17:24:05 +00:00
parent 64227940e7
commit 3a9e7690ba
2 changed files with 49 additions and 6 deletions

View File

@ -2126,12 +2126,35 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
} }
// Implementation of unsigned i64 to f32. This implementation has the // Implementation of unsigned i64 to f32.
// advantage of performing rounding correctly.
// TODO: Generalize this for use with other types. // TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
// For unsigned conversions, convert them to signed conversions using the
// algorithm from the x86_64 __floatundidf in compiler_rt.
if (!isSigned) {
SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy());
SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
SDValue AndConst = DAG.getConstant(1, MVT::i64);
SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
// TODO: This really should be implemented using a branch rather than a
// select. We happen to get lucky and machinesink does the right
// thing most of the time. This would be a good candidate for a
//pseudo-op, or, even better, for whole-function isel.
SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
}
// Otherwise, implement the fully general conversion.
EVT SHVT = TLI.getShiftAmountTy(); EVT SHVT = TLI.getShiftAmountTy();
SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
@ -2143,9 +2166,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
ISD::SETUGE); ISD::SETUGE);
SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
DAG.getConstant(32, SHVT)); DAG.getConstant(32, SHVT));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
@ -2158,7 +2181,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
DAG.getIntPtrConstant(0)); DAG.getIntPtrConstant(0));
} }
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);

View File

@ -0,0 +1,21 @@
; RUN: llc < %s -march=x86-64 | FileCheck %s
; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
; by the compiler_rt implementation of __floatundisf.
; <rdar://problem/8493982>
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
; CHECK: testq %rdi, %rdi
; CHECK-NEXT: jns LBB0_2
; CHECK: shrq
; CHECK-NEXT: andq
; CHECK-NEXT: orq
; CHECK-NEXT: cvtsi2ss
; CHECK: LBB0_2
; CHECK-NEXT: cvtsi2ss
define float @test(i64 %a) {
entry:
%b = uitofp i64 %a to float
ret float %b
}