Fix uint64->{float, double} conversion to do rounding correctly in 32-bit.

The implementation in LegalizeIntegerTypes to handle this as sint64->float + appropriate power of 2 is subject to double rounding, considered incorrect by numerics people. Use this implementation only when it is safe. This leads to using library calls in some cases that produced inline code before, but it's correct now. (EVTToAPFloatSemantics belongs somewhere else, any suggestions?) Add a correctly rounding (though not particularly fast) conversion that uses X87 80-bit computations for x86-32. 7885399, 5901940. This shows up in gcc.c-torture/execute/ieee/rbug.c in the gcc testsuite on some platforms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@103883 91177308-0d34-0410-b5e6-96231b3b80d8
2024-08-26 23:29:22 +00:00 · 2010-05-15 18:51:12 +00:00 · 2010-05-15 18:51:12 +00:00 · 8d908ebd19
commit 8d908ebd19
parent 789955127e
2 changed files with 76 additions and 24 deletions
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@ -2314,13 +2314,29 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
 }
 static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
  switch (VT.getSimpleVT().SimpleTy) {
  default: llvm_unreachable("Unknown FP format");
  case MVT::f32:     return &APFloat::IEEEsingle;
  case MVT::f64:     return &APFloat::IEEEdouble;
  case MVT::f80:     return &APFloat::x87DoubleExtended;
  case MVT::f128:    return &APFloat::IEEEquad;
  case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
  }
 }
 SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
  SDValue Op = N->getOperand(0);
  EVT SrcVT = Op.getValueType();
  EVT DstVT = N->getValueType(0);
  DebugLoc dl = N->getDebugLoc();
-  if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+  // The following optimization is valid only if every value in SrcVT (when
  // treated as signed) is representable in DstVT.  Check that the mantissa
  // size of DstVT is >= than the number of bits in SrcVT -1.
  const fltSemantics *sem = EVTToAPFloatSemantics(DstVT);
  if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 &&
      TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
    // Do a signed conversion then adjust the result.
    SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
    SignedConv = TLI.LowerOperation(SignedConv, DAG);
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -145,13 +145,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
  } else if (!UseSoftFloat) {
-    if (X86ScalarSSEf64) {
+    // We have an algorithm for SSE2->double, and we turn this into a
-      // We have an impenetrably clever algorithm for ui64->double only.
+    // 64-bit FILD followed by conditional FADD for other targets.
-      setOperationAction(ISD::UINT_TO_FP   , MVT::i64  , Custom);
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
    }
    // We have an algorithm for SSE2, and we turn this into a 64-bit
    // FILD for other targets.
-    setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Custom);
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
  }
  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
@ -5462,7 +5461,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
 }
 SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
-                                     SDValue StackSlot,
+                                     SDValue StackSlot, 
                                     SelectionDAG &DAG) const {
  // Build the FILD
  DebugLoc dl = Op.getDebugLoc();
@ -5636,35 +5635,72 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
  SDValue N0 = Op.getOperand(0);
  DebugLoc dl = Op.getDebugLoc();
-  // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't
+  // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
  // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
  // the optimization here.
  if (DAG.SignBitIsZero(N0))
    return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
  EVT SrcVT = N0.getValueType();
-  if (SrcVT == MVT::i64) {
+  EVT DstVT = Op.getValueType();
-    // We only handle SSE2 f64 target here; caller can expand the rest.
+  if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
    if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
      return SDValue();
    return LowerUINT_TO_FP_i64(Op, DAG);
-  } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) {
+  else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
    return LowerUINT_TO_FP_i32(Op, DAG);
  }
  assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!");
  // Make a 64-bit buffer, and use it to build an FILD.
  SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
-  SDValue WordOff = DAG.getConstant(4, getPointerTy());
+  if (SrcVT == MVT::i32) {
-  SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+    SDValue WordOff = DAG.getConstant(4, getPointerTy());
-                                   getPointerTy(), StackSlot, WordOff);
+    SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
-  SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+                                     getPointerTy(), StackSlot, WordOff);
    SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
                                  StackSlot, NULL, 0, false, false, 0);
    SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
                                  OffsetSlot, NULL, 0, false, false, 0);
    SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
    return Fild;
  }
  assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
                                StackSlot, NULL, 0, false, false, 0);
-  SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+  // For i64 source, we need to add the appropriate power of 2 if the input
-                                OffsetSlot, NULL, 0, false, false, 0);
+  // was negative.  This is the same as the optimization in
-  return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+  // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
  // we must be careful to do the computation in x87 extended precision, not
  // in SSE. (The generic code can't know it's OK to do this, or how to.)
  SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
  SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
  SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3);
  APInt FF(32, 0x5F800000ULL);
  // Check whether the sign bit is set.
  SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
                                 Op.getOperand(0), DAG.getConstant(0, MVT::i64),
                                 ISD::SETLT);
  // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
  SDValue FudgePtr = DAG.getConstantPool(
                             ConstantInt::get(*DAG.getContext(), FF.zext(64)),
                                         getPointerTy());
  // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
  SDValue Zero = DAG.getIntPtrConstant(0);
  SDValue Four = DAG.getIntPtrConstant(4);
  SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
                               Zero, Four);
  FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
  // Load the value out, extending it from f32 to f80.
  // FIXME: Avoid the extend by constructing the right constant pool?
  SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
                                 FudgePtr, PseudoSourceValue::getConstantPool(),
                                 0, MVT::f32, false, false, 4);
  // Extend everything to 80 bits to force it to be done on x87.
  SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
  return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
 }
 std::pair<SDValue,SDValue> X86TargetLowering::