Implement correctly-rounded decimal->binary conversion, i.e. conversion

from user input strings. Such conversions are more intricate and subtle than they may appear; it is unlikely I have got it completely right first time. I would appreciate being informed of any bugs and incorrect roundings you might discover. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42912 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-11 16:37:42 +00:00 · 2007-10-12 16:02:31 +00:00 · 2007-10-12 16:02:31 +00:00 · 96c7471b39
commit 96c7471b39
parent 5b8e0c5fd3
2 changed files with 364 additions and 13 deletions
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@ -60,7 +60,10 @@
    if the requested precision is less than the natural precision the
    output is correctly rounded for the specified rounding mode.

-    Conversion to and from decimal text is not currently implemented.
+    It also reads decimal floating point numbers and correctly rounds
+    according to the specified rounding mode.
+
+    Conversion to decimal text is not currently implemented.

    Non-zero finite numbers are represented internally as a sign bit,
    a 16-bit signed exponent, and the significand as an array of
@ -83,13 +86,12 @@

    Some features that may or may not be worth adding:

-    Conversions to and from decimal strings (hard).
+    Binary to decimal conversion (hard).

    Optional ability to detect underflow tininess before rounding.

    New formats: x87 in single and double precision mode (IEEE apart
-    from extended exponent range) and IBM two-double extended
-    precision (hard).
+    from extended exponent range) (hard).

    New operations: sqrt, IEEE remainder, C90 fmod, nextafter,
    nexttoward.
@ -186,10 +188,12 @@ namespace llvm {
    opStatus multiply(const APFloat &, roundingMode);
    opStatus divide(const APFloat &, roundingMode);
    opStatus mod(const APFloat &, roundingMode);
-    void copySign(const APFloat &);
    opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode);
-    void changeSign();    // neg
-    void clearSign();     // abs
+
+    /* Sign operations.  */
+    void changeSign();
+    void clearSign();
+    void copySign(const APFloat &);

    /* Conversions.  */
    opStatus convert(const fltSemantics &, roundingMode);
@ -272,8 +276,12 @@ namespace llvm {
    opStatus convertFromUnsignedParts(const integerPart *, unsigned int,
                                      roundingMode);
    opStatus convertFromHexadecimalString(const char *, roundingMode);
+    opStatus convertFromDecimalString (const char *, roundingMode);
    char *convertNormalToHexString(char *, unsigned int, bool,
                                   roundingMode) const;
+    opStatus roundSignificandWithExponent(const integerPart *, unsigned int,
+                                          int, roundingMode);
+
    APInt convertFloatAPFloatToAPInt() const;
    APInt convertDoubleAPFloatToAPInt() const;
    APInt convertF80LongDoubleAPFloatToAPInt() const;
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@ -52,6 +52,23 @@ namespace llvm {
  // onto the usual format above.  For now only storage of constants of
  // this type is supported, no arithmetic.
  const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106 };
+
+  /* A tight upper bound on number of parts required to hold the value
+     pow(5, power) is
+
+       power * 1024 / (441 * integerPartWidth) + 1
+       
+     However, whilst the result may require only this many parts,
+     because we are multiplying two values to get it, the
+     multiplication may require an extra part with the excess part
+     being zero (consider the trivial case of 1 * 1, tcFullMultiply
+     requires two parts to hold the single-part result).  So we add an
+     extra one to guarantee enough space whilst multiplying.  */
+  const unsigned int maxExponent = 16383;
+  const unsigned int maxPrecision = 113;
+  const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
+  const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 1024)
+                                                / (441 * integerPartWidth));
 }

 /* Put a bunch of private, handy routines in an anonymous namespace.  */
@ -76,7 +93,7 @@ namespace {
  }

  unsigned int
-  hexDigitValue (unsigned int c)
+  hexDigitValue(unsigned int c)
  {
    unsigned int r;

@ -239,6 +256,142 @@ namespace {
    return moreSignificant;
  }

+  /* The error from the true value, in half-ulps, on multiplying two
+     floating point numbers, which differ from the value they
+     approximate by at most HUE1 and HUE2 half-ulps, is strictly less
+     than the returned value.
+
+     See "How to Read Floating Point Numbers Accurately" by William D
+     Clinger.  */
+  unsigned int
+  HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
+  {
+    assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
+
+    if (HUerr1 + HUerr2 == 0)
+      return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
+    else
+      return inexactMultiply + 2 * (HUerr1 + HUerr2);
+  }
+
+  /* The number of ulps from the boundary (zero, or half if ISNEAREST)
+     when the least significant BITS are truncated.  BITS cannot be
+     zero.  */
+  integerPart
+  ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
+  {
+    unsigned int count, partBits;
+    integerPart part, boundary;
+
+    assert (bits != 0);
+
+    bits--;
+    count = bits / integerPartWidth;
+    partBits = bits % integerPartWidth + 1;
+
+    part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
+
+    if (isNearest)
+      boundary = (integerPart) 1 << (partBits - 1);
+    else
+      boundary = 0;
+
+    if (count == 0) {
+      if (part - boundary <= boundary - part)
+        return part - boundary;
+      else
+        return boundary - part;
+    }
+
+    if (part == boundary) {
+      while (--count)
+        if (parts[count])
+          return ~(integerPart) 0; /* A lot.  */
+
+      return parts[0];
+    } else if (part == boundary - 1) {
+      while (--count)
+        if (~parts[count])
+          return ~(integerPart) 0; /* A lot.  */
+
+      return -parts[0];
+    }
+
+    return ~(integerPart) 0; /* A lot.  */
+  }
+
+  /* Place pow(5, power) in DST, and return the number of parts used.
+     DST must be at least one part larger than size of the answer.  */
+  static unsigned int
+  powerOf5(integerPart *dst, unsigned int power)
+  {
+    /* A tight upper bound on number of parts required to hold the
+       value pow(5, power) is
+
+         power * 65536 / (28224 * integerPartWidth) + 1
+
+       However, whilst the result may require only N parts, because we
+       are multiplying two values to get it, the multiplication may
+       require N + 1 parts with the excess part being zero (consider
+       the trivial case of 1 * 1, the multiplier requires two parts to
+       hold the single-part result).  So we add two to guarantee
+       enough space whilst multiplying.  */
+    static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
+                                              15625, 78125 };
+    static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
+    static unsigned int partsCount[16] = { 1 };
+
+    integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
+    unsigned int result;
+
+    assert(power <= maxExponent);
+
+    p1 = dst;
+    p2 = scratch;
+
+    *p1 = firstEightPowers[power & 7];
+    power >>= 3;
+
+    result = 1;
+    pow5 = pow5s;
+
+    for (unsigned int n = 0; power; power >>= 1, n++) {
+      unsigned int pc;
+
+      pc = partsCount[n];
+
+      /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
+      if (pc == 0) {
+        pc = partsCount[n - 1];
+        APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
+        pc *= 2;
+        if (pow5[pc - 1] == 0)
+          pc--;
+        partsCount[n] = pc;
+      }
+
+      if (power & 1) {
+        integerPart *tmp;
+
+        APInt::tcFullMultiply(p2, p1, pow5, result, pc);
+        result += pc;
+        if (p2[result - 1] == 0)
+          result--;
+
+        /* Now result is in p1 with partsCount parts and p2 is scratch
+           space.  */
+        tmp = p1, p1 = p2, p2 = tmp;
+      }
+
+      pow5 += pc;
+    }
+
+    if (p1 != dst)
+      APInt::tcAssign(dst, p1, result);
+
+    return result;
+  }
+
  /* Zero at the end to avoid modular arithmetic when adding one; used
     when rounding up during hexadecimal output.  */
  static const char hexDigitsLower[] = "0123456789abcdef0";
@ -650,6 +803,9 @@ APFloat::divideSignificand(const APFloat &rhs)
    APInt::tcShiftLeft(dividend, partsCount, bit);
  }

+  /* Ensure the dividend >= divisor initially for the loop below.
+     Incidentally, this means that the division loop below is
+     guaranteed to set the integer bit to one.  */
  if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
    exponent--;
    APInt::tcShiftLeft(dividend, partsCount, 1);
@ -865,7 +1021,7 @@ APFloat::normalize(roundingMode rounding_mode,

      /* Keep OMSB up-to-date.  */
      if(omsb > (unsigned) exponentChange)
-        omsb -= (unsigned) exponentChange;
+        omsb -= exponentChange;
      else
        omsb = 0;
    }
@ -916,7 +1072,6 @@ APFloat::normalize(roundingMode rounding_mode,

  /* We have a non-zero denormal.  */
  assert(omsb < semantics->precision);
-  assert(exponent == semantics->minExponent);

  /* Canonicalize zeroes.  */
  if(omsb == 0)
@ -1750,6 +1905,195 @@ APFloat::convertFromHexadecimalString(const char *p,
  return normalize(rounding_mode, lost_fraction);
 }

+APFloat::opStatus
+APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
+                                      unsigned sigPartCount, int exp,
+                                      roundingMode rounding_mode)
+{
+  unsigned int parts, pow5PartCount;
+  fltSemantics calcSemantics = { 32767, -32767, 0 };
+  integerPart pow5Parts[maxPowerOfFiveParts];
+  bool isNearest;
+
+  isNearest = (rounding_mode == rmNearestTiesToEven
+               || rounding_mode == rmNearestTiesToAway);
+
+  parts = partCountForBits(semantics->precision + 11);
+
+  /* Calculate pow(5, abs(exp)).  */
+  pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
+
+  for (;; parts *= 2) {
+    opStatus sigStatus, powStatus;
+    unsigned int excessPrecision, truncatedBits;
+
+    calcSemantics.precision = parts * integerPartWidth - 1;
+    excessPrecision = calcSemantics.precision - semantics->precision;
+    truncatedBits = excessPrecision;
+
+    APFloat decSig(calcSemantics, fcZero, sign);
+    APFloat pow5(calcSemantics, fcZero, false);
+
+    sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
+                                                rmNearestTiesToEven);
+    powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
+                                              rmNearestTiesToEven);
+    /* Add exp, as 10^n = 5^n * 2^n.  */
+    decSig.exponent += exp;
+
+    lostFraction calcLostFraction;
+    integerPart HUerr, HUdistance, powHUerr;
+
+    if (exp >= 0) {
+      /* multiplySignificand leaves the precision-th bit set to 1.  */
+      calcLostFraction = decSig.multiplySignificand(pow5, NULL);
+      powHUerr = powStatus != opOK;
+    } else {
+      calcLostFraction = decSig.divideSignificand(pow5);
+      /* Denormal numbers have less precision.  */
+      if (decSig.exponent < semantics->minExponent) {
+        excessPrecision += (semantics->minExponent - decSig.exponent);
+        truncatedBits = excessPrecision;
+        if (excessPrecision > calcSemantics.precision)
+          excessPrecision = calcSemantics.precision;
+      }
+      /* Extra half-ulp lost in reciprocal of exponent.  */
+      powHUerr = 1 + powStatus != opOK;
+    }
+
+    /* Both multiplySignificand and divideSignificand return the
+       result with the integer bit set.  */
+    assert (APInt::tcExtractBit
+            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
+
+    HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
+                       powHUerr);
+    HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
+                                      excessPrecision, isNearest);
+
+    /* Are we guaranteed to round correctly if we truncate?  */
+    if (HUdistance >= HUerr) {
+      APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
+                       calcSemantics.precision - excessPrecision,
+                       excessPrecision);
+      /* Take the exponent of decSig.  If we tcExtract-ed less bits
+         above we must adjust our exponent to compensate for the
+         implicit right shift.  */
+      exponent = (decSig.exponent + semantics->precision
+                  - (calcSemantics.precision - excessPrecision));
+      calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
+                                                       decSig.partCount(),
+                                                       truncatedBits);
+      return normalize(rounding_mode, calcLostFraction);
+    }
+  }
+}
+
+APFloat::opStatus
+APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
+{
+  const char *dot, *firstSignificantDigit;
+  integerPart val, maxVal, decValue;
+  opStatus fs;
+
+  /* Skip leading zeroes and any decimal point.  */
+  p = skipLeadingZeroesAndAnyDot(p, &dot);
+  firstSignificantDigit = p;
+
+  /* The maximum number that can be multiplied by ten with any digit
+     added without overflowing an integerPart.  */
+  maxVal = (~ (integerPart) 0 - 9) / 10;
+
+  val = 0;
+  while (val <= maxVal) {
+    if (*p == '.') {
+      assert(dot == 0);
+      dot = p++;
+    }
+
+    decValue = digitValue(*p);
+    if (decValue == -1U)
+      break;
+    p++;
+    val = val * 10 + decValue;
+  }
+
+  integerPart *decSignificand;
+  unsigned int partCount, maxPartCount;
+
+  partCount = 0;
+  maxPartCount = 4;
+  decSignificand = new integerPart[maxPartCount];
+  decSignificand[partCount++] = val;
+
+  /* Now continue to do single-part arithmetic for as long as we can.
+     Then do a part multiplication, and repeat.  */
+  while (decValue != -1U) {
+    integerPart multiplier;
+
+    val = 0;
+    multiplier = 1;
+
+    while (multiplier <= maxVal) {
+      if (*p == '.') {
+        assert(dot == 0);
+        dot = p++;
+      }
+
+      decValue = digitValue(*p);
+      if (decValue == -1U)
+        break;
+      p++;
+      multiplier *= 10;
+      val = val * 10 + decValue;
+    }
+
+    if (partCount == maxPartCount) {
+      integerPart *newDecSignificand;
+      newDecSignificand = new integerPart[maxPartCount = partCount * 2];
+      APInt::tcAssign(newDecSignificand, decSignificand, partCount);
+      delete [] decSignificand;
+      decSignificand = newDecSignificand;
+    }
+
+    APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
+                          partCount, partCount + 1, false);
+
+    /* If we used another part (likely), increase the count.  */
+    if (decSignificand[partCount] != 0)
+      partCount++;
+  }
+
+  /* Now decSignificand contains the supplied significand ignoring the
+     decimal point.  Figure out our effective exponent, which is the
+     specified exponent adjusted for any decimal point.  */
+
+  if (p == firstSignificantDigit) {
+    /* Ignore the exponent if we are zero - we cannot overflow.  */
+    category = fcZero;
+    fs = opOK;
+  } else {
+    int decimalExponent;
+
+    if (dot)
+      decimalExponent = dot + 1 - p;
+    else
+      decimalExponent = 0;
+
+    /* Add the given exponent.  */
+    if (*p == 'e' || *p == 'E')
+      decimalExponent = totalExponent(p, decimalExponent);
+
+    category = fcNormal;
+    fs = roundSignificandWithExponent(decSignificand, partCount,
+                                      decimalExponent, rounding_mode);
+  }
+
+  delete [] decSignificand;
+
+  return fs;
+}
+
 APFloat::opStatus
 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
 {
@ -1763,9 +2107,8 @@ APFloat::convertFromString(const char *p, roundingMode rounding_mode)

  if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
    return convertFromHexadecimalString(p + 2, rounding_mode);
-
-  assert(0 && "Decimal to binary conversions not yet implemented");
-  abort();
+  else
+    return convertFromDecimalString(p, rounding_mode);
 }

 /* Write out a hexadecimal representation of the floating point value