mirror of
https://github.com/autc04/Retro68.git
synced 2024-11-24 23:32:06 +00:00
1952 lines
68 KiB
C
1952 lines
68 KiB
C
/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation; either version 3, or (at your option) any later
|
|
version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
Under Section 7 of GPL version 3, you are granted additional
|
|
permissions described in the GCC Runtime Library Exception, version
|
|
3.1, as published by the Free Software Foundation.
|
|
|
|
You should have received a copy of the GNU General Public License and
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#define BID_128RES
|
|
|
|
#include "bid_internal.h"
|
|
|
|
/*****************************************************************************
|
|
* BID128_round_integral_exact
|
|
****************************************************************************/
|
|
|
|
BID128_FUNCTION_ARG1 (bid128_round_integral_exact, x)
|
|
|
|
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
|
|
};
|
|
UINT64 x_sign;
|
|
UINT64 x_exp;
|
|
int exp; // unbiased exponent
|
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
|
UINT64 tmp64;
|
|
BID_UI64DOUBLE tmp1;
|
|
unsigned int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT128 C1;
|
|
UINT256 fstar;
|
|
UINT256 P256;
|
|
|
|
// check for NaN or Infinity
|
|
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
|
|
// x is special
|
|
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
|
|
// if x = NaN, then res = Q (x)
|
|
// check first for non-canonical NaN payload
|
|
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
|
|
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
|
|
(x.w[0] > 0x38c15b09ffffffffull))) {
|
|
x.w[1] = x.w[1] & 0xffffc00000000000ull;
|
|
x.w[0] = 0x0ull;
|
|
}
|
|
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (x)
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
} else { // x is QNaN
|
|
// return x
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
}
|
|
BID_RETURN (res)
|
|
} else { // x is not a NaN, so it must be infinity
|
|
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
|
|
// return +inf
|
|
res.w[1] = 0x7800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
} else { // x is -inf
|
|
// return -inf
|
|
res.w[1] = 0xf800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
// unpack x
|
|
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
C1.w[1] = x.w[1] & MASK_COEFF;
|
|
C1.w[0] = x.w[0];
|
|
|
|
// check for non-canonical values (treated as zero)
|
|
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
|
|
// non-canonical
|
|
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
|
|
C1.w[1] = 0; // significand high
|
|
C1.w[0] = 0; // significand low
|
|
} else { // G0_G1 != 11
|
|
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
|
|
if (C1.w[1] > 0x0001ed09bead87c0ull ||
|
|
(C1.w[1] == 0x0001ed09bead87c0ull
|
|
&& C1.w[0] > 0x378d8e63ffffffffull)) {
|
|
// x is non-canonical if coefficient is larger than 10^34 -1
|
|
C1.w[1] = 0;
|
|
C1.w[0] = 0;
|
|
} else { // canonical
|
|
;
|
|
}
|
|
}
|
|
|
|
// test for input equal to zero
|
|
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
|
|
// x is 0
|
|
// return 0 preserving the sign bit and the preferred exponent
|
|
// of MAX(Q(x), 0)
|
|
if (x_exp <= (0x1820ull << 49)) {
|
|
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
|
|
} else {
|
|
res.w[1] = x_sign | x_exp;
|
|
}
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// x is not special and is not zero
|
|
|
|
switch (rnd_mode) {
|
|
case ROUNDING_TO_NEAREST:
|
|
case ROUNDING_TIES_AWAY:
|
|
// if (exp <= -(p+1)) return 0.0
|
|
if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
|
|
res.w[1] = x_sign | 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_DOWN:
|
|
// if (exp <= -p) return -1.0 or +0.0
|
|
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffa000000000000ull == -34
|
|
if (x_sign) {
|
|
// if negative, return negative 1, because we know coefficient
|
|
// is non-zero (would have been caught above)
|
|
res.w[1] = 0xb040000000000000ull;
|
|
res.w[0] = 0x0000000000000001ull;
|
|
} else {
|
|
// if positive, return positive 0, because we know coefficient is
|
|
// non-zero (would have been caught above)
|
|
res.w[1] = 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
}
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_UP:
|
|
// if (exp <= -p) return -0.0 or +1.0
|
|
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
|
|
if (x_sign) {
|
|
// if negative, return negative 0, because we know the coefficient
|
|
// is non-zero (would have been caught above)
|
|
res.w[1] = 0xb040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
} else {
|
|
// if positive, return positive 1, because we know coefficient is
|
|
// non-zero (would have been caught above)
|
|
res.w[1] = 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000001ull;
|
|
}
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_TO_ZERO:
|
|
// if (exp <= -p) return -0.0 or +0.0
|
|
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
|
|
res.w[1] = x_sign | 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
}
|
|
|
|
// q = nr. of decimal digits in x
|
|
// determine first the nr. of bits in x
|
|
if (C1.w[1] == 0) {
|
|
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
|
|
// split the 64-bit value in two 32-bit halves to avoid rounding errors
|
|
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
|
|
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
|
|
x_nr_bits =
|
|
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
} else { // x < 2^32
|
|
tmp1.d = (double) (C1.w[0]); // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1.w[0]; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
|
|
tmp1.d = (double) C1.w[1]; // exact conversion
|
|
x_nr_bits =
|
|
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
|
|
(C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
|
|
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
|
|
q++;
|
|
}
|
|
exp = (x_exp >> 49) - 6176;
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res.w[1] = x.w[1];
|
|
res.w[0] = x.w[0];
|
|
BID_RETURN (res);
|
|
}
|
|
// exp < 0
|
|
switch (rnd_mode) {
|
|
case ROUNDING_TO_NEAREST:
|
|
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
|
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
|
|
tmp64 = C1.w[0];
|
|
if (ind <= 19) {
|
|
C1.w[0] = C1.w[0] + midpoint64[ind - 1];
|
|
} else {
|
|
C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
|
|
C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
|
|
}
|
|
if (C1.w[0] < tmp64)
|
|
C1.w[1]++;
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 34
|
|
// kx = 10^(-x) = ten2mk128[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 118 bits
|
|
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
|
|
// determine the value of res and fstar
|
|
|
|
// determine inexactness of the rounding of C*
|
|
// if (0 < f* - 1/2 < 10^(-x)) then
|
|
// the result is exact
|
|
// else // if (f* - 1/2 > T*) then
|
|
// the result is inexact
|
|
// Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
|
|
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
// redundant shift = shiftright128[ind - 1]; // shift = 0
|
|
res.w[1] = P256.w[3];
|
|
res.w[0] = P256.w[2];
|
|
// redundant fstar.w[3] = 0;
|
|
// redundant fstar.w[2] = 0;
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* < 10^(-x) <=> midpoint
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
// if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
|
|
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
|
|
((fstar.w[1] < (ten2mk128[ind - 1].w[1]))
|
|
|| ((fstar.w[1] == ten2mk128[ind - 1].w[1])
|
|
&& (fstar.w[0] < ten2mk128[ind - 1].w[0])))) {
|
|
// subract 1 to make even
|
|
if (res.w[0]-- == 0) {
|
|
res.w[1]--;
|
|
}
|
|
}
|
|
if (fstar.w[1] > 0x8000000000000000ull ||
|
|
(fstar.w[1] == 0x8000000000000000ull
|
|
&& fstar.w[0] > 0x0ull)) {
|
|
// f* > 1/2 and the result may be exact
|
|
tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2
|
|
if (tmp64 > ten2mk128[ind - 1].w[1] ||
|
|
(tmp64 == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
} // else the result is exact
|
|
} else { // the result is inexact; f2* <= 1/2
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res.w[1] = (P256.w[3] >> shift);
|
|
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
|
|
// redundant fstar.w[3] = 0;
|
|
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* < 10^(-x) <=> midpoint
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
|
|
fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
|
|
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
|
|
// subract 1 to make even
|
|
if (res.w[0]-- == 0) {
|
|
res.w[1]--;
|
|
}
|
|
}
|
|
if (fstar.w[2] > onehalf128[ind - 1] ||
|
|
(fstar.w[2] == onehalf128[ind - 1]
|
|
&& (fstar.w[1] || fstar.w[0]))) {
|
|
// f2* > 1/2 and the result may be exact
|
|
// Calculate f2* - 1/2
|
|
tmp64 = fstar.w[2] - onehalf128[ind - 1];
|
|
if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
|
|
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
} // else the result is exact
|
|
} else { // the result is inexact; f2* <= 1/2
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
} else { // 22 <= ind - 1 <= 33
|
|
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
|
|
res.w[1] = 0;
|
|
res.w[0] = P256.w[3] >> shift;
|
|
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
|
|
fstar.w[2] = P256.w[2];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* < 10^(-x) <=> midpoint
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
|
|
fstar.w[3] == 0 && fstar.w[2] == 0 &&
|
|
(fstar.w[1] < ten2mk128[ind - 1].w[1] ||
|
|
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
|
|
// subract 1 to make even
|
|
if (res.w[0]-- == 0) {
|
|
res.w[1]--;
|
|
}
|
|
}
|
|
if (fstar.w[3] > onehalf128[ind - 1] ||
|
|
(fstar.w[3] == onehalf128[ind - 1] &&
|
|
(fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
|
|
// f2* > 1/2 and the result may be exact
|
|
// Calculate f2* - 1/2
|
|
tmp64 = fstar.w[3] - onehalf128[ind - 1];
|
|
if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
|
|
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
} // else the result is exact
|
|
} else { // the result is inexact; f2* <= 1/2
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
}
|
|
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
|
|
BID_RETURN (res);
|
|
} else { // if ((q + exp) < 0) <=> q < -exp
|
|
// the result is +0 or -0
|
|
res.w[1] = x_sign | 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_TIES_AWAY:
|
|
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
|
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
|
|
tmp64 = C1.w[0];
|
|
if (ind <= 19) {
|
|
C1.w[0] = C1.w[0] + midpoint64[ind - 1];
|
|
} else {
|
|
C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
|
|
C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
|
|
}
|
|
if (C1.w[0] < tmp64)
|
|
C1.w[1]++;
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 34
|
|
// kx = 10^(-x) = ten2mk128[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 118 bits
|
|
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
|
|
// the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
|
|
// if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
|
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
|
// if floor(C*) is even then C* = floor(C*) - logical right
|
|
// shift; C* has p decimal digits, correct by Prop. 1)
|
|
// else if floor(C*) is odd C* = floor(C*)-1 (logical right
|
|
// shift; C* has p decimal digits, correct by Pr. 1)
|
|
// else
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// n = C* * 10^(e+x)
|
|
|
|
// determine also the inexactness of the rounding of C*
|
|
// if (0 < f* - 1/2 < 10^(-x)) then
|
|
// the result is exact
|
|
// else // if (f* - 1/2 > T*) then
|
|
// the result is inexact
|
|
// Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
|
|
// shift right C* by Ex-128 = shiftright128[ind]
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
// redundant shift = shiftright128[ind - 1]; // shift = 0
|
|
res.w[1] = P256.w[3];
|
|
res.w[0] = P256.w[2];
|
|
// redundant fstar.w[3] = 0;
|
|
// redundant fstar.w[2] = 0;
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
if (fstar.w[1] > 0x8000000000000000ull ||
|
|
(fstar.w[1] == 0x8000000000000000ull
|
|
&& fstar.w[0] > 0x0ull)) {
|
|
// f* > 1/2 and the result may be exact
|
|
tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2
|
|
if ((tmp64 > ten2mk128[ind - 1].w[1] ||
|
|
(tmp64 == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] >= ten2mk128[ind - 1].w[0]))) {
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
} // else the result is exact
|
|
} else { // the result is inexact; f2* <= 1/2
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res.w[1] = (P256.w[3] >> shift);
|
|
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
|
|
// redundant fstar.w[3] = 0;
|
|
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
if (fstar.w[2] > onehalf128[ind - 1] ||
|
|
(fstar.w[2] == onehalf128[ind - 1]
|
|
&& (fstar.w[1] || fstar.w[0]))) {
|
|
// f2* > 1/2 and the result may be exact
|
|
// Calculate f2* - 1/2
|
|
tmp64 = fstar.w[2] - onehalf128[ind - 1];
|
|
if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
|
|
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
} // else the result is exact
|
|
} else { // the result is inexact; f2* <= 1/2
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
} else { // 22 <= ind - 1 <= 33
|
|
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
|
|
res.w[1] = 0;
|
|
res.w[0] = P256.w[3] >> shift;
|
|
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
|
|
fstar.w[2] = P256.w[2];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
if (fstar.w[3] > onehalf128[ind - 1] ||
|
|
(fstar.w[3] == onehalf128[ind - 1] &&
|
|
(fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
|
|
// f2* > 1/2 and the result may be exact
|
|
// Calculate f2* - 1/2
|
|
tmp64 = fstar.w[3] - onehalf128[ind - 1];
|
|
if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
|
|
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
} // else the result is exact
|
|
} else { // the result is inexact; f2* <= 1/2
|
|
// set the inexact flag
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
}
|
|
// if the result was a midpoint, it was already rounded away from zero
|
|
res.w[1] |= x_sign | 0x3040000000000000ull;
|
|
BID_RETURN (res);
|
|
} else { // if ((q + exp) < 0) <=> q < -exp
|
|
// the result is +0 or -0
|
|
res.w[1] = x_sign | 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_DOWN:
|
|
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
|
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
|
|
// (number of digits to be chopped off)
|
|
// chop off ind digits from the lower part of C1
|
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
|
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
|
|
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
|
|
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
|
|
// tmp64 = C1.w[0];
|
|
// if (ind <= 19) {
|
|
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
|
|
// } else {
|
|
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
|
|
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
|
|
// }
|
|
// if (C1.w[0] < tmp64) C1.w[1]++;
|
|
// if carry-out from C1.w[0], increment C1.w[1]
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 34
|
|
// kx = 10^(-x) = ten2mk128[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 118 bits
|
|
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res.w[1] = P256.w[3];
|
|
res.w[0] = P256.w[2];
|
|
// redundant fstar.w[3] = 0;
|
|
// redundant fstar.w[2] = 0;
|
|
// redundant fstar.w[1] = P256.w[1];
|
|
// redundant fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if ((P256.w[1] > ten2mk128[ind - 1].w[1])
|
|
|| (P256.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
// if positive, the truncated value is already the correct result
|
|
if (x_sign) { // if negative
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 0 <= shift <= 102
|
|
res.w[1] = (P256.w[3] >> shift);
|
|
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
|
|
// redundant fstar.w[3] = 0;
|
|
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
|
|
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
// if positive, the truncated value is already the correct result
|
|
if (x_sign) { // if negative
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
} else { // 22 <= ind - 1 <= 33
|
|
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
|
|
res.w[1] = 0;
|
|
res.w[0] = P256.w[3] >> shift;
|
|
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
|
|
fstar.w[2] = P256.w[2];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if (fstar.w[3] || fstar.w[2]
|
|
|| fstar.w[1] > ten2mk128[ind - 1].w[1]
|
|
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
// if positive, the truncated value is already the correct result
|
|
if (x_sign) { // if negative
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp <= 0
|
|
if (x_sign) { // negative rounds down to -1.0
|
|
res.w[1] = 0xb040000000000000ull;
|
|
res.w[0] = 0x0000000000000001ull;
|
|
} else { // positive rpunds down to +0.0
|
|
res.w[1] = 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
}
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_UP:
|
|
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
|
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
|
|
// (number of digits to be chopped off)
|
|
// chop off ind digits from the lower part of C1
|
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
|
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
|
|
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
|
|
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
|
|
// tmp64 = C1.w[0];
|
|
// if (ind <= 19) {
|
|
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
|
|
// } else {
|
|
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
|
|
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
|
|
// }
|
|
// if (C1.w[0] < tmp64) C1.w[1]++;
|
|
// if carry-out from C1.w[0], increment C1.w[1]
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 34
|
|
// kx = 10^(-x) = ten2mk128[ind - 1]
|
|
// C* = C1 * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 118 bits
|
|
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res.w[1] = P256.w[3];
|
|
res.w[0] = P256.w[2];
|
|
// redundant fstar.w[3] = 0;
|
|
// redundant fstar.w[2] = 0;
|
|
// redundant fstar.w[1] = P256.w[1];
|
|
// redundant fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if ((P256.w[1] > ten2mk128[ind - 1].w[1])
|
|
|| (P256.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
// if negative, the truncated value is already the correct result
|
|
if (!x_sign) { // if positive
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res.w[1] = (P256.w[3] >> shift);
|
|
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
|
|
// redundant fstar.w[3] = 0;
|
|
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
|
|
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
// if negative, the truncated value is already the correct result
|
|
if (!x_sign) { // if positive
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
} else { // 22 <= ind - 1 <= 33
|
|
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
|
|
res.w[1] = 0;
|
|
res.w[0] = P256.w[3] >> shift;
|
|
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
|
|
fstar.w[2] = P256.w[2];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if (fstar.w[3] || fstar.w[2]
|
|
|| fstar.w[1] > ten2mk128[ind - 1].w[1]
|
|
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
// if negative, the truncated value is already the correct result
|
|
if (!x_sign) { // if positive
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp <= 0
|
|
if (x_sign) { // negative rounds up to -0.0
|
|
res.w[1] = 0xb040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
} else { // positive rpunds up to +1.0
|
|
res.w[1] = 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000001ull;
|
|
}
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
case ROUNDING_TO_ZERO:
|
|
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
|
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
|
|
// (number of digits to be chopped off)
|
|
// chop off ind digits from the lower part of C1
|
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
|
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
|
|
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
|
|
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
|
|
//tmp64 = C1.w[0];
|
|
// if (ind <= 19) {
|
|
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
|
|
// } else {
|
|
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
|
|
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
|
|
// }
|
|
// if (C1.w[0] < tmp64) C1.w[1]++;
|
|
// if carry-out from C1.w[0], increment C1.w[1]
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 34
|
|
// kx = 10^(-x) = ten2mk128[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 118 bits
|
|
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res.w[1] = P256.w[3];
|
|
res.w[0] = P256.w[2];
|
|
// redundant fstar.w[3] = 0;
|
|
// redundant fstar.w[2] = 0;
|
|
// redundant fstar.w[1] = P256.w[1];
|
|
// redundant fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if ((P256.w[1] > ten2mk128[ind - 1].w[1])
|
|
|| (P256.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res.w[1] = (P256.w[3] >> shift);
|
|
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
|
|
// redundant fstar.w[3] = 0;
|
|
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
|
|
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
} else { // 22 <= ind - 1 <= 33
|
|
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
|
|
res.w[1] = 0;
|
|
res.w[0] = P256.w[3] >> shift;
|
|
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
|
|
fstar.w[2] = P256.w[2];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if (fstar.w[3] || fstar.w[2]
|
|
|| fstar.w[1] > ten2mk128[ind - 1].w[1]
|
|
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
}
|
|
}
|
|
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp <= 0 the result is +0 or -0
|
|
res.w[1] = x_sign | 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
*pfpsf |= INEXACT_EXCEPTION;
|
|
BID_RETURN (res);
|
|
}
|
|
break;
|
|
}
|
|
|
|
BID_RETURN (res);
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* BID128_round_integral_nearest_even
|
|
****************************************************************************/
|
|
|
|
BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even, x)
|
|
|
|
UINT128 res;
|
|
UINT64 x_sign;
|
|
UINT64 x_exp;
|
|
int exp; // unbiased exponent
|
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
|
UINT64 tmp64;
|
|
BID_UI64DOUBLE tmp1;
|
|
unsigned int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT128 C1;
|
|
// UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits
|
|
UINT256 fstar;
|
|
UINT256 P256;
|
|
|
|
// check for NaN or Infinity
|
|
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
|
|
// x is special
|
|
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
|
|
// if x = NaN, then res = Q (x)
|
|
// check first for non-canonical NaN payload
|
|
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
|
|
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
|
|
(x.w[0] > 0x38c15b09ffffffffull))) {
|
|
x.w[1] = x.w[1] & 0xffffc00000000000ull;
|
|
x.w[0] = 0x0ull;
|
|
}
|
|
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (x)
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
} else { // x is QNaN
|
|
// return x
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
}
|
|
BID_RETURN (res)
|
|
} else { // x is not a NaN, so it must be infinity
|
|
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
|
|
// return +inf
|
|
res.w[1] = 0x7800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
} else { // x is -inf
|
|
// return -inf
|
|
res.w[1] = 0xf800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
// unpack x
|
|
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
C1.w[1] = x.w[1] & MASK_COEFF;
|
|
C1.w[0] = x.w[0];
|
|
|
|
// check for non-canonical values (treated as zero)
|
|
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
|
|
// non-canonical
|
|
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
|
|
C1.w[1] = 0; // significand high
|
|
C1.w[0] = 0; // significand low
|
|
} else { // G0_G1 != 11
|
|
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
|
|
if (C1.w[1] > 0x0001ed09bead87c0ull ||
|
|
(C1.w[1] == 0x0001ed09bead87c0ull
|
|
&& C1.w[0] > 0x378d8e63ffffffffull)) {
|
|
// x is non-canonical if coefficient is larger than 10^34 -1
|
|
C1.w[1] = 0;
|
|
C1.w[0] = 0;
|
|
} else { // canonical
|
|
;
|
|
}
|
|
}
|
|
|
|
// test for input equal to zero
|
|
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
|
|
// x is 0
|
|
// return 0 preserving the sign bit and the preferred exponent
|
|
// of MAX(Q(x), 0)
|
|
if (x_exp <= (0x1820ull << 49)) {
|
|
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
|
|
} else {
|
|
res.w[1] = x_sign | x_exp;
|
|
}
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// x is not special and is not zero
|
|
|
|
// if (exp <= -(p+1)) return 0
|
|
if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
|
|
res.w[1] = x_sign | 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// q = nr. of decimal digits in x
|
|
// determine first the nr. of bits in x
|
|
if (C1.w[1] == 0) {
|
|
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
|
|
// split the 64-bit value in two 32-bit halves to avoid rounding errors
|
|
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
|
|
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
|
|
x_nr_bits =
|
|
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
} else { // x < 2^32
|
|
tmp1.d = (double) (C1.w[0]); // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1.w[0]; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
|
|
tmp1.d = (double) C1.w[1]; // exact conversion
|
|
x_nr_bits =
|
|
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi
|
|
|| (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
|
|
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
|
|
q++;
|
|
}
|
|
exp = (x_exp >> 49) - 6176;
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res.w[1] = x.w[1];
|
|
res.w[0] = x.w[0];
|
|
BID_RETURN (res);
|
|
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
|
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
|
|
tmp64 = C1.w[0];
|
|
if (ind <= 19) {
|
|
C1.w[0] = C1.w[0] + midpoint64[ind - 1];
|
|
} else {
|
|
C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
|
|
C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
|
|
}
|
|
if (C1.w[0] < tmp64)
|
|
C1.w[1]++;
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 34
|
|
// kx = 10^(-x) = ten2mk128[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 118 bits
|
|
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
|
|
// determine the value of res and fstar
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
// redundant shift = shiftright128[ind - 1]; // shift = 0
|
|
res.w[1] = P256.w[3];
|
|
res.w[0] = P256.w[2];
|
|
// redundant fstar.w[3] = 0;
|
|
// redundant fstar.w[2] = 0;
|
|
// redundant fstar.w[1] = P256.w[1];
|
|
// redundant fstar.w[0] = P256.w[0];
|
|
// fraction f* < 10^(-x) <=> midpoint
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
// if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
|
|
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
|
|
((P256.w[1] < (ten2mk128[ind - 1].w[1]))
|
|
|| ((P256.w[1] == ten2mk128[ind - 1].w[1])
|
|
&& (P256.w[0] < ten2mk128[ind - 1].w[0])))) {
|
|
// subract 1 to make even
|
|
if (res.w[0]-- == 0) {
|
|
res.w[1]--;
|
|
}
|
|
}
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res.w[1] = (P256.w[3] >> shift);
|
|
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
|
|
// redundant fstar.w[3] = 0;
|
|
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* < 10^(-x) <=> midpoint
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
|
|
fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
|
|
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
|
|
// subract 1 to make even
|
|
if (res.w[0]-- == 0) {
|
|
res.w[1]--;
|
|
}
|
|
}
|
|
} else { // 22 <= ind - 1 <= 33
|
|
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
|
|
res.w[1] = 0;
|
|
res.w[0] = P256.w[3] >> shift;
|
|
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
|
|
fstar.w[2] = P256.w[2];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* < 10^(-x) <=> midpoint
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
|
|
fstar.w[3] == 0 && fstar.w[2] == 0
|
|
&& (fstar.w[1] < ten2mk128[ind - 1].w[1]
|
|
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
|
|
// subract 1 to make even
|
|
if (res.w[0]-- == 0) {
|
|
res.w[1]--;
|
|
}
|
|
}
|
|
}
|
|
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
|
|
BID_RETURN (res);
|
|
} else { // if ((q + exp) < 0) <=> q < -exp
|
|
// the result is +0 or -0
|
|
res.w[1] = x_sign | 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* BID128_round_integral_negative
|
|
****************************************************************************/
|
|
|
|
BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative, x)
|
|
|
|
UINT128 res;
|
|
UINT64 x_sign;
|
|
UINT64 x_exp;
|
|
int exp; // unbiased exponent
|
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
|
|
// (all are UINT64)
|
|
BID_UI64DOUBLE tmp1;
|
|
unsigned int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT128 C1;
|
|
// UINT128 res is C* at first - represents up to 34 decimal digits ~
|
|
// 113 bits
|
|
UINT256 fstar;
|
|
UINT256 P256;
|
|
|
|
// check for NaN or Infinity
|
|
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
|
|
// x is special
|
|
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
|
|
// if x = NaN, then res = Q (x)
|
|
// check first for non-canonical NaN payload
|
|
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
|
|
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
|
|
(x.w[0] > 0x38c15b09ffffffffull))) {
|
|
x.w[1] = x.w[1] & 0xffffc00000000000ull;
|
|
x.w[0] = 0x0ull;
|
|
}
|
|
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (x)
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
} else { // x is QNaN
|
|
// return x
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
}
|
|
BID_RETURN (res)
|
|
} else { // x is not a NaN, so it must be infinity
|
|
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
|
|
// return +inf
|
|
res.w[1] = 0x7800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
} else { // x is -inf
|
|
// return -inf
|
|
res.w[1] = 0xf800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
// unpack x
|
|
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
C1.w[1] = x.w[1] & MASK_COEFF;
|
|
C1.w[0] = x.w[0];
|
|
|
|
// check for non-canonical values (treated as zero)
|
|
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
|
|
// non-canonical
|
|
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
|
|
C1.w[1] = 0; // significand high
|
|
C1.w[0] = 0; // significand low
|
|
} else { // G0_G1 != 11
|
|
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
|
|
if (C1.w[1] > 0x0001ed09bead87c0ull ||
|
|
(C1.w[1] == 0x0001ed09bead87c0ull
|
|
&& C1.w[0] > 0x378d8e63ffffffffull)) {
|
|
// x is non-canonical if coefficient is larger than 10^34 -1
|
|
C1.w[1] = 0;
|
|
C1.w[0] = 0;
|
|
} else { // canonical
|
|
;
|
|
}
|
|
}
|
|
|
|
// test for input equal to zero
|
|
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
|
|
// x is 0
|
|
// return 0 preserving the sign bit and the preferred exponent
|
|
// of MAX(Q(x), 0)
|
|
if (x_exp <= (0x1820ull << 49)) {
|
|
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
|
|
} else {
|
|
res.w[1] = x_sign | x_exp;
|
|
}
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// x is not special and is not zero
|
|
|
|
// if (exp <= -p) return -1.0 or +0.0
|
|
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
|
|
if (x_sign) {
|
|
// if negative, return negative 1, because we know the coefficient
|
|
// is non-zero (would have been caught above)
|
|
res.w[1] = 0xb040000000000000ull;
|
|
res.w[0] = 0x0000000000000001ull;
|
|
} else {
|
|
// if positive, return positive 0, because we know coefficient is
|
|
// non-zero (would have been caught above)
|
|
res.w[1] = 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
// q = nr. of decimal digits in x
|
|
// determine first the nr. of bits in x
|
|
if (C1.w[1] == 0) {
|
|
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
|
|
// split the 64-bit value in two 32-bit halves to avoid rounding errors
|
|
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
|
|
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
|
|
x_nr_bits =
|
|
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
} else { // x < 2^32
|
|
tmp1.d = (double) (C1.w[0]); // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1.w[0]; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
|
|
tmp1.d = (double) C1.w[1]; // exact conversion
|
|
x_nr_bits =
|
|
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
|
|
(C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
|
|
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
|
|
q++;
|
|
}
|
|
exp = (x_exp >> 49) - 6176;
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res.w[1] = x.w[1];
|
|
res.w[0] = x.w[0];
|
|
BID_RETURN (res);
|
|
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
|
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
|
|
// (number of digits to be chopped off)
|
|
// chop off ind digits from the lower part of C1
|
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
|
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
|
|
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
|
|
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
|
|
//tmp64 = C1.w[0];
|
|
// if (ind <= 19) {
|
|
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
|
|
// } else {
|
|
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
|
|
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
|
|
// }
|
|
// if (C1.w[0] < tmp64) C1.w[1]++;
|
|
// if carry-out from C1.w[0], increment C1.w[1]
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 34
|
|
// kx = 10^(-x) = ten2mk128[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 118 bits
|
|
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res.w[1] = P256.w[3];
|
|
res.w[0] = P256.w[2];
|
|
// if positive, the truncated value is already the correct result
|
|
if (x_sign) { // if negative
|
|
// redundant fstar.w[3] = 0;
|
|
// redundant fstar.w[2] = 0;
|
|
// redundant fstar.w[1] = P256.w[1];
|
|
// redundant fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if ((P256.w[1] > ten2mk128[ind - 1].w[1])
|
|
|| (P256.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 0 <= shift <= 102
|
|
res.w[1] = (P256.w[3] >> shift);
|
|
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
|
|
// if positive, the truncated value is already the correct result
|
|
if (x_sign) { // if negative
|
|
// redundant fstar.w[3] = 0;
|
|
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
|
|
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
} else { // 22 <= ind - 1 <= 33
|
|
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
|
|
res.w[1] = 0;
|
|
res.w[0] = P256.w[3] >> shift;
|
|
// if positive, the truncated value is already the correct result
|
|
if (x_sign) { // if negative
|
|
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
|
|
fstar.w[2] = P256.w[2];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if (fstar.w[3] || fstar.w[2]
|
|
|| fstar.w[1] > ten2mk128[ind - 1].w[1]
|
|
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp <= 0
|
|
if (x_sign) { // negative rounds down to -1.0
|
|
res.w[1] = 0xb040000000000000ull;
|
|
res.w[0] = 0x0000000000000001ull;
|
|
} else { // positive rpunds down to +0.0
|
|
res.w[1] = 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* BID128_round_integral_positive
|
|
****************************************************************************/
|
|
|
|
BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive, x)
|
|
|
|
UINT128 res;
|
|
UINT64 x_sign;
|
|
UINT64 x_exp;
|
|
int exp; // unbiased exponent
|
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
|
|
// (all are UINT64)
|
|
BID_UI64DOUBLE tmp1;
|
|
unsigned int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT128 C1;
|
|
// UINT128 res is C* at first - represents up to 34 decimal digits ~
|
|
// 113 bits
|
|
UINT256 fstar;
|
|
UINT256 P256;
|
|
|
|
// check for NaN or Infinity
|
|
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
|
|
// x is special
|
|
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
|
|
// if x = NaN, then res = Q (x)
|
|
// check first for non-canonical NaN payload
|
|
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
|
|
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
|
|
(x.w[0] > 0x38c15b09ffffffffull))) {
|
|
x.w[1] = x.w[1] & 0xffffc00000000000ull;
|
|
x.w[0] = 0x0ull;
|
|
}
|
|
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (x)
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
} else { // x is QNaN
|
|
// return x
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
}
|
|
BID_RETURN (res)
|
|
} else { // x is not a NaN, so it must be infinity
|
|
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
|
|
// return +inf
|
|
res.w[1] = 0x7800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
} else { // x is -inf
|
|
// return -inf
|
|
res.w[1] = 0xf800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
// unpack x
|
|
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
C1.w[1] = x.w[1] & MASK_COEFF;
|
|
C1.w[0] = x.w[0];
|
|
|
|
// check for non-canonical values (treated as zero)
|
|
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
|
|
// non-canonical
|
|
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
|
|
C1.w[1] = 0; // significand high
|
|
C1.w[0] = 0; // significand low
|
|
} else { // G0_G1 != 11
|
|
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
|
|
if (C1.w[1] > 0x0001ed09bead87c0ull ||
|
|
(C1.w[1] == 0x0001ed09bead87c0ull
|
|
&& C1.w[0] > 0x378d8e63ffffffffull)) {
|
|
// x is non-canonical if coefficient is larger than 10^34 -1
|
|
C1.w[1] = 0;
|
|
C1.w[0] = 0;
|
|
} else { // canonical
|
|
;
|
|
}
|
|
}
|
|
|
|
// test for input equal to zero
|
|
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
|
|
// x is 0
|
|
// return 0 preserving the sign bit and the preferred exponent
|
|
// of MAX(Q(x), 0)
|
|
if (x_exp <= (0x1820ull << 49)) {
|
|
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
|
|
} else {
|
|
res.w[1] = x_sign | x_exp;
|
|
}
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// x is not special and is not zero
|
|
|
|
// if (exp <= -p) return -0.0 or +1.0
|
|
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
|
|
if (x_sign) {
|
|
// if negative, return negative 0, because we know the coefficient
|
|
// is non-zero (would have been caught above)
|
|
res.w[1] = 0xb040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
} else {
|
|
// if positive, return positive 1, because we know coefficient is
|
|
// non-zero (would have been caught above)
|
|
res.w[1] = 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000001ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
// q = nr. of decimal digits in x
|
|
// determine first the nr. of bits in x
|
|
if (C1.w[1] == 0) {
|
|
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
|
|
// split 64-bit value in two 32-bit halves to avoid rounding errors
|
|
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
|
|
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
|
|
x_nr_bits =
|
|
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
} else { // x < 2^32
|
|
tmp1.d = (double) (C1.w[0]); // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1.w[0]; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
|
|
tmp1.d = (double) C1.w[1]; // exact conversion
|
|
x_nr_bits =
|
|
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
|
|
(C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
|
|
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
|
|
q++;
|
|
}
|
|
exp = (x_exp >> 49) - 6176;
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res.w[1] = x.w[1];
|
|
res.w[0] = x.w[0];
|
|
BID_RETURN (res);
|
|
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
|
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
|
|
// (number of digits to be chopped off)
|
|
// chop off ind digits from the lower part of C1
|
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
|
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
|
|
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
|
|
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
|
|
// tmp64 = C1.w[0];
|
|
// if (ind <= 19) {
|
|
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
|
|
// } else {
|
|
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
|
|
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
|
|
// }
|
|
// if (C1.w[0] < tmp64) C1.w[1]++;
|
|
// if carry-out from C1.w[0], increment C1.w[1]
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 34
|
|
// kx = 10^(-x) = ten2mk128[ind - 1]
|
|
// C* = C1 * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 118 bits
|
|
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res.w[1] = P256.w[3];
|
|
res.w[0] = P256.w[2];
|
|
// if negative, the truncated value is already the correct result
|
|
if (!x_sign) { // if positive
|
|
// redundant fstar.w[3] = 0;
|
|
// redundant fstar.w[2] = 0;
|
|
// redundant fstar.w[1] = P256.w[1];
|
|
// redundant fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if ((P256.w[1] > ten2mk128[ind - 1].w[1])
|
|
|| (P256.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res.w[1] = (P256.w[3] >> shift);
|
|
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
|
|
// if negative, the truncated value is already the correct result
|
|
if (!x_sign) { // if positive
|
|
// redundant fstar.w[3] = 0;
|
|
fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
|
|
(fstar.w[1] == ten2mk128[ind - 1].w[1] &&
|
|
fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
} else { // 22 <= ind - 1 <= 33
|
|
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
|
|
res.w[1] = 0;
|
|
res.w[0] = P256.w[3] >> shift;
|
|
// if negative, the truncated value is already the correct result
|
|
if (!x_sign) { // if positive
|
|
fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
|
|
fstar.w[2] = P256.w[2];
|
|
fstar.w[1] = P256.w[1];
|
|
fstar.w[0] = P256.w[0];
|
|
// fraction f* > 10^(-x) <=> inexact
|
|
// f* is in the right position to be compared with
|
|
// 10^(-x) from ten2mk128[]
|
|
if (fstar.w[3] || fstar.w[2]
|
|
|| fstar.w[1] > ten2mk128[ind - 1].w[1]
|
|
|| (fstar.w[1] == ten2mk128[ind - 1].w[1]
|
|
&& fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
|
|
if (++res.w[0] == 0) {
|
|
res.w[1]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp <= 0
|
|
if (x_sign) { // negative rounds up to -0.0
|
|
res.w[1] = 0xb040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
} else { // positive rpunds up to +1.0
|
|
res.w[1] = 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000001ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* BID128_round_integral_zero
|
|
****************************************************************************/
|
|
|
|
BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero, x)
|
|
|
|
UINT128 res;
|
|
UINT64 x_sign;
|
|
UINT64 x_exp;
|
|
int exp; // unbiased exponent
|
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
|
|
// (all are UINT64)
|
|
BID_UI64DOUBLE tmp1;
|
|
unsigned int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT128 C1;
|
|
// UINT128 res is C* at first - represents up to 34 decimal digits ~
|
|
// 113 bits
|
|
UINT256 P256;
|
|
|
|
// check for NaN or Infinity
|
|
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
|
|
// x is special
|
|
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
|
|
// if x = NaN, then res = Q (x)
|
|
// check first for non-canonical NaN payload
|
|
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
|
|
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
|
|
(x.w[0] > 0x38c15b09ffffffffull))) {
|
|
x.w[1] = x.w[1] & 0xffffc00000000000ull;
|
|
x.w[0] = 0x0ull;
|
|
}
|
|
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (x)
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
} else { // x is QNaN
|
|
// return x
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
}
|
|
BID_RETURN (res)
|
|
} else { // x is not a NaN, so it must be infinity
|
|
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
|
|
// return +inf
|
|
res.w[1] = 0x7800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
} else { // x is -inf
|
|
// return -inf
|
|
res.w[1] = 0xf800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
// unpack x
|
|
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
C1.w[1] = x.w[1] & MASK_COEFF;
|
|
C1.w[0] = x.w[0];
|
|
|
|
// check for non-canonical values (treated as zero)
|
|
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
|
|
// non-canonical
|
|
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
|
|
C1.w[1] = 0; // significand high
|
|
C1.w[0] = 0; // significand low
|
|
} else { // G0_G1 != 11
|
|
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
|
|
if (C1.w[1] > 0x0001ed09bead87c0ull ||
|
|
(C1.w[1] == 0x0001ed09bead87c0ull
|
|
&& C1.w[0] > 0x378d8e63ffffffffull)) {
|
|
// x is non-canonical if coefficient is larger than 10^34 -1
|
|
C1.w[1] = 0;
|
|
C1.w[0] = 0;
|
|
} else { // canonical
|
|
;
|
|
}
|
|
}
|
|
|
|
// test for input equal to zero
|
|
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
|
|
// x is 0
|
|
// return 0 preserving the sign bit and the preferred exponent
|
|
// of MAX(Q(x), 0)
|
|
if (x_exp <= (0x1820ull << 49)) {
|
|
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
|
|
} else {
|
|
res.w[1] = x_sign | x_exp;
|
|
}
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// x is not special and is not zero
|
|
|
|
// if (exp <= -p) return -0.0 or +0.0
|
|
if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
|
|
res.w[1] = x_sign | 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// q = nr. of decimal digits in x
|
|
// determine first the nr. of bits in x
|
|
if (C1.w[1] == 0) {
|
|
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
|
|
// split the 64-bit value in two 32-bit halves to avoid rounding errors
|
|
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
|
|
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
|
|
x_nr_bits =
|
|
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
} else { // x < 2^32
|
|
tmp1.d = (double) (C1.w[0]); // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1.w[0]; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
|
|
tmp1.d = (double) C1.w[1]; // exact conversion
|
|
x_nr_bits =
|
|
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
|
|
(C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
|
|
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
|
|
q++;
|
|
}
|
|
exp = (x_exp >> 49) - 6176;
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res.w[1] = x.w[1];
|
|
res.w[0] = x.w[0];
|
|
BID_RETURN (res);
|
|
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
|
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
|
|
// (number of digits to be chopped off)
|
|
// chop off ind digits from the lower part of C1
|
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
|
// FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
|
|
// FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
|
|
// FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
|
|
//tmp64 = C1.w[0];
|
|
// if (ind <= 19) {
|
|
// C1.w[0] = C1.w[0] + midpoint64[ind - 1];
|
|
// } else {
|
|
// C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
|
|
// C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
|
|
// }
|
|
// if (C1.w[0] < tmp64) C1.w[1]++;
|
|
// if carry-out from C1.w[0], increment C1.w[1]
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 34
|
|
// kx = 10^(-x) = ten2mk128[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 118 bits
|
|
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res.w[1] = P256.w[3];
|
|
res.w[0] = P256.w[2];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res.w[1] = (P256.w[3] >> shift);
|
|
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
|
|
} else { // 22 <= ind - 1 <= 33
|
|
shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
|
|
res.w[1] = 0;
|
|
res.w[0] = P256.w[3] >> shift;
|
|
}
|
|
res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
|
|
BID_RETURN (res);
|
|
} else { // if exp < 0 and q + exp <= 0 the result is +0 or -0
|
|
res.w[1] = x_sign | 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* BID128_round_integral_nearest_away
|
|
****************************************************************************/
|
|
|
|
BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away, x)
|
|
|
|
UINT128 res;
|
|
UINT64 x_sign;
|
|
UINT64 x_exp;
|
|
int exp; // unbiased exponent
|
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
|
|
// (all are UINT64)
|
|
UINT64 tmp64;
|
|
BID_UI64DOUBLE tmp1;
|
|
unsigned int x_nr_bits;
|
|
int q, ind, shift;
|
|
UINT128 C1;
|
|
// UINT128 res is C* at first - represents up to 34 decimal digits ~
|
|
// 113 bits
|
|
// UINT256 fstar;
|
|
UINT256 P256;
|
|
|
|
// check for NaN or Infinity
|
|
if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
|
|
// x is special
|
|
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
|
|
// if x = NaN, then res = Q (x)
|
|
// check first for non-canonical NaN payload
|
|
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
|
|
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
|
|
(x.w[0] > 0x38c15b09ffffffffull))) {
|
|
x.w[1] = x.w[1] & 0xffffc00000000000ull;
|
|
x.w[0] = 0x0ull;
|
|
}
|
|
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
|
|
// set invalid flag
|
|
*pfpsf |= INVALID_EXCEPTION;
|
|
// return quiet (x)
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
} else { // x is QNaN
|
|
// return x
|
|
res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
|
|
res.w[0] = x.w[0];
|
|
}
|
|
BID_RETURN (res)
|
|
} else { // x is not a NaN, so it must be infinity
|
|
if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
|
|
// return +inf
|
|
res.w[1] = 0x7800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
} else { // x is -inf
|
|
// return -inf
|
|
res.w[1] = 0xf800000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
}
|
|
BID_RETURN (res);
|
|
}
|
|
}
|
|
// unpack x
|
|
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
|
C1.w[1] = x.w[1] & MASK_COEFF;
|
|
C1.w[0] = x.w[0];
|
|
|
|
// check for non-canonical values (treated as zero)
|
|
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
|
|
// non-canonical
|
|
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
|
|
C1.w[1] = 0; // significand high
|
|
C1.w[0] = 0; // significand low
|
|
} else { // G0_G1 != 11
|
|
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
|
|
if (C1.w[1] > 0x0001ed09bead87c0ull ||
|
|
(C1.w[1] == 0x0001ed09bead87c0ull
|
|
&& C1.w[0] > 0x378d8e63ffffffffull)) {
|
|
// x is non-canonical if coefficient is larger than 10^34 -1
|
|
C1.w[1] = 0;
|
|
C1.w[0] = 0;
|
|
} else { // canonical
|
|
;
|
|
}
|
|
}
|
|
|
|
// test for input equal to zero
|
|
if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
|
|
// x is 0
|
|
// return 0 preserving the sign bit and the preferred exponent
|
|
// of MAX(Q(x), 0)
|
|
if (x_exp <= (0x1820ull << 49)) {
|
|
res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
|
|
} else {
|
|
res.w[1] = x_sign | x_exp;
|
|
}
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// x is not special and is not zero
|
|
|
|
// if (exp <= -(p+1)) return 0.0
|
|
if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
|
|
res.w[1] = x_sign | 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
// q = nr. of decimal digits in x
|
|
// determine first the nr. of bits in x
|
|
if (C1.w[1] == 0) {
|
|
if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
|
|
// split the 64-bit value in two 32-bit halves to avoid rounding errors
|
|
if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
|
|
tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
|
|
x_nr_bits =
|
|
33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
} else { // x < 2^32
|
|
tmp1.d = (double) (C1.w[0]); // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // if x < 2^53
|
|
tmp1.d = (double) C1.w[0]; // exact conversion
|
|
x_nr_bits =
|
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
|
|
tmp1.d = (double) C1.w[1]; // exact conversion
|
|
x_nr_bits =
|
|
65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
|
}
|
|
|
|
q = nr_digits[x_nr_bits - 1].digits;
|
|
if (q == 0) {
|
|
q = nr_digits[x_nr_bits - 1].digits1;
|
|
if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
|
|
(C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
|
|
C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
|
|
q++;
|
|
}
|
|
exp = (x_exp >> 49) - 6176;
|
|
if (exp >= 0) { // -exp <= 0
|
|
// the argument is an integer already
|
|
res.w[1] = x.w[1];
|
|
res.w[0] = x.w[0];
|
|
BID_RETURN (res);
|
|
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
|
ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
|
|
// chop off ind digits from the lower part of C1
|
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
|
|
tmp64 = C1.w[0];
|
|
if (ind <= 19) {
|
|
C1.w[0] = C1.w[0] + midpoint64[ind - 1];
|
|
} else {
|
|
C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
|
|
C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
|
|
}
|
|
if (C1.w[0] < tmp64)
|
|
C1.w[1]++;
|
|
// calculate C* and f*
|
|
// C* is actually floor(C*) in this case
|
|
// C* and f* need shifting and masking, as shown by
|
|
// shiftright128[] and maskhigh128[]
|
|
// 1 <= x <= 34
|
|
// kx = 10^(-x) = ten2mk128[ind - 1]
|
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
|
// the approximation of 10^(-x) was rounded up to 118 bits
|
|
__mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
|
|
// the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
|
|
// if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
|
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
|
// if floor(C*) is even then C* = floor(C*) - logical right
|
|
// shift; C* has p decimal digits, correct by Prop. 1)
|
|
// else if floor(C*) is odd C* = floor(C*)-1 (logical right
|
|
// shift; C* has p decimal digits, correct by Pr. 1)
|
|
// else
|
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
|
// correct by Property 1)
|
|
// n = C* * 10^(e+x)
|
|
|
|
// shift right C* by Ex-128 = shiftright128[ind]
|
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
|
res.w[1] = P256.w[3];
|
|
res.w[0] = P256.w[2];
|
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
|
res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
|
|
res.w[1] = (P256.w[3] >> shift);
|
|
} else { // 22 <= ind - 1 <= 33
|
|
shift = shiftright128[ind - 1]; // 2 <= shift <= 38
|
|
res.w[1] = 0;
|
|
res.w[0] = (P256.w[3] >> (shift - 64)); // 2 <= shift - 64 <= 38
|
|
}
|
|
// if the result was a midpoint, it was already rounded away from zero
|
|
res.w[1] |= x_sign | 0x3040000000000000ull;
|
|
BID_RETURN (res);
|
|
} else { // if ((q + exp) < 0) <=> q < -exp
|
|
// the result is +0 or -0
|
|
res.w[1] = x_sign | 0x3040000000000000ull;
|
|
res.w[0] = 0x0000000000000000ull;
|
|
BID_RETURN (res);
|
|
}
|
|
}
|