mirror of
https://github.com/autc04/Retro68.git
synced 2024-11-24 07:31:32 +00:00
1222 lines
40 KiB
C
1222 lines
40 KiB
C
|
/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
|
||
|
|
||
|
This file is part of GCC.
|
||
|
|
||
|
GCC is free software; you can redistribute it and/or modify it under
|
||
|
the terms of the GNU General Public License as published by the Free
|
||
|
Software Foundation; either version 3, or (at your option) any later
|
||
|
version.
|
||
|
|
||
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
for more details.
|
||
|
|
||
|
Under Section 7 of GPL version 3, you are granted additional
|
||
|
permissions described in the GCC Runtime Library Exception, version
|
||
|
3.1, as published by the Free Software Foundation.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License and
|
||
|
a copy of the GCC Runtime Library Exception along with this program;
|
||
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||
|
<http://www.gnu.org/licenses/>. */
|
||
|
|
||
|
#include "bid_internal.h"
|
||
|
|
||
|
/*****************************************************************************
|
||
|
* BID64_round_integral_exact
|
||
|
****************************************************************************/
|
||
|
|
||
|
#if DECIMAL_CALL_BY_REFERENCE
|
||
|
void
|
||
|
bid64_round_integral_exact (UINT64 * pres,
|
||
|
UINT64 *
|
||
|
px _RND_MODE_PARAM _EXC_FLAGS_PARAM
|
||
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
||
|
UINT64 x = *px;
|
||
|
#if !DECIMAL_GLOBAL_ROUNDING
|
||
|
unsigned int rnd_mode = *prnd_mode;
|
||
|
#endif
|
||
|
#else
|
||
|
UINT64
|
||
|
bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM
|
||
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
||
|
#endif
|
||
|
|
||
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
||
|
UINT64 x_sign;
|
||
|
int exp; // unbiased exponent
|
||
|
// Note: C1 represents the significand (UINT64)
|
||
|
BID_UI64DOUBLE tmp1;
|
||
|
int x_nr_bits;
|
||
|
int q, ind, shift;
|
||
|
UINT64 C1;
|
||
|
// UINT64 res is C* at first - represents up to 16 decimal digits <= 54 bits
|
||
|
UINT128 fstar = { {0x0ull, 0x0ull} };
|
||
|
UINT128 P128;
|
||
|
|
||
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
||
|
|
||
|
// check for NaNs and infinities
|
||
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
||
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
||
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
||
|
else
|
||
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
||
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
||
|
// set invalid flag
|
||
|
*pfpsf |= INVALID_EXCEPTION;
|
||
|
// return quiet (SNaN)
|
||
|
res = x & 0xfdffffffffffffffull;
|
||
|
} else { // QNaN
|
||
|
res = x;
|
||
|
}
|
||
|
BID_RETURN (res);
|
||
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
||
|
res = x_sign | 0x7800000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// unpack x
|
||
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
||
|
// if the steering bits are 11 (condition will be 0), then
|
||
|
// the exponent is G[0:w+1]
|
||
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
||
|
if (C1 > 9999999999999999ull) { // non-canonical
|
||
|
C1 = 0;
|
||
|
}
|
||
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
||
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG1);
|
||
|
}
|
||
|
|
||
|
// if x is 0 or non-canonical return 0 preserving the sign bit and
|
||
|
// the preferred exponent of MAX(Q(x), 0)
|
||
|
if (C1 == 0) {
|
||
|
if (exp < 0)
|
||
|
exp = 0;
|
||
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
||
|
|
||
|
switch (rnd_mode) {
|
||
|
case ROUNDING_TO_NEAREST:
|
||
|
case ROUNDING_TIES_AWAY:
|
||
|
// return 0 if (exp <= -(p+1))
|
||
|
if (exp <= -17) {
|
||
|
res = x_sign | 0x31c0000000000000ull;
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
break;
|
||
|
case ROUNDING_DOWN:
|
||
|
// return 0 if (exp <= -p)
|
||
|
if (exp <= -16) {
|
||
|
if (x_sign) {
|
||
|
res = 0xb1c0000000000001ull;
|
||
|
} else {
|
||
|
res = 0x31c0000000000000ull;
|
||
|
}
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
break;
|
||
|
case ROUNDING_UP:
|
||
|
// return 0 if (exp <= -p)
|
||
|
if (exp <= -16) {
|
||
|
if (x_sign) {
|
||
|
res = 0xb1c0000000000000ull;
|
||
|
} else {
|
||
|
res = 0x31c0000000000001ull;
|
||
|
}
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
break;
|
||
|
case ROUNDING_TO_ZERO:
|
||
|
// return 0 if (exp <= -p)
|
||
|
if (exp <= -16) {
|
||
|
res = x_sign | 0x31c0000000000000ull;
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
break;
|
||
|
} // end switch ()
|
||
|
|
||
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
||
|
// determine first the nr. of bits in x
|
||
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
||
|
q = 16;
|
||
|
} else { // if x < 2^53
|
||
|
tmp1.d = (double) C1; // exact conversion
|
||
|
x_nr_bits =
|
||
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
||
|
q = nr_digits[x_nr_bits - 1].digits;
|
||
|
if (q == 0) {
|
||
|
q = nr_digits[x_nr_bits - 1].digits1;
|
||
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
||
|
q++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (exp >= 0) { // -exp <= 0
|
||
|
// the argument is an integer already
|
||
|
res = x;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
|
||
|
switch (rnd_mode) {
|
||
|
case ROUNDING_TO_NEAREST:
|
||
|
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
||
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
||
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
||
|
// chop off ind digits from the lower part of C1
|
||
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
|
||
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
||
|
C1 = C1 + midpoint64[ind - 1];
|
||
|
// calculate C* and f*
|
||
|
// C* is actually floor(C*) in this case
|
||
|
// C* and f* need shifting and masking, as shown by
|
||
|
// shiftright128[] and maskhigh128[]
|
||
|
// 1 <= x <= 16
|
||
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
||
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
||
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
||
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
||
|
|
||
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
||
|
// if floor(C*) is even then C* = floor(C*) - logical right
|
||
|
// shift; C* has p decimal digits, correct by Prop. 1)
|
||
|
// else if floor(C*) is odd C* = floor(C*)-1 (logical right
|
||
|
// shift; C* has p decimal digits, correct by Pr. 1)
|
||
|
// else
|
||
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
||
|
// correct by Property 1)
|
||
|
// n = C* * 10^(e+x)
|
||
|
|
||
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
||
|
res = P128.w[1];
|
||
|
fstar.w[1] = 0;
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
||
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
||
|
res = (P128.w[1] >> shift);
|
||
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
}
|
||
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
||
|
// since round_to_even, subtract 1 if current result is odd
|
||
|
if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0)
|
||
|
&& (fstar.w[0] < ten2mk64[ind - 1])) {
|
||
|
res--;
|
||
|
}
|
||
|
// determine inexactness of the rounding of C*
|
||
|
// if (0 < f* - 1/2 < 10^(-x)) then
|
||
|
// the result is exact
|
||
|
// else // if (f* - 1/2 > T*) then
|
||
|
// the result is inexact
|
||
|
if (ind - 1 <= 2) {
|
||
|
if (fstar.w[0] > 0x8000000000000000ull) {
|
||
|
// f* > 1/2 and the result may be exact
|
||
|
// fstar.w[0] - 0x8000000000000000ull is f* - 1/2
|
||
|
if ((fstar.w[0] - 0x8000000000000000ull) > ten2mk64[ind - 1]) {
|
||
|
// set the inexact flag
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
} // else the result is exact
|
||
|
} else { // the result is inexact; f2* <= 1/2
|
||
|
// set the inexact flag
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
}
|
||
|
} else { // if 3 <= ind - 1 <= 21
|
||
|
if (fstar.w[1] > onehalf128[ind - 1] ||
|
||
|
(fstar.w[1] == onehalf128[ind - 1] && fstar.w[0])) {
|
||
|
// f2* > 1/2 and the result may be exact
|
||
|
// Calculate f2* - 1/2
|
||
|
if (fstar.w[1] > onehalf128[ind - 1]
|
||
|
|| fstar.w[0] > ten2mk64[ind - 1]) {
|
||
|
// set the inexact flag
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
} // else the result is exact
|
||
|
} else { // the result is inexact; f2* <= 1/2
|
||
|
// set the inexact flag
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
}
|
||
|
}
|
||
|
// set exponent to zero as it was negative before.
|
||
|
res = x_sign | 0x31c0000000000000ull | res;
|
||
|
BID_RETURN (res);
|
||
|
} else { // if exp < 0 and q + exp < 0
|
||
|
// the result is +0 or -0
|
||
|
res = x_sign | 0x31c0000000000000ull;
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
break;
|
||
|
case ROUNDING_TIES_AWAY:
|
||
|
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
||
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
||
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
||
|
// chop off ind digits from the lower part of C1
|
||
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
|
||
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
||
|
C1 = C1 + midpoint64[ind - 1];
|
||
|
// calculate C* and f*
|
||
|
// C* is actually floor(C*) in this case
|
||
|
// C* and f* need shifting and masking, as shown by
|
||
|
// shiftright128[] and maskhigh128[]
|
||
|
// 1 <= x <= 16
|
||
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
||
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
||
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
||
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
||
|
|
||
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
||
|
// C* = floor(C*) - logical right shift; C* has p decimal digits,
|
||
|
// correct by Prop. 1)
|
||
|
// else
|
||
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
||
|
// correct by Property 1)
|
||
|
// n = C* * 10^(e+x)
|
||
|
|
||
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
||
|
res = P128.w[1];
|
||
|
fstar.w[1] = 0;
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
||
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
||
|
res = (P128.w[1] >> shift);
|
||
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
}
|
||
|
// midpoints are already rounded correctly
|
||
|
// determine inexactness of the rounding of C*
|
||
|
// if (0 < f* - 1/2 < 10^(-x)) then
|
||
|
// the result is exact
|
||
|
// else // if (f* - 1/2 > T*) then
|
||
|
// the result is inexact
|
||
|
if (ind - 1 <= 2) {
|
||
|
if (fstar.w[0] > 0x8000000000000000ull) {
|
||
|
// f* > 1/2 and the result may be exact
|
||
|
// fstar.w[0] - 0x8000000000000000ull is f* - 1/2
|
||
|
if ((fstar.w[0] - 0x8000000000000000ull) > ten2mk64[ind - 1]) {
|
||
|
// set the inexact flag
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
} // else the result is exact
|
||
|
} else { // the result is inexact; f2* <= 1/2
|
||
|
// set the inexact flag
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
}
|
||
|
} else { // if 3 <= ind - 1 <= 21
|
||
|
if (fstar.w[1] > onehalf128[ind - 1] ||
|
||
|
(fstar.w[1] == onehalf128[ind - 1] && fstar.w[0])) {
|
||
|
// f2* > 1/2 and the result may be exact
|
||
|
// Calculate f2* - 1/2
|
||
|
if (fstar.w[1] > onehalf128[ind - 1]
|
||
|
|| fstar.w[0] > ten2mk64[ind - 1]) {
|
||
|
// set the inexact flag
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
} // else the result is exact
|
||
|
} else { // the result is inexact; f2* <= 1/2
|
||
|
// set the inexact flag
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
}
|
||
|
}
|
||
|
// set exponent to zero as it was negative before.
|
||
|
res = x_sign | 0x31c0000000000000ull | res;
|
||
|
BID_RETURN (res);
|
||
|
} else { // if exp < 0 and q + exp < 0
|
||
|
// the result is +0 or -0
|
||
|
res = x_sign | 0x31c0000000000000ull;
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
break;
|
||
|
case ROUNDING_DOWN:
|
||
|
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
||
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
||
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
||
|
// chop off ind digits from the lower part of C1
|
||
|
// C1 fits in 64 bits
|
||
|
// calculate C* and f*
|
||
|
// C* is actually floor(C*) in this case
|
||
|
// C* and f* need shifting and masking, as shown by
|
||
|
// shiftright128[] and maskhigh128[]
|
||
|
// 1 <= x <= 16
|
||
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
||
|
// C* = C1 * 10^(-x)
|
||
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
||
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
||
|
|
||
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
||
|
// correct by Property 1)
|
||
|
// if (0 < f* < 10^(-x)) then the result is exact
|
||
|
// n = C* * 10^(e+x)
|
||
|
|
||
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
||
|
res = P128.w[1];
|
||
|
fstar.w[1] = 0;
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
||
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
||
|
res = (P128.w[1] >> shift);
|
||
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
}
|
||
|
// if (f* > 10^(-x)) then the result is inexact
|
||
|
if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) {
|
||
|
if (x_sign) {
|
||
|
// if negative and not exact, increment magnitude
|
||
|
res++;
|
||
|
}
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
}
|
||
|
// set exponent to zero as it was negative before.
|
||
|
res = x_sign | 0x31c0000000000000ull | res;
|
||
|
BID_RETURN (res);
|
||
|
} else { // if exp < 0 and q + exp <= 0
|
||
|
// the result is +0 or -1
|
||
|
if (x_sign) {
|
||
|
res = 0xb1c0000000000001ull;
|
||
|
} else {
|
||
|
res = 0x31c0000000000000ull;
|
||
|
}
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
break;
|
||
|
case ROUNDING_UP:
|
||
|
if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
||
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
||
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
||
|
// chop off ind digits from the lower part of C1
|
||
|
// C1 fits in 64 bits
|
||
|
// calculate C* and f*
|
||
|
// C* is actually floor(C*) in this case
|
||
|
// C* and f* need shifting and masking, as shown by
|
||
|
// shiftright128[] and maskhigh128[]
|
||
|
// 1 <= x <= 16
|
||
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
||
|
// C* = C1 * 10^(-x)
|
||
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
||
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
||
|
|
||
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
||
|
// correct by Property 1)
|
||
|
// if (0 < f* < 10^(-x)) then the result is exact
|
||
|
// n = C* * 10^(e+x)
|
||
|
|
||
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
||
|
res = P128.w[1];
|
||
|
fstar.w[1] = 0;
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
||
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
||
|
res = (P128.w[1] >> shift);
|
||
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
}
|
||
|
// if (f* > 10^(-x)) then the result is inexact
|
||
|
if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) {
|
||
|
if (!x_sign) {
|
||
|
// if positive and not exact, increment magnitude
|
||
|
res++;
|
||
|
}
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
}
|
||
|
// set exponent to zero as it was negative before.
|
||
|
res = x_sign | 0x31c0000000000000ull | res;
|
||
|
BID_RETURN (res);
|
||
|
} else { // if exp < 0 and q + exp <= 0
|
||
|
// the result is -0 or +1
|
||
|
if (x_sign) {
|
||
|
res = 0xb1c0000000000000ull;
|
||
|
} else {
|
||
|
res = 0x31c0000000000001ull;
|
||
|
}
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
break;
|
||
|
case ROUNDING_TO_ZERO:
|
||
|
if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
||
|
// need to shift right -exp digits from the coefficient; exp will be 0
|
||
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
||
|
// chop off ind digits from the lower part of C1
|
||
|
// C1 fits in 127 bits
|
||
|
// calculate C* and f*
|
||
|
// C* is actually floor(C*) in this case
|
||
|
// C* and f* need shifting and masking, as shown by
|
||
|
// shiftright128[] and maskhigh128[]
|
||
|
// 1 <= x <= 16
|
||
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
||
|
// C* = C1 * 10^(-x)
|
||
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
||
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
||
|
|
||
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
||
|
// correct by Property 1)
|
||
|
// if (0 < f* < 10^(-x)) then the result is exact
|
||
|
// n = C* * 10^(e+x)
|
||
|
|
||
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
||
|
res = P128.w[1];
|
||
|
fstar.w[1] = 0;
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
||
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
||
|
res = (P128.w[1] >> shift);
|
||
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
}
|
||
|
// if (f* > 10^(-x)) then the result is inexact
|
||
|
if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1])) {
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
}
|
||
|
// set exponent to zero as it was negative before.
|
||
|
res = x_sign | 0x31c0000000000000ull | res;
|
||
|
BID_RETURN (res);
|
||
|
} else { // if exp < 0 and q + exp < 0
|
||
|
// the result is +0 or -0
|
||
|
res = x_sign | 0x31c0000000000000ull;
|
||
|
*pfpsf |= INEXACT_EXCEPTION;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
break;
|
||
|
} // end switch ()
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************
|
||
|
* BID64_round_integral_nearest_even
|
||
|
****************************************************************************/
|
||
|
|
||
|
#if DECIMAL_CALL_BY_REFERENCE
|
||
|
void
|
||
|
bid64_round_integral_nearest_even (UINT64 * pres,
|
||
|
UINT64 *
|
||
|
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
||
|
_EXC_INFO_PARAM) {
|
||
|
UINT64 x = *px;
|
||
|
#else
|
||
|
UINT64
|
||
|
bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM
|
||
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
||
|
#endif
|
||
|
|
||
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
||
|
UINT64 x_sign;
|
||
|
int exp; // unbiased exponent
|
||
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
||
|
BID_UI64DOUBLE tmp1;
|
||
|
int x_nr_bits;
|
||
|
int q, ind, shift;
|
||
|
UINT64 C1;
|
||
|
UINT128 fstar;
|
||
|
UINT128 P128;
|
||
|
|
||
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
||
|
|
||
|
// check for NaNs and infinities
|
||
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
||
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
||
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
||
|
else
|
||
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
||
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
||
|
// set invalid flag
|
||
|
*pfpsf |= INVALID_EXCEPTION;
|
||
|
// return quiet (SNaN)
|
||
|
res = x & 0xfdffffffffffffffull;
|
||
|
} else { // QNaN
|
||
|
res = x;
|
||
|
}
|
||
|
BID_RETURN (res);
|
||
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
||
|
res = x_sign | 0x7800000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// unpack x
|
||
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
||
|
// if the steering bits are 11 (condition will be 0), then
|
||
|
// the exponent is G[0:w+1]
|
||
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
||
|
if (C1 > 9999999999999999ull) { // non-canonical
|
||
|
C1 = 0;
|
||
|
}
|
||
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
||
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG1);
|
||
|
}
|
||
|
|
||
|
// if x is 0 or non-canonical
|
||
|
if (C1 == 0) {
|
||
|
if (exp < 0)
|
||
|
exp = 0;
|
||
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
||
|
|
||
|
// return 0 if (exp <= -(p+1))
|
||
|
if (exp <= -17) {
|
||
|
res = x_sign | 0x31c0000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
||
|
// determine first the nr. of bits in x
|
||
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
||
|
q = 16;
|
||
|
} else { // if x < 2^53
|
||
|
tmp1.d = (double) C1; // exact conversion
|
||
|
x_nr_bits =
|
||
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
||
|
q = nr_digits[x_nr_bits - 1].digits;
|
||
|
if (q == 0) {
|
||
|
q = nr_digits[x_nr_bits - 1].digits1;
|
||
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
||
|
q++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (exp >= 0) { // -exp <= 0
|
||
|
// the argument is an integer already
|
||
|
res = x;
|
||
|
BID_RETURN (res);
|
||
|
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
||
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
||
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
||
|
// chop off ind digits from the lower part of C1
|
||
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
|
||
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
||
|
C1 = C1 + midpoint64[ind - 1];
|
||
|
// calculate C* and f*
|
||
|
// C* is actually floor(C*) in this case
|
||
|
// C* and f* need shifting and masking, as shown by
|
||
|
// shiftright128[] and maskhigh128[]
|
||
|
// 1 <= x <= 16
|
||
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
||
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
||
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
||
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
||
|
|
||
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
||
|
// if floor(C*) is even then C* = floor(C*) - logical right
|
||
|
// shift; C* has p decimal digits, correct by Prop. 1)
|
||
|
// else if floor(C*) is odd C* = floor(C*)-1 (logical right
|
||
|
// shift; C* has p decimal digits, correct by Pr. 1)
|
||
|
// else
|
||
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
||
|
// correct by Property 1)
|
||
|
// n = C* * 10^(e+x)
|
||
|
|
||
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
||
|
res = P128.w[1];
|
||
|
fstar.w[1] = 0;
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
||
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
||
|
res = (P128.w[1] >> shift);
|
||
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
}
|
||
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
||
|
// since round_to_even, subtract 1 if current result is odd
|
||
|
if ((res & 0x0000000000000001ull) && (fstar.w[1] == 0)
|
||
|
&& (fstar.w[0] < ten2mk64[ind - 1])) {
|
||
|
res--;
|
||
|
}
|
||
|
// set exponent to zero as it was negative before.
|
||
|
res = x_sign | 0x31c0000000000000ull | res;
|
||
|
BID_RETURN (res);
|
||
|
} else { // if exp < 0 and q + exp < 0
|
||
|
// the result is +0 or -0
|
||
|
res = x_sign | 0x31c0000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************
|
||
|
* BID64_round_integral_negative
|
||
|
*****************************************************************************/
|
||
|
|
||
|
#if DECIMAL_CALL_BY_REFERENCE
|
||
|
void
|
||
|
bid64_round_integral_negative (UINT64 * pres,
|
||
|
UINT64 *
|
||
|
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
||
|
_EXC_INFO_PARAM) {
|
||
|
UINT64 x = *px;
|
||
|
#else
|
||
|
UINT64
|
||
|
bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM
|
||
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
||
|
#endif
|
||
|
|
||
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
||
|
UINT64 x_sign;
|
||
|
int exp; // unbiased exponent
|
||
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
||
|
BID_UI64DOUBLE tmp1;
|
||
|
int x_nr_bits;
|
||
|
int q, ind, shift;
|
||
|
UINT64 C1;
|
||
|
// UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
|
||
|
UINT128 fstar;
|
||
|
UINT128 P128;
|
||
|
|
||
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
||
|
|
||
|
// check for NaNs and infinities
|
||
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
||
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
||
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
||
|
else
|
||
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
||
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
||
|
// set invalid flag
|
||
|
*pfpsf |= INVALID_EXCEPTION;
|
||
|
// return quiet (SNaN)
|
||
|
res = x & 0xfdffffffffffffffull;
|
||
|
} else { // QNaN
|
||
|
res = x;
|
||
|
}
|
||
|
BID_RETURN (res);
|
||
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
||
|
res = x_sign | 0x7800000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// unpack x
|
||
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
||
|
// if the steering bits are 11 (condition will be 0), then
|
||
|
// the exponent is G[0:w+1]
|
||
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
||
|
if (C1 > 9999999999999999ull) { // non-canonical
|
||
|
C1 = 0;
|
||
|
}
|
||
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
||
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG1);
|
||
|
}
|
||
|
|
||
|
// if x is 0 or non-canonical
|
||
|
if (C1 == 0) {
|
||
|
if (exp < 0)
|
||
|
exp = 0;
|
||
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
||
|
|
||
|
// return 0 if (exp <= -p)
|
||
|
if (exp <= -16) {
|
||
|
if (x_sign) {
|
||
|
res = 0xb1c0000000000001ull;
|
||
|
} else {
|
||
|
res = 0x31c0000000000000ull;
|
||
|
}
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
||
|
// determine first the nr. of bits in x
|
||
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
||
|
q = 16;
|
||
|
} else { // if x < 2^53
|
||
|
tmp1.d = (double) C1; // exact conversion
|
||
|
x_nr_bits =
|
||
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
||
|
q = nr_digits[x_nr_bits - 1].digits;
|
||
|
if (q == 0) {
|
||
|
q = nr_digits[x_nr_bits - 1].digits1;
|
||
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
||
|
q++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (exp >= 0) { // -exp <= 0
|
||
|
// the argument is an integer already
|
||
|
res = x;
|
||
|
BID_RETURN (res);
|
||
|
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
||
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
||
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
||
|
// chop off ind digits from the lower part of C1
|
||
|
// C1 fits in 64 bits
|
||
|
// calculate C* and f*
|
||
|
// C* is actually floor(C*) in this case
|
||
|
// C* and f* need shifting and masking, as shown by
|
||
|
// shiftright128[] and maskhigh128[]
|
||
|
// 1 <= x <= 16
|
||
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
||
|
// C* = C1 * 10^(-x)
|
||
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
||
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
||
|
|
||
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
||
|
// correct by Property 1)
|
||
|
// if (0 < f* < 10^(-x)) then the result is exact
|
||
|
// n = C* * 10^(e+x)
|
||
|
|
||
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
||
|
res = P128.w[1];
|
||
|
fstar.w[1] = 0;
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
||
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
||
|
res = (P128.w[1] >> shift);
|
||
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
}
|
||
|
// if (f* > 10^(-x)) then the result is inexact
|
||
|
if (x_sign
|
||
|
&& ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1]))) {
|
||
|
// if negative and not exact, increment magnitude
|
||
|
res++;
|
||
|
}
|
||
|
// set exponent to zero as it was negative before.
|
||
|
res = x_sign | 0x31c0000000000000ull | res;
|
||
|
BID_RETURN (res);
|
||
|
} else { // if exp < 0 and q + exp <= 0
|
||
|
// the result is +0 or -1
|
||
|
if (x_sign) {
|
||
|
res = 0xb1c0000000000001ull;
|
||
|
} else {
|
||
|
res = 0x31c0000000000000ull;
|
||
|
}
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************
|
||
|
* BID64_round_integral_positive
|
||
|
****************************************************************************/
|
||
|
|
||
|
#if DECIMAL_CALL_BY_REFERENCE
|
||
|
void
|
||
|
bid64_round_integral_positive (UINT64 * pres,
|
||
|
UINT64 *
|
||
|
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
||
|
_EXC_INFO_PARAM) {
|
||
|
UINT64 x = *px;
|
||
|
#else
|
||
|
UINT64
|
||
|
bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM
|
||
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
||
|
#endif
|
||
|
|
||
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
||
|
UINT64 x_sign;
|
||
|
int exp; // unbiased exponent
|
||
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
||
|
BID_UI64DOUBLE tmp1;
|
||
|
int x_nr_bits;
|
||
|
int q, ind, shift;
|
||
|
UINT64 C1;
|
||
|
// UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
|
||
|
UINT128 fstar;
|
||
|
UINT128 P128;
|
||
|
|
||
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
||
|
|
||
|
// check for NaNs and infinities
|
||
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
||
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
||
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
||
|
else
|
||
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
||
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
||
|
// set invalid flag
|
||
|
*pfpsf |= INVALID_EXCEPTION;
|
||
|
// return quiet (SNaN)
|
||
|
res = x & 0xfdffffffffffffffull;
|
||
|
} else { // QNaN
|
||
|
res = x;
|
||
|
}
|
||
|
BID_RETURN (res);
|
||
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
||
|
res = x_sign | 0x7800000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// unpack x
|
||
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
||
|
// if the steering bits are 11 (condition will be 0), then
|
||
|
// the exponent is G[0:w+1]
|
||
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
||
|
if (C1 > 9999999999999999ull) { // non-canonical
|
||
|
C1 = 0;
|
||
|
}
|
||
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
||
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG1);
|
||
|
}
|
||
|
|
||
|
// if x is 0 or non-canonical
|
||
|
if (C1 == 0) {
|
||
|
if (exp < 0)
|
||
|
exp = 0;
|
||
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
||
|
|
||
|
// return 0 if (exp <= -p)
|
||
|
if (exp <= -16) {
|
||
|
if (x_sign) {
|
||
|
res = 0xb1c0000000000000ull;
|
||
|
} else {
|
||
|
res = 0x31c0000000000001ull;
|
||
|
}
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
||
|
// determine first the nr. of bits in x
|
||
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
||
|
q = 16;
|
||
|
} else { // if x < 2^53
|
||
|
tmp1.d = (double) C1; // exact conversion
|
||
|
x_nr_bits =
|
||
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
||
|
q = nr_digits[x_nr_bits - 1].digits;
|
||
|
if (q == 0) {
|
||
|
q = nr_digits[x_nr_bits - 1].digits1;
|
||
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
||
|
q++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (exp >= 0) { // -exp <= 0
|
||
|
// the argument is an integer already
|
||
|
res = x;
|
||
|
BID_RETURN (res);
|
||
|
} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
|
||
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
||
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
||
|
// chop off ind digits from the lower part of C1
|
||
|
// C1 fits in 64 bits
|
||
|
// calculate C* and f*
|
||
|
// C* is actually floor(C*) in this case
|
||
|
// C* and f* need shifting and masking, as shown by
|
||
|
// shiftright128[] and maskhigh128[]
|
||
|
// 1 <= x <= 16
|
||
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
||
|
// C* = C1 * 10^(-x)
|
||
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
||
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
||
|
|
||
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
||
|
// correct by Property 1)
|
||
|
// if (0 < f* < 10^(-x)) then the result is exact
|
||
|
// n = C* * 10^(e+x)
|
||
|
|
||
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
||
|
res = P128.w[1];
|
||
|
fstar.w[1] = 0;
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
||
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
||
|
res = (P128.w[1] >> shift);
|
||
|
fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
||
|
fstar.w[0] = P128.w[0];
|
||
|
}
|
||
|
// if (f* > 10^(-x)) then the result is inexact
|
||
|
if (!x_sign
|
||
|
&& ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind - 1]))) {
|
||
|
// if positive and not exact, increment magnitude
|
||
|
res++;
|
||
|
}
|
||
|
// set exponent to zero as it was negative before.
|
||
|
res = x_sign | 0x31c0000000000000ull | res;
|
||
|
BID_RETURN (res);
|
||
|
} else { // if exp < 0 and q + exp <= 0
|
||
|
// the result is -0 or +1
|
||
|
if (x_sign) {
|
||
|
res = 0xb1c0000000000000ull;
|
||
|
} else {
|
||
|
res = 0x31c0000000000001ull;
|
||
|
}
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************
|
||
|
* BID64_round_integral_zero
|
||
|
****************************************************************************/
|
||
|
|
||
|
#if DECIMAL_CALL_BY_REFERENCE
|
||
|
void
|
||
|
bid64_round_integral_zero (UINT64 * pres,
|
||
|
UINT64 *
|
||
|
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
||
|
_EXC_INFO_PARAM) {
|
||
|
UINT64 x = *px;
|
||
|
#else
|
||
|
UINT64
|
||
|
bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
||
|
_EXC_INFO_PARAM) {
|
||
|
#endif
|
||
|
|
||
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
||
|
UINT64 x_sign;
|
||
|
int exp; // unbiased exponent
|
||
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
||
|
BID_UI64DOUBLE tmp1;
|
||
|
int x_nr_bits;
|
||
|
int q, ind, shift;
|
||
|
UINT64 C1;
|
||
|
// UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
|
||
|
UINT128 P128;
|
||
|
|
||
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
||
|
|
||
|
// check for NaNs and infinities
|
||
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
||
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
||
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
||
|
else
|
||
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
||
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
||
|
// set invalid flag
|
||
|
*pfpsf |= INVALID_EXCEPTION;
|
||
|
// return quiet (SNaN)
|
||
|
res = x & 0xfdffffffffffffffull;
|
||
|
} else { // QNaN
|
||
|
res = x;
|
||
|
}
|
||
|
BID_RETURN (res);
|
||
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
||
|
res = x_sign | 0x7800000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// unpack x
|
||
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
||
|
// if the steering bits are 11 (condition will be 0), then
|
||
|
// the exponent is G[0:w+1]
|
||
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
||
|
if (C1 > 9999999999999999ull) { // non-canonical
|
||
|
C1 = 0;
|
||
|
}
|
||
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
||
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG1);
|
||
|
}
|
||
|
|
||
|
// if x is 0 or non-canonical
|
||
|
if (C1 == 0) {
|
||
|
if (exp < 0)
|
||
|
exp = 0;
|
||
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
||
|
|
||
|
// return 0 if (exp <= -p)
|
||
|
if (exp <= -16) {
|
||
|
res = x_sign | 0x31c0000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
||
|
// determine first the nr. of bits in x
|
||
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
||
|
q = 16;
|
||
|
} else { // if x < 2^53
|
||
|
tmp1.d = (double) C1; // exact conversion
|
||
|
x_nr_bits =
|
||
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
||
|
q = nr_digits[x_nr_bits - 1].digits;
|
||
|
if (q == 0) {
|
||
|
q = nr_digits[x_nr_bits - 1].digits1;
|
||
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
||
|
q++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (exp >= 0) { // -exp <= 0
|
||
|
// the argument is an integer already
|
||
|
res = x;
|
||
|
BID_RETURN (res);
|
||
|
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
||
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
||
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
||
|
// chop off ind digits from the lower part of C1
|
||
|
// C1 fits in 127 bits
|
||
|
// calculate C* and f*
|
||
|
// C* is actually floor(C*) in this case
|
||
|
// C* and f* need shifting and masking, as shown by
|
||
|
// shiftright128[] and maskhigh128[]
|
||
|
// 1 <= x <= 16
|
||
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
||
|
// C* = C1 * 10^(-x)
|
||
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
||
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
||
|
|
||
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
||
|
// correct by Property 1)
|
||
|
// if (0 < f* < 10^(-x)) then the result is exact
|
||
|
// n = C* * 10^(e+x)
|
||
|
|
||
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
||
|
res = P128.w[1];
|
||
|
// redundant fstar.w[1] = 0;
|
||
|
// redundant fstar.w[0] = P128.w[0];
|
||
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
||
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
||
|
res = (P128.w[1] >> shift);
|
||
|
// redundant fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
|
||
|
// redundant fstar.w[0] = P128.w[0];
|
||
|
}
|
||
|
// if (f* > 10^(-x)) then the result is inexact
|
||
|
// if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind-1])){
|
||
|
// // redundant
|
||
|
// }
|
||
|
// set exponent to zero as it was negative before.
|
||
|
res = x_sign | 0x31c0000000000000ull | res;
|
||
|
BID_RETURN (res);
|
||
|
} else { // if exp < 0 and q + exp < 0
|
||
|
// the result is +0 or -0
|
||
|
res = x_sign | 0x31c0000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*****************************************************************************
|
||
|
* BID64_round_integral_nearest_away
|
||
|
****************************************************************************/
|
||
|
|
||
|
#if DECIMAL_CALL_BY_REFERENCE
|
||
|
void
|
||
|
bid64_round_integral_nearest_away (UINT64 * pres,
|
||
|
UINT64 *
|
||
|
px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
|
||
|
_EXC_INFO_PARAM) {
|
||
|
UINT64 x = *px;
|
||
|
#else
|
||
|
UINT64
|
||
|
bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM
|
||
|
_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
|
||
|
#endif
|
||
|
|
||
|
UINT64 res = 0xbaddbaddbaddbaddull;
|
||
|
UINT64 x_sign;
|
||
|
int exp; // unbiased exponent
|
||
|
// Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
|
||
|
BID_UI64DOUBLE tmp1;
|
||
|
int x_nr_bits;
|
||
|
int q, ind, shift;
|
||
|
UINT64 C1;
|
||
|
UINT128 P128;
|
||
|
|
||
|
x_sign = x & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
|
||
|
|
||
|
// check for NaNs and infinities
|
||
|
if ((x & MASK_NAN) == MASK_NAN) { // check for NaN
|
||
|
if ((x & 0x0003ffffffffffffull) > 999999999999999ull)
|
||
|
x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
|
||
|
else
|
||
|
x = x & 0xfe03ffffffffffffull; // clear G6-G12
|
||
|
if ((x & MASK_SNAN) == MASK_SNAN) { // SNaN
|
||
|
// set invalid flag
|
||
|
*pfpsf |= INVALID_EXCEPTION;
|
||
|
// return quiet (SNaN)
|
||
|
res = x & 0xfdffffffffffffffull;
|
||
|
} else { // QNaN
|
||
|
res = x;
|
||
|
}
|
||
|
BID_RETURN (res);
|
||
|
} else if ((x & MASK_INF) == MASK_INF) { // check for Infinity
|
||
|
res = x_sign | 0x7800000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// unpack x
|
||
|
if ((x & MASK_STEERING_BITS) == MASK_STEERING_BITS) {
|
||
|
// if the steering bits are 11 (condition will be 0), then
|
||
|
// the exponent is G[0:w+1]
|
||
|
exp = ((x & MASK_BINARY_EXPONENT2) >> 51) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG2) | MASK_BINARY_OR2;
|
||
|
if (C1 > 9999999999999999ull) { // non-canonical
|
||
|
C1 = 0;
|
||
|
}
|
||
|
} else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
|
||
|
exp = ((x & MASK_BINARY_EXPONENT1) >> 53) - 398;
|
||
|
C1 = (x & MASK_BINARY_SIG1);
|
||
|
}
|
||
|
|
||
|
// if x is 0 or non-canonical
|
||
|
if (C1 == 0) {
|
||
|
if (exp < 0)
|
||
|
exp = 0;
|
||
|
res = x_sign | (((UINT64) exp + 398) << 53);
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// x is a finite non-zero number (not 0, non-canonical, or special)
|
||
|
|
||
|
// return 0 if (exp <= -(p+1))
|
||
|
if (exp <= -17) {
|
||
|
res = x_sign | 0x31c0000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
// q = nr. of decimal digits in x (1 <= q <= 54)
|
||
|
// determine first the nr. of bits in x
|
||
|
if (C1 >= 0x0020000000000000ull) { // x >= 2^53
|
||
|
q = 16;
|
||
|
} else { // if x < 2^53
|
||
|
tmp1.d = (double) C1; // exact conversion
|
||
|
x_nr_bits =
|
||
|
1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
|
||
|
q = nr_digits[x_nr_bits - 1].digits;
|
||
|
if (q == 0) {
|
||
|
q = nr_digits[x_nr_bits - 1].digits1;
|
||
|
if (C1 >= nr_digits[x_nr_bits - 1].threshold_lo)
|
||
|
q++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (exp >= 0) { // -exp <= 0
|
||
|
// the argument is an integer already
|
||
|
res = x;
|
||
|
BID_RETURN (res);
|
||
|
} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
|
||
|
// need to shift right -exp digits from the coefficient; the exp will be 0
|
||
|
ind = -exp; // 1 <= ind <= 16; ind is a synonym for 'x'
|
||
|
// chop off ind digits from the lower part of C1
|
||
|
// C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
|
||
|
// FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
|
||
|
C1 = C1 + midpoint64[ind - 1];
|
||
|
// calculate C* and f*
|
||
|
// C* is actually floor(C*) in this case
|
||
|
// C* and f* need shifting and masking, as shown by
|
||
|
// shiftright128[] and maskhigh128[]
|
||
|
// 1 <= x <= 16
|
||
|
// kx = 10^(-x) = ten2mk64[ind - 1]
|
||
|
// C* = (C1 + 1/2 * 10^x) * 10^(-x)
|
||
|
// the approximation of 10^(-x) was rounded up to 64 bits
|
||
|
__mul_64x64_to_128 (P128, C1, ten2mk64[ind - 1]);
|
||
|
|
||
|
// if (0 < f* < 10^(-x)) then the result is a midpoint
|
||
|
// C* = floor(C*) - logical right shift; C* has p decimal digits,
|
||
|
// correct by Prop. 1)
|
||
|
// else
|
||
|
// C* = floor(C*) (logical right shift; C has p decimal digits,
|
||
|
// correct by Property 1)
|
||
|
// n = C* * 10^(e+x)
|
||
|
|
||
|
if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
|
||
|
res = P128.w[1];
|
||
|
} else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
|
||
|
shift = shiftright128[ind - 1]; // 3 <= shift <= 63
|
||
|
res = (P128.w[1] >> shift);
|
||
|
}
|
||
|
// midpoints are already rounded correctly
|
||
|
// set exponent to zero as it was negative before.
|
||
|
res = x_sign | 0x31c0000000000000ull | res;
|
||
|
BID_RETURN (res);
|
||
|
} else { // if exp < 0 and q + exp < 0
|
||
|
// the result is +0 or -0
|
||
|
res = x_sign | 0x31c0000000000000ull;
|
||
|
BID_RETURN (res);
|
||
|
}
|
||
|
}
|