floating point update...

This commit is contained in:
Kelvin Sherlock
2016-11-13 11:02:57 -05:00
parent 3c1a33323e
commit 9a98bb40b6
3 changed files with 482 additions and 367 deletions
+249
View File
@@ -0,0 +1,249 @@
#include "floating_point.h"
#include <cstdint>
namespace floating_point {
void info::read_single(const void *vp) {
uint32_t i;
std::memcpy(&i, vp, 4);
sign = i >> 31;
one = 1;
exp = (i >> 23) & ((1 << 8) - 1);
sig = i & ((1 << 23) - 1);
if (exp == 255) {
exp = 0;
if (sig == 0) inf = true;
else {
nan = true;
sig &= ~(UINT64_C(1) << 22);
}
return;
}
if (exp == 0 && sig != 0) {
// denormalized.
one = 0;
sig <<= 40; //?
exp = -126;
return;
}
if (exp) exp -= 127; // bias
// adjust to 64 bit significand.
sig <<= 40;
if (one) sig |= (UINT64_C(1) << 63);
}
void info::read_double(const void *vp) {
uint64_t i;
std::memcpy(&i, vp, 8);
sign = i >> 63;
one = 1;
exp = (i >> 52) & ((1 << 11) - 1);
sig = i & ((UINT64_C(1) << 52) - 1);
if (exp == 2047) {
exp = 0;
if (sig == 0) inf = true;
else {
nan = true;
sig &= ~(UINT64_C(1) << 51);
}
return;
}
if (exp == 0 && sig != 0) {
// denormalized.
one = 0;
sig <<= 10; //?
exp = -1022;
return;
}
if (exp) exp -= 1023; // bias
sig <<= 11;
if (one) sig |= (UINT64_C(1) << 63);
}
void info::read_extended(const void *vp) {
uint64_t i;
uint16_t sexp;
if (endian::native == endian::little) {
std::memcpy(&i, (const uint8_t *)vp, 8);
std::memcpy(&sexp, (const uint8_t *)vp + 8, 2);
} else {
std::memcpy(&sexp, (const uint8_t *)vp, 2);
std::memcpy(&i, (const uint8_t *)vp + 2, 8);
}
sign = (sexp >> 15) & 0x01;
exp = sexp & ((1 << 15) - 1);
one = i >> 63;
sig = i; // includes 1. i & ((UINT64_C(1) << 63) - 1);
if (exp == 32767) {
exp = 0;
sig &= ((UINT64_C(1) << 63) - 1);
if (sig == 0) inf = true;
else {
nan = true;
sig &= ((UINT64_C(1) << 62) - 1);
}
return;
}
#if 0
if (exp == 0 && sig != 0) {
// denormalized.
exp -= 16382;
return;
}
#endif
//
if (exp) exp -= 16383;
}
void info::write_single(void *vp) const {
using namespace single_traits;
uint32_t i = 0;
if (sign) i |= sign_bit;
if (nan) {
// todo -- better signalling vs quiet...
i |= nan_exp;
i |= quiet_nan; // nan bit.
unsigned tmp = sig & 0xff;
if (!tmp) tmp = 1;
i |= tmp;
}
else if (inf || exp > max_exp ) {
i |= nan_exp; // also infinite.
}
else if (exp < min_exp || !one) {
// todo -- could de-normalize here...
// if too small -> 0.
// no need to modify i!
}
else {
// de-normalized numbers handled above (as 0)
uint32_t e = exp + bias;
e <<= significand_bits;
i |= e;
uint32_t s = sig >> (63 - significand_bits);
// and clear 1-bit
s &= significand_mask;
i |= s;
}
std::memcpy(vp, &i, 4);
}
void info::write_double(void *vp) const {
using namespace double_traits;
uint64_t i = 0;
if (sign) i |= sign_bit;
if (nan) {
// todo -- better signalling vs quiet...
i |= nan_exp;
i |= quiet_nan; // nan bit.
unsigned tmp = sig & 0xff;
if (!tmp) tmp = 1;
i |= tmp;
}
else if (inf || exp > max_exp ) {
i |= nan_exp;
}
else if (exp < min_exp || !one) {
// if too small -> 0.
// no need to modify i!
}
else {
// de-normalized numbers handled above (as 0)
uint64_t e = exp + bias;
e <<= significand_bits;
i |= e;
uint64_t s = sig >> (63 - significand_bits);
// and clear 1-bit
s &= significand_mask;
i |= s;
}
std::memcpy(vp, &i, 8);
}
void info::write_extended(void *vp) const {
//...
using namespace extended_traits;
uint64_t i = 0;
uint16_t sexp = 0;
if (sign) sexp |= sign_bit;
if (nan) {
// todo -- better signalling vs quiet...
sexp |= nan_exp;
i |= quiet_nan; // nan bit.
i |= one_bit;
unsigned tmp = sig & 0xff;
if (!tmp) tmp = 1;
i |= tmp;
}
else if (inf || exp > max_exp ) {
sexp |= nan_exp;
i |= one_bit;
}
else if (exp < min_exp || !one) {
// if too small -> 0.
// no need to modify i!
}
else {
// de-normalized numbers handled above (as 0)
uint64_t e = exp + bias;
sexp |= e;
i = sig; // 1-bit already set.
}
uint8_t *cp = (uint8_t *)vp;
if (endian::native == endian::little) {
std::memcpy(cp + 0, &i, 8);
std::memcpy(cp + 8, &sexp, 2);
} else {
std::memcpy(cp + 0, &sexp, 2);
std::memcpy(cp + 2, &i, 8);
}
}
}
+163 -336
View File
@@ -9,426 +9,253 @@
//#include <inttypes.h> //#include <inttypes.h>
//#include "endian.h"
enum class endian { enum class endian {
big = 1234, little = 3412,
little = 3412, big = 1234,
native = little native = little
}; };
namespace single_traits { namespace floating_point {
constexpr size_t bias = 127;
constexpr size_t exponent_bits = 8;
constexpr size_t significand_bits = 23;
constexpr int max_exp = 127;
constexpr int min_exp = -127;
constexpr uint32_t significand_mask = ((1 << significand_bits) - 1); template<size_t size>
constexpr uint32_t sign_bit = UINT32_C(1) << 31; void reverse_bytes(void *vp) {
constexpr uint32_t nan_exp = UINT32_C(255) << significand_bits; char *cp = (char *)vp;
for (size_t i = 0; i < size / 2; ++i)
std::swap(cp[i], cp[size - i - 1]);
}
constexpr uint32_t quiet_nan = UINT32_C(0x02) << (significand_bits - 2); template<size_t size>
constexpr uint32_t signaling_nan = UINT32_C(0x01) << (significand_bits - 2); void reverse_bytes_if(void *vp, std::true_type) {
} reverse_bytes<size>(vp);
}
namespace double_traits { template<size_t size>
constexpr size_t bias = 1023; void reverse_bytes_if(void *vp, std::false_type) {
constexpr size_t exponent_bits = 11; }
constexpr size_t significand_bits = 52;
constexpr int max_exp = 1023;
constexpr int min_exp = -1023;
constexpr uint64_t significand_mask = ((UINT64_C(1) << significand_bits) - 1);
constexpr uint64_t sign_bit = UINT64_C(1) << 63;
constexpr uint64_t nan_exp = UINT64_C(2047) << significand_bits;
constexpr uint64_t quiet_nan = UINT64_C(0x02) << (significand_bits - 2);
constexpr uint64_t signaling_nan = UINT64_C(0x01) << (significand_bits - 2);
}
namespace extended_traits {
constexpr size_t bias = 16383;
constexpr size_t exponent_bits = 15;
constexpr size_t significand_bits = 63; // does not include explicit 1.
constexpr int max_exp = 16383;
constexpr int min_exp = -16383;
constexpr uint64_t significand_mask = ((UINT64_C(1) << significand_bits) - 1);
constexpr uint64_t quiet_nan = UINT64_C(0x02) << (significand_bits - 2);
constexpr uint64_t signaling_nan = UINT64_C(0x01) << (significand_bits - 2);
constexpr uint64_t one_bit = UINT64_C(0x8000000000000000);
// stored separately. namespace single_traits {
constexpr uint16_t sign_bit = 0x8000; constexpr size_t bias = 127;
constexpr uint16_t nan_exp = 0x7fff; constexpr size_t exponent_bits = 8;
constexpr size_t significand_bits = 23;
constexpr int max_exp = 127;
constexpr int min_exp = -127;
} constexpr uint32_t significand_mask = ((1 << significand_bits) - 1);
constexpr uint32_t sign_bit = UINT32_C(1) << 31;
constexpr uint32_t nan_exp = UINT32_C(255) << significand_bits;
class fpinfo { constexpr uint32_t quiet_nan = UINT32_C(0x02) << (significand_bits - 2);
constexpr uint32_t signaling_nan = UINT32_C(0x01) << (significand_bits - 2);
}
public: namespace double_traits {
constexpr size_t bias = 1023;
constexpr size_t exponent_bits = 11;
constexpr size_t significand_bits = 52;
constexpr int max_exp = 1023;
constexpr int min_exp = -1023;
bool sign = false; constexpr uint64_t significand_mask = ((UINT64_C(1) << significand_bits) - 1);
bool one = false; constexpr uint64_t sign_bit = UINT64_C(1) << 63;
int exp = 0; constexpr uint64_t nan_exp = UINT64_C(2047) << significand_bits;
uint64_t sig = 0; // includes explicit 1 bit, adjusted to 63 bits of fraction.
bool nan = false; constexpr uint64_t quiet_nan = UINT64_C(0x02) << (significand_bits - 2);
bool inf = false; constexpr uint64_t signaling_nan = UINT64_C(0x01) << (significand_bits - 2);
}
namespace extended_traits {
constexpr size_t bias = 16383;
constexpr size_t exponent_bits = 15;
constexpr size_t significand_bits = 63; // does not include explicit 1.
constexpr int max_exp = 16383;
constexpr int min_exp = -16383;
constexpr uint64_t significand_mask = ((UINT64_C(1) << significand_bits) - 1);
constexpr uint64_t quiet_nan = UINT64_C(0x02) << (significand_bits - 2);
constexpr uint64_t signaling_nan = UINT64_C(0x01) << (significand_bits - 2);
constexpr uint64_t one_bit = UINT64_C(0x8000000000000000);
// stored separately.
constexpr uint16_t sign_bit = 0x8000;
constexpr uint16_t nan_exp = 0x7fff;
template<size_t size, endian byteorder> }
fpinfo(void *data);
fpinfo(float f); template<size_t _size, endian _byte_order>
fpinfo(double d); struct format {
fpinfo(long double ld); static constexpr size_t size = _size;
fpinfo() = default; static constexpr endian byte_order = _byte_order;
template<size_t size, endian byteorder>
void write(void *vp) const;
void write(float &x) const;
void write(double &x) const;
void write(long double &x) const;
#if 0
enum {
fp_zero,
fp_infinite,
fp_quiet_nan,
fp_signaling_nan,
fp_normal,
fp_subnormal
}; };
#endif
private:
//static constexpr uint64_t one_bit = UINT64_C(0x8000000000000000); class info {
template<size_t size, endian byteorder> void init(const void *vp); private:
}; void read_single(const void *);
void read_double(const void *);
void read_extended(const void *);
template<>
void fpinfo::init<4, endian::native>(const void *vp) {
uint32_t i; void write_single(void *) const;
std::memcpy(&i, vp, 4); void write_double(void *) const;
void write_extended(void *) const;
public:
sign = i >> 31; bool sign = false;
one = 1; bool one = false;
exp = (i >> 23) & ((1 << 8) - 1); int exp = 0;
sig = i & ((1 << 23) - 1); uint64_t sig = 0; // includes explicit 1 bit, adjusted to 63 bits of fraction.
if (exp == 255) { bool nan = false;
exp = 0; bool inf = false;
if (sig == 0) inf = true;
else {
nan = true; template<class T, typename = std::enable_if<std::is_floating_point<T>::value> >
sig &= ~(UINT64_C(1) << 22); void read(T x)
{ read(format<sizeof(x), endian::native>{}, &x); }
template<size_t size, endian byte_order>
void read(format<size, byte_order>, const void *vp) {
uint8_t buffer[size];
static_assert(byte_order != endian::native, "byte order");
std::memcpy(buffer, vp, size);
reverse_bytes<size>(buffer);
read(format<size, endian::native>{}, buffer);
} }
return;
}
if (exp == 0 && sig != 0) { void read(format<4, endian::native>, const void *vp) {
// denormalized. read_single(vp);
one = 0;
sig <<= 40; //?
exp = -126;
return;
}
if (exp) exp -= 127; // bias
// adjust to 64 bit significand.
sig <<= 40;
if (one) sig |= (UINT64_C(1) << 63);
}
template<>
void fpinfo::init<8, endian::native>(const void *vp) {
uint64_t i;
std::memcpy(&i, vp, 8);
sign = i >> 63;
one = 1;
exp = (i >> 52) & ((1 << 11) - 1);
sig = i & ((UINT64_C(1) << 52) - 1);
if (exp == 2047) {
exp = 0;
if (sig == 0) inf = true;
else {
nan = true;
sig &= ~(UINT64_C(1) << 51);
} }
return;
}
if (exp == 0 && sig != 0) {
// denormalized.
one = 0;
sig <<= 10; //?
exp = -1022;
return;
}
if (exp) exp -= 1023; // bias
sig <<= 11;
if (one) sig |= (UINT64_C(1) << 63);
}
// read a macintosh extended... void read(format<8, endian::native>, const void *vp) {
template<> read_double(vp);
void fpinfo::init<10, endian::big>(const void *vp) {
}
// some c compilers actually generate 128-bit floats....
template<>
void fpinfo::init<16, endian::native>(const void *vp) {
uint64_t i;
uint32_t sexp;
if (endian::native == endian::little) {
std::memcpy(&i, (const uint8_t *)vp, 8);
std::memcpy(&sexp, (const uint8_t *)vp + 8, 4);
} else {
std::memcpy(&sexp, (const uint8_t *)vp + 4, 4);
std::memcpy(&i, (const uint8_t *)vp + 8, 8);
}
sign = (sexp >> 15) & 0x01;
exp = sexp & ((1 << 15) - 1);
one = i >> 63;
sig = i; // includes 1. i & ((UINT64_C(1) << 63) - 1);
if (exp == 32767) {
exp = 0;
sig &= ((UINT64_C(1) << 63) - 1);
if (sig == 0) inf = true;
else {
nan = true;
sig &= ((UINT64_C(1) << 62) - 1);
} }
return;
}
if (exp == 0 && sig != 0) { void read(format<10, endian::native>, const void *vp) {
// denormalized. read_extended(vp);
exp -= 16382; }
return;
}
// void read(format<12, endian::native>, const void *vp) {
// todo -- padding?
read_extended(vp);
}
if (exp) exp -= 16383; void read(format<16, endian::native>, const void *vp) {
} // todo -- padding?
read_extended(vp);
}
fpinfo::fpinfo(float x) { template<class T, typename = std::enable_if<std::is_floating_point<T>::value> >
init<sizeof(x), endian::native>(&x); void write(T &x) const
} { write(format<sizeof(x), endian::native>{}, &x); }
fpinfo::fpinfo(double x) { template<size_t size, endian byte_order>
init<sizeof(x), endian::native>(&x); void write(format<size, byte_order>, void *vp) const {
}
uint8_t buffer[size];
static_assert(byte_order != endian::native, "byte order");
fpinfo::fpinfo(long double x) { write(format<size, endian::native>{}, buffer);
init<sizeof(x), endian::native>(&x);
}
//template<size_t size, endian byteorder> void init(const void *vp); reverse_bytes<size>(buffer);
std::memcpy(vp, buffer, size);
}
template<>
void fpinfo::write<4, endian::native>(void *vp) const {
using namespace single_traits; void write(format<4, endian::native>, void *vp) const {
write_single(vp);
}
uint32_t i = 0;
if (sign) i |= sign_bit; void write(format<8, endian::native>, void *vp) const {
write_double(vp);
}
if (nan) { void write(format<10, endian::native>, void *vp) const {
// todo -- better signalling vs quiet... write_extended(vp);
i |= nan_exp; }
i |= quiet_nan; // nan bit.
unsigned tmp = sig & 0xff; void write(format<12, endian::native>, void *vp) const {
if (!tmp) tmp = 1; // todo -- padding?
i |= tmp; write_extended(vp);
} std::memset((uint8_t *)vp + 10, 0, 12-10);
else if (inf || exp > max_exp ) { }
i |= nan_exp; // also infinite.
}
else if (exp < min_exp || !one) {
// todo -- could de-normalize here...
// if too small -> 0.
// no need to modify i!
}
else {
// de-normalized numbers handled above (as 0)
uint32_t e = exp + bias;
e <<= significand_bits;
i |= e;
uint32_t s = sig >> (63 - significand_bits); void write(format<16, endian::native>, void *vp) const {
// and clear 1-bit // todo -- padding?
s &= significand_mask; write_extended(vp);
i |= s; std::memset((uint8_t *)vp + 10, 0, 16-10);
} }
std::memcpy(vp, &i, 4);
}
template<>
void fpinfo::write<8, endian::native>(void *vp) const {
using namespace double_traits;
uint64_t i = 0; template<class T, typename = std::enable_if<std::is_floating_point<T>::value> >
info(T x) { read(x); }
info() = default;
if (sign) i |= sign_bit;
if (nan) { #if 0
// todo -- better signalling vs quiet... enum {
i |= nan_exp; fp_zero,
i |= quiet_nan; // nan bit. fp_infinite,
fp_quiet_nan,
fp_signaling_nan,
fp_normal,
fp_subnormal
};
#endif
unsigned tmp = sig & 0xff;
if (!tmp) tmp = 1;
i |= tmp;
}
else if (inf || exp > max_exp ) {
i |= nan_exp;
}
else if (exp < min_exp || !one) {
// if too small -> 0.
// no need to modify i!
}
else {
// de-normalized numbers handled above (as 0)
uint64_t e = exp + bias;
e <<= significand_bits;
i |= e;
uint64_t s = sig >> (63 - significand_bits); };
// and clear 1-bit
s &= significand_mask;
i |= s;
}
std::memcpy(vp, &i, 8);
}
template<>
void fpinfo::write<10, endian::big>(void *vp) const {
using namespace extended_traits;
uint64_t i = 0;
uint16_t sexp = 0;
if (sign) sexp |= sign_bit;
if (nan) {
// todo -- better signalling vs quiet...
sexp |= nan_exp;
i |= quiet_nan; // nan bit.
i |= one_bit;
unsigned tmp = sig & 0xff;
if (!tmp) tmp = 1;
i |= tmp;
}
else if (inf || exp > max_exp ) {
sexp |= nan_exp;
i |= one_bit;
}
else if (exp < min_exp || !one) {
// if too small -> 0.
// no need to modify i!
}
else {
// de-normalized numbers handled above (as 0)
uint64_t e = exp + bias;
sexp |= e;
i = sig; // 1-bit already set.
}
std::memcpy(vp, &i, 10);
}
template<>
void fpinfo::write<16, endian::native>(void *vp) const {
}
void fpinfo::write(float &x) const {
write<sizeof(x), endian::native>(&x);
}
void fpinfo::write(double &x) const {
write<sizeof(x), endian::native>(&x);
}
void fpinfo::write(long double &x) const {
write<sizeof(x), endian::native>(&x);
}
namespace its_complicated {
/* /*
std::string to_string(const fpinfo &fpi) std::string to_string(const info &fpi)
{ {
} }
*/ */
inline int fpclassify(const fpinfo &fpi) { inline int fpclassify(const info &fpi) {
if (fpi.nan) return FP_NAN; if (fpi.nan) return FP_NAN;
if (fpi.inf) return FP_INFINITE; if (fpi.inf) return FP_INFINITE;
if (fpi.sig == 0) return FP_ZERO; if (fpi.sig == 0) return FP_ZERO;
return fpi.sig >> 63 ? FP_NORMAL : FP_SUBNORMAL; return fpi.sig >> 63 ? FP_NORMAL : FP_SUBNORMAL;
} }
inline int signbit(const fpinfo &fpi) { inline int signbit(const info &fpi) {
return fpi.sign; return fpi.sign;
} }
inline int isnan(const fpinfo &fpi) { inline int isnan(const info &fpi) {
return fpi.nan; return fpi.nan;
} }
inline int isinf(const fpinfo &fpi) { inline int isinf(const info &fpi) {
return fpi.inf; return fpi.inf;
} }
inline int isfinite(const fpinfo &fpi) { inline int isfinite(const info &fpi) {
if (fpi.nan || fpi.inf) return false; if (fpi.nan || fpi.inf) return false;
return true; return true;
} }
inline int isnormal(const fpinfo &fpi) { inline int isnormal(const info &fpi) {
if (fpi.nan || fpi.inf) return false; if (fpi.nan || fpi.inf) return false;
return fpi.sig >> 63; return fpi.sig >> 63;
} }
} }
#endif #endif
+70 -31
View File
@@ -1,30 +1,51 @@
#define CATCH_CONFIG_MAIN #define CATCH_CONFIG_MAIN
#include "catch.hpp" #include "catch.hpp"
#include "../toolbox/floating_point.h" #include "floating_point.h"
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
namespace fp = floating_point;
void bitdump(const void *vp, unsigned bytes) {
const uint8_t *p = (const uint8_t *)vp;
p += bytes;
for (unsigned i = 0; i < bytes; ++i) {
uint8_t c = *(--p);
for (int j = 0x80; j; j >>= 1) {
printf ("%d", c & j ? 1 : 0);
}
printf(" ");
}
printf("\n");
}
TEST_CASE( "1.0 is handled", "[floating point info]") { TEST_CASE( "1.0 is handled", "[floating point info]") {
fpinfo fpi; fp::info fpi;
fpi = fpinfo((float)1.0); fpi = fp::info((float)1.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
REQUIRE(fpi.exp == 0); REQUIRE(fpi.exp == 0);
REQUIRE(fpi.sig == 0x8000000000000000); REQUIRE(fpi.sig == 0x8000000000000000);
fpi = fpinfo((double)1.0); fpi = fp::info((double)1.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
REQUIRE(fpi.exp == 0); REQUIRE(fpi.exp == 0);
REQUIRE(fpi.sig == 0x8000000000000000); REQUIRE(fpi.sig == 0x8000000000000000);
fpi = fpinfo((long double)1.0); fpi = fp::info((long double)1.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
@@ -35,23 +56,23 @@ TEST_CASE( "1.0 is handled", "[floating point info]") {
TEST_CASE( "-1.0 is handled", "[floating point info]") { TEST_CASE( "-1.0 is handled", "[floating point info]") {
fpinfo fpi; fp::info fpi;
fpi = fpinfo((float)-1.0); fpi = fp::info((float)-1.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 1); REQUIRE(fpi.sign == 1);
REQUIRE(fpi.exp == 0); REQUIRE(fpi.exp == 0);
REQUIRE(fpi.sig == 0x8000000000000000); REQUIRE(fpi.sig == 0x8000000000000000);
fpi = fpinfo((double)-1.0); fpi = fp::info((double)-1.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 1); REQUIRE(fpi.sign == 1);
REQUIRE(fpi.exp == 0); REQUIRE(fpi.exp == 0);
REQUIRE(fpi.sig == 0x8000000000000000); REQUIRE(fpi.sig == 0x8000000000000000);
fpi = fpinfo((long double)-1.0); fpi = fp::info((long double)-1.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 1); REQUIRE(fpi.sign == 1);
@@ -62,23 +83,23 @@ TEST_CASE( "-1.0 is handled", "[floating point info]") {
TEST_CASE( "2.0 is handled", "[floating point info]") { TEST_CASE( "2.0 is handled", "[floating point info]") {
fpinfo fpi; fp::info fpi;
fpi = fpinfo((float)2.0); fpi = fp::info((float)2.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
REQUIRE(fpi.exp == 1); REQUIRE(fpi.exp == 1);
REQUIRE(fpi.sig == 0x8000000000000000); REQUIRE(fpi.sig == 0x8000000000000000);
fpi = fpinfo((double)2.0); fpi = fp::info((double)2.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
REQUIRE(fpi.exp == 1); REQUIRE(fpi.exp == 1);
REQUIRE(fpi.sig == 0x8000000000000000); REQUIRE(fpi.sig == 0x8000000000000000);
fpi = fpinfo((long double)2.0); fpi = fp::info((long double)2.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
@@ -89,23 +110,23 @@ TEST_CASE( "2.0 is handled", "[floating point info]") {
TEST_CASE( "NaN("") is handled", "[floating point info]") { TEST_CASE( "NaN("") is handled", "[floating point info]") {
fpinfo fpi; fp::info fpi;
fpi = fpinfo(nanf("")); fpi = fp::info(nanf(""));
REQUIRE(fpi.nan == true); REQUIRE(fpi.nan == true);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
REQUIRE(fpi.exp == 0); REQUIRE(fpi.exp == 0);
REQUIRE(fpi.sig == 0x0000000000000000); REQUIRE(fpi.sig == 0x0000000000000000);
fpi = fpinfo(nan("")); fpi = fp::info(nan(""));
REQUIRE(fpi.nan == true); REQUIRE(fpi.nan == true);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
REQUIRE(fpi.exp == 0); REQUIRE(fpi.exp == 0);
REQUIRE(fpi.sig == 0x0000000000000000); REQUIRE(fpi.sig == 0x0000000000000000);
fpi = fpinfo(nanl("")); fpi = fp::info(nanl(""));
REQUIRE(fpi.nan == true); REQUIRE(fpi.nan == true);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
@@ -116,23 +137,23 @@ TEST_CASE( "NaN("") is handled", "[floating point info]") {
TEST_CASE( "NaN(255) is handled", "[floating point info]") { TEST_CASE( "NaN(255) is handled", "[floating point info]") {
fpinfo fpi; fp::info fpi;
fpi = fpinfo(nanf("255")); fpi = fp::info(nanf("255"));
REQUIRE(fpi.nan == true); REQUIRE(fpi.nan == true);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
REQUIRE(fpi.exp == 0); REQUIRE(fpi.exp == 0);
REQUIRE(fpi.sig == 255); REQUIRE(fpi.sig == 255);
fpi = fpinfo(nan("255")); fpi = fp::info(nan("255"));
REQUIRE(fpi.nan == true); REQUIRE(fpi.nan == true);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
REQUIRE(fpi.exp == 0); REQUIRE(fpi.exp == 0);
REQUIRE(fpi.sig == 255); REQUIRE(fpi.sig == 255);
fpi = fpinfo(nanl("255")); fpi = fp::info(nanl("255"));
REQUIRE(fpi.nan == true); REQUIRE(fpi.nan == true);
REQUIRE(fpi.inf == false); REQUIRE(fpi.inf == false);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
@@ -143,23 +164,23 @@ TEST_CASE( "NaN(255) is handled", "[floating point info]") {
TEST_CASE( "Inf is handled", "[floating point info]") { TEST_CASE( "Inf is handled", "[floating point info]") {
fpinfo fpi; fp::info fpi;
fpi = fpinfo((float)1.0/0.0); fpi = fp::info((float)1.0/0.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == true); REQUIRE(fpi.inf == true);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
REQUIRE(fpi.exp == 0); REQUIRE(fpi.exp == 0);
REQUIRE(fpi.sig == 0); REQUIRE(fpi.sig == 0);
fpi = fpinfo((double)1.0/0.0); fpi = fp::info((double)1.0/0.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == true); REQUIRE(fpi.inf == true);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
REQUIRE(fpi.exp == 0); REQUIRE(fpi.exp == 0);
REQUIRE(fpi.sig == 0); REQUIRE(fpi.sig == 0);
fpi = fpinfo((long double)1.0/0.0); fpi = fp::info((long double)1.0/0.0);
REQUIRE(fpi.nan == false); REQUIRE(fpi.nan == false);
REQUIRE(fpi.inf == true); REQUIRE(fpi.inf == true);
REQUIRE(fpi.sign == 0); REQUIRE(fpi.sign == 0);
@@ -173,17 +194,20 @@ TEST_CASE( "Re-cast 0.0", "[floating point info]") {
float target_f = 0.0; float target_f = 0.0;
double target_d = 0.0; double target_d = 0.0;
double target_ld = 0.0; double target_ld = 0.0;
long double ld;
double d; double d;
float f; float f;
fpinfo fpi(0.0l); fp::info fpi(0.0l);
fpi.write(f); fpi.write(f);
fpi.write(d); fpi.write(d);
fpi.write(ld);
REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0); REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0);
REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0); REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0);
REQUIRE(memcmp(&target_ld, &ld, sizeof(ld)) == 0);
} }
@@ -192,17 +216,20 @@ TEST_CASE( "Re-cast 1.0", "[floating point info]") {
float target_f = 1.0; float target_f = 1.0;
double target_d = 1.0; double target_d = 1.0;
double target_ld = 1.0; double target_ld = 1.0;
long double ld;
double d; double d;
float f; float f;
fpinfo fpi(1.0l); fp::info fpi(1.0l);
fpi.write(f); fpi.write(f);
fpi.write(d); fpi.write(d);
fpi.write(ld);
REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0); REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0);
REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0); REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0);
REQUIRE(memcmp(&target_ld, &ld, sizeof(ld)) == 0);
} }
@@ -211,17 +238,20 @@ TEST_CASE( "Re-cast -1.0", "[floating point info]") {
float target_f = -1.0; float target_f = -1.0;
double target_d = -1.0; double target_d = -1.0;
double target_ld = -1.0; double target_ld = -1.0;
long double ld;
double d; double d;
float f; float f;
fpinfo fpi(-1.0l); fp::info fpi(-1.0l);
fpi.write(f); fpi.write(f);
fpi.write(d); fpi.write(d);
fpi.write(ld);
REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0); REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0);
REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0); REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0);
REQUIRE(memcmp(&target_ld, &ld, sizeof(ld)) == 0);
} }
@@ -231,17 +261,20 @@ TEST_CASE( "Re-cast 1000.0", "[floating point info]") {
float target_f = 1000.0; float target_f = 1000.0;
double target_d = 1000.0; double target_d = 1000.0;
double target_ld = 1000.0; double target_ld = 1000.0;
long double ld;
double d; double d;
float f; float f;
fpinfo fpi(1000.0l); fp::info fpi(1000.0l);
fpi.write(f); fpi.write(f);
fpi.write(d); fpi.write(d);
fpi.write(ld);
REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0); REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0);
REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0); REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0);
REQUIRE(memcmp(&target_ld, &ld, sizeof(ld)) == 0);
} }
@@ -251,17 +284,20 @@ TEST_CASE( "Re-cast Inf", "[floating point info]") {
float target_f = 1.0/0.0; float target_f = 1.0/0.0;
double target_d = 1.0/0.0; double target_d = 1.0/0.0;
double target_ld = 1.0/0.0; double target_ld = 1.0/0.0;
long double ld;
double d; double d;
float f; float f;
fpinfo fpi(1.0/0.0l); fp::info fpi(1.0/0.0l);
fpi.write(f); fpi.write(f);
fpi.write(d); fpi.write(d);
fpi.write(ld);
REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0); REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0);
REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0); REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0);
REQUIRE(memcmp(&target_ld, &ld, sizeof(ld)) == 0);
} }
@@ -270,15 +306,18 @@ TEST_CASE( "Re-cast NaN", "[floating point info]") {
float target_f = nanf("16"); float target_f = nanf("16");
double target_d = nan("16"); double target_d = nan("16");
double target_ld = nanl("16"); double target_ld = nanl("16");
long double ld;
double d; double d;
float f; float f;
fpinfo fpi(nanl("16")); fp::info fpi(nanl("16"));
fpi.write(f); fpi.write(f);
fpi.write(d); fpi.write(d);
fpi.write(ld);
REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0); REQUIRE(memcmp(&target_f, &f, sizeof(f)) == 0);
REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0); REQUIRE(memcmp(&target_d, &d, sizeof(d)) == 0);
REQUIRE(memcmp(&target_ld, &ld, sizeof(ld)) == 0);
} }