fcmp and various other changes

- Read the 68881 docs for fcmp more carefully, and tweaked it to more
  accurately implement the (documented) behavior
- Implemented de-un-normalizing for all x80 softfloat ops
- Some other changes I don't remember - it's my emulator, I can be vague
  if I want XD
This commit is contained in:
Peter Rutenbar 2014-12-10 22:22:45 -05:00
parent aea23c6dfc
commit 9c3640cf48
4 changed files with 186 additions and 61 deletions

View File

@ -3142,6 +3142,32 @@ flag float64_lt_quiet( float64 a, float64 b )
#ifdef FLOATX80
// [shoebill]
// 68881 supports "unnormalized" 80 bit floats - floats where the exponent
// is >0 and the explicit bit is 0. We can easily convert this to a regular
// normal or subnormal number.
INLINE floatx80 floatx80_de_unnormalize( floatx80 a )
{
flag aSign;
int16 aExp;
bits64 aSig;
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
aSign = extractFloatx80Sign( a );
// NaN/inf don't count
if (aExp == 0x7FFF)
return a;
while ((aExp > 0) && ((aSig >> 63)==0)) {
aExp--;
aSig <<= 1;
}
return packFloatx80(aSign, aExp, aSig);
}
/*----------------------------------------------------------------------------
| Returns the result of converting the extended double-precision floating-
| point value `a' to the 32-bit two's complement integer format. The
@ -3158,6 +3184,9 @@ int32 floatx80_to_int32( floatx80 a )
int32 aExp, shiftCount;
bits64 aSig;
// [shoebill]
a = floatx80_de_unnormalize(a);
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
aSign = extractFloatx80Sign( a );
@ -3188,6 +3217,9 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a )
int32 aExp, shiftCount;
bits64 aSig, savedASig;
int32 z;
// [shoebill]
a = floatx80_de_unnormalize(a);
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
@ -3232,6 +3264,9 @@ int64 floatx80_to_int64( floatx80 a )
flag aSign;
int32 aExp, shiftCount;
bits64 aSig, aSigExtra;
// [shoebill]
a = floatx80_de_unnormalize(a);
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
@ -3273,6 +3308,9 @@ int64 floatx80_to_int64_round_to_zero( floatx80 a )
int32 aExp, shiftCount;
bits64 aSig;
int64 z;
// [shoebill]
a = floatx80_de_unnormalize(a);
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
@ -3313,6 +3351,9 @@ float32 floatx80_to_float32( floatx80 a )
flag aSign;
int32 aExp;
bits64 aSig;
// [shoebill]
a = floatx80_de_unnormalize(a);
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
@ -3341,6 +3382,9 @@ float64 floatx80_to_float64( floatx80 a )
flag aSign;
int32 aExp;
bits64 aSig, zSig;
// [shoebill]
a = floatx80_de_unnormalize(a);
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
@ -3372,22 +3416,15 @@ float128 floatx80_to_float128( floatx80 a )
int16 aExp;
bits64 aSig, zSig0, zSig1;
// [shoebill]
a = floatx80_de_unnormalize(a);
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
aSign = extractFloatx80Sign( a );
if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
}
// [shoebill]
// If the x80 has a non-zero exponent and its explicit bit is 0,
// then Motorola's 68881 docs call it "unnormal". packFloat128 can't
// handle that, so we need to de-unnormalize it to get a regular
// normal or subnormal number.
while ((aExp > 0) && ((aSig >> 63)==0)) {
aExp--;
aSig <<= 1;
}
// [shoebill]
// Don't chew off the x80's explicit bit without checking
@ -3420,6 +3457,9 @@ floatx80 floatx80_round_to_int( floatx80 a )
bits64 lastBitMask, roundBitsMask;
int8 roundingMode;
floatx80 z;
// [shoebill]
a = floatx80_de_unnormalize(a);
aExp = extractFloatx80Exp( a );
if ( 0x403E <= aExp ) {
@ -3653,6 +3693,10 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
floatx80 floatx80_add( floatx80 a, floatx80 b )
{
flag aSign, bSign;
// [shoebill]
a = floatx80_de_unnormalize(a);
b = floatx80_de_unnormalize(b);
aSign = extractFloatx80Sign( a );
bSign = extractFloatx80Sign( b );
@ -3674,6 +3718,10 @@ floatx80 floatx80_add( floatx80 a, floatx80 b )
floatx80 floatx80_sub( floatx80 a, floatx80 b )
{
flag aSign, bSign;
// [shoebill]
a = floatx80_de_unnormalize(a);
b = floatx80_de_unnormalize(b);
aSign = extractFloatx80Sign( a );
bSign = extractFloatx80Sign( b );
@ -3698,6 +3746,10 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b )
int32 aExp, bExp, zExp;
bits64 aSig, bSig, zSig0, zSig1;
floatx80 z;
// [shoebill]
a = floatx80_de_unnormalize(a);
b = floatx80_de_unnormalize(b);
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
@ -3759,6 +3811,10 @@ floatx80 floatx80_div( floatx80 a, floatx80 b )
bits64 aSig, bSig, zSig0, zSig1;
bits64 rem0, rem1, rem2, term0, term1, term2;
floatx80 z;
// [shoebill]
a = floatx80_de_unnormalize(a);
b = floatx80_de_unnormalize(b);
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
@ -3839,6 +3895,10 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b )
bits64 aSig0, aSig1, bSig;
bits64 q, term0, term1, alternateASig0, alternateASig1;
floatx80 z;
// [shoebill]
a = floatx80_de_unnormalize(a);
b = floatx80_de_unnormalize(b);
aSig0 = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
@ -3936,6 +3996,9 @@ floatx80 floatx80_sqrt( floatx80 a )
bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
floatx80 z;
// [shoebill]
a = floatx80_de_unnormalize(a);
aSig0 = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
@ -4002,7 +4065,10 @@ floatx80 floatx80_sqrt( floatx80 a )
flag floatx80_eq( floatx80 a, floatx80 b )
{
// [shoebill]
a = floatx80_de_unnormalize(a);
b = floatx80_de_unnormalize(b);
if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
|| ( ( extractFloatx80Exp( b ) == 0x7FFF )
@ -4033,6 +4099,10 @@ flag floatx80_eq( floatx80 a, floatx80 b )
flag floatx80_le( floatx80 a, floatx80 b )
{
flag aSign, bSign;
// [shoebill]
a = floatx80_de_unnormalize(a);
b = floatx80_de_unnormalize(b);
if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
@ -4067,6 +4137,10 @@ flag floatx80_lt( floatx80 a, floatx80 b )
{
flag aSign, bSign;
// [shoebill]
a = floatx80_de_unnormalize(a);
b = floatx80_de_unnormalize(b);
if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
|| ( ( extractFloatx80Exp( b ) == 0x7FFF )
@ -4098,7 +4172,10 @@ flag floatx80_lt( floatx80 a, floatx80 b )
flag floatx80_eq_signaling( floatx80 a, floatx80 b )
{
// [shoebill]
a = floatx80_de_unnormalize(a);
b = floatx80_de_unnormalize(b);
if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
|| ( ( extractFloatx80Exp( b ) == 0x7FFF )
@ -4126,6 +4203,10 @@ flag floatx80_eq_signaling( floatx80 a, floatx80 b )
flag floatx80_le_quiet( floatx80 a, floatx80 b )
{
flag aSign, bSign;
// [shoebill]
a = floatx80_de_unnormalize(a);
b = floatx80_de_unnormalize(b);
if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
@ -4163,6 +4244,10 @@ flag floatx80_lt_quiet( floatx80 a, floatx80 b )
{
flag aSign, bSign;
// [shoebill]
a = floatx80_de_unnormalize(a);
b = floatx80_de_unnormalize(b);
if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
|| ( ( extractFloatx80Exp( b ) == 0x7FFF )

View File

@ -1398,7 +1398,7 @@ static void inst_cas (void) {
~decompose(ext, 0000 000 uuu 000 ccc);
const uint8_t sz = (1 << s) >> 1; // (01 -> byte, 10 -> word, 11 -> long)
call_ea_read(M, sz);
/* The dest/source/result operands are reversed here from inst_cmp */
@ -2747,26 +2747,30 @@ static void inst_bchg_immediate (void) {
static void inst_ext (void) {
~decompose(shoe.op, 0100 100 ooo 000 rrr);
switch (o) {
case ~b(010): { // byte -> word
uint16_t val = (int8_t)get_d(r, 1);
set_d(r, val, 2);
set_sr_z(get_d(r, 2));
set_sr_n(mib(shoe.d[r], 2));
break;
} case ~b(011): { // word -> long
uint32_t val = (int16_t)get_d(r, 2);
set_d(r, val, 4);
set_sr_z(get_d(r, 4));
set_sr_n(mib(shoe.d[r], 4));
break;
} case ~b(111): { // byte -> long
uint32_t val = (int8_t)get_d(r, 1);
set_d(r, val, 4);
set_sr_z(get_d(r, 4));
set_sr_n(mib(shoe.d[r], 4));
break;
}
}
set_sr_v(0);
set_sr_c(0);
// if the LSb of o is 1, then result size == long
set_sr_z(get_d(r, 2+2*(o&1))==0);
set_sr_n(mib(shoe.d[r], 2+2*(o&1)));
}
static void inst_andi_to_sr (void) {

View File

@ -777,7 +777,7 @@ void dis_long_div () {
sprintf(dest, "d%u", R);
if (Q != R)
sprintf(dest+2, ":d%u", Q);
sprintf(dis.str, "div%c%s.l %s,%s", "us"[u], s?"":"l", decode_ea_rw(M, 4), dest);
sprintf(dis.str, "div%c%s.l %s,%s", "us"[u], s?"l":"", decode_ea_rw(M, 4), dest);
}
void dis_cmpm () {

View File

@ -397,6 +397,8 @@ static void _fpu_read_ea_commit(const uint8_t format)
assert(!"size==1, reg==a7");
}
static long double _floatx80_to_long_double(floatx80 f80);
static void _fpu_write_ea(uint8_t mr, uint8_t format, floatx80 *f, uint8_t K)
{
fpu_get_state_ptr();
@ -421,7 +423,10 @@ static void _fpu_write_ea(uint8_t mr, uint8_t format, floatx80 *f, uint8_t K)
const _Bool is_nan = ((f->high << 1) == 0xfffe) && f->low;
slog("inst_f fpu_write_ea EA=%u/%u data=%Lf format=%u\n", m, r, 666.0L, format);
{
const long double tmp = _floatx80_to_long_double(*f);
slog("FPU: fpu_write_ea EA=%u/%u data=%Lf format=%u\n", m, r, tmp, format);
}
/* Initialize softfloat's exceptions bits/rounding mode */
@ -496,11 +501,12 @@ static void _fpu_write_ea(uint8_t mr, uint8_t format, floatx80 *f, uint8_t K)
/* Copy the formatted data into *addr */
slog("inst_f fpu_write_ea: addr=0x08x data=0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
addr,
ptr[0], ptr[1], ptr[2], ptr[3],
ptr[4], ptr[5], ptr[6], ptr[7],
ptr[8], ptr[9], ptr[10], ptr[11]);
{
slog("FPU: fpu_write_ea: addr=0x%08x data=", addr);
for (i=0; i<size; i++)
printf("%02x", ptr[i]);
printf("\n");
}
for (i=0; i<size; i++) {
lset(addr + i, 1, buf[i]);
@ -1352,22 +1358,35 @@ static void inst_fmath_fatanh ()
static void inst_fmath_fcmp ()
{
fpu_get_state_ptr();
const uint8_t source_zero = _float128_is_zero(fpu->source);
const uint8_t source_inf = _float128_is_infinity(fpu->source);
const uint8_t source_sign = _float128_is_neg(fpu->source);
const uint8_t dest_zero = _float128_is_zero(fpu->dest);
const uint8_t dest_inf = _float128_is_infinity(fpu->dest);
const uint8_t dest_sign = _float128_is_neg(fpu->dest);
/* If the operands are "in-range", then we actually need to compare them */
if (!(source_zero | source_inf | dest_zero | dest_inf)) {
// dest - source.
// dest == zource -> Z
// dest < source -> N
if (float128_eq(fpu->dest, fpu->source))
cc_z = 1;
else if (float128_le(fpu->dest, fpu->source))
cc_n = 1;
}
else {
const uint8_t offset = ((source_zero << 5) | (source_inf << 4) | (source_sign << 3) |
(dest_zero << 2) | (dest_inf << 1) | (dest_sign << 0));
/* Precomputed answers for all the possible combinations */
cc_z = (0x0000303008040201ULL >> offset) & 1;
cc_n = (0x00002a2a083b0a3bULL >> offset) & 1;
slog("FPU: fcmp: hit uncommon case: offset=0x%x z=%u n=%u\n", offset, cc_z, cc_n);
}
/* Don't write the result back to the register */
fpu->write_back = 0;
fpu->result = float128_sub(fpu->dest, fpu->source);
/*
* The 68881 docs say fcmp doesn't throw any exceptions
* based on the result, but I'm not sure I believe it.
if (float_exception_flags & float_flag_invalid)
es_operr = 1;
if (float_exception_flags & float_flag_inexact)
es_inex2 = 1;
*/
}
static void inst_fmath_fcos ()
@ -2254,11 +2273,16 @@ static void inst_fmath_ftst ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
cc_z = source_zero;
cc_i = source_inf;
cc_n = source_sign;
/* Don't write the result back to the register */
fpu->write_back = 0;
/* ftst just sets the cond codes according to the source */
fpu->result = fpu->source;
}
static void inst_fmath_ftwotox ()
@ -2343,9 +2367,6 @@ static void _fmath_set_condition_codes (floatx80 val)
const uint32_t exp = val.high & 0x7fff;
const _Bool sign = val.high >> 15;
/* Clear the whole CC register byte */
fpu->fpsr.raw &= 0x00ffffff;
/* Check for zero */
cc_z = ((exp == 0) && (frac == 0));
@ -2576,6 +2597,9 @@ static void inst_fmath (const uint16_t ext)
es_snan = 0; // Set if one of the inputs was a signaling NaN
es_bsun = 0; // never set here
/* Clear the whole CC register byte */
fpu->fpsr.raw &= 0x00ffffff;
fpu->write_back = 1; // let "do-write-back" be the default behavior
fpu->fmath_op = e;
@ -2623,6 +2647,12 @@ static void inst_fmath (const uint16_t ext)
(((_fmath_flags[e] & FMATH_TYPE_DYADIC) &&
float128_is_nan(fpu->dest)))) {
_fmath_handle_nans();
/*
* The result is guaranteed to be NaN, so we need
* to set cc_nan for ftst and fcmp, who bypass
* _fmath_set_condition_codes()
*/
cc_nan = 1;
goto computation_done;
}
@ -2643,13 +2673,12 @@ static void inst_fmath (const uint16_t ext)
*/
computation_done:
/* Convert the 128-bit result to the specified precision */
/*
* FIXME: If fpu->write_back==0, should we still go through rounding?
* The condition codes will still need to be set. Should they
* be set based on the intermediate result or rounded result?
/*
* Convert the 128-bit result to the specified precision.
* FIXME: do we need to do rounding for ftst/fcmp?
*/
rounded_result = _fmath_round_intermediate_result();
if (fpu->write_back)
rounded_result = _fmath_round_intermediate_result();
/* Update the accrued exception bits */
@ -2687,6 +2716,12 @@ computation_done:
throwable <<= 1;
}
/*
* FIXME: are condition codes ever set if an exception is thrown?
* I think not, so clear the whole CC register byte
*/
fpu->fpsr.raw &= 0x00ffffff;
/*
* Convert the exception bit position
* to the correct vector number, and throw
@ -2699,22 +2734,22 @@ computation_done:
/*
* Otherwise, no exceptions to throw!
* Calculate the condition codes from the result.
*/
_fmath_set_condition_codes(rounded_result);
/*
* We're definitely running to completion now,
* so commit ea-read changes
*/
_fpu_read_ea_commit(source_specifier);
/* Write back the result, and we're done! */
if (fpu->write_back) {
fpu->fp[dest_register] = rounded_result;
/* Calculate the condition codes from the result. */
_fmath_set_condition_codes(rounded_result);
long double tmp = _floatx80_to_long_double(rounded_result);
slog("FPU: result = %Lf\n", tmp);
/* Write back the result, and we're done! */
if (fpu->write_back) {
fpu->fp[dest_register] = rounded_result;
long double tmp = _floatx80_to_long_double(rounded_result);
slog("FPU: result = %Lf\n", tmp);
}
}
}
@ -2839,7 +2874,7 @@ static void inst_fmovem_control (const uint16_t ext)
uint8_t newround = fpu->fpcr.b._mc_rnd;
if (round != newround) {
slog("inst_fmovem_control: HEY: round %u -> %u\n", round, newround);
slog("FPU: inst_fmovem_control: HEY: round %u -> %u\n", round, newround);
}
}
if (S) fpu->fpsr.raw = buf[i++];
@ -2859,7 +2894,7 @@ static void inst_fmovem_control (const uint16_t ext)
slog("inst_fmove_control: notice: (EA = %u/%u %08x CSI = %u%u%u)\n", m, r, (uint32_t)shoe.dat, C, S, I);
slog("FPU: inst_fmove_control: notice: (EA = %u/%u %08x CSI = %u%u%u)\n", m, r, (uint32_t)shoe.dat, C, S, I);
}
@ -2911,7 +2946,7 @@ static void inst_fmovem (const uint16_t ext)
assert(p); // assert post-increment mask
}
slog("inst_fmovem: pre=%08x mask=%08x EA=%u/%u addr=0x%08x size=%u %s\n", pre_mask, mask, m, r, addr, size, d?"to mem":"from mem");
slog("FPU: inst_fmovem: pre=%08x mask=%08x EA=%u/%u addr=0x%08x size=%u %s\n", pre_mask, mask, m, r, addr, size, d?"to mem":"from mem");
if (d) {
// Write those registers
@ -2969,6 +3004,7 @@ static void inst_fmovem (const uint16_t ext)
* other FPU instructions.
*/
static _Bool fpu_test_cc(uint8_t cc)
{
fpu_get_state_ptr();
@ -3136,7 +3172,7 @@ void inst_frestore () {
if (m==3) {
shoe.a[r] += size;
slog("frestore: changing shoe.a[%u] += %u\n", r, size);
slog("FPU: frestore: changing shoe.a[%u] += %u\n", r, size);
}
}