fcmp and various other changes

- Read the 68881 docs for fcmp more carefully, and tweaked it to more accurately implement the (documented) behavior - Implemented de-un-normalizing for all x80 softfloat ops - Some other changes I don't remember - it's my emulator, I can be vague if I want XD
2014-12-10 22:22:45 -05:00 · 2014-12-10 22:22:45 -05:00 · 9c3640cf48
parent aea23c6dfc
commit 9c3640cf48
4 changed files with 186 additions and 61 deletions
--- a/core/SoftFloat/softfloat.c
+++ b/core/SoftFloat/softfloat.c
@ -3142,6 +3142,32 @@ flag float64_lt_quiet( float64 a, float64 b )

 #ifdef FLOATX80

+// [shoebill]
+// 68881 supports "unnormalized" 80 bit floats - floats where the exponent
+// is >0 and the explicit bit is 0. We can easily convert this to a regular
+// normal or subnormal number.
+INLINE floatx80 floatx80_de_unnormalize( floatx80 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+    
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    
+    // NaN/inf don't count
+    if (aExp == 0x7FFF)
+        return a;
+    
+    while ((aExp > 0) && ((aSig >> 63)==0)) {
+        aExp--;
+        aSig <<= 1;
+    }
+    
+    return packFloatx80(aSign, aExp, aSig);
+}
+
 /*----------------------------------------------------------------------------
 | Returns the result of converting the extended double-precision floating-
 | point value `a' to the 32-bit two's complement integer format.  The
@ -3158,6 +3184,9 @@ int32 floatx80_to_int32( floatx80 a )
    int32 aExp, shiftCount;
    bits64 aSig;

+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    
    aSig = extractFloatx80Frac( a );
    aExp = extractFloatx80Exp( a );
    aSign = extractFloatx80Sign( a );
@ -3188,6 +3217,9 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a )
    int32 aExp, shiftCount;
    bits64 aSig, savedASig;
    int32 z;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);

    aSig = extractFloatx80Frac( a );
    aExp = extractFloatx80Exp( a );
@ -3232,6 +3264,9 @@ int64 floatx80_to_int64( floatx80 a )
    flag aSign;
    int32 aExp, shiftCount;
    bits64 aSig, aSigExtra;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);

    aSig = extractFloatx80Frac( a );
    aExp = extractFloatx80Exp( a );
@ -3273,6 +3308,9 @@ int64 floatx80_to_int64_round_to_zero( floatx80 a )
    int32 aExp, shiftCount;
    bits64 aSig;
    int64 z;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);

    aSig = extractFloatx80Frac( a );
    aExp = extractFloatx80Exp( a );
@ -3313,6 +3351,9 @@ float32 floatx80_to_float32( floatx80 a )
    flag aSign;
    int32 aExp;
    bits64 aSig;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);

    aSig = extractFloatx80Frac( a );
    aExp = extractFloatx80Exp( a );
@ -3341,6 +3382,9 @@ float64 floatx80_to_float64( floatx80 a )
    flag aSign;
    int32 aExp;
    bits64 aSig, zSig;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);

    aSig = extractFloatx80Frac( a );
    aExp = extractFloatx80Exp( a );
@ -3372,22 +3416,15 @@ float128 floatx80_to_float128( floatx80 a )
    int16 aExp;
    bits64 aSig, zSig0, zSig1;

+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    
    aSig = extractFloatx80Frac( a );
    aExp = extractFloatx80Exp( a );
    aSign = extractFloatx80Sign( a );
    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
        return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
    }
-    
-    // [shoebill]
-    // If the x80 has a non-zero exponent and its explicit bit is 0,
-    // then Motorola's 68881 docs call it "unnormal". packFloat128 can't
-    // handle that, so we need to de-unnormalize it to get a regular
-    // normal or subnormal number.
-    while ((aExp > 0) && ((aSig >> 63)==0)) {
-        aExp--;
-        aSig <<= 1;
-    }

    // [shoebill]
    // Don't chew off the x80's explicit bit without checking
@ -3420,6 +3457,9 @@ floatx80 floatx80_round_to_int( floatx80 a )
    bits64 lastBitMask, roundBitsMask;
    int8 roundingMode;
    floatx80 z;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);

    aExp = extractFloatx80Exp( a );
    if ( 0x403E <= aExp ) {
@ -3653,6 +3693,10 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
 floatx80 floatx80_add( floatx80 a, floatx80 b )
 {
    flag aSign, bSign;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    b = floatx80_de_unnormalize(b);

    aSign = extractFloatx80Sign( a );
    bSign = extractFloatx80Sign( b );
@ -3674,6 +3718,10 @@ floatx80 floatx80_add( floatx80 a, floatx80 b )
 floatx80 floatx80_sub( floatx80 a, floatx80 b )
 {
    flag aSign, bSign;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    b = floatx80_de_unnormalize(b);

    aSign = extractFloatx80Sign( a );
    bSign = extractFloatx80Sign( b );
@ -3698,6 +3746,10 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b )
    int32 aExp, bExp, zExp;
    bits64 aSig, bSig, zSig0, zSig1;
    floatx80 z;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    b = floatx80_de_unnormalize(b);

    aSig = extractFloatx80Frac( a );
    aExp = extractFloatx80Exp( a );
@ -3759,6 +3811,10 @@ floatx80 floatx80_div( floatx80 a, floatx80 b )
    bits64 aSig, bSig, zSig0, zSig1;
    bits64 rem0, rem1, rem2, term0, term1, term2;
    floatx80 z;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    b = floatx80_de_unnormalize(b);

    aSig = extractFloatx80Frac( a );
    aExp = extractFloatx80Exp( a );
@ -3839,6 +3895,10 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b )
    bits64 aSig0, aSig1, bSig;
    bits64 q, term0, term1, alternateASig0, alternateASig1;
    floatx80 z;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    b = floatx80_de_unnormalize(b);

    aSig0 = extractFloatx80Frac( a );
    aExp = extractFloatx80Exp( a );
@ -3936,6 +3996,9 @@ floatx80 floatx80_sqrt( floatx80 a )
    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
    floatx80 z;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);

    aSig0 = extractFloatx80Frac( a );
    aExp = extractFloatx80Exp( a );
@ -4002,7 +4065,10 @@ floatx80 floatx80_sqrt( floatx80 a )

 flag floatx80_eq( floatx80 a, floatx80 b )
 {
-
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    b = floatx80_de_unnormalize(b);
+    
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
@ -4033,6 +4099,10 @@ flag floatx80_eq( floatx80 a, floatx80 b )
 flag floatx80_le( floatx80 a, floatx80 b )
 {
    flag aSign, bSign;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    b = floatx80_de_unnormalize(b);

    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
@ -4067,6 +4137,10 @@ flag floatx80_lt( floatx80 a, floatx80 b )
 {
    flag aSign, bSign;

+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    b = floatx80_de_unnormalize(b);
+    
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
@ -4098,7 +4172,10 @@ flag floatx80_lt( floatx80 a, floatx80 b )

 flag floatx80_eq_signaling( floatx80 a, floatx80 b )
 {
-
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    b = floatx80_de_unnormalize(b);
+    
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
@ -4126,6 +4203,10 @@ flag floatx80_eq_signaling( floatx80 a, floatx80 b )
 flag floatx80_le_quiet( floatx80 a, floatx80 b )
 {
    flag aSign, bSign;
+    
+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    b = floatx80_de_unnormalize(b);

    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
@ -4163,6 +4244,10 @@ flag floatx80_lt_quiet( floatx80 a, floatx80 b )
 {
    flag aSign, bSign;

+    // [shoebill]
+    a = floatx80_de_unnormalize(a);
+    b = floatx80_de_unnormalize(b);
+    
    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
--- a/core/cpu.c
+++ b/core/cpu.c
@ -1398,7 +1398,7 @@ static void inst_cas (void) {
    ~decompose(ext, 0000 000 uuu 000 ccc);
    
    const uint8_t sz = (1 << s) >> 1; // (01 -> byte, 10 -> word, 11 -> long)
-
+    
    call_ea_read(M, sz);
    
    /* The dest/source/result operands are reversed here from inst_cmp */
@ -2747,26 +2747,30 @@ static void inst_bchg_immediate (void) {
    
 static void inst_ext (void) {
    ~decompose(shoe.op, 0100 100 ooo 000 rrr);
+    
    switch (o) {
        case ~b(010): { // byte -> word
            uint16_t val = (int8_t)get_d(r, 1);
            set_d(r, val, 2);
+            set_sr_z(get_d(r, 2));
+            set_sr_n(mib(shoe.d[r], 2));
            break;
        } case ~b(011): { // word -> long
            uint32_t val = (int16_t)get_d(r, 2);
            set_d(r, val, 4);
+            set_sr_z(get_d(r, 4));
+            set_sr_n(mib(shoe.d[r], 4));
            break;
        } case ~b(111): { // byte -> long
            uint32_t val = (int8_t)get_d(r, 1);
            set_d(r, val, 4);
+            set_sr_z(get_d(r, 4));
+            set_sr_n(mib(shoe.d[r], 4));
            break;
        }
    }
    set_sr_v(0);
    set_sr_c(0);
-    // if the LSb of o is 1, then result size == long
-    set_sr_z(get_d(r, 2+2*(o&1))==0);
-    set_sr_n(mib(shoe.d[r], 2+2*(o&1))); 
 }
    
 static void inst_andi_to_sr (void) {
--- a/core/dis.c
+++ b/core/dis.c
@ -777,7 +777,7 @@ void dis_long_div () {
    sprintf(dest, "d%u", R);
    if (Q != R) 
        sprintf(dest+2, ":d%u", Q);
-    sprintf(dis.str, "div%c%s.l %s,%s", "us"[u], s?"":"l", decode_ea_rw(M, 4), dest);
+    sprintf(dis.str, "div%c%s.l %s,%s", "us"[u], s?"l":"", decode_ea_rw(M, 4), dest);
 }

 void dis_cmpm () {
--- a/core/fpu.c
+++ b/core/fpu.c
@ -397,6 +397,8 @@ static void _fpu_read_ea_commit(const uint8_t format)
        assert(!"size==1, reg==a7");
 }

+static long double _floatx80_to_long_double(floatx80 f80);
+
 static void _fpu_write_ea(uint8_t mr, uint8_t format, floatx80 *f, uint8_t K)
 {
    fpu_get_state_ptr();
@ -421,7 +423,10 @@ static void _fpu_write_ea(uint8_t mr, uint8_t format, floatx80 *f, uint8_t K)
    
    const _Bool is_nan = ((f->high << 1) == 0xfffe) && f->low;
    
-    slog("inst_f fpu_write_ea EA=%u/%u data=%Lf format=%u\n", m, r, 666.0L, format);
+    {
+        const long double tmp = _floatx80_to_long_double(*f);
+        slog("FPU: fpu_write_ea EA=%u/%u data=%Lf format=%u\n", m, r, tmp, format);
+    }
    
    /* Initialize softfloat's exceptions bits/rounding mode */
    
@ -496,11 +501,12 @@ static void _fpu_write_ea(uint8_t mr, uint8_t format, floatx80 *f, uint8_t K)
    
    /* Copy the formatted data into *addr */
    
-    slog("inst_f  fpu_write_ea: addr=0x08x data=0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
-         addr,
-         ptr[0], ptr[1], ptr[2], ptr[3],
-         ptr[4], ptr[5], ptr[6], ptr[7],
-         ptr[8], ptr[9], ptr[10], ptr[11]);
+    {
+        slog("FPU:  fpu_write_ea: addr=0x%08x data=", addr);
+        for (i=0; i<size; i++)
+            printf("%02x", ptr[i]);
+        printf("\n");
+    }
    
    for (i=0; i<size; i++) {
        lset(addr + i, 1, buf[i]);
@ -1352,22 +1358,35 @@ static void inst_fmath_fatanh ()
 static void inst_fmath_fcmp ()
 {
    fpu_get_state_ptr();
+    const uint8_t source_zero = _float128_is_zero(fpu->source);
+    const uint8_t source_inf = _float128_is_infinity(fpu->source);
+    const uint8_t source_sign = _float128_is_neg(fpu->source);
+    const uint8_t dest_zero = _float128_is_zero(fpu->dest);
+    const uint8_t dest_inf = _float128_is_infinity(fpu->dest);
+    const uint8_t dest_sign = _float128_is_neg(fpu->dest);
+    
+    /* If the operands are "in-range", then we actually need to compare them */
+    if (!(source_zero | source_inf | dest_zero | dest_inf)) {
+        // dest - source.
+        // dest == zource -> Z
+        // dest < source -> N
+        if (float128_eq(fpu->dest, fpu->source))
+            cc_z = 1;
+        else if (float128_le(fpu->dest, fpu->source))
+            cc_n = 1;
+    }
+    else {
+        const uint8_t offset = ((source_zero << 5) | (source_inf << 4) | (source_sign << 3) |
+                                (dest_zero << 2) | (dest_inf << 1) | (dest_sign << 0));
+        /* Precomputed answers for all the possible combinations */
+        cc_z = (0x0000303008040201ULL >> offset) & 1;
+        cc_n = (0x00002a2a083b0a3bULL >> offset) & 1;
+        
+        slog("FPU: fcmp: hit uncommon case: offset=0x%x z=%u n=%u\n", offset, cc_z, cc_n);
+    }
    
    /* Don't write the result back to the register */
    fpu->write_back = 0;
-    
-    fpu->result = float128_sub(fpu->dest, fpu->source);
-    
-    /*
-     * The 68881 docs say fcmp doesn't throw any exceptions
-     * based on the result, but I'm not sure I believe it.
-     
-     if (float_exception_flags & float_flag_invalid)
-        es_operr = 1;
-     
-     if (float_exception_flags & float_flag_inexact)
-        es_inex2 = 1;
-     */
 }

 static void inst_fmath_fcos ()
@ -2254,11 +2273,16 @@ static void inst_fmath_ftst ()
 {
    fpu_get_state_ptr();
    
+    const _Bool source_zero = _float128_is_zero(fpu->source);
+    const _Bool source_inf = _float128_is_infinity(fpu->source);
+    const _Bool source_sign = _float128_is_neg(fpu->source);
+    
+    cc_z = source_zero;
+    cc_i = source_inf;
+    cc_n = source_sign;
+    
    /* Don't write the result back to the register */
    fpu->write_back = 0;
-    
-    /* ftst just sets the cond codes according to the source */
-    fpu->result = fpu->source;
 }

 static void inst_fmath_ftwotox ()
@ -2343,9 +2367,6 @@ static void _fmath_set_condition_codes (floatx80 val)
    const uint32_t exp = val.high & 0x7fff;
    const _Bool sign = val.high >> 15;
    
-    /* Clear the whole CC register byte */
-    fpu->fpsr.raw &= 0x00ffffff;
-    
    /* Check for zero */
    cc_z = ((exp == 0) && (frac == 0));
    
@ -2576,6 +2597,9 @@ static void inst_fmath (const uint16_t ext)
    es_snan = 0;  // Set if one of the inputs was a signaling NaN
    es_bsun = 0;  // never set here
    
+    /* Clear the whole CC register byte */
+    fpu->fpsr.raw &= 0x00ffffff;
+    
    fpu->write_back = 1; // let "do-write-back" be the default behavior
    fpu->fmath_op = e;
    
@ -2623,6 +2647,12 @@ static void inst_fmath (const uint16_t ext)
             (((_fmath_flags[e] & FMATH_TYPE_DYADIC) &&
               float128_is_nan(fpu->dest)))) {
        _fmath_handle_nans();
+        /*
+         * The result is guaranteed to be NaN, so we need
+         * to set cc_nan for ftst and fcmp, who bypass
+         * _fmath_set_condition_codes()
+         */
+        cc_nan = 1;
        goto computation_done;
    }
    
@ -2643,13 +2673,12 @@ static void inst_fmath (const uint16_t ext)
     */
 computation_done:
    
-    /* Convert the 128-bit result to the specified precision */
-    /*
-     * FIXME: If fpu->write_back==0, should we still go through rounding?
-     *        The condition codes will still need to be set. Should they
-     *        be set based on the intermediate result or rounded result?
+    /* 
+     * Convert the 128-bit result to the specified precision.
+     * FIXME: do we need to do rounding for ftst/fcmp?
     */
-    rounded_result = _fmath_round_intermediate_result();
+    if (fpu->write_back)
+        rounded_result = _fmath_round_intermediate_result();
    
    
    /* Update the accrued exception bits */
@ -2687,6 +2716,12 @@ computation_done:
            throwable <<= 1;
        }
        
+        /*
+         * FIXME: are condition codes ever set if an exception is thrown?
+         * I think not, so clear the whole CC register byte
+         */
+        fpu->fpsr.raw &= 0x00ffffff;
+        
        /*
         * Convert the exception bit position
         * to the correct vector number, and throw
@ -2699,22 +2734,22 @@ computation_done:
    
    /*
     * Otherwise, no exceptions to throw!
-     * Calculate the condition codes from the result.
-     */
-    _fmath_set_condition_codes(rounded_result);
-    
-    /*
     * We're definitely running to completion now,
     * so commit ea-read changes
     */
    _fpu_read_ea_commit(source_specifier);
    
-    /* Write back the result, and we're done! */
    if (fpu->write_back) {
-        fpu->fp[dest_register] = rounded_result;
+        /* Calculate the condition codes from the result. */
+        _fmath_set_condition_codes(rounded_result);
        
-        long double tmp = _floatx80_to_long_double(rounded_result);
-        slog("FPU: result = %Lf\n", tmp);
+        /* Write back the result, and we're done! */
+        if (fpu->write_back) {
+            fpu->fp[dest_register] = rounded_result;
+            
+            long double tmp = _floatx80_to_long_double(rounded_result);
+            slog("FPU: result = %Lf\n", tmp);
+        }
    }
 }

@ -2839,7 +2874,7 @@ static void inst_fmovem_control (const uint16_t ext)
            uint8_t newround = fpu->fpcr.b._mc_rnd;
            
            if (round != newround) {
-                slog("inst_fmovem_control: HEY: round %u -> %u\n", round, newround);
+                slog("FPU: inst_fmovem_control: HEY: round %u -> %u\n", round, newround);
            }
        }
        if (S) fpu->fpsr.raw = buf[i++];
@ -2859,7 +2894,7 @@ static void inst_fmovem_control (const uint16_t ext)
    
    
    
-    slog("inst_fmove_control: notice: (EA = %u/%u %08x CSI = %u%u%u)\n", m, r, (uint32_t)shoe.dat, C, S, I);
+    slog("FPU: inst_fmove_control: notice: (EA = %u/%u %08x CSI = %u%u%u)\n", m, r, (uint32_t)shoe.dat, C, S, I);
    
    
 }
@ -2911,7 +2946,7 @@ static void inst_fmovem (const uint16_t ext)
        assert(p); // assert post-increment mask
    }
    
-    slog("inst_fmovem: pre=%08x mask=%08x EA=%u/%u addr=0x%08x size=%u %s\n", pre_mask, mask, m, r, addr, size, d?"to mem":"from mem");
+    slog("FPU: inst_fmovem: pre=%08x mask=%08x EA=%u/%u addr=0x%08x size=%u %s\n", pre_mask, mask, m, r, addr, size, d?"to mem":"from mem");
    
    if (d) {
        // Write those registers
@ -2969,6 +3004,7 @@ static void inst_fmovem (const uint16_t ext)
 * other FPU instructions.
 */

+
 static _Bool fpu_test_cc(uint8_t cc)
 {
    fpu_get_state_ptr();
@ -3136,7 +3172,7 @@ void inst_frestore () {
    
    if (m==3) {
        shoe.a[r] += size;
-        slog("frestore: changing shoe.a[%u] += %u\n", r, size);
+        slog("FPU: frestore: changing shoe.a[%u] += %u\n", r, size);
    }
 }