More half-baked newfpu goodness

Once the framework for handling exceptions, accrued exceptions, condition codes, and rounding is done, I can start implementing actual "fmath" instructions. Everything not handled by softfloat will be imprecisely and hackily implemented by using the best available native float math (e.g. cosf(), cos(), or cosl())
2025-08-05 04:25:56 +00:00 · 2014-09-28 16:51:08 -04:00
parent bb2ec0a27d
commit bb6d1e719f
2 changed files with 548 additions and 27 deletions
--- a/core/SoftFloat/test.c
+++ b/core/SoftFloat/test.c
@@ -1,23 +0,0 @@
-#include <stdio.h>
-#include <stdint.h>
-#include <math.h>
-#include "softfloat.h"
-
-floatx80 f[8];
-
-int main (void)
-{
-    float128 a = int64_to_float128(123);
-    float128 b = int64_to_float128(123);
-    float128 c = float128_mul(a, b);
-    
-    int32_t c_int = float128_to_int32(c);
-    
-    
-    printf("123 * 123 = %d (expected %d)\n", c_int, 123 * 123);
-    
-    printf("%f\n", a);
-    
-    
-    return 0;
-}
--- a/core/newfpu.c
+++ b/core/newfpu.c
@@ -131,6 +131,40 @@ typedef struct {
    
 } fpu_state_t;

+enum rounding_precision_t {
+    prec_extended = 0,
+    prec_single = 1,
+    prec_double = 2,
+};
+
+enum rounding_mode_t {
+    mode_nearest = 0,
+    mode_zero = 1,
+    mode_neg = 2,
+    mode_pos = 3
+};
+
+/*
+ * 0 L     long word integer
+ * 1 S     single precision real
+ * 2 X     extended precision real
+ * 3 P{#k} packed decimal real with static k factor
+ * 4 W     word integer
+ * 5 D     double precision real
+ * 6 B     byte integer
+ * 7 P{Dn} packed decimal real with dynamic k factor
+ */
+static const uint8_t _format_sizes[8] = {4, 4, 12, 12, 2, 8, 1, 12};
+enum {
+    format_L = 0,
+    format_S = 1,
+    format_X = 2,
+    format_Ps = 3,
+    format_W = 4,
+    format_D = 5,
+    format_B = 6,
+    format_Pd = 7
+} fpu_formats;

 #define fpu_get_state_ptr() fpu_state_t *fpu = (fpu_state_t*)shoe.fpu_state
 #define nextword() ({const uint16_t w=lget(shoe.pc,2); if (shoe.abort) {return;}; shoe.pc+=2; w;})
@@ -189,10 +223,215 @@ static _Bool _bsun_test()
    return 1;
 }

+#pragma mark Float format translators
+
+static float128 _int8_to_intermediate(int8_t byte)
+{
+    return int32_to_float128((int32_t)byte);
+}
+
+static float128 _int16_to_intermediate(int16_t sh)
+{
+    return int32_to_float128((int32_t)sh);
+}
+
+static float128 _int32_to_intermediate(int32_t in)
+{
+    return int32_to_float128(in);
+}
+
+static float128 _float_to_intermediate(uint32_t f)
+{
+    assert(sizeof(uint32_t) == sizeof(float32));
+    return float32_to_float128((float32)f);
+}
+
+/*
+ * _double_to_intermediate(d): d needs to be 68k-native order (8 bytes)
+ */
+static float128 _double_to_intermediate(uint8_t *d)
+{
+    assert(sizeof(uint64_t) == sizeof(float64));
+    
+    return float64_to_float128((float64) ntohll(*(uint64_t*)d));
+}
+
+/*
+ * _extended_to_intermediate(e): e needs to be 68k-native order (12 bytes)
+ */
+static float128 _extended_to_intermediate(uint8_t *e)
+{
+    /*
+     * softfloat floatx80 format:
+     * uint64_t low; // the low part of the extended float (significand, low exponent bits)
+     * uint16_t high; // the high part, sign, high exponent bits
+     */
+    floatx80 x80 = {
+        .high = (e[0] << 8) | e[1],
+        .low = ntohll(*(uint64_t*)&e[4])
+    };
+    return floatx80_to_float128(x80);
+}
+
+/*
+ * Set softfloat's rounding mode
+ * (fpcr.mc_rnd and softfloat use different values for these modes)
+ */
+static void _set_rounding_mode(enum rounding_mode_t mode)
+{
+    const int8 rounding_map[4] = {
+        float_round_nearest_even, float_round_to_zero,
+        float_round_up, float_round_down
+    };
+    
+    float_rounding_mode = rounding_map[mode];
+}
+
+#pragma mark EA routines
+
+/*
+ * Note: fpu_read_ea modifies shoe.pc, and fpu_read_ea_commit
+ *        modifies shoe.a[x] for pre/post-inc/decrement
+ * Returns false if we're aborting
+ */
+static _Bool _fpu_read_ea(const uint8_t format, float128 *result)
+{
+    fpu_get_state_ptr();
+    
+    ~decompose(shoe.op, 0000 0000 00 mmmrrr);
+    
+    const uint8_t size = _format_sizes[format];
+    uint32_t addr = 0;
+    
+    /*
+     * Step 1: find the effective address, store it in addr
+     *         (or the actual data, if unavailable)
+     */
+    
+    switch (m) {
+        case 0:
+            if (format == format_S)
+                *result = _float_to_intermediate(shoe.d[r]);
+            else if (format == format_B)
+                *result = _int8_to_intermediate(shoe.d[r] & 0xff);
+            else if (format == format_W)
+                *result = _int16_to_intermediate(shoe.d[r] & 0xffff);
+            else if (format == format_L)
+                *result = int32_to_float128(shoe.d[r]);
+            else {
+                /*
+                 * No other format can be used with a data register
+                 * (because they require >4 bytes)
+                 */
+                throw_illegal_instruction();
+                return 0;
+            }
+            goto got_data;
+            
+        case 1:
+            /* Address regisers can't be used */
+            throw_illegal_instruction();
+            return 0;
+        
+        case 3:
+            addr = shoe.a[r];
+            assert(!( r==7 && size==1));
+            goto got_address;
+            
+        case 4:
+            addr = shoe.a[r] - size;
+            assert(!( r==7 && size==1));
+            goto got_address;
+            
+        case 7:
+            if (r == 4) {
+                addr = shoe.pc;
+                shoe.pc += size;
+                goto got_address;
+            }
+            
+            // fall through to default:
+            
+        default: {
+            ~decompose(shoe.op, 0000 0000 00 MMMMMM);
+            shoe.mr = M;
+            ea_addr();
+            if (shoe.abort)
+                return 0;
+            
+            addr = (uint32_t)shoe.dat;
+            goto got_address;
+        }
+
+    }
+    
+got_address:
+    
+    /*
+     * Step 2: Load the data from the effective address
+     */
+    
+    if (size <= 4) {
+        const uint32_t raw = lget(addr, size);
+        if (shoe.abort)
+            return 0;
+        
+        switch (format) {
+            case format_B:
+                *result = _int8_to_intermediate(raw & 0xff);
+                break;
+            case format_W:
+                *result = _int16_to_intermediate(raw & 0xffff);
+                break;
+            case format_L:
+                *result = _int32_to_intermediate(raw);
+                break;
+            case format_S:
+                *result = _int32_to_intermediate(raw);
+                break;
+            default:
+                assert(0); /* never get here */
+        }
+    }
+    else { // if (size > 4) -> if format is double, extended, or packed
+        uint8_t buf[12];
+        uint32_t i;
+        
+        for (i = 0; i < size; i++) {
+            buf[i] = lget(addr + i, 1);
+            if (shoe.abort)
+                return 0;
+        }
+        
+        switch (format) {
+            case format_D:
+                *result = _double_to_intermediate(buf);
+                break;
+            case format_X:
+                *result = _extended_to_intermediate(buf);
+                break;
+            case format_Ps:
+            case format_Pd:
+                // FIXME: implement packed formats
+                assert(!"Somebody tried to use a packed format!\n");
+                // throw_illegal_instruction();
+                // return 0;
+            default:
+                assert(0); // never get here
+        }
+    }
+    
+got_data:
+    
+    return 1;
+}
+
 #pragma mark Second-hop instructions

 static void inst_fmath (const uint16_t ext)
 {
+    fpu_get_state_ptr();
+    
    ~decompose(shoe.op, 1111 001 000 MMMMMM);
    ~decompose(ext, 0 a 0 sss ddd eeeeeee);
    
@@ -206,8 +445,7 @@ static void inst_fmath (const uint16_t ext)
    float128 source, result;
    
    if (src_in_ea) {
-        source = _fpu_read_ea(M, source_specifier);
-        if (shoe.abort)
+        if (!_fpu_read_ea(source_specifier, &source))
            return ;
    }
    else
@@ -215,22 +453,328 @@ static void inst_fmath (const uint16_t ext)
    
    float128 dest = floatx80_to_float128(fpu->fp[dest_register]);
    
+    /*
+     * Thoughts on the meaning of the bits in the fmath opcode
+     * Bits 6543210
+     *
+     * The 6th bit (Bxxxxx) is only implemented on 68040,
+     * and it is only used to force the rounding mode.
+     * If bit 6 is set, then bit 2 controls whether it rounds to
+     * single or double. (Bit 2 is unchanged from the extended version
+     * for single-rounding, and flipped for double-rounding)
+     *
+     * Wait no, this doesn't work for fsqrt.
+     * Maybe the bits don't have any specific meaning...
+     */
+    
+    /*
+     * We'll shrink the precision and perform rounding
+     * just prior to writing back the result.
+     * Certain instructions override the precision
+     * in fpcr, so keep track of the prefered prec here.
+     */
+    enum rounding_precision_t rounding_prec = mc_prec;
+    
+    /*
+     * For all the intermediate calculations, we
+     * probably want to use nearest-rounding mode.
+     */
+    _set_rounding_mode(mode_nearest);
+    
+    /* Reset softfloat's exception flags */
+    float_exception_flags = 0;
+    
+    /* Reset fpsr's exception flags */
+    es_inex1 = 0; // this is only set for imprecisely-rounded packed inputs (not implemented)
+    es_inex2 = 0; // set if we ever lose precision (during the op or during rounding)
+    es_dz = 0;    // set if we divided by zero
+    es_unfl = 0;  // set if we underflowed (inex2 should be set too, I think)
+    es_ovfl = 0;  // set if we overflowed (inex2 should be set too, I think)
+    es_operr = 0; // ?
+    es_snan = 0;  // Set if one of the inputs was a signaling NaN
+    es_bsun = 0;  // never set here
+    
+    
+    switch (e) {
+        case ~b(1000000): // fsmove
+        case ~b(1000100): // fdmove
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1000000)) rounding_prec = prec_single;
+            else if (e == ~b(1000100)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0000000): // fmove
+            result = source;
+            
+            break;
+            
+        case ~b(0000001): // fint
+            break;
+            
+        case ~b(0000010): // fsinh
+            break;
+            
+        case ~b(0000011): // fintrz
+            break;
+            
+        case ~b(1000001): // fssqrt
+        case ~b(1000101): // fdsqrt
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1000001)) rounding_prec = prec_single;
+            else if (e == ~b(1000101)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0000100): // fsqrt;
+            
+            break;
+            
+        case ~b(0000110): // flognp1
+            break;
+            
+        case ~b(0001000): // fetoxm1
+            break;
+            
+        case ~b(0001001): // ftanh
+            break;
+            
+        case ~b(0001010): // fatan
+            break;
+            
+        case ~b(0001100): // fasin
+            break;
+            
+        case ~b(0001101): // fatanh
+            break;
+            
+        case ~b(0001110): // fsin
+            break;
+            
+        case ~b(0001111): // ftan
+            break;
+            
+        case ~b(0010000): // fetox
+            break;
+            
+        case ~b(0010001): // ftwotox
+            break;
+            
+        case ~b(0010010): // ftentox
+            break;
+            
+        case ~b(0010100): // flogn
+            break;
+            
+        case ~b(0010101): // flog10
+            break;
+            
+        case ~b(0010110): // flog2
+            break;
+            
+        case ~b(1011000): // fsabs
+        case ~b(1011100): // fdabs
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1011000)) rounding_prec = prec_single;
+            else if (e == ~b(1011100)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0011000): // fabs
+            
+            break;
+            
+        case ~b(0011001): // fcosh
+            break;
+            
+        case ~b(1011010): // fsneg
+        case ~b(1011110): // fdneg
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1011010)) rounding_prec = prec_single;
+            else if (e == ~b(1011110)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0011010): // fneg
+            
+            break;
+            
+        case ~b(0011100): // facos
+            break;
+            
+        case ~b(0011101): // fcos
+            break;
+            
+        case ~b(0011110): // fgetexp
+            break;
+            
+        case ~b(0011111): // fgetman
+            break;
+            
+        case ~b(0100001): // fmod
+            break;
+            
+        case ~b(1100010): // fsadd
+        case ~b(1100110): // fdadd
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1100010)) rounding_prec = prec_single;
+            else if (e == ~b(1100110)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0100010): // fadd
+            
+            break;
+            
+        case ~b(0100100): // fsgldiv
+            break;
+            
+        case ~b(0100111): // fsglmul
+            break;
+            
+        case ~b(0100101): // frem
+            break;
+            
+        case ~b(0100110): // fscale
+            break;
+            
+        case ~b(0111000): // fcmp
+            break;
+            
+        case ~b(0111010): // ftst
+            break;
+            
+        case ~b(1100000): // fsdiv
+        case ~b(1100100): // fddiv
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1100000)) rounding_prec = prec_single;
+            else if (e == ~b(1100100)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0100000): // fdiv
+            
+            break;
+            
+        case ~b(1100011): // fsmul
+        case ~b(1100111): // fdmul
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1100011)) rounding_prec = prec_single;
+            else if (e == ~b(1100111)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0100011): // fmul
+            
+            break;
+            
+        case ~b(1101000): // fssub
+        case ~b(1101100): // fdsub
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1101000)) rounding_prec = prec_single;
+            else if (e == ~b(1101100)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0101000): // fsub
+
+            break;
+    }
+    
+    /*
+     * Now update all the exception flags,
+     * and determine whether we want to
+     * actually throw an exception.
+     */
+    
+    /*
+     es_inex1 = 0;
+     es_inex2 = 0;
+     es_dz = 0;
+     es_unfl = 0;
+     es_ovfl = 0;
+     es_operr = 0;
+     es_snan = 0;
+     es_bsun = 0;
+     */
+    if (float_exception_flags & float_flag_divbyzero)
+        es_dz = 1;
+    if (float_exception_flags & float_flag_overflow)
+        es_ovfl = 1;
+    if (float_exception_flags & float_flag_underflow) {
+        /* Wait... this might not be right */
+        es_unfl = 1;
+        es_inex2 = 1;
+    }
+         
+    /*
+     Accrued    -iff-     exception
+     
+     invalid              bsun or snan or operr
+     overflow             overflow
+     underflow            (underflow AND inex2)
+     divzero              divzero
+     inexact              inex1 or inex2 or overflow
+     */
+    
+    /*
+     Exception  -> Accrued
+     
+     bsun       -> invalid (can't happen here)
+     snan       -> invalid (we can check for this manually)
+     operr      -> invalid
+     overflow   -> overflow
+     underflow...
+     divzero    -> divzero
+     inex1      -> inexact
+     inex2      -> inexact
+     overflow   -> inexact
+     */
+    
+    /*
+     bsun     -> only set for f*cc* instructions
+     snan     -> was one of the operands a signaling NaN?
+     operr    -> 
+     overflow -> The result didn't fit, rounded to +/- infinity
+     underflow-> The result didn't fit, rounded to +/- 0
+     divzero  -> divided by zero
+     inex1    -> The input (packed) format was imprecisely converted
+     inex2    -> The resulting output couldn't be precisely represented
+     */
+    
+    // Presumably if overflow or underflow happen, inex2 must be set?
+    // Underflow and overflow can't be set at the same time?
+     
    
    assert(!"fmath");
 }

 static void inst_fmove (const uint16_t ext)
 {
+    fpu_get_state_ptr();
+    
    assert(!"fmove");
 }

 static void inst_fmovem_control (const uint16_t ext)
 {
+    fpu_get_state_ptr();
+    
    assert(!"fmovem_control");
 }

 static void inst_fmovem (const uint16_t ext)
 {
+    fpu_get_state_ptr();
+    
    assert(!"fmovem");
 }

@@ -472,8 +1016,8 @@ void inst_ftrapcc () {
 }

 void inst_fnop() {
-    // This is technically fbcc, so we should set fpiar too
-    fpu->fpiar = shoe.orig_pc;
+    // This is technically fbcc
+    inst_fbcc();
 }

 void inst_fpu_other () {