From bb6d1e719f3f0c79cf95498da573fccfe4b96244 Mon Sep 17 00:00:00 2001
From: pruten <pruten@gmail.com>
Date: Sun, 28 Sep 2014 16:51:08 -0400
Subject: [PATCH] More half-baked newfpu goodness

Once the framework for handling exceptions, accrued exceptions,
condition codes, and rounding is done, I can start implementing
actual "fmath" instructions. Everything not handled by softfloat
will be imprecisely and hackily implemented by using the best
available native float math (e.g. cosf(), cos(), or cosl())
---
 core/SoftFloat/test.c |  23 --
 core/newfpu.c         | 552 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 548 insertions(+), 27 deletions(-)
 delete mode 100644 core/SoftFloat/test.c

diff --git a/core/SoftFloat/test.c b/core/SoftFloat/test.c
deleted file mode 100644
index 3627648..0000000
--- a/core/SoftFloat/test.c
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <stdio.h>
-#include <stdint.h>
-#include <math.h>
-#include "softfloat.h"
-
-floatx80 f[8];
-
-int main (void)
-{
-    float128 a = int64_to_float128(123);
-    float128 b = int64_to_float128(123);
-    float128 c = float128_mul(a, b);
-    
-    int32_t c_int = float128_to_int32(c);
-    
-    
-    printf("123 * 123 = %d (expected %d)\n", c_int, 123 * 123);
-    
-    printf("%f\n", a);
-    
-    
-    return 0;
-}
\ No newline at end of file
diff --git a/core/newfpu.c b/core/newfpu.c
index 2ca265a..3bbb1d9 100644
--- a/core/newfpu.c
+++ b/core/newfpu.c
@@ -131,6 +131,40 @@ typedef struct {
     
 } fpu_state_t;
 
+enum rounding_precision_t {
+    prec_extended = 0,
+    prec_single = 1,
+    prec_double = 2,
+};
+
+enum rounding_mode_t {
+    mode_nearest = 0,
+    mode_zero = 1,
+    mode_neg = 2,
+    mode_pos = 3
+};
+
+/*
+ * 0 L     long word integer
+ * 1 S     single precision real
+ * 2 X     extended precision real
+ * 3 P{#k} packed decimal real with static k factor
+ * 4 W     word integer
+ * 5 D     double precision real
+ * 6 B     byte integer
+ * 7 P{Dn} packed decimal real with dynamic k factor
+ */
+static const uint8_t _format_sizes[8] = {4, 4, 12, 12, 2, 8, 1, 12};
+enum {
+    format_L = 0,
+    format_S = 1,
+    format_X = 2,
+    format_Ps = 3,
+    format_W = 4,
+    format_D = 5,
+    format_B = 6,
+    format_Pd = 7
+} fpu_formats;
 
 #define fpu_get_state_ptr() fpu_state_t *fpu = (fpu_state_t*)shoe.fpu_state
 #define nextword() ({const uint16_t w=lget(shoe.pc,2); if (shoe.abort) {return;}; shoe.pc+=2; w;})
@@ -189,10 +223,215 @@ static _Bool _bsun_test()
     return 1;
 }
 
+#pragma mark Float format translators
+
+static float128 _int8_to_intermediate(int8_t byte)
+{
+    return int32_to_float128((int32_t)byte);
+}
+
+static float128 _int16_to_intermediate(int16_t sh)
+{
+    return int32_to_float128((int32_t)sh);
+}
+
+static float128 _int32_to_intermediate(int32_t in)
+{
+    return int32_to_float128(in);
+}
+
+static float128 _float_to_intermediate(uint32_t f)
+{
+    assert(sizeof(uint32_t) == sizeof(float32));
+    return float32_to_float128((float32)f);
+}
+
+/*
+ * _double_to_intermediate(d): d needs to be 68k-native order (8 bytes)
+ */
+static float128 _double_to_intermediate(uint8_t *d)
+{
+    assert(sizeof(uint64_t) == sizeof(float64));
+    
+    return float64_to_float128((float64) ntohll(*(uint64_t*)d));
+}
+
+/*
+ * _extended_to_intermediate(e): e needs to be 68k-native order (12 bytes)
+ */
+static float128 _extended_to_intermediate(uint8_t *e)
+{
+    /*
+     * softfloat floatx80 format:
+     * uint64_t low; // the low part of the extended float (significand, low exponent bits)
+     * uint16_t high; // the high part, sign, high exponent bits
+     */
+    floatx80 x80 = {
+        .high = (e[0] << 8) | e[1],
+        .low = ntohll(*(uint64_t*)&e[4])
+    };
+    return floatx80_to_float128(x80);
+}
+
+/*
+ * Set softfloat's rounding mode
+ * (fpcr.mc_rnd and softfloat use different values for these modes)
+ */
+static void _set_rounding_mode(enum rounding_mode_t mode)
+{
+    const int8 rounding_map[4] = {
+        float_round_nearest_even, float_round_to_zero,
+        float_round_up, float_round_down
+    };
+    
+    float_rounding_mode = rounding_map[mode];
+}
+
+#pragma mark EA routines
+
+/*
+ * Note: fpu_read_ea modifies shoe.pc, and fpu_read_ea_commit
+ *        modifies shoe.a[x] for pre/post-inc/decrement
+ * Returns false if we're aborting
+ */
+static _Bool _fpu_read_ea(const uint8_t format, float128 *result)
+{
+    fpu_get_state_ptr();
+    
+    ~decompose(shoe.op, 0000 0000 00 mmmrrr);
+    
+    const uint8_t size = _format_sizes[format];
+    uint32_t addr = 0;
+    
+    /*
+     * Step 1: find the effective address, store it in addr
+     *         (or the actual data, if unavailable)
+     */
+    
+    switch (m) {
+        case 0:
+            if (format == format_S)
+                *result = _float_to_intermediate(shoe.d[r]);
+            else if (format == format_B)
+                *result = _int8_to_intermediate(shoe.d[r] & 0xff);
+            else if (format == format_W)
+                *result = _int16_to_intermediate(shoe.d[r] & 0xffff);
+            else if (format == format_L)
+                *result = int32_to_float128(shoe.d[r]);
+            else {
+                /*
+                 * No other format can be used with a data register
+                 * (because they require >4 bytes)
+                 */
+                throw_illegal_instruction();
+                return 0;
+            }
+            goto got_data;
+            
+        case 1:
+            /* Address regisers can't be used */
+            throw_illegal_instruction();
+            return 0;
+        
+        case 3:
+            addr = shoe.a[r];
+            assert(!( r==7 && size==1));
+            goto got_address;
+            
+        case 4:
+            addr = shoe.a[r] - size;
+            assert(!( r==7 && size==1));
+            goto got_address;
+            
+        case 7:
+            if (r == 4) {
+                addr = shoe.pc;
+                shoe.pc += size;
+                goto got_address;
+            }
+            
+            // fall through to default:
+            
+        default: {
+            ~decompose(shoe.op, 0000 0000 00 MMMMMM);
+            shoe.mr = M;
+            ea_addr();
+            if (shoe.abort)
+                return 0;
+            
+            addr = (uint32_t)shoe.dat;
+            goto got_address;
+        }
+
+    }
+    
+got_address:
+    
+    /*
+     * Step 2: Load the data from the effective address
+     */
+    
+    if (size <= 4) {
+        const uint32_t raw = lget(addr, size);
+        if (shoe.abort)
+            return 0;
+        
+        switch (format) {
+            case format_B:
+                *result = _int8_to_intermediate(raw & 0xff);
+                break;
+            case format_W:
+                *result = _int16_to_intermediate(raw & 0xffff);
+                break;
+            case format_L:
+                *result = _int32_to_intermediate(raw);
+                break;
+            case format_S:
+                *result = _int32_to_intermediate(raw);
+                break;
+            default:
+                assert(0); /* never get here */
+        }
+    }
+    else { // if (size > 4) -> if format is double, extended, or packed
+        uint8_t buf[12];
+        uint32_t i;
+        
+        for (i = 0; i < size; i++) {
+            buf[i] = lget(addr + i, 1);
+            if (shoe.abort)
+                return 0;
+        }
+        
+        switch (format) {
+            case format_D:
+                *result = _double_to_intermediate(buf);
+                break;
+            case format_X:
+                *result = _extended_to_intermediate(buf);
+                break;
+            case format_Ps:
+            case format_Pd:
+                // FIXME: implement packed formats
+                assert(!"Somebody tried to use a packed format!\n");
+                // throw_illegal_instruction();
+                // return 0;
+            default:
+                assert(0); // never get here
+        }
+    }
+    
+got_data:
+    
+    return 1;
+}
+
 #pragma mark Second-hop instructions
 
 static void inst_fmath (const uint16_t ext)
 {
+    fpu_get_state_ptr();
+    
     ~decompose(shoe.op, 1111 001 000 MMMMMM);
     ~decompose(ext, 0 a 0 sss ddd eeeeeee);
     
@@ -206,8 +445,7 @@ static void inst_fmath (const uint16_t ext)
     float128 source, result;
     
     if (src_in_ea) {
-        source = _fpu_read_ea(M, source_specifier);
-        if (shoe.abort)
+        if (!_fpu_read_ea(source_specifier, &source))
             return ;
     }
     else
@@ -215,22 +453,328 @@ static void inst_fmath (const uint16_t ext)
     
     float128 dest = floatx80_to_float128(fpu->fp[dest_register]);
     
+    /*
+     * Thoughts on the meaning of the bits in the fmath opcode
+     * Bits 6543210
+     *
+     * The 6th bit (Bxxxxx) is only implemented on 68040,
+     * and it is only used to force the rounding mode.
+     * If bit 6 is set, then bit 2 controls whether it rounds to
+     * single or double. (Bit 2 is unchanged from the extended version
+     * for single-rounding, and flipped for double-rounding)
+     *
+     * Wait no, this doesn't work for fsqrt.
+     * Maybe the bits don't have any specific meaning...
+     */
+    
+    /*
+     * We'll shrink the precision and perform rounding
+     * just prior to writing back the result.
+     * Certain instructions override the precision
+     * in fpcr, so keep track of the prefered prec here.
+     */
+    enum rounding_precision_t rounding_prec = mc_prec;
+    
+    /*
+     * For all the intermediate calculations, we
+     * probably want to use nearest-rounding mode.
+     */
+    _set_rounding_mode(mode_nearest);
+    
+    /* Reset softfloat's exception flags */
+    float_exception_flags = 0;
+    
+    /* Reset fpsr's exception flags */
+    es_inex1 = 0; // this is only set for imprecisely-rounded packed inputs (not implemented)
+    es_inex2 = 0; // set if we ever lose precision (during the op or during rounding)
+    es_dz = 0;    // set if we divided by zero
+    es_unfl = 0;  // set if we underflowed (inex2 should be set too, I think)
+    es_ovfl = 0;  // set if we overflowed (inex2 should be set too, I think)
+    es_operr = 0; // ?
+    es_snan = 0;  // Set if one of the inputs was a signaling NaN
+    es_bsun = 0;  // never set here
+    
+    
+    switch (e) {
+        case ~b(1000000): // fsmove
+        case ~b(1000100): // fdmove
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1000000)) rounding_prec = prec_single;
+            else if (e == ~b(1000100)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0000000): // fmove
+            result = source;
+            
+            break;
+            
+        case ~b(0000001): // fint
+            break;
+            
+        case ~b(0000010): // fsinh
+            break;
+            
+        case ~b(0000011): // fintrz
+            break;
+            
+        case ~b(1000001): // fssqrt
+        case ~b(1000101): // fdsqrt
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1000001)) rounding_prec = prec_single;
+            else if (e == ~b(1000101)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0000100): // fsqrt;
+            
+            break;
+            
+        case ~b(0000110): // flognp1
+            break;
+            
+        case ~b(0001000): // fetoxm1
+            break;
+            
+        case ~b(0001001): // ftanh
+            break;
+            
+        case ~b(0001010): // fatan
+            break;
+            
+        case ~b(0001100): // fasin
+            break;
+            
+        case ~b(0001101): // fatanh
+            break;
+            
+        case ~b(0001110): // fsin
+            break;
+            
+        case ~b(0001111): // ftan
+            break;
+            
+        case ~b(0010000): // fetox
+            break;
+            
+        case ~b(0010001): // ftwotox
+            break;
+            
+        case ~b(0010010): // ftentox
+            break;
+            
+        case ~b(0010100): // flogn
+            break;
+            
+        case ~b(0010101): // flog10
+            break;
+            
+        case ~b(0010110): // flog2
+            break;
+            
+        case ~b(1011000): // fsabs
+        case ~b(1011100): // fdabs
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1011000)) rounding_prec = prec_single;
+            else if (e == ~b(1011100)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0011000): // fabs
+            
+            break;
+            
+        case ~b(0011001): // fcosh
+            break;
+            
+        case ~b(1011010): // fsneg
+        case ~b(1011110): // fdneg
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1011010)) rounding_prec = prec_single;
+            else if (e == ~b(1011110)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0011010): // fneg
+            
+            break;
+            
+        case ~b(0011100): // facos
+            break;
+            
+        case ~b(0011101): // fcos
+            break;
+            
+        case ~b(0011110): // fgetexp
+            break;
+            
+        case ~b(0011111): // fgetman
+            break;
+            
+        case ~b(0100001): // fmod
+            break;
+            
+        case ~b(1100010): // fsadd
+        case ~b(1100110): // fdadd
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1100010)) rounding_prec = prec_single;
+            else if (e == ~b(1100110)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0100010): // fadd
+            
+            break;
+            
+        case ~b(0100100): // fsgldiv
+            break;
+            
+        case ~b(0100111): // fsglmul
+            break;
+            
+        case ~b(0100101): // frem
+            break;
+            
+        case ~b(0100110): // fscale
+            break;
+            
+        case ~b(0111000): // fcmp
+            break;
+            
+        case ~b(0111010): // ftst
+            break;
+            
+        case ~b(1100000): // fsdiv
+        case ~b(1100100): // fddiv
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1100000)) rounding_prec = prec_single;
+            else if (e == ~b(1100100)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0100000): // fdiv
+            
+            break;
+            
+        case ~b(1100011): // fsmul
+        case ~b(1100111): // fdmul
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1100011)) rounding_prec = prec_single;
+            else if (e == ~b(1100111)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0100011): // fmul
+            
+            break;
+            
+        case ~b(1101000): // fssub
+        case ~b(1101100): // fdsub
+            /* These are only legal on 68040 */
+            throw_illegal_instruction();
+            return ;
+            
+            if (e == ~b(1101000)) rounding_prec = prec_single;
+            else if (e == ~b(1101100)) rounding_prec = prec_double;
+            else assert(0);
+        case ~b(0101000): // fsub
+
+            break;
+    }
+    
+    /*
+     * Now update all the exception flags,
+     * and determine whether we want to
+     * actually throw an exception.
+     */
+    
+    /*
+     es_inex1 = 0;
+     es_inex2 = 0;
+     es_dz = 0;
+     es_unfl = 0;
+     es_ovfl = 0;
+     es_operr = 0;
+     es_snan = 0;
+     es_bsun = 0;
+     */
+    if (float_exception_flags & float_flag_divbyzero)
+        es_dz = 1;
+    if (float_exception_flags & float_flag_overflow)
+        es_ovfl = 1;
+    if (float_exception_flags & float_flag_underflow) {
+        /* Wait... this might not be right */
+        es_unfl = 1;
+        es_inex2 = 1;
+    }
+         
+    /*
+     Accrued    -iff-     exception
+     
+     invalid              bsun or snan or operr
+     overflow             overflow
+     underflow            (underflow AND inex2)
+     divzero              divzero
+     inexact              inex1 or inex2 or overflow
+     */
+    
+    /*
+     Exception  -> Accrued
+     
+     bsun       -> invalid (can't happen here)
+     snan       -> invalid (we can check for this manually)
+     operr      -> invalid
+     overflow   -> overflow
+     underflow...
+     divzero    -> divzero
+     inex1      -> inexact
+     inex2      -> inexact
+     overflow   -> inexact
+     */
+    
+    /*
+     bsun     -> only set for f*cc* instructions
+     snan     -> was one of the operands a signaling NaN?
+     operr    -> 
+     overflow -> The result didn't fit, rounded to +/- infinity
+     underflow-> The result didn't fit, rounded to +/- 0
+     divzero  -> divided by zero
+     inex1    -> The input (packed) format was imprecisely converted
+     inex2    -> The resulting output couldn't be precisely represented
+     */
+    
+    // Presumably if overflow or underflow happen, inex2 must be set?
+    // Underflow and overflow can't be set at the same time?
+     
     
     assert(!"fmath");
 }
 
 static void inst_fmove (const uint16_t ext)
 {
+    fpu_get_state_ptr();
+    
     assert(!"fmove");
 }
 
 static void inst_fmovem_control (const uint16_t ext)
 {
+    fpu_get_state_ptr();
+    
     assert(!"fmovem_control");
 }
 
 static void inst_fmovem (const uint16_t ext)
 {
+    fpu_get_state_ptr();
+    
     assert(!"fmovem");
 }
 
@@ -472,8 +1016,8 @@ void inst_ftrapcc () {
 }
 
 void inst_fnop() {
-    // This is technically fbcc, so we should set fpiar too
-    fpu->fpiar = shoe.orig_pc;
+    // This is technically fbcc
+    inst_fbcc();
 }
 
 void inst_fpu_other () {