From 937a28a8bb243738f2cd4df8bde5965e71346635 Mon Sep 17 00:00:00 2001
From: Peter Rutenbar <pruten@gmail.com>
Date: Sun, 9 Nov 2014 10:56:58 -0500
Subject: [PATCH] Implemented frem, fmod, fsglmul/div, and fixed fabs

- And also fixed an assert
- New frem impl is slower, but easier to get the quotient bits.
---
 core/newfpu.c | 260 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 238 insertions(+), 22 deletions(-)

diff --git a/core/newfpu.c b/core/newfpu.c
index b0dccf2..6c24aee 100644
--- a/core/newfpu.c
+++ b/core/newfpu.c
@@ -1005,6 +1005,34 @@ static void inst_fmath_fmovecr (void)
     return $map_str;
 })
 
+static _Bool _float128_is_zero (float128 f)
+{
+    return ((f.high << 1) == 0) && (f.low == 0);
+}
+
+static _Bool _float128_is_neg (float128 f)
+{
+    return f.high >> 63;
+}
+
+static _Bool _float128_is_infinity (float128 f)
+{
+    const uint64_t frac_a = f.high & 0x0000ffffffffffff;
+    const uint64_t frac_b = f.low;
+    const uint16_t exp = (f.high >> 48) & 0x7fff;
+    
+    return (exp == 0x7fff) && ((frac_a | frac_b) == 0);
+}
+
+static _Bool _float128_is_nan (float128 f)
+{
+    const uint64_t frac_a = f.high & 0x0000ffffffffffff;
+    const uint64_t frac_b = f.low;
+    const uint16_t exp = (f.high >> 48) & 0x7fff;
+    
+    return (exp == 0x7fff) && ((frac_a | frac_b) != 0);
+}
+
 static void inst_fmath_fabs ()
 {
     fpu_get_state_ptr();
@@ -1039,6 +1067,14 @@ static void inst_fmath_fadd ()
     /* Throw inex2 if the result is inexact */
     if (float_exception_flags & float_flag_inexact)
         es_inex2 = 1;
+    
+    /* Throw ovfl if the op overflowed */
+    if (float_exception_flags & float_flag_overflow)
+        es_ovfl = 1;
+    
+    /* Throw unfl if the op overflowed */
+    if (float_exception_flags & float_flag_underflow)
+        es_unfl = 1;
 }
 
 static void inst_fmath_fasin ()
@@ -1114,6 +1150,14 @@ static void inst_fmath_fdiv ()
     /* Throw inex2 if the result is inexact */
     if (float_exception_flags & float_flag_inexact)
         es_inex2 = 1;
+    
+    /* Throw ovfl if the op overflowed */
+    if (float_exception_flags & float_flag_overflow)
+        es_ovfl = 1;
+    
+    /* Throw unfl if the op overflowed */
+    if (float_exception_flags & float_flag_underflow)
+        es_unfl = 1;
 }
 
 static void inst_fmath_fetox ()
@@ -1228,7 +1272,60 @@ static void inst_fmath_fmod ()
 {
     fpu_get_state_ptr();
     
-    assert(!"fmath: fmod not implemented");
+    const _Bool source_zero = _float128_is_zero(fpu->source);
+    const _Bool source_inf = _float128_is_infinity(fpu->source);
+    const _Bool dest_zero = _float128_is_zero(fpu->dest);
+    const _Bool dest_inf = _float128_is_infinity(fpu->dest);
+    
+    /* I just assume the quotient/sign are 0 for the following cases */
+    qu_quotient = 0;
+    qu_s = 0;
+    
+    /* If source is zero, result is nan */
+    if (source_zero) {
+        fpu->result = _nan128;
+        es_operr = 1;
+        return ;
+    }
+    /* If dest (but not source) is zero, result is that zero */
+    else if (dest_zero) {
+        fpu->result = fpu->dest;
+        return ;
+    }
+    /* If dest is infinity, result is nan */
+    else if (dest_inf) {
+        fpu->result = _nan128;
+        es_operr = 1;
+        return ;
+    }
+    /* If source, but not dest, is infinity, result is dest */
+    else if (source_inf) {
+        fpu->result = fpu->dest;
+        return ;
+    }
+    
+    /* -- We're past the edge cases, do the actual op -- */
+    
+    const signed char old_round_mode = float_rounding_mode;
+    
+    /* fmod uses round-to-zero */
+    float_rounding_mode = float_round_to_zero;
+    
+    float128 N = float128_div(fpu->dest, fpu->source);
+    N = float128_round_to_int(N);
+    
+    float_rounding_mode = old_round_mode;
+    
+    fpu->result = float128_sub(fpu->dest, float128_mul(fpu->source, N));
+    
+    /* FIXME: not sure how to set unfl reliably */
+    
+    _Bool sign = N.high >> 63; /* Remember the sign */
+    N.high <<= 1; /* Clear the sign */
+    N.high >>= 1;
+    uint32_t final = float128_to_int32(N); /* Get the integer of the quotient */
+    qu_quotient = final & 0x7f;
+    qu_s = sign;
 }
 
 static void inst_fmath_fmove ()
@@ -1254,6 +1351,14 @@ static void inst_fmath_fmul ()
     /* Throw inex2 if the result is inexact */
     if (float_exception_flags & float_flag_inexact)
         es_inex2 = 1;
+    
+    /* Throw ovfl if the op overflowed */
+    if (float_exception_flags & float_flag_overflow)
+        es_ovfl = 1;
+    
+    /* Throw unfl if the op overflowed */
+    if (float_exception_flags & float_flag_underflow)
+        es_unfl = 1;
 }
 
 static void inst_fmath_fneg ()
@@ -1263,6 +1368,11 @@ static void inst_fmath_fneg ()
     /* Flip the sign bit */
     fpu->result = fpu->dest;
     fpu->result.high ^= (1ULL << 63);
+    
+    /* 
+     * FIXME: you're supposed to throw UNFL if this is a
+     *        denormalized number, I think.
+     */
 }
 
 static uint8_t __find_quotient(float128 dest, float128 source, float128 result)
@@ -1289,34 +1399,69 @@ static uint8_t __find_quotient(float128 dest, float128 source, float128 result)
     return (uint8_t)final;
 }
 
+const float128 _nan128 = {
+    .high = 0xFFFF800000000000ULL,
+    .low = 0
+};
+
 static void inst_fmath_frem ()
 {
     fpu_get_state_ptr();
-    float128 tmp;
     
-    fpu->result = float128_rem(fpu->dest, fpu->source);
+    const _Bool source_zero = _float128_is_zero(fpu->source);
+    const _Bool source_inf = _float128_is_infinity(fpu->source);
+    const _Bool dest_zero = _float128_is_zero(fpu->dest);
+    const _Bool dest_inf = _float128_is_infinity(fpu->dest);
     
-    /*
-     * Throw operr (and return NaN) if source is zero,
-     * or if dest is infinity.
-     */
-    if (float_exception_flags & float_flag_invalid)
+    /* I just assume the quotient/sign are 0 for the following cases */
+    qu_quotient = 0;
+    qu_s = 0;
+    
+    /* If source is zero, result is nan */
+    if (source_zero) {
+        fpu->result = _nan128;
         es_operr = 1;
+        return ;
+    }
+    /* If dest (but not source) is zero, result is that zero */
+    else if (dest_zero) {
+        fpu->result = fpu->dest;
+        return ;
+    }
+    /* If dest is infinity, result is nan */
+    else if (dest_inf) {
+        fpu->result = _nan128;
+        es_operr = 1;
+        return ;
+    }
+    /* If source, but not dest, is infinity, result is dest */
+    else if (source_inf) {
+        fpu->result = fpu->dest;
+        return ;
+    }
     
-    // FIXME: !! set quotient byte!
-    // you may be able to use the local "q" bits64 in
-    // float128_rem()
-    uint8_t quo = __find_quotient(fpu->dest, fpu->source, fpu->result);
-    printf("FPU: __find_quotient = 0x%02x\n", quo);
+    /* -- We're past the edge cases, do the actual op -- */
     
+    const signed char old_round_mode = float_rounding_mode;
     
-    qu_quotient = quo & 0x7f;
-    qu_s = quo >> 7;
-    /*
-     * errata: 68kprm has typesetting issues (or typos?)
-     *         if source==inf, don't set operr!
-     */
+    /* frem uses round-to-nearest */
+    float_rounding_mode = float_round_nearest_even;
     
+    float128 N = float128_div(fpu->dest, fpu->source);
+    N = float128_round_to_int(N);
+    
+    float_rounding_mode = old_round_mode;
+    
+    fpu->result = float128_sub(fpu->dest, float128_mul(fpu->source, N));
+    
+    /* FIXME: not sure how to set unfl reliably */
+    
+    _Bool sign = N.high >> 63; /* Remember the sign */
+    N.high <<= 1; /* Clear the sign */
+    N.high >>= 1;
+    uint32_t final = float128_to_int32(N); /* Get the integer of the quotient */
+    qu_quotient = final & 0x7f;
+    qu_s = sign;
 }
 
 static void inst_fmath_fscale ()
@@ -1330,14 +1475,77 @@ static void inst_fmath_fsgldiv ()
 {
     fpu_get_state_ptr();
     
-    assert(!"fmath: fsgldiv not implemented");
+    float128 source = fpu->source;
+    float128 dest = fpu->dest;
+    
+    /* Dump the low 88 bits of the source/dest mantissas */
+    source.low = 0;
+    source.high &= 0xffffffffff000000;
+    dest.low = 0;
+    dest.high &= 0xffffffffff000000;
+    
+    fpu->result = float128_div(dest, source);
+    
+    /* Throw operr (and return NaN) if both operands are zero */
+    if (float_exception_flags & float_flag_invalid)
+        es_operr = 1;
+    
+    /* Throw divide-by-zero if dividend is zero */
+    if (float_exception_flags & float_flag_divbyzero)
+        es_dz = 1;
+    
+    /* Throw inex2 if the result is inexact */
+    if (float_exception_flags & float_flag_inexact)
+        es_inex2 = 1;
+    
+    /* Throw ovfl if the op overflowed */
+    if (float_exception_flags & float_flag_overflow)
+        es_ovfl = 1;
+    
+    /* Throw unfl if the op overflowed */
+    if (float_exception_flags & float_flag_underflow)
+        es_unfl = 1;
 }
 
 static void inst_fmath_fsglmul ()
 {
     fpu_get_state_ptr();
     
-    assert(!"fmath: fsglmul not implemented");
+    /*
+     * As far as I can tell, fsglmul/fsgldiv use an ALU
+     * for the mantissa that is only 24-bits wide. Everything
+     * else is done with regular internal precision.
+     */
+    
+    float128 source = fpu->source;
+    float128 dest = fpu->dest;
+    
+    /* Dump the low 88 bits of the source/dest mantissas */
+    source.low = 0;
+    source.high &= 0xffffffffff000000;
+    dest.low = 0;
+    dest.high &= 0xffffffffff000000;
+    
+    fpu->result = float128_mul(dest, source);
+    
+    /*
+     * Throw operr (and return NaN) if one operand is infinity
+     * and the other is zero.
+     */
+    if (float_exception_flags & float_flag_invalid)
+        es_operr = 1;
+    
+    /* Throw inex2 if the result is inexact */
+    if (float_exception_flags & float_flag_inexact)
+        es_inex2 = 1;
+    
+    /* Throw ovfl if the op overflowed */
+    if (float_exception_flags & float_flag_overflow)
+        es_ovfl = 1;
+    
+    /* Throw unfl if the op overflowed */
+    if (float_exception_flags & float_flag_underflow)
+        es_unfl = 1;
 }
 
 static void inst_fmath_fsin ()
@@ -1396,6 +1604,14 @@ static void inst_fmath_fsub ()
     /* Throw inex2 if the result is inexact */
     if (float_exception_flags & float_flag_inexact)
         es_inex2 = 1;
+    
+    /* Throw ovfl if the op overflowed */
+    if (float_exception_flags & float_flag_overflow)
+        es_ovfl = 1;
+    
+    /* Throw unfl if the op overflowed */
+    if (float_exception_flags & float_flag_underflow)
+        es_unfl = 1;
 }
 
 static void inst_fmath_ftan ()
@@ -2276,7 +2492,7 @@ void inst_frestore () {
         size = 0xb8; // BUSY state frame
     else {
         slog("Frestore encountered an unknown state frame 0x%04x\n", word);
-        assert("inst_frestore: bad state frame");
+        assert(!"inst_frestore: bad state frame");
         return ;
     }