shoebill/core/fpu.c

3301 lines
95 KiB
C

/*
* Copyright (c) 2013-2014, Peter Rutenbar <pruten@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "../core/shoebill.h"
#include "../core/SoftFloat/softfloat.h"
#pragma mark Structures and macros
// Mode control byte
#define mc_rnd (fpu->fpcr.b._mc_rnd)
#define mc_prec (fpu->fpcr.b._mc_prec)
// Exception enable byte
#define ee_inex1 (fpu->fpcr.b._ee_inex1)
#define ee_inex2 (fpu->fpcr.b._ee_inex2)
#define ee_dz (fpu->fpcr.b._ee_dz)
#define ee_unfl (fpu->fpcr.b._ee_unfl)
#define ee_ovfl (fpu->fpcr.b._ee_ovfl)
#define ee_operr (fpu->fpcr.b._ee_operr)
#define ee_snan (fpu->fpcr.b._ee_snan)
#define ee_bsun (fpu->fpcr.b._ee_bsun)
// Accrued exception byte
#define ae_inex (fpu->fpsr.b._ae_inex)
#define ae_dz (fpu->fpsr.b._ae_dz)
#define ae_unfl (fpu->fpsr.b._ae_unfl)
#define ae_ovfl (fpu->fpsr.b._ae_ovfl)
#define ae_iop (fpu->fpsr.b._ae_iop)
// Exception status byte
#define es_inex1 (fpu->fpsr.b._es_inex1)
#define es_inex2 (fpu->fpsr.b._es_inex2)
#define es_dz (fpu->fpsr.b._es_dz)
#define es_unfl (fpu->fpsr.b._es_unfl)
#define es_ovfl (fpu->fpsr.b._es_ovfl)
#define es_operr (fpu->fpsr.b._es_operr)
#define es_snan (fpu->fpsr.b._es_snan)
#define es_bsun (fpu->fpsr.b._es_bsun)
// Quotient byte
#define qu_quotient (fpu->fpsr.b._qu_quotient)
#define qu_s (fpu->fpsr.b._qu_s) /* quotient sign */
// Condition codes
#define cc_nan (fpu->fpsr.b._cc_nan)
#define cc_i (fpu->fpsr.b._cc_i)
#define cc_z (fpu->fpsr.b._cc_z)
#define cc_n (fpu->fpsr.b._cc_n)
typedef struct {
uint32_t fpiar; // FPU iaddr
union { // fpcr, fpu control register
struct {
// Mode control byte
uint16_t _mc_zero : 4; // zero/dummy
uint16_t _mc_rnd : 2; // rounding mode
uint16_t _mc_prec : 2; // rounding precision
// Exception enable byte
uint16_t _ee_inex1 : 1; // inexact decimal input
uint16_t _ee_inex2 : 1; // inxact operation
uint16_t _ee_dz : 1; // divide by zero
uint16_t _ee_unfl : 1; // underflow
uint16_t _ee_ovfl : 1; // overflow
uint16_t _ee_operr : 1; // operand error
uint16_t _ee_snan : 1; // signalling not a number
uint16_t _ee_bsun : 1; // branch/set on unordered
} b;
uint16_t raw;
} fpcr;
union { // fpsr, fpu status register
struct {
// Accrued exception byte
uint32_t _dummy1 : 3; // dummy/zero
uint32_t _ae_inex : 1; // inexact
uint32_t _ae_dz : 1; // divide by zero
uint32_t _ae_unfl : 1; // underflow
uint32_t _ae_ovfl : 1; // overflow
uint32_t _ae_iop : 1; // invalid operation
// Exception status byte
uint32_t _es_inex1 : 1; // inexact decimal input
uint32_t _es_inex2 : 1; // inxact operation
uint32_t _es_dz : 1; // divide by zero
uint32_t _es_unfl : 1; // underflow
uint32_t _es_ovfl : 1; // overflow
uint32_t _es_operr : 1; // operand error
uint32_t _es_snan : 1; // signalling not a number
uint32_t _es_bsun : 1; // branch/set on unordered
// Quotient byte
uint32_t _qu_quotient : 7;
uint32_t _qu_s : 1;
// Condition code byte
uint32_t _cc_nan : 1; // not a number
uint32_t _cc_i : 1; // infinity
uint32_t _cc_z : 1; // zero
uint32_t _cc_n : 1; // negative
uint32_t _dummy2 : 4; // dummy/zero
} b;
uint32_t raw;
} fpsr;
floatx80 fp[8]; // 80 bit floating point general registers
// State for the static fmath instruction implementations
float128 source, dest, result;
_Bool write_back;
uint16_t extension_word; // only set in inst_fmath(), only used by fsincos
uint8_t fmath_op;
} fpu_state_t;
enum rounding_precision_t {
prec_extended = 0,
prec_single = 1,
prec_double = 2,
};
enum rounding_mode_t {
mode_nearest = 0,
mode_zero = 1,
mode_neg = 2,
mode_pos = 3
};
/*
* 0 L long word integer
* 1 S single precision real
* 2 X extended precision real
* 3 P{#k} packed decimal real with static k factor
* 4 W word integer
* 5 D double precision real
* 6 B byte integer
* 7 P{Dn} packed decimal real with dynamic k factor
*/
static const uint8_t _format_sizes[8] = {4, 4, 12, 12, 2, 8, 1, 12};
enum {
format_L = 0,
format_S = 1,
format_X = 2,
format_Ps = 3,
format_W = 4,
format_D = 5,
format_B = 6,
format_Pd = 7
} fpu_formats;
#define fpu_get_state_ptr() fpu_state_t *fpu = (fpu_state_t*)shoe.fpu_state
#define nextword() ({const uint16_t w = pccache_nextword(shoe.pc); if sunlikely(shoe.abort) return; shoe.pc += 2; w;})
#define nextlong() ({const uint32_t L = pccache_nextlong(shoe.pc); if sunlikely(shoe.abort) return; shoe.pc += 4; L;})
#define verify_supervisor() {if (!sr_s()) {throw_privilege_violation(); return;}}
#pragma mark FPU exception stuff
enum fpu_vector_t {
fpu_vector_ftrapcc = 7,
fpu_vector_fline = 11,
fpu_vector_coprocessor_protocol_violation = 13, // won't be using this one
fpu_vector_bsun = 48,
fpu_vector_inexact = 49,
fpu_vector_divide_by_zero = 50,
fpu_vector_underflow = 51,
fpu_vector_operr = 52,
fpu_vector_overflow = 53,
fpu_vector_snan = 54
};
/*
* Map the exception bit positions (in fpsr and fpcr)
* to their corresponding exception vector numbers.
*/
const uint8_t _exception_bit_to_vector[8] = {
48, // bsun
54, // snan
52, // operr
53, // ovfl
51, // unfl
50, // dz
49, // inex2
49, // inex1
};
static void throw_fpu_pre_instruction_exception(enum fpu_vector_t vector)
{
throw_frame_zero(shoe.orig_sr, shoe.orig_pc, vector);
}
/*
* Note: I may be able to get away without implementing the
* mid-instruction exception.
*/
/*
* _bsun_test() is called by every inst_f*cc instruction
* to test whether the bsun exception is enabled, throw an
* exception if so, and otherwise just set the appropriate
* bit in fpsr, and update the accrued exception byte.
*/
static _Bool _bsun_test()
{
fpu_get_state_ptr();
// BSUN counts against the IOP accrued exception bit
ae_iop = 1;
// Set the BSUN exception status bit
es_bsun = 1;
// If the BSUN exception isn't enabled, then we can just return
if (!ee_bsun)
return 0; // 0 -> elected not to throw an exception
throw_fpu_pre_instruction_exception(fpu_vector_bsun);
return 1;
}
static void _throw_illegal_instruction()
{
// assert(!"throw_illegal_instruction!");
throw_illegal_instruction();
}
#pragma mark Float format translators (to/from big-endian motorola format)
static void _floatx80_to_int8(floatx80 *f, uint8_t *ptr)
{
uint32_t tmp = floatx80_to_int32(*f);
ptr[0] = tmp & 0xff;
}
static void _floatx80_to_int16(floatx80 *f, uint8_t *ptr)
{
uint32_t tmp = floatx80_to_int32(*f);
ptr[0] = (tmp >> 8) & 0xff;
ptr[1] = (tmp >> 0) & 0xff;
}
static void _floatx80_to_int32(floatx80 *f, uint8_t *ptr)
{
uint32_t tmp = floatx80_to_int32(*f);
ptr[0] = (tmp >> 24) & 0xff;
ptr[1] = (tmp >> 16) & 0xff;
ptr[2] = (tmp >> 8) & 0xff;
ptr[3] = (tmp >> 0) & 0xff;
}
static void _floatx80_to_single(floatx80 *f, uint8_t *ptr)
{
const float32 tmp = floatx80_to_float32(*f);
ptr[0] = (tmp >> 24) & 0xff;
ptr[1] = (tmp >> 16) & 0xff;
ptr[2] = (tmp >> 8) & 0xff;
ptr[3] = (tmp >> 0) & 0xff;
}
static void _floatx80_to_double(floatx80 *f, uint8_t *ptr)
{
const float64 tmp = floatx80_to_float64(*f);
ptr[0] = (tmp >> 56) & 0xff;
ptr[1] = (tmp >> 48) & 0xff;
ptr[2] = (tmp >> 40) & 0xff;
ptr[3] = (tmp >> 32) & 0xff;
ptr[4] = (tmp >> 24) & 0xff;
ptr[5] = (tmp >> 16) & 0xff;
ptr[6] = (tmp >> 8) & 0xff;
ptr[7] = (tmp >> 0) & 0xff;
}
static void _floatx80_to_extended(floatx80 *f, uint8_t *ptr)
{
ptr[0] = (f->high >> 8) & 0xff;
ptr[1] = (f->high >> 0) & 0xff;
ptr[2] = 0;
ptr[3] = 0;
ptr[4] = (f->low >> 56) & 0xff;
ptr[5] = (f->low >> 48) & 0xff;
ptr[6] = (f->low >> 40) & 0xff;
ptr[7] = (f->low >> 32) & 0xff;
ptr[8] = (f->low >> 24) & 0xff;
ptr[9] = (f->low >> 16) & 0xff;
ptr[10] = (f->low >> 8) & 0xff;
ptr[11] = (f->low >> 0) & 0xff;
}
static float128 _int8_to_intermediate(int8_t byte)
{
return int32_to_float128((int32_t)byte);
}
static float128 _int16_to_intermediate(int16_t sh)
{
return int32_to_float128((int32_t)sh);
}
static float128 _int32_to_intermediate(int32_t in)
{
return int32_to_float128(in);
}
static float128 _single_to_intermediate(uint32_t f)
{
assert(sizeof(uint32_t) == sizeof(float32));
return float32_to_float128((float32)f);
}
/*
* _double_to_intermediate(d): d needs to be 68k-native order (8 bytes)
*/
static float128 _double_to_intermediate(uint8_t *d)
{
assert(sizeof(uint64_t) == sizeof(float64));
return float64_to_float128((float64) ntohll(*(uint64_t*)d));
}
/*
* _extended_to_intermediate(e): e needs to be 68k-native order (12 bytes)
*/
static float128 _extended_to_intermediate(uint8_t *e)
{
/*
* softfloat floatx80 format:
* uint64_t low; // the low part of the extended float (significand, low exponent bits)
* uint16_t high; // the high part, sign, high exponent bits
*/
floatx80 x80 = {
.high = (e[0] << 8) | e[1],
.low = ntohll(*(uint64_t*)&e[4])
};
return floatx80_to_float128(x80);
}
static void _extended_to_floatx80(uint8_t *bytes, floatx80 *f)
{
f->high = (bytes[0] << 8) | bytes[1];
f->low = ntohll(*(uint64_t*)&bytes[4]);
}
/*
* Set softfloat's rounding mode
* (fpcr.mc_rnd and softfloat use different values for these modes)
*/
static void _set_rounding_mode(enum rounding_mode_t mode)
{
const int8_t rounding_map[4] = {
float_round_nearest_even, float_round_to_zero,
float_round_up, float_round_down
};
float_rounding_mode = rounding_map[mode];
}
#pragma mark EA routines
/*
* Read-commit merely updates the address register
* for pre/post-inc/decrement
*/
static void _fpu_read_ea_commit(const uint8_t format)
{
~decompose(shoe.op, 0000 0000 00 mmmrrr);
if (m == 3) // post-increment
shoe.a[r] += _format_sizes[format];
else if (m == 4) // pre-decrement
shoe.a[r] -= _format_sizes[format];
/*
* Note: still unsure about what happens when
* mode=pre/postincdecrement, size==1, and register==a7
* (is the change +-2 bytes? or 1?)
*/
if (((m == 3) || (m == 4)) && (_format_sizes[format] == 1) && (r == 7))
assert(!"size==1, reg==a7");
}
static long double _floatx80_to_long_double(floatx80 f80);
static void _fpu_write_ea(uint8_t mr, uint8_t format, floatx80 *f, uint8_t K)
{
fpu_get_state_ptr();
const uint8_t m = mr >> 3;
const uint8_t r = mr & 7;
const uint8_t size = _format_sizes[format];
uint8_t buf[12], *ptr = &buf[0];
uint32_t addr, i;
if ((m == 1) ||
((m == 0) && (size > 4))) {
/* If mode==a-reg, or mode==data reg and the size is > 4 bytes, no dice */
_throw_illegal_instruction();
return ;
}
else if ((m == 7) && (r > 1)) {
/* If this is otherwise an illegal addr mode... */
_throw_illegal_instruction();
return ;
}
const _Bool is_nan = ((f->high << 1) == 0xfffe) && f->low;
{
const long double tmp = _floatx80_to_long_double(*f);
slog("FPU: fpu_write_ea EA=%u/%u data=%Lf format=%u\n", m, r, tmp, format);
}
/* Initialize softfloat's exceptions bits/rounding mode */
float_exception_flags = 0;
_set_rounding_mode(mc_rnd);
/* Convert to the appropriate format */
switch (format) {
case format_B: {
_floatx80_to_int8(f, ptr);
break;
}
case format_W: {
_floatx80_to_int16(f, ptr);
break;
}
case format_L: {
_floatx80_to_int32(f, ptr);
break;
}
case format_S: {
_floatx80_to_single(f, ptr);
break;
}
case format_D: {
_floatx80_to_double(f, ptr);
break;
}
case format_X: {
_floatx80_to_extended(f, ptr);
break;
}
default: {
assert(!"unsupported format (packed something!)");
}
}
/* Write to memory */
switch (m) {
case 0: {
if (format == format_B)
set_d(r, ptr[0], 1);
else if (format == format_W)
set_d(r, ntohs(*(uint16_t*)ptr), 2);
else if ((format == format_L) || (format == format_S))
set_d(r, ntohl(*(uint32_t*)ptr), 4);
else
assert(!"how did I get here?");
goto done;
}
case 1:
assert(!"how did I get here again!");
case 2:
addr = shoe.a[r];
break;
case 3:
addr = shoe.a[r];
assert(!( r==7 && size==1));
break;
case 4: // pre-decrement
addr = shoe.a[r] - size;
assert(!( r==7 && size==1));
break;
default:
call_ea_addr(mr);
addr = (uint32_t)shoe.dat;
break;
}
/* Copy the formatted data into *addr */
/*{
slog("FPU: fpu_write_ea: addr=0x%08x data=", addr);
for (i=0; i<size; i++)
printf("%02x", ptr[i]);
printf("\n");
}*/
for (i=0; i<size; i++) {
lset(addr + i, 1, buf[i]);
if (shoe.abort) return ;
}
done:
/*
* Set exception bits and update pre/post/blah registers.
* note: condition codes are not modified
*/
es_bsun = 0;
es_snan = 0;
es_operr = 0;
es_ovfl = 0;
es_unfl = 0;
es_dz = 0;
es_inex2 = 0;
es_inex1 = 0;
switch (format) {
format_B:
format_W:
format_L:
/* Set snan, operr, and/or inex2 */
es_snan = is_nan;
es_operr = ((float_exception_flags & float_flag_invalid) != 0);
es_inex2 = ((float_exception_flags & float_flag_inexact) != 0);
break;
format_S:
format_D:
format_X:
/* Set snan, ovfl, unfl, and/or inex2 */
es_snan = is_nan;
es_ovfl = ((float_exception_flags & float_flag_overflow) != 0);
es_unfl = ((float_exception_flags & float_flag_underflow) != 0);
es_inex2 = ((float_exception_flags & float_flag_inexact) != 0);
break;
format_Pd:
format_Ps:
/* Set snan, operr, and/or inex2 */
assert(!"you better implement packed formats");
break;
}
/* Update the accrued exception bits */
ae_iop |= es_bsun | es_snan | es_operr;
ae_ovfl |= es_ovfl;
ae_unfl |= (es_unfl & es_inex2); // yes, &
ae_dz |= es_dz;
ae_inex |= es_inex1 | es_inex2 | es_ovfl;
/* Are any exceptions both set and enabled? */
if (fpu->fpsr.raw & fpu->fpcr.raw & 0x0000ff00) {
/*
* Then we need to throw an exception.
* The exception is sent to the vector for
* the highest priority exception, and the priority
* order is (high->low) bsan, snan, operr, ovfl, unfl, dz, inex2/1
* (which is the order of the bits in fpsr/fpcr).
* Iterate over the bits in order, and throw the
* exception to whichever bit is set first.
*/
uint8_t i, throwable = (fpu->fpsr.raw & fpu->fpcr.raw) >> 8;
assert(throwable);
for (i=0; 1; i++) {
if (throwable & 0x80)
break;
throwable <<= 1;
}
/*
* Convert the exception bit position
* to the correct vector number, and throw
* a (pre-instruction) exception.
*/
throw_fpu_pre_instruction_exception(_exception_bit_to_vector[i]);
return ;
}
/* Finalize registers, and we're done */
if (m == 3)
shoe.a[r] += size;
else if (m == 4)
shoe.a[r] -= size;
}
/*
* Note: fpu_read_ea modifies shoe.pc, and fpu_read_ea_commit
* modifies shoe.a[x] for pre/post-inc/decrement
* Returns false if we're aborting
*/
static _Bool _fpu_read_ea(const uint8_t format, float128 *result)
{
fpu_get_state_ptr();
~decompose(shoe.op, 0000 0000 00 mmmrrr);
const uint8_t size = _format_sizes[format];
uint32_t addr = 0;
/*
* Step 1: find the effective address, store it in addr
* (or the actual data, if unavailable)
*/
slog("FPU: read_ea: mr=%u%u f=%c ", m, r, "lsxpwdb?"[format]);
switch (m) {
case 0:
if (format == format_S)
*result = _single_to_intermediate(shoe.d[r]);
else if (format == format_B)
*result = _int8_to_intermediate(shoe.d[r] & 0xff);
else if (format == format_W)
*result = _int16_to_intermediate(shoe.d[r] & 0xffff);
else if (format == format_L)
*result = int32_to_float128(shoe.d[r]);
else {
/*
* No other format can be used with a data register
* (because they require >4 bytes)
*/
_throw_illegal_instruction();
return 0;
}
slog("raw=0x%x", chop(shoe.d[r], size));
goto got_data;
case 1:
/* Address regisers can't be used */
_throw_illegal_instruction();
return 0;
case 3:
addr = shoe.a[r];
assert(!( r==7 && size==1));
goto got_address;
case 4:
addr = shoe.a[r] - size;
assert(!( r==7 && size==1));
goto got_address;
case 7:
if (r == 4) {
addr = shoe.pc;
shoe.pc += size;
goto got_address;
}
// fall through to default:
default: {
~decompose(shoe.op, 0000 0000 00 MMMMMM);
shoe.mr = M;
ea_addr();
if (shoe.abort)
return 0;
addr = (uint32_t)shoe.dat;
goto got_address;
}
}
got_address:
/*
* Step 2: Load the data from the effective address
*/
//slog("raw=0x");
if (size <= 4) {
const uint32_t raw = lget(addr, size);
if (shoe.abort)
return 0;
//printf("%x ", raw);
switch (format) {
case format_B:
*result = _int8_to_intermediate(raw & 0xff);
break;
case format_W:
*result = _int16_to_intermediate(raw & 0xffff);
break;
case format_L:
*result = _int32_to_intermediate(raw);
break;
case format_S:
*result = _single_to_intermediate(raw);
break;
default:
assert(0); /* never get here */
}
}
else { // if (size > 4) -> if format is double, extended, or packed
uint8_t buf[12];
uint32_t i;
for (i = 0; i < size; i++) {
buf[i] = lget(addr + i, 1);
slog("%02x", buf[i]);
if (shoe.abort)
return 0;
}
switch (format) {
case format_D:
*result = _double_to_intermediate(buf);
break;
case format_X:
*result = _extended_to_intermediate(buf);
break;
case format_Ps:
// case format_Pd: // not possible as a src specifier
// FIXME: implement packed formats
assert(!"Somebody tried to use a packed format!\n");
// _throw_illegal_instruction();
// return 0;
default:
assert(0); // never get here
}
}
got_data:
//printf("\n");
return 1;
}
#pragma mark Hacky low-precision transcendental implementations
/*
* s -> sign, e -> biased exponent
* ma -> 48 high bits of the mantissa
* mb -> 64 low bits of the mantissa
*/
#define _assemble_float128(s, e, ma, mb) ({ \
const uint64_t _ma = (ma), _mb = (mb); \
const uint64_t _e = (e), _s = (s); \
float128 f = { \
.high = ((_s != 0) << 16) | (_e & 0x7fff), \
.low = _mb \
}; \
f.high = ((f.high) << 48) | _ma; \
f; \
})
#define HACKY_MATH_X86
#ifdef HACKY_MATH_X86
#define NATIVE double
double _to_native(float128 f128)
{
float64 f64 = float128_to_float64(f128);
double result;
uint8_t *ptr = (uint8_t*)&result;
ptr[7] = (f64 >> 56) & 0xff;
ptr[6] = (f64 >> 48) & 0xff;
ptr[5] = (f64 >> 40) & 0xff;
ptr[4] = (f64 >> 32) & 0xff;
ptr[3] = (f64 >> 24) & 0xff;
ptr[2] = (f64 >> 16) & 0xff;
ptr[1] = (f64 >> 8) & 0xff;
ptr[0] = (f64 >> 0) & 0xff;
return result;
}
float128 _from_native(double n)
{
float64 f64 = 0;
uint8_t *ptr = (uint8_t*)&n;
f64 = (f64 << 8) | ptr[7];
f64 = (f64 << 8) | ptr[6];
f64 = (f64 << 8) | ptr[5];
f64 = (f64 << 8) | ptr[4];
f64 = (f64 << 8) | ptr[3];
f64 = (f64 << 8) | ptr[2];
f64 = (f64 << 8) | ptr[1];
f64 = (f64 << 8) | ptr[0];
return float64_to_float128(f64);
}
#include <math.h>
#define _native_cos(a) cos(a)
#define _native_acos(a) acos(a)
#define _native_cosh(a) cosh(a)
#define _native_sin(a) sin(a)
#define _native_asin(a) asin(a)
#define _native_sinh(a) sinh(a)
#define _native_tan(a) tan(a)
#define _native_atan(a) atan(a)
#define _native_tanh(a) tanh(a)
#define _native_atanh(a) atanh(a)
#define _native_pow(a, b) pow((a), (b))
#define _native_exp(a) exp(a)
#define _native_expm1(a) (exp(a) - 1.0) /* or expm1() */
#define _native_log10(a) log10(a)
#define _native_log2(a) (log(a) / log(2.0)) /* or log2() */
#define _native_log(a) log(a)
#define _native_log1p(a) log((a) + 1.0) /* or log1p() */
double _native_tentox(a) {
/*
* This is a dumb workaround for a clang bug on OS X 10.10
* Clang wants to optimize pow(10.0, a) to __exp(a), but
* __exp doesn't exist in the 10.8 SDK. So I'm using powl()
* instead, which doesn't have an equivalent optimized function.
*/
return (double)powl(10.0, (long double)a);
}
const double _native_e = 2.71828182845904509;
const double _native_10 = 10.0;
const double _native_2 = 2.0;
const double _native_1 = 1.0;
#elif (defined(HACKY_MATH_PPC))
#error "PowerPC hacky math isn't implemented yet"
#else
#error "You need to define HACKY_MATH_X86, or implement one for your arch"
#endif
static float128 _hack_cos (float128 x) {
return _from_native(_native_cos(_to_native(x)));
}
static float128 _hack_acos (float128 x) {
return _from_native(_native_acos(_to_native(x)));
}
static float128 _hack_cosh (float128 x) {
return _from_native(_native_cosh(_to_native(x)));
}
static float128 _hack_sin (float128 x) {
return _from_native(_native_sin(_to_native(x)));
}
static float128 _hack_asin (float128 x) {
return _from_native(_native_asin(_to_native(x)));
}
static float128 _hack_sinh (float128 x) {
return _from_native(_native_sinh(_to_native(x)));
}
static float128 _hack_tan (float128 x) {
return _from_native(_native_tan(_to_native(x)));
}
static float128 _hack_atan (float128 x) {
return _from_native(_native_atan(_to_native(x)));
}
static float128 _hack_tanh (float128 x) {
return _from_native(_native_tanh(_to_native(x)));
}
static float128 _hack_atanh (float128 x) {
return _from_native(_native_atanh(_to_native(x)));
}
static float128 _hack_etox (float128 x) {
return _from_native(_native_exp(_to_native(x)));
}
static float128 _hack_etoxm1 (float128 x) {
return _from_native(_native_expm1(_to_native(x)));
}
static float128 _hack_log10 (float128 x) {
return _from_native(_native_log10(_to_native(x)));
}
static float128 _hack_log2 (float128 x) {
return _from_native(_native_log2(_to_native(x)));
}
static float128 _hack_logn (float128 x) {
return _from_native(_native_log(_to_native(x)));
}
static float128 _hack_lognp1 (float128 x) {
return _from_native(_native_log1p(_to_native(x)));
}
static float128 _hack_tentox (float128 x) {
return _from_native(_native_tentox(_to_native(x)));
}
static float128 _hack_twotox (float128 x) {
return _from_native(_native_pow(_native_2, _to_native(x)));
}
#pragma mark FMATH! and all its helpers
/* Function prototypes from SoftFloat/softfloat-specialize.h */
char float128_is_nan(float128 a);
char float128_is_signaling_nan (float128 a);
static void inst_fmath_fmovecr (void)
{
fpu_get_state_ptr();
/*
* FYI: these constants are stored in the "intermediate" 85-bit
* format in the 6888x rom. This has the side effect that
* they are rounded according to fpcr.mc_rnd.
* We emulate the intermediate 85-bit format with float128.
*/
switch (fpu->fmath_op) {
case 0x00: // pi
fpu->result = _assemble_float128(0, 0x4000, 0x921fb54442d1, 0x8469898cc51701b8);
break;
case 0x0b: // log_10(2)
fpu->result = _assemble_float128(0, 0x3ffd, 0x34413509f79f, 0xef311f12b35816f9);
break;
case 0x0c: // e
fpu->result = _assemble_float128(0, 0x4000, 0x5bf0a8b14576, 0x95355fb8ac404e7a);
break;
case 0x0d: // log_2(e)
fpu->result = _assemble_float128(0, 0x3fff, 0x71547652b82f, 0xe1777d0ffda0d23a);
break;
case 0x0e: // log_10(e)
// NOTE: 68881 doesn't set inex2 for this one
// Also note: that's bogus. 68881 uses 3 trailing mantissa bits to do rounding,
// and those bits are non-zero for this number, so it must actually be stored
// incorrectly in the ROM.
// I'll emulate this by truncating the float128 mantissa.
// fpu->result = _assemble_float128(0, 0x3ffd, 0xbcb7b1526e50, 0xe32a6ab7555f5a67);
fpu->result = _assemble_float128(0, 0x3ffd, 0xbcb7b1526e50, 0xe32a000000000000);
break;
case 0x0f: // 0.0
fpu->result = _assemble_float128(0, 0, 0, 0);
break;
case 0x30: // ln(2)
fpu->result = _assemble_float128(0, 0x3ffe, 0x62e42fefa39e, 0xf35793c7673007e5);
break;
case 0x31: // ln(10)
fpu->result = _assemble_float128(0, 0x4000, 0x26bb1bbb5551, 0x582dd4adac5705a6);
break;
case 0x32: // 1 (68kprm has typesetting issues everywhere. This one says 100, but means 10^0.)
fpu->result = _assemble_float128(0, 0x3fff, 0x0, 0x0);
break;
case 0x33: // 10
fpu->result = _assemble_float128(0, 0x4002, 0x400000000000, 0x0);
break;
case 0x34: // 10^2
fpu->result = _assemble_float128(0, 0x4005, 0x900000000000, 0x0);
break;
case 0x35: // 10^4
fpu->result = _assemble_float128(0, 0x400c, 0x388000000000, 0x0);
break;
case 0x36: // 10^8
fpu->result = _assemble_float128(0, 0x4019, 0x7d7840000000, 0x0);
break;
case 0x37: // 10^16
fpu->result = _assemble_float128(0, 0x4034, 0x1c37937e0800, 0x0);
break;
case 0x38: // 10^32
fpu->result = _assemble_float128(0, 0x4069, 0x3b8b5b5056e1, 0x6b3be04000000000);
break;
case 0x39: // 10^64
fpu->result = _assemble_float128(0, 0x40d3, 0x84f03e93ff9f, 0x4daa797ed6e38ed6);
break;
case 0x3a: // 10^128
fpu->result = _assemble_float128(0, 0x41a8, 0x27748f9301d3, 0x19bf8cde66d86d62);
break;
case 0x3b: // 10^256
fpu->result = _assemble_float128(0, 0x4351, 0x54fdd7f73bf3, 0xbd1bbb77203731fd);
break;
case 0x3c: // 10^512
fpu->result = _assemble_float128(0, 0x46a3, 0xc633415d4c1d, 0x238d98cab8a978a0);
break;
case 0x3d: // 10^1024
fpu->result = _assemble_float128(0, 0x4d48, 0x92eceb0d02ea, 0x182eca1a7a51e316);
break;
case 0x3e: // 10^2048
fpu->result = _assemble_float128(0, 0x5a92, 0x3d1676bb8a7a, 0xbbc94e9a519c6535);
break;
case 0x3f: // 10^4096
fpu->result = _assemble_float128(0, 0x7525, 0x88c0a4051441, 0x2f3592982a7f0094);
break;
default:
/*
* I wanted to include the actual values for the other ROM offsets,
* but they might be proprietary. Most of them are 0 anyways, and some
* cause FPU exceptions, even with all exceptions disabled... (?)
* 68040 FPSP just returns 0, so we'll do that too.
*/
fpu->result = _assemble_float128(0, 0, 0, 0);
return ;
}
}
/*
* This is quick macro.pl macro to build a jump table
* for fmath instructions. It is probably slightly slower
* to use a jump table rather than a big switch statement,
* but I think it looks cleaner.
*/
~newmacro(create_fmath_jump_table, 0, {
my $name_map = {};
my $op_map = {};
my $add = sub {
my $op = shift;
my $name = lc(shift);
my $mode = 'foo';
my $arch = 68881;
foreach my $arg (@_) {
if (($arg eq 'monadic') or ($arg eq 'dyadic')) {
$mode = $arg;
}
elsif ($arg == 68040) {
$arch = $arg;
}
else {
croak("bad arg $arg");
}
}
croak("didn't specify mode") if ($mode eq "foo");
croak("dup $op $name") if exists $op_map->{$op};
croak("bogus") if ($op > 127);
$op_map->{$op} = {op => $op, name => $name, mode => $mode, arch => $arch};
$name_map->{$name} = $op_map->{$op};
};
$add->(~b(1000000), 'fmove', 'monadic', 68040);
$add->(~b(1000100), 'fmove', 'monadic', 68040);
$add->(~b(0000000), 'fmove', 'monadic');
$add->(~b(0000001), 'fint', 'monadic');
$add->(~b(0000010), 'fsinh', 'monadic');
$add->(~b(0000011), 'fintrz', 'monadic');
$add->(~b(1000001), 'fsqrt', 'monadic', 68040);
$add->(~b(1000101), 'fsqrt', 'monadic', 68040);
$add->(~b(0000100), 'fsqrt', 'monadic');
$add->(~b(0000110), 'flognp1', 'monadic');
$add->(~b(0001000), 'fetoxm1', 'monadic');
$add->(~b(0001001), 'ftanh', 'monadic');
$add->(~b(0001010), 'fatan', 'monadic');
$add->(~b(0001100), 'fasin', 'monadic');
$add->(~b(0001101), 'fatanh', 'monadic');
$add->(~b(0001110), 'fsin', 'monadic');
$add->(~b(0001111), 'ftan', 'monadic');
$add->(~b(0010000), 'fetox', 'monadic');
$add->(~b(0010001), 'ftwotox', 'monadic');
$add->(~b(0010010), 'ftentox', 'monadic');
$add->(~b(0010100), 'flogn', 'monadic');
$add->(~b(0010101), 'flog10', 'monadic');
$add->(~b(0010110), 'flog2', 'monadic');
$add->(~b(1011000), 'fabs', 'monadic', 68040);
$add->(~b(1011100), 'fabs', 'monadic', 68040);
$add->(~b(0011000), 'fabs', 'monadic');
$add->(~b(0011001), 'fcosh', 'monadic');
$add->(~b(1011010), 'fneg', 'monadic', 68040);
$add->(~b(1011110), 'fneg', 'monadic', 68040);
$add->(~b(0011010), 'fneg', 'monadic');
$add->(~b(0011100), 'facos', 'monadic');
$add->(~b(0011101), 'fcos', 'monadic');
$add->(~b(0011110), 'fgetexp', 'monadic');
$add->(~b(0011111), 'fgetman', 'monadic');
$add->(~b(1100000), 'fdiv', 'dyadic', 68040);
$add->(~b(1100100), 'fdiv', 'dyadic', 68040);
$add->(~b(0100000), 'fdiv', 'dyadic');
$add->(~b(0100001), 'fmod', 'dyadic');
$add->(~b(1100010), 'fadd', 'dyadic', 68040);
$add->(~b(1100110), 'fadd', 'dyadic', 68040);
$add->(~b(0100010), 'fadd', 'dyadic');
$add->(~b(1100011), 'fmul', 'dyadic', 68040);
$add->(~b(1100111), 'fmul', 'dyadic', 68040);
$add->(~b(0100011), 'fmul', 'dyadic');
$add->(~b(0100100), 'fsgldiv', 'dyadic');
$add->(~b(0100101), 'frem', 'dyadic');
$add->(~b(0100110), 'fscale', 'dyadic');
$add->(~b(0100111), 'fsglmul', 'dyadic');
$add->(~b(1101000), 'fsub', 'dyadic', 68040);
$add->(~b(1101100), 'fsub', 'dyadic', 68040);
$add->(~b(0101000), 'fsub', 'dyadic');
$add->(~b(0110000), 'fsincos', 'monadic');
$add->(~b(0110001), 'fsincos', 'monadic');
$add->(~b(0110010), 'fsincos', 'monadic');
$add->(~b(0110011), 'fsincos', 'monadic');
$add->(~b(0110100), 'fsincos', 'monadic');
$add->(~b(0110101), 'fsincos', 'monadic');
$add->(~b(0110110), 'fsincos', 'monadic');
$add->(~b(0110111), 'fsincos', 'monadic');
$add->(~b(0111000), 'fcmp', 'dyadic');
$add->(~b(0111010), 'ftst', 'monadic');
my $map_str = "fmath_impl_t *_fmath_map[128] = {\n";
my @inst_flags = (0) x 128;
for (my $i=0; $i < 128; $i++) {
my $func_ptr = "NULL";
if (exists $op_map->{$i}) {
$func_ptr = 'inst_fmath_' . $op_map->{$i}->{name};
if ($op_map->{$i}->{mode} eq 'dyadic') {
$inst_flags[$i] |= 1;
}
if ($op_map->{$i}->{arch} == 68040) {
$inst_flags[$i] |= 2;
}
}
$map_str .= "\t" . $func_ptr . ",\n";
}
$map_str .= "};\n\nuint8_t _fmath_flags[128] = {\n";
for (my $i=0; $i < 128; $i++) {
$map_str .= "\t" . sprintf('0x%02x', $inst_flags[$i]) . ",\n";
}
$map_str .= "};\n";
$map_str .= "const char *_fmath_names[128] = {\n";
for (my $i=0; $i < 128; $i++) {
my $name = "f???";
if (exists $op_map->{$i}) {
$name = $op_map->{$i}->{name};
}
$map_str .= "\t\"" . $name . "\",\n";
}
$map_str .= "};\n";
return $map_str;
})
static _Bool _float128_is_zero (float128 f)
{
return ((f.high << 1) == 0) && (f.low == 0);
}
static _Bool _float128_is_neg (float128 f)
{
return f.high >> 63;
}
static _Bool _float128_is_infinity (float128 f)
{
const uint64_t frac_a = f.high & 0x0000ffffffffffff;
const uint64_t frac_b = f.low;
const uint16_t exp = (f.high >> 48) & 0x7fff;
return (exp == 0x7fff) && ((frac_a | frac_b) == 0);
}
static _Bool _float128_is_nan (float128 f)
{
const uint64_t frac_a = f.high & 0x0000ffffffffffff;
const uint64_t frac_b = f.low;
const uint16_t exp = (f.high >> 48) & 0x7fff;
return (exp == 0x7fff) && ((frac_a | frac_b) != 0);
}
const float128 _nan128 = {
.high = 0xFFFF800000000000ULL,
.low = 0
};
const float128 _one128 = {
.high = 0x3fff000000000000ULL,
.low = 0
};
const float128 _zero128 = {
.high = 0,
.low = 0
};
static void inst_fmath_fabs ()
{
fpu_get_state_ptr();
/* Clear the sign bit */
fpu->result = fpu->source;
fpu->result.high <<= 1;
fpu->result.high >>= 1;
}
static void inst_fmath_facos ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
/* Find the absolute value of source */
float128 tmp = fpu->source;
tmp.high <<= 1;
tmp.high >>= 1;
/* If source is zero, result is +pi/2 */
if (source_zero) {
fpu->result = _assemble_float128(0, 0x3fff, 0x921fb54442d1, 0x8469898cc51701b8);
return;
}
/* If source isn't in range [-1, 1], return nan, set operr */
else if (!float128_le(tmp, _one128)) {
fpu->result = _nan128;
es_operr = 1;
return ;
}
fpu->result = _hack_acos(fpu->source);
/* Set inex2?? */
}
static void inst_fmath_fadd ()
{
fpu_get_state_ptr();
fpu->result = float128_add(fpu->dest, fpu->source);
/*
* Throw operr (and return NaN) if operands are infinities
* with opposite signs. (I *think* softfloat is doing this
* corectly - the code's hard to read.)
*/
if (float_exception_flags & float_flag_invalid)
es_operr = 1;
/* Throw inex2 if the result is inexact */
if (float_exception_flags & float_flag_inexact)
es_inex2 = 1;
/* Throw ovfl if the op overflowed */
if (float_exception_flags & float_flag_overflow)
es_ovfl = 1;
/* Throw unfl if the op overflowed */
if (float_exception_flags & float_flag_underflow)
es_unfl = 1;
}
static void inst_fmath_fasin ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
/* Find the absolute value of source */
float128 tmp = fpu->source;
tmp.high <<= 1;
tmp.high >>= 1;
/* If source is zero, result is source */
if (source_zero) {
fpu->result = fpu->source;
return;
}
/* If source isn't in range [-1, 1], return nan, set operr */
else if (!float128_le(tmp, _one128)) {
fpu->result = _nan128;
es_operr = 1;
return ;
}
fpu->result = _hack_asin(fpu->source);
/* Set inex2?? */
/* Set unfl?? */
}
static void inst_fmath_fatan ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
/* If source is zero, result is source */
if (source_zero) {
fpu->result = fpu->source;
return;
}
/* If source is inf, result is +-pi/2 */
else if (source_inf) {
fpu->result = _assemble_float128(source_sign, 0x3fff, 0x921fb54442d1, 0x8469898cc51701b8);
return ;
}
fpu->result = _hack_atan(fpu->source);
/* Set inex2?? */
/* Set unfl?? */
}
static void inst_fmath_fatanh ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
/* Take the absolute value of source */
float128 tmp = fpu->source;
tmp.high <<= 1;
tmp.high >>= 1;
/* If source is 0, return source */
if (source_zero) {
fpu->result = fpu->source;
return;
}
/* If |source| == 1.0, set dz, return +-inf */
else if (float128_eq(tmp, _one128)) {
es_dz = 1;
fpu->result = _assemble_float128(source_sign, 0x7fff, 0, 0);
return;
}
/* If |source| > 1.0, set operr, return nan */
else if (!float128_le(tmp, _one128)) {
es_operr = 1;
fpu->result = _nan128;
return ;
}
fpu->result = _hack_atanh(fpu->source);
}
static void inst_fmath_fcmp ()
{
fpu_get_state_ptr();
const uint8_t source_zero = _float128_is_zero(fpu->source);
const uint8_t source_inf = _float128_is_infinity(fpu->source);
const uint8_t source_sign = _float128_is_neg(fpu->source);
const uint8_t dest_zero = _float128_is_zero(fpu->dest);
const uint8_t dest_inf = _float128_is_infinity(fpu->dest);
const uint8_t dest_sign = _float128_is_neg(fpu->dest);
/* If the operands are "in-range", then we actually need to compare them */
if (!(source_zero | source_inf | dest_zero | dest_inf)) {
// dest - source.
// dest == zource -> Z
// dest < source -> N
if (float128_eq(fpu->dest, fpu->source))
cc_z = 1;
else if (float128_le(fpu->dest, fpu->source))
cc_n = 1;
}
else {
const uint8_t offset = ((source_zero << 5) | (source_inf << 4) | (source_sign << 3) |
(dest_zero << 2) | (dest_inf << 1) | (dest_sign << 0));
/* Precomputed answers for all the possible combinations */
cc_z = (0x0000303008040201ULL >> offset) & 1;
cc_n = (0x00002a2a083b0a3bULL >> offset) & 1;
slog("FPU: fcmp: hit uncommon case: offset=0x%x z=%u n=%u\n", offset, cc_z, cc_n);
}
/* Don't write the result back to the register */
fpu->write_back = 0;
}
static void inst_fmath_fcos ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
/* If source is zero, result is +1.0 */
if (source_zero) {
fpu->result = _one128;
return;
}
/* If source is inf, result is nan, and set operr */
else if (source_inf) {
fpu->result = _nan128;
es_operr = 1;
return ;
}
fpu->result = _hack_cos(fpu->source);
/* Set inex2?? */
}
static void inst_fmath_fcosh ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
/* If source is zero, result is +1.0 */
if (source_zero) {
fpu->result = _one128;
return;
}
/* If source is +/- inf, result is +inf */
else if (source_inf) {
fpu->result = _assemble_float128(0, 0x7fff, 0, 0);
return;
}
fpu->result = _hack_cosh(fpu->source);
}
static void inst_fmath_fdiv ()
{
fpu_get_state_ptr();
fpu->result = float128_div(fpu->dest, fpu->source);
/* Throw operr (and return NaN) if both operands are zero */
if (float_exception_flags & float_flag_invalid)
es_operr = 1;
/* Throw divide-by-zero if dividend is zero */
if (float_exception_flags & float_flag_divbyzero)
es_dz = 1;
/* Throw inex2 if the result is inexact */
if (float_exception_flags & float_flag_inexact)
es_inex2 = 1;
/* Throw ovfl if the op overflowed */
if (float_exception_flags & float_flag_overflow)
es_ovfl = 1;
/* Throw unfl if the op overflowed */
if (float_exception_flags & float_flag_underflow)
es_unfl = 1;
}
static void inst_fmath_fetox ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
/* If source is zero, result is +1.0 */
if (source_zero) {
fpu->result = _one128;
return ;
}
/* if source is -inf, result is +0.0 */
else if (source_inf && source_sign) {
fpu->result = _zero128;
return ;
}
/* if source is +inf, result is +inf */
else if (source_inf) {
fpu->result = fpu->source;
return ;
}
fpu->result = _hack_etox(fpu->source);
}
static void inst_fmath_fetoxm1 ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
const float128 negone = _assemble_float128(1, 0x3fff, 0, 0);
/* If source is zero, result is source */
if (source_zero) {
fpu->result = fpu->source;
return ;
}
/* if source is -inf, result is +0.0 */
else if (source_inf && source_sign) {
fpu->result = negone;
return ;
}
/* if source is +inf, result is +inf */
else if (source_inf) {
fpu->result = fpu->source;
return ;
}
fpu->result = _hack_etoxm1(fpu->source);
}
static void inst_fmath_fgetexp ()
{
fpu_get_state_ptr();
/* If source is INF, set operr and return NaN */
if (((fpu->source.high << 1) == 0xfffe000000000000ULL) && (fpu->source.low == 0)) {
es_operr = 1;
fpu->result.high = 0xffff000000000000ULL;
fpu->result.low = 0xc000000000000000ULL;
return ;
}
/*
* If source is 0, return source.
* According to 68881 docs, the result needs to have
* the same sign as the source (why?)
*/
if (((fpu->source.high << 1) == 0) && (fpu->source.low == 0)) {
fpu->result = fpu->source;
return ;
}
/*
* Otherwise, extract the biased exponent, convert it
* to a two's complement integer, and store that value
* as a float.
*/
const uint32_t biased = (fpu->source.high << 1) >> 49;
fpu->result = int32_to_float128(((int32_t)biased) - 16383);
}
static void inst_fmath_fgetman ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
/* If source is inf, set operr, result is nan */
if (source_inf) {
fpu->result = _nan128;
es_operr = 1;
return;
}
/* If source is zero, result is source */
else if (source_zero) {
fpu->result = fpu->source;
return;
}
/*
* fgetman "returns" the value of the source's mantissa,
* which I think just means it resets the exponent to 0x3fff (biased 0)
* FIXME: test this
*/
fpu->result = fpu->source;
fpu->result.high &= 0x8000ffffffffffffULL;
fpu->result.high |= 0x3fff000000000000ULL;
}
static void inst_fmath_fint ()
{
fpu_get_state_ptr();
fpu->result = float128_round_to_int(fpu->source);
/* Throw inex2 if the result is inexact */
if (float_exception_flags & float_flag_inexact)
es_inex2 = 1;
}
static void inst_fmath_fintrz ()
{
fpu_get_state_ptr();
/* Same as fint, but force the round-to-zero mode */
const signed char old_round_mode = float_rounding_mode;
float_rounding_mode = float_round_to_zero;
fpu->result = float128_round_to_int(fpu->source);
float_rounding_mode = old_round_mode;
/* Throw inex2 if the result is inexact */
if (float_exception_flags & float_flag_inexact)
es_inex2 = 1;
}
static void inst_fmath_flog10 ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
/* If source is zero, set dz, result is -inf */
if (source_zero) {
fpu->result = _assemble_float128(1, 0x7fff, 0, 0);
es_dz = 1;
return;
}
/* If source is negative, set operr, result is nan */
else if (source_sign) {
fpu->result = _nan128;
es_operr = 1;
return;
}
/* If source is +inf, result is +inf. */
else if (source_inf) {
fpu->result = fpu->source;
return;
}
fpu->result = _hack_log10(fpu->source);
}
static void inst_fmath_flog2 ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
/* If source is zero, set dz, result is -inf */
if (source_zero) {
fpu->result = _assemble_float128(1, 0x7fff, 0, 0);
es_dz = 1;
return;
}
/* If source is negative, set operr, result is nan */
else if (source_sign) {
fpu->result = _nan128;
es_operr = 1;
return;
}
/* If source is +inf, result is +inf. */
else if (source_inf) {
fpu->result = fpu->source;
return;
}
fpu->result = _hack_log2(fpu->source);
}
static void inst_fmath_flognp1 ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
const float128 negone = _assemble_float128(1, 0x3fff, 0, 0);
/* If source is zero, result is source */
if (source_zero) {
fpu->result = fpu->source;
return;
}
/* If source is -1.0, set dz, result is -inf */
else if (float128_eq(negone, fpu->source)) {
es_dz = 1;
fpu->result = _assemble_float128(1, 0x7fff, 0, 0);
return;
}
/* If source < -1.0, set operr, result is nan */
else if (float128_lt(fpu->source, negone)) {
es_operr = 1;
fpu->result = _nan128;
return;
}
/* If source is +inf, result is +inf. */
else if (source_inf) {
fpu->result = fpu->source;
return;
}
fpu->result = _hack_lognp1(fpu->source);
}
static void inst_fmath_flogn ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
/* If source is zero, set dz, result is -inf */
if (source_zero) {
fpu->result = _assemble_float128(1, 0x7fff, 0, 0);
es_dz = 1;
return;
}
/* If source is negative, set operr, result is nan */
else if (source_sign) {
fpu->result = _nan128;
es_operr = 1;
return;
}
/* If source is +inf, result is +inf. */
else if (source_inf) {
fpu->result = fpu->source;
return;
}
fpu->result = _hack_logn(fpu->source);
}
static void inst_fmath_fmove ()
{
fpu_get_state_ptr();
fpu->result = fpu->source;
}
static void inst_fmath_fmul ()
{
fpu_get_state_ptr();
fpu->result = float128_mul(fpu->dest, fpu->source);
/*
* Throw operr (and return NaN) if one operand is infinity
* and the other is zero.
*/
if (float_exception_flags & float_flag_invalid)
es_operr = 1;
/* Throw inex2 if the result is inexact */
if (float_exception_flags & float_flag_inexact)
es_inex2 = 1;
/* Throw ovfl if the op overflowed */
if (float_exception_flags & float_flag_overflow)
es_ovfl = 1;
/* Throw unfl if the op overflowed */
if (float_exception_flags & float_flag_underflow)
es_unfl = 1;
}
static void inst_fmath_fneg ()
{
fpu_get_state_ptr();
/* Flip the sign bit */
fpu->result = fpu->source;
fpu->result.high ^= (1ULL << 63);
/*
* FIXME: you're supposed to throw UNFL if this is a
* denormalized number, I think.
*/
}
static void inst_fmath_frem ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool dest_zero = _float128_is_zero(fpu->dest);
const _Bool dest_inf = _float128_is_infinity(fpu->dest);
/* I just assume the quotient/sign are 0 for the following cases */
qu_quotient = 0;
qu_s = 0;
/* If source is zero, result is nan */
if (source_zero) {
fpu->result = _nan128;
es_operr = 1;
return ;
}
/* If dest (but not source) is zero, result is that zero */
else if (dest_zero) {
fpu->result = fpu->dest;
return ;
}
/* If dest is infinity, result is nan */
else if (dest_inf) {
fpu->result = _nan128;
es_operr = 1;
return ;
}
/* If source, but not dest, is infinity, result is dest */
else if (source_inf) {
fpu->result = fpu->dest;
return ;
}
/* -- We're past the edge cases, do the actual op -- */
const signed char old_round_mode = float_rounding_mode;
/* frem uses round-to-nearest */
float_rounding_mode = float_round_nearest_even;
float128 N = float128_div(fpu->dest, fpu->source);
N = float128_round_to_int(N);
float_rounding_mode = old_round_mode;
fpu->result = float128_sub(fpu->dest, float128_mul(fpu->source, N));
/* FIXME: not sure how to set unfl reliably */
_Bool sign = N.high >> 63; /* Remember the sign */
N.high <<= 1; /* Clear the sign */
N.high >>= 1;
uint32_t final = float128_to_int32(N); /* Get the integer of the quotient */
qu_quotient = final & 0x7f;
qu_s = sign;
}
static void inst_fmath_fmod ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool dest_zero = _float128_is_zero(fpu->dest);
const _Bool dest_inf = _float128_is_infinity(fpu->dest);
/* I just assume the quotient/sign are 0 for the following cases */
qu_quotient = 0;
qu_s = 0;
/* If source is zero, result is nan */
if (source_zero) {
fpu->result = _nan128;
es_operr = 1;
return ;
}
/* If dest (but not source) is zero, result is that zero */
else if (dest_zero) {
fpu->result = fpu->dest;
return ;
}
/* If dest is infinity, result is nan */
else if (dest_inf) {
fpu->result = _nan128;
es_operr = 1;
return ;
}
/* If source, but not dest, is infinity, result is dest */
else if (source_inf) {
fpu->result = fpu->dest;
return ;
}
/* -- We're past the edge cases, do the actual op -- */
const signed char old_round_mode = float_rounding_mode;
/* fmod uses round-to-zero */
float_rounding_mode = float_round_to_zero;
float128 N = float128_div(fpu->dest, fpu->source);
N = float128_round_to_int(N);
float_rounding_mode = old_round_mode;
fpu->result = float128_sub(fpu->dest, float128_mul(fpu->source, N));
/* FIXME: not sure how to set unfl reliably */
_Bool sign = N.high >> 63; /* Remember the sign */
N.high <<= 1; /* Clear the sign */
N.high >>= 1;
uint32_t final = float128_to_int32(N); /* Get the integer of the quotient */
qu_quotient = final & 0x7f;
qu_s = sign;
}
static void inst_fmath_fscale ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
const _Bool dest_zero = _float128_is_zero(fpu->dest);
const _Bool dest_inf = _float128_is_infinity(fpu->dest);
int32_t factor, orig_exponent, exponent;
/* If the source is inf, the result is nan and set operr */
if (source_inf) {
fpu->result = _nan128;
es_operr = 1;
return ;
}
/* Else, if dest is inf or zero, the result is dest */
else if (dest_inf || dest_zero) {
fpu->result = fpu->dest;
return ;
}
/*
* If the source has a huge magnitude, it's definitely
* going to over/underflow
*/
if (float128_le(int32_to_float128(65536), fpu->source))
goto overflow;
else if (float128_le(fpu->source, int32_to_float128(-65536)))
goto underflow;
/* Find the scaling factor. This shoudn't raise any exceptions */
factor = float128_to_int32_round_to_zero(fpu->source);
assert(float_exception_flags == 0);
orig_exponent = (int32_t)((fpu->dest.high >> 48) & 0x7fff);
exponent = factor + orig_exponent;
if (exponent < 0)
goto underflow;
else if (exponent >= 0x7fff)
goto overflow;
else if ((exponent == 0) && (orig_exponent > 0)) {
uint64_t m_high;
/*
* Edge case: if the 80-bit input was {exp=1, mantissa=1},
* the 128-bit version will be {exp=1, mantissa=0}.
* If we're subtracting 1, the result will be {exp=0, mantissa=0}
* which is not correct. We need to account for the implicit bit.
*/
fpu->result = fpu->dest;
fpu->result.low >>= 1;
fpu->result.low |= (fpu->result.high << 63);
m_high = (fpu->result.high & 0x0000ffffffffffffULL) >> 1;
fpu->result.high &= 0x8000000000000000ULL;
fpu->result.high |= m_high;
}
else if ((orig_exponent == 0) && (exponent > 0)) {
uint32_t i;
/*
* Edge case 2: if the original value was subnormal, and we're
* adding to the exponent, then the result may normal or subnormal,
* and we need to adjust the implicit bit accordingly.
*/
fpu->result = fpu->dest;
float128 _two128 = int32_to_float128(2);
for (i=0; i < exponent; i++)
fpu->result = float128_mul(fpu->result, _two128);
/*
* FIXME: this is a really bad implementation, but I doubt anyone
* will ever hit this condition. It's also probably not exactly
* correct. The motorola docs say that 68881 will never return an
* unnormal number as the result of an operation, but if you add
* an arbitrary value to the exponent of a subnormal number, you
* can get an unnormal number. Does the 68881 specially handle this?
*/
}
else {
/* Common case */
fpu->result = fpu->dest;
fpu->result.high &= 0x8000ffffffffffffULL;
fpu->result.high |= (((uint64_t)exponent) << 48);
}
return ;
overflow:
/* result is +-inf, set overflow */
fpu->result = _assemble_float128(source_sign, 0x7fff, 0, 0);
es_ovfl = 1;
return ;
underflow:
/* result is +-zero, set underflow */
fpu->result = _assemble_float128(source_sign, 0, 0, 0);
es_unfl = 1;
return ;
}
static void inst_fmath_fsgldiv ()
{
fpu_get_state_ptr();
float128 source = fpu->source;
float128 dest = fpu->dest;
/* Dump the low 88 bits of the source/dest mantissas */
source.low = 0;
source.high &= 0xffffffffff000000;
dest.low = 0;
dest.high &= 0xffffffffff000000;
fpu->result = float128_div(dest, source);
/* Throw operr (and return NaN) if both operands are zero */
if (float_exception_flags & float_flag_invalid)
es_operr = 1;
/* Throw divide-by-zero if dividend is zero */
if (float_exception_flags & float_flag_divbyzero)
es_dz = 1;
/* Throw inex2 if the result is inexact */
if (float_exception_flags & float_flag_inexact)
es_inex2 = 1;
/* Throw ovfl if the op overflowed */
if (float_exception_flags & float_flag_overflow)
es_ovfl = 1;
/* Throw unfl if the op overflowed */
if (float_exception_flags & float_flag_underflow)
es_unfl = 1;
}
static void inst_fmath_fsglmul ()
{
fpu_get_state_ptr();
/*
* As far as I can tell, fsglmul/fsgldiv use an ALU
* for the mantissa that is only 24-bits wide. Everything
* else is done with regular internal precision.
*/
float128 source = fpu->source;
float128 dest = fpu->dest;
/* Dump the low 88 bits of the source/dest mantissas */
source.low = 0;
source.high &= 0xffffffffff000000;
dest.low = 0;
dest.high &= 0xffffffffff000000;
fpu->result = float128_mul(dest, source);
/*
* Throw operr (and return NaN) if one operand is infinity
* and the other is zero.
*/
if (float_exception_flags & float_flag_invalid)
es_operr = 1;
/* Throw inex2 if the result is inexact */
if (float_exception_flags & float_flag_inexact)
es_inex2 = 1;
/* Throw ovfl if the op overflowed */
if (float_exception_flags & float_flag_overflow)
es_ovfl = 1;
/* Throw unfl if the op overflowed */
if (float_exception_flags & float_flag_underflow)
es_unfl = 1;
}
static void inst_fmath_fsin ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
/* If source is zero, result is source */
if (source_zero) {
fpu->result = fpu->source;
return;
}
/* If source is inf, result is nan, and set operr */
else if (source_inf) {
fpu->result = _nan128;
es_operr = 1;
return ;
}
fpu->result = _hack_sin(fpu->source);
/* Set inex2?? */
}
static void inst_fmath_fsincos ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const uint16_t cos_reg = fpu->extension_word & 0x7;
float128 cos_result;
/*
* This is incredibly inefficient, but it's fine for now
* floatx80 -> float128 -> float64 -> native -> sin -> float128 -> floatx80
* floatx80 -> float128 -> float64 -> native -> cos -> float128 -> floatx80
*/
/* If source is inf, result is nan, and set operr */
if (source_inf) {
fpu->result = _nan128;
cos_result = _nan128;
es_operr = 1;
goto write_cos;
}
/* If the source is zero, cos is +1.0, sin is +-0.0 */
else if (source_zero) {
fpu->result = fpu->source;
cos_result = _one128;
goto write_cos;
}
fpu->result = _hack_sin(fpu->source);
cos_result = _hack_cos(fpu->source);
write_cos:
/* FIXME: 68kprm doesn't clarify how the cosine result is rounded */
fpu->fp[cos_reg] = float128_to_floatx80(cos_result);
}
static void inst_fmath_fsinh ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
/* If source is zero or inf, return source */
if (source_zero || source_inf) {
fpu->result = fpu->source;
return;
}
fpu->result = _hack_sinh(fpu->source);
}
static void inst_fmath_fsqrt ()
{
fpu_get_state_ptr();
fpu->result = float128_sqrt(fpu->source);
/* Throw operr (and return NaN) if the operand is < 0 */
if (float_exception_flags & float_flag_invalid)
es_operr = 1;
/* Throw inex2 if the result is inexact */
if (float_exception_flags & float_flag_inexact)
es_inex2 = 1;
}
static void inst_fmath_fsub ()
{
fpu_get_state_ptr();
fpu->result = float128_sub(fpu->dest, fpu->source);
/*
* Throw operr (and return NaN) if operands are infinities
* with equal signs. (I *think* softfloat is doing this
* corectly - the code's hard to read.)
*
* Both 68kprm and 68881 docs say that (+inf) - (-inf) = (-inf)
* but I presume that's a typo, and it's supposed to be (+inf)
*/
if (float_exception_flags & float_flag_invalid)
es_operr = 1;
/* Throw inex2 if the result is inexact */
if (float_exception_flags & float_flag_inexact)
es_inex2 = 1;
/* Throw ovfl if the op overflowed */
if (float_exception_flags & float_flag_overflow)
es_ovfl = 1;
/* Throw unfl if the op overflowed */
if (float_exception_flags & float_flag_underflow)
es_unfl = 1;
}
static void inst_fmath_ftan ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
/* If source is zero, result is source */
if (source_zero) {
fpu->result = fpu->source;
return;
}
/* If source is inf, result is nan, and set operr */
else if (source_inf) {
fpu->result = _nan128;
es_operr = 1;
return ;
}
fpu->result = _hack_tan(fpu->source);
/* Set inex2?? */
}
static void inst_fmath_ftanh ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
/* If source is zero, result is source */
if (source_zero) {
fpu->result = fpu->source;
return;
}
/* If source is +/- inf, result is +/- 1.0 */
else if (source_inf) {
fpu->result = _assemble_float128(source_sign, 0x3fff, 0, 0);
return;
}
fpu->result = _hack_tanh(fpu->source);
}
static void inst_fmath_ftentox ()
{
fpu_get_state_ptr();
// _hack_ftentox() is broken on clang 3.5 on osx 10.10
// (tries to optimize pow(10.0, x) to __exp10(x), and __exp10
// isn't implemented in the 10.8 SDK)
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
/* If source is zero, result is +1.0 */
if (source_zero) {
fpu->result = _one128;
return ;
}
/* if source is -inf, result is +0.0 */
else if (source_inf && source_sign) {
fpu->result = _zero128;
return ;
}
/* if source is +inf, result is +inf */
else if (source_inf) {
fpu->result = fpu->source;
return ;
}
fpu->result = _hack_tentox(fpu->source);
}
static void inst_fmath_ftst ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
cc_z = source_zero;
cc_i = source_inf;
cc_n = source_sign;
/* Don't write the result back to the register */
fpu->write_back = 0;
}
static void inst_fmath_ftwotox ()
{
fpu_get_state_ptr();
const _Bool source_zero = _float128_is_zero(fpu->source);
const _Bool source_inf = _float128_is_infinity(fpu->source);
const _Bool source_sign = _float128_is_neg(fpu->source);
/* If source is zero, result is +1.0 */
if (source_zero) {
fpu->result = _one128;
return ;
}
/* if source is -inf, result is +0.0 */
else if (source_inf && source_sign) {
fpu->result = _zero128;
return ;
}
/* if source is +inf, result is +inf */
else if (source_inf) {
fpu->result = fpu->source;
return ;
}
fpu->result = _hack_twotox(fpu->source);
}
typedef void (fmath_impl_t)(void);
#define FMATH_TYPE_DYADIC 1
#define FMATH_TYPE_68040 2
~create_fmath_jump_table()
/*
* Take fpu->result, and round and crop it to the
* preferred precision, then return the result as
* a floatx80. (Set all the appropriate exception bits
* too)
*
* ALSO!! This checks and sets underflow/overflow
*/
static floatx80 _fmath_round_intermediate_result ()
{
fpu_get_state_ptr();
floatx80 final;
float_exception_flags = 0; // (so we can know if the result is inexact)
_set_rounding_mode(mc_rnd); // Set the preferred rounding mode
if (mc_prec == prec_extended) { // extended precision
final = float128_to_floatx80(fpu->result);
es_inex2 |= ((float_exception_flags & float_flag_inexact) != 0);
es_unfl |= ((float_exception_flags & float_flag_underflow) != 0);
es_ovfl |= ((float_exception_flags & float_flag_overflow) != 0);
}
else if (mc_prec == prec_double) { // double precision
float64 tmp = float128_to_float64(fpu->result);
es_inex2 |= ((float_exception_flags & float_flag_inexact) != 0);
es_unfl |= ((float_exception_flags & float_flag_underflow) != 0);
es_ovfl |= ((float_exception_flags & float_flag_overflow) != 0);
final = float64_to_floatx80(tmp);
}
else if (mc_prec == prec_single) { // single precision
float32 tmp = float128_to_float32(fpu->result);
es_inex2 |= ((float_exception_flags & float_flag_inexact) != 0);
es_unfl |= ((float_exception_flags & float_flag_underflow) != 0);
es_ovfl |= ((float_exception_flags & float_flag_overflow) != 0);
final = float32_to_floatx80(tmp);
}
else
assert(!"bogus precision mode???");
return final;
}
static void _fmath_set_condition_codes (floatx80 val)
{
fpu_get_state_ptr();
const uint64_t frac = val.low;
const uint32_t exp = val.high & 0x7fff;
const _Bool sign = val.high >> 15;
/* Check for zero */
cc_z = ((exp == 0) && (frac == 0));
/* Check for negative */
cc_n = sign;
/* Check for NaN */
cc_nan = ((exp == 0x7fff) && ((frac << 1) != 0));
/* Check for infinity */
cc_i = ((exp == 0x7fff) && ((frac << 1) == 0));
}
static void _fmath_handle_nans ()
{
fpu_get_state_ptr();
const _Bool is_dyadic = _fmath_flags[fpu->fmath_op] & FMATH_TYPE_DYADIC;
const _Bool is_signaling = float128_is_signaling_nan(fpu->source) ||
(is_dyadic && float128_is_signaling_nan(fpu->dest));
const _Bool is_source_nan = float128_is_nan(fpu->source);
const _Bool is_dest_nan = is_dyadic && float128_is_nan(fpu->dest);
/*
* If the dest is NaN, or both are NaN, let the result be set to dest.
* (with signaling disabled)
*/
if (is_dest_nan)
fpu->result = fpu->dest;
else {
assert(is_source_nan);
fpu->result = fpu->source;
}
/* Set the snan exception status bit */
es_snan = is_signaling;
/* Silence the result */
// Signaling -> 0
// Non-signaling -> 1
fpu->result.high |= 0x800000000000;
}
void dis_fmath (uint16_t op, uint16_t ext, char *output)
{
~decompose(op, 1111 001 000 MMMMMM);
~decompose(ext, 0 a 0 sss ddd eeeeeee);
const uint8_t src_in_ea = a;
const uint8_t source_specifier = s;
const uint8_t dest_register = d;
const uint8_t extension = e;
/* If this is fmovecr */
if (src_in_ea && (source_specifier == 7)) {
const char *name = NULL;
switch (extension) {
case 0x00: name = "pi"; break;
case 0x0b: name = "log_10(2)"; break;
case 0x0c: name = "c"; break;
case 0x0d: name = "log_2(e)"; break;
case 0x0e: name = "log_10(e)"; break;
case 0x0f: name = "0.0"; break;
case 0x30: name = "ln(2)"; break;
case 0x31: name = "ln(10)"; break;
case 0x32: name = "1.0"; break;
case 0x33: name = "10.0"; break;
case 0x34: name = "10.0^2"; break;
case 0x35: name = "10.0^4"; break;
case 0x36: name = "10.0^8"; break;
case 0x37: name = "10.0^16"; break;
case 0x38: name = "10.0^32"; break;
case 0x39: name = "10.0^64"; break;
case 0x3a: name = "10.0^128"; break;
case 0x3b: name = "10.0^256"; break;
case 0x3c: name = "10.0^512"; break;
case 0x3d: name = "10.0^1024"; break;
case 0x3e: name = "10.0^2048"; break;
case 0x3f: name = "10.0^4096"; break;
}
if (name == NULL)
sprintf(output, "fmovecr.x #%u,fp%u", extension, dest_register);
else
sprintf(output, "fmovecr.x %s,fp%u", name, dest_register);
return;
}
if (_fmath_map[e] == NULL) {
/* This instruction isn't defined */
sprintf(output, "fmath???");
}
else if (_fmath_map[e] == inst_fmath_fsincos) {
/* fsincos.<fmt> <ea>,FPc:FPs */
if (src_in_ea)
sprintf(output, "fsincos.%c %s,fp%u:fp%u", "lsxpwdb?"[source_specifier],
decode_ea_rw(M, _format_sizes[source_specifier]), e & 7, dest_register);
else
sprintf(output, "fsincos.x fp%u,fp%u:fp%u", source_specifier, e & 7, dest_register);
}
else if (_fmath_map[e] == inst_fmath_ftst) {
/* ftst.<fmt> <source> */
if (src_in_ea)
sprintf(output, "ftst.%c %s", "lsxpwdb?"[source_specifier],
decode_ea_rw(M, _format_sizes[source_specifier]));
else
sprintf(output, "ftst.x fp%u", dest_register);
}
else {
/* f<inst>.<fmt> <source>,<dest> */
if (src_in_ea)
sprintf(output, "%s.%c %s,fp%u", _fmath_names[e], "lsxpwdb?"[source_specifier],
decode_ea_rw(M, _format_sizes[source_specifier]), dest_register);
else
sprintf(output, "%s.x fp%u,fp%u", _fmath_names[e],
source_specifier, dest_register);
}
}
static long double _float128_to_long_double(float128 f128)
{
long double result;
uint8_t *ptr = (uint8_t*)&result;
int8_t old = float_exception_flags;
floatx80 f80 = float128_to_floatx80(f128);
float_exception_flags = old;
ptr[9] = (f80.high >> 8) & 0xff;
ptr[8] = (f80.high >> 0) & 0xff;
ptr[7] = (f80.low >> 56) & 0xff;
ptr[6] = (f80.low >> 48) & 0xff;
ptr[5] = (f80.low >> 40) & 0xff;
ptr[4] = (f80.low >> 32) & 0xff;
ptr[3] = (f80.low >> 24) & 0xff;
ptr[2] = (f80.low >> 16) & 0xff;
ptr[1] = (f80.low >> 8) & 0xff;
ptr[0] = (f80.low >> 0) & 0xff;
return result;
}
static long double _floatx80_to_long_double(floatx80 f80)
{
long double result;
uint8_t *ptr = (uint8_t*)&result;
ptr[9] = (f80.high >> 8) & 0xff;
ptr[8] = (f80.high >> 0) & 0xff;
ptr[7] = (f80.low >> 56) & 0xff;
ptr[6] = (f80.low >> 48) & 0xff;
ptr[5] = (f80.low >> 40) & 0xff;
ptr[4] = (f80.low >> 32) & 0xff;
ptr[3] = (f80.low >> 24) & 0xff;
ptr[2] = (f80.low >> 16) & 0xff;
ptr[1] = (f80.low >> 8) & 0xff;
ptr[0] = (f80.low >> 0) & 0xff;
return result;
}
static void inst_fmath (const uint16_t ext)
{
fpu_get_state_ptr();
floatx80 rounded_result;
~decompose(shoe.op, 1111 001 000 MMMMMM);
~decompose(ext, 0 a 0 sss ddd eeeeeee);
const uint8_t src_in_ea = a;
const uint8_t source_specifier = s;
const uint8_t dest_register = d;
const uint8_t extension = e;
slog("FPU:---\n");
/* Throw illegal instruction for 040-only ops */
if (_fmath_flags[e] & FMATH_TYPE_68040) {
_throw_illegal_instruction();
return;
}
/*
* All the documented fmath ops have an implementation in
* _fmath_map[]. If it's NULL, it's not documented; throw
* an exception.
* This probably matches what the 68040's behavior (I haven't
* checked), but the 68881 doesn't do this.
* 68881 throws an illegal instruction exception for all
* opcodes where the high (6th) bit of e is set.
* All other instructions seem to short circuit to the
* nearest documented instruction.
* FIXME: consider implementing this behavior.
*/
if (_fmath_map[e] == NULL) {
/* Unless this is fmovecr, where the extension doesn't matter */
if (!(src_in_ea && (source_specifier == 7))) {
_throw_illegal_instruction();
return ;
}
}
/* We only need to load the dest reg for dyadic ops */
if (_fmath_flags[e] & FMATH_TYPE_DYADIC)
fpu->dest = floatx80_to_float128(fpu->fp[dest_register]);
/*
* We'll shrink the precision and perform rounding
* just prior to writing back the result.
* Certain instructions override the precision
* in fpcr, so keep track of the prefered prec here.
*/
enum rounding_precision_t rounding_prec = mc_prec;
/*
* For all the intermediate calculations, we
* probably want to use nearest-rounding mode.
*/
_set_rounding_mode(mode_nearest);
/* Reset fpsr's exception flags */
es_inex1 = 0; // this is only set for imprecisely-rounded packed inputs (not implemented)
es_inex2 = 0; // set if we ever lose precision (during the op or during rounding)
es_dz = 0; // set if we divided by zero
es_unfl = 0; // set if we underflowed (inex2 should be set too, I think)
es_ovfl = 0; // set if we overflowed (inex2 should be set too, I think)
es_operr = 0; // set if there was an instruction specific operand error
es_snan = 0; // Set if one of the inputs was a signaling NaN
es_bsun = 0; // never set here
/* Clear the whole CC register byte */
fpu->fpsr.raw &= 0x00ffffff;
fpu->write_back = 1; // let "do-write-back" be the default behavior
fpu->fmath_op = e;
/* Handle fmovecr */
if (src_in_ea && (source_specifier == 7)) { // fmovecr
/*
* 68kprm says M should be ~b(000000), but apparently
* any value will work for fmovecr
*/
slog("FPU: fmovecr %u,fp%u\n", e, dest_register);
inst_fmath_fmovecr();
goto computation_done;
}
/*
* Read in the source from the EA or from a register.
* In either case, convert the value to a float128,
* (that's our version of the 85-bit "intermediate" format)
*/
if (src_in_ea) {
if (!_fpu_read_ea(source_specifier, &fpu->source))
return ;
slog("FPU: %s.%c ", _fmath_names[e], "lsxpwdb?"[source_specifier]);
}
else {
fpu->source = floatx80_to_float128(fpu->fp[source_specifier]);
slog("FPU: %s.x ", _fmath_names[e], "lsxpwdb?"[source_specifier]);
}
/*{
long double tmp = _float128_to_long_double(fpu->source);
printf("%Lf,fp%u\n", tmp, dest_register);
}*/
/* fsincos needs this to know which register to write cos */
fpu->extension_word = ext;
/*
* If the source is NaN, or this is a dyadic (two-operand)
* instruction, and the second operand (fpu->dest) is NaN,
* then the result is predetermined: NaN
*/
if (float128_is_nan(fpu->source) ||
(((_fmath_flags[e] & FMATH_TYPE_DYADIC) &&
float128_is_nan(fpu->dest)))) {
_fmath_handle_nans();
/*
* The result is guaranteed to be NaN, so we need
* to set cc_nan for ftst and fcmp, who bypass
* _fmath_set_condition_codes()
*/
cc_nan = 1;
goto computation_done;
}
/* Reset softfloat's exception flags */
float_exception_flags = 0;
/*
* Otherwise, call the extension-specific helper function.
* Guarantees: Neither source nor dest are NaN
* SoftFloat's exception flags have been cleared
*/
_fmath_map[e]();
/*
* At this point, the "computation"-phase (I forget what the correct
* 6888x term is) is over. Now we check exception bits, throw exceptions,
* compute condition codes, and round and store the result.
*/
computation_done:
/*
* Convert the 128-bit result to the specified precision.
* FIXME: do we need to do rounding for ftst/fcmp?
*/
if (fpu->write_back)
rounded_result = _fmath_round_intermediate_result();
/* Update the accrued exception bits */
assert(!es_bsun); // no fmath op can throw es_bsun
ae_iop |= es_bsun | es_snan | es_operr;
ae_ovfl |= es_ovfl;
ae_unfl |= (es_unfl & es_inex2); // yes, &
ae_dz |= es_dz;
ae_inex |= es_inex1 | es_inex2 | es_ovfl;
slog("FPU: bsun=%u snan=%u operr=%u ovfl=%u unfl=%u dz=%u inex1=%u inex2=%u\n",
es_bsun, es_snan, es_operr, es_ovfl, es_unfl, es_dz, es_inex1, es_inex2);
/* Are any exceptions both set and enabled? */
if (fpu->fpsr.raw & fpu->fpcr.raw & 0x0000ff00) {
/*
* Then we need to throw an exception.
* The exception is sent to the vector for
* the highest priority exception, and the priority
* order is (high->low) bsan, snan, operr, ovfl, unfl, dz, inex2/1
* (which is the order of the bits in fpsr/fpcr).
* Iterate over the bits in order, and throw the
* exception to whichever bit is set first.
*/
uint8_t i, throwable = (fpu->fpsr.raw & fpu->fpcr.raw) >> 8;
slog("FPU: throw exception! 0x%08x\n", throwable);
assert(throwable);
for (i=0; 1; i++) {
if (throwable & 0x80)
break;
throwable <<= 1;
}
/*
* FIXME: are condition codes ever set if an exception is thrown?
* I think not, so clear the whole CC register byte
*/
fpu->fpsr.raw &= 0x00ffffff;
/*
* Convert the exception bit position
* to the correct vector number, and throw
* a (pre-instruction) exception.
*/
throw_fpu_pre_instruction_exception(_exception_bit_to_vector[i]);
return ;
}
/*
* Otherwise, no exceptions to throw!
* We're definitely running to completion now,
* so commit ea-read changes
*/
_fpu_read_ea_commit(source_specifier);
if (fpu->write_back) {
/* Calculate the condition codes from the result. */
_fmath_set_condition_codes(rounded_result);
/* Write back the result, and we're done! */
if (fpu->write_back) {
fpu->fp[dest_register] = rounded_result;
long double tmp = _floatx80_to_long_double(rounded_result);
slog("FPU: result = %Lf\n", tmp);
}
}
}
#pragma mark Second-hop non-fmath instructions
/*
* reg->mem fmove (fmath handles all other fmoves)
*/
static void inst_fmove (const uint16_t ext)
{
fpu_get_state_ptr();
~decompose(shoe.op, 1111 001 000 MMMMMM);
~decompose(shoe.op, 1111 001 000 mmmrrr);
~decompose(ext, 011 fff sss KKKKKKK);
_fpu_write_ea(M, f, &fpu->fp[s], K);
}
static void inst_fmovem_control (const uint16_t ext)
{
fpu_get_state_ptr();
~decompose(shoe.op, 1111 001 000 mmmrrr);
~decompose(shoe.op, 1111 001 000 MMMMMM);
~decompose(ext, 10d CSI 0000000000);
const uint32_t count = C + S + I;
const uint32_t size = count * 4;
uint32_t addr, buf[3];
uint32_t i;
/* I don't know if this is even a valid instruction */
if (count == 0)
return ;
/* data and addr reg modes are valid, but only if count==1 */
if ((m == 0 || m == 1) && (count > 1)) {
_throw_illegal_instruction();
return ;
}
if (d) { // reg to memory
i=0;
if (C) buf[i++] = fpu->fpcr.raw;
if (S) buf[i++] = fpu->fpsr.raw;
if (I) buf[i++] = fpu->fpiar;
if (m == 0) {
if (count == 1)
shoe.d[r] = buf[0];
else
_throw_illegal_instruction();
return ;
}
else if (m == 1) {
if ((count == 1) && I)
shoe.a[r] = buf[0];
else
_throw_illegal_instruction();
return ;
}
else if (m == 3)
addr = shoe.a[r];
else if (m == 4)
addr = shoe.a[r] - size;
else {
if ((m==7) && (r!=0 || r!=1)) {
/* Not allowed for reg->mem */
_throw_illegal_instruction();
return;
}
call_ea_addr(M);
addr = shoe.dat;
}
for (i=0; i<count; i++) {
lset(addr + (i*4), 4, buf[i]);
if (shoe.abort)
return ;
}
}
else { // mem to reg
if (m == 0) {// data reg
if (count == 1)
buf[0] = shoe.d[r];
else
_throw_illegal_instruction();
return;
}
else if (m == 1) {// addr reg
if ((count == 1) && I)
buf[0] = shoe.a[r];
else
_throw_illegal_instruction();
return;
}
else {
if (m == 3) // post-increment
addr = shoe.a[r];
else if (m == 4) // pre-decrement
addr = shoe.a[r] - size;
else if (M == 0x3c) // immediate
addr = shoe.pc;
else {
call_ea_addr(M); // call_ea_addr() should work for all other modes
addr = shoe.dat;
}
for (i=0; i<count; i++) {
buf[i] = lget(addr + (i*4), 4);
if (shoe.abort)
return ;
}
}
i = 0;
if (C) {
uint8_t round = fpu->fpcr.b._mc_rnd;
fpu->fpcr.raw = buf[i++];
uint8_t newround = fpu->fpcr.b._mc_rnd;
if (round != newround) {
slog("FPU: inst_fmovem_control: HEY: round %u -> %u\n", round, newround);
}
}
if (S) fpu->fpsr.raw = buf[i++];
if (I) fpu->fpiar = buf[i++];
// Commit immediate-EA-mode PC change
if (M == 0x3c)
shoe.pc += size;
}
// Commit pre/post-inc/decrement
if (m == 3)
shoe.a[r] += size;
if (m == 4)
shoe.a[r] -= size;
slog("FPU: inst_fmove_control: notice: (EA = %u/%u %08x CSI = %u%u%u)\n", m, r, (uint32_t)shoe.dat, C, S, I);
}
static void inst_fmovem (const uint16_t ext)
{
fpu_get_state_ptr();
~decompose(shoe.op, 1111 001 000 mmmrrr);
~decompose(shoe.op, 1111 001 000 MMMMMM);
~decompose(ext, 11 d ps 000 LLLLLLLL); // Static register mask
~decompose(ext, 11 0 00 000 0yyy0000); // Register for dynamic mode
const uint8_t pre_mask = s ? shoe.d[y] : L; // pre-adjusted mask
// Count the number of bits in the mask
uint32_t count, maskcpy = pre_mask;
for (count=0; maskcpy; maskcpy >>= 1)
count += (maskcpy & 1);
const uint32_t size = count * 12;
// for predecrement mode, the mask is reversed
uint8_t mask = 0;
if (m == 4) {
uint32_t i;
for (i=0; i < 8; i++) {
const uint8_t bit = (pre_mask << i) & 0x80;
mask = (mask >> 1) | bit;
}
}
else
mask = pre_mask;
uint32_t i, addr;
// Find the EA
if (m == 3) {
addr = shoe.a[r];
assert(p); // assert post-increment mask
}
else if (m == 4) {
addr = shoe.a[r] - size;
assert(!p); // assert pre-decrement mask
}
else {
call_ea_addr(M);
addr = shoe.dat;
assert(p); // assert post-increment mask
}
slog("FPU: inst_fmovem: pre=%08x mask=%08x EA=%u/%u addr=0x%08x size=%u %s\n", pre_mask, mask, m, r, addr, size, d?"to mem":"from mem");
if (d) {
// Write those registers
for (i=0; i<8; i++) {
if (!(mask & (0x80 >> i)))
continue;
uint8_t buf[12];
_floatx80_to_extended(&fpu->fp[i], buf);
// slog("inst_fmovem: writing %Lf from fp%u", fpu->fp[i], i);
uint32_t j;
for (j=0; j<12; j++) {
slog(" %02x", buf[j]);
lset(addr, 1, buf[j]);
addr++;
if (shoe.abort)
return ;
}
slog("\n");
}
}
else {
// Read those registers
for (i=0; i<8; i++) {
if (!(mask & (0x80 >> i)))
continue;
uint8_t buf[12];
uint32_t j;
for (j=0; j<12; j++) {
buf[j] = lget(addr, 1);
addr++;
if (shoe.abort)
return ;
}
_extended_to_floatx80(buf, &fpu->fp[i]);
// slog("inst_fmovem: read %Lf to fp%u\n", shoe.fp[i], i);
}
}
// Commit the write for pre/post-inc/decrement
if (m == 3)
shoe.a[r] += size;
else if (m == 4)
shoe.a[r] -= size;
}
#pragma mark First-hop decoder table inst implementations
/*
* The table generated by decoder_gen.c will refer directly
* to these instructions. inst_fpu_other() will handle all
* other FPU instructions.
*/
static _Bool fpu_test_cc(uint8_t cc)
{
fpu_get_state_ptr();
const _Bool z = cc_z;
const _Bool n = cc_n;
const _Bool nan = cc_nan;
switch (cc & 0x0f) {
case 0: // false
return 0;
case 1: // equal
return z;
case 2: // greater than
return !(nan | z | n);
case 3: // greater than or equal
return z | !(nan | n);
case 4: // less than
return n & !(nan | z);
case 5: // less than or equal
return z | (n & !nan);
case 6: // greater or less than
return !(nan | z);
case 7: // ordered
return !nan;
case 8: // unordered
return nan;
case 9: // not (greater or less than)
return nan | z;
case 10: // not (less than or equal)
return nan | !(n | z);
case 11: // not (less than)
return nan | (z | !n);
case 12: // not (greater than or equal)
return nan | (n & !z);
case 13: // not (greater than)
return nan | z | n;
case 14: // not equal
return !z;
case 15: // true
return 1;
}
assert(0);
return 0;
}
void inst_fscc () {
fpu_get_state_ptr();
// fscc can throw an exception
fpu->fpiar = shoe.orig_pc;
const uint16_t ext = nextword();
~decompose(shoe.op, 1111 001 001 MMMMMM);
~decompose(ext, 0000 0000 000 b cccc);
/*
* inst_f*cc instructions throw a pre-instruction exception
* if b && cc_nan
*/
if (b && cc_nan && _bsun_test())
return ;
shoe.dat = fpu_test_cc(c) ? 0xff : 0;
call_ea_write(M, 1);
}
void inst_fbcc () {
fpu_get_state_ptr();
// fbcc can throw an exception
fpu->fpiar = shoe.orig_pc;
~decompose(shoe.op, 1111 001 01 s 0bcccc); // b => raise BSUN if NaN
const uint8_t sz = 2 << s;
/*
* inst_f*cc instructions throw a pre-instruction exception
* if b && cc_nan
*/
if (b && cc_nan && _bsun_test())
return ;
if (fpu_test_cc(c)) {
const uint16_t ext = nextword();
uint32_t displacement;
if (s) {
const uint16_t ext2 = nextword();
displacement = (ext << 16) | ext2;
}
else
displacement = (int16_t)ext;
shoe.pc = shoe.orig_pc + 2 + displacement;
}
else
shoe.pc += sz;
}
void inst_fsave () {
fpu_get_state_ptr();
verify_supervisor();
// Don't modify fpiar for fsave
~decompose(shoe.op, 1111 001 100 MMMMMM);
~decompose(shoe.op, 1111 001 100 mmmrrr);
const uint32_t size = 0x1c; // IDLE frame
const uint16_t frame_header = 0xfd18;
uint32_t addr;
if (m == 4)
addr = shoe.a[r] - size;
else {
call_ea_addr(M);
addr = shoe.dat;
}
lset(addr, 2, frame_header);
if (shoe.abort)
return ;
if (m == 4)
shoe.a[r] = addr;
}
void inst_frestore () {
fpu_get_state_ptr();
verify_supervisor();
// Don't modify fpiar for frestore
~decompose(shoe.op, 1111 001 101 MMMMMM);
~decompose(shoe.op, 1111 001 101 mmmrrr);
uint32_t addr, size;
if (m == 3)
addr = shoe.a[r];
else {
call_ea_addr(M);
addr = shoe.dat;
}
const uint16_t word = lget(addr, 2);
if (shoe.abort) return ;
// XXX: These frame sizes are different on 68881/68882/68040
if ((word & 0xff00) == 0x0000)
size = 4; // NULL state frame
else if ((word & 0xff) == 0x0018)
size = 0x1c; // IDLE state frame
else if ((word & 0xff) == 0x00b4)
size = 0xb8; // BUSY state frame
else {
slog("Frestore encountered an unknown state frame 0x%04x\n", word);
assert(!"inst_frestore: bad state frame");
return ;
}
if (m==3) {
shoe.a[r] += size;
slog("FPU: frestore: changing shoe.a[%u] += %u\n", r, size);
}
}
void inst_fdbcc () {
fpu_get_state_ptr();
~decompose(shoe.op, 1111 001 001 001 rrr);
// fdbcc can throw an exception
fpu->fpiar = shoe.orig_pc;
const uint16_t ext = nextword();
~decompose(ext, 0000 0000 000 b cccc);
/*
* inst_f*cc instructions throw a pre-instruction exception
* if b && cc_nan
*/
if (b && cc_nan && _bsun_test())
return ;
if (fpu_test_cc(c)) {
shoe.pc += 2;
}
else {
const int16_t disp = nextword();
const uint16_t newd = get_d(r, 2) - 1;
set_d(r, newd, 2);
if (newd != 0xffff)
shoe.pc = shoe.orig_pc + 2 + disp;
}
}
void inst_ftrapcc () {
fpu_get_state_ptr();
~decompose(shoe.op, 1111 001 001 111 xyz);
// ftrapcc can throw an exception
fpu->fpiar = shoe.orig_pc;
// (xyz) == (100) -> sz=0
// (xyz) == (010) -> sz=2
// (xyz) == (011) -> sz=4
const uint32_t sz = y << (z+1);
const uint32_t next_pc = shoe.orig_pc + 2 + sz;
const uint16_t ext = nextword();
~decompose(ext, 0000 0000 000 b cccc);
/*
* inst_f*cc instructions throw a pre-instruction exception
* if b && cc_nan
*/
if (b && cc_nan && _bsun_test())
return ;
if (fpu_test_cc(c))
throw_frame_two(shoe.sr, next_pc, 7, shoe.orig_pc);
else
shoe.pc = next_pc;
}
void inst_fnop() {
// This is technically fbcc
inst_fbcc();
}
void inst_fpu_other () {
fpu_get_state_ptr();
~decompose(shoe.op, 1111 001 000 MMMMMM);
const uint16_t ext = nextword();
~decompose(ext, ccc xxx yyy eeeeeee);
switch (c) {
case 0: // Reg to reg
fpu->fpiar = shoe.orig_pc; // fmath() can throw an exception
inst_fmath(ext);
return;
case 1: // unused
_throw_illegal_instruction();
return;
case 2: // Memory->reg & movec
fpu->fpiar = shoe.orig_pc; // fmath() can throw an exception
inst_fmath(ext);
return;
case 3: // reg->mem
fpu->fpiar = shoe.orig_pc; // fmove() can throw an exception
inst_fmove(ext);
return;
case 4: // mem -> sys ctl registers
case 5: // sys ctl registers -> mem
// fmovem_control() cannot throw an FPU exception (don't modify fpiar)
inst_fmovem_control(ext);
return;
case 6: // movem to fp registers
case 7: // movem to memory
// fmovem() cannot throw an FPU exception (don't modify fpiar)
inst_fmovem(ext);
return;
}
assert(0); // never get here
return;
}
#pragma mark FPU-state initialization and reset
void fpu_initialize()
{
fpu_state_t *fpu = (fpu_state_t*)p_alloc(shoe.pool, sizeof(fpu_state_t));
memset(fpu, sizeof(fpu_state_t), 0);
shoe.fpu_state = fpu;
}
void fpu_reset()
{
p_free(shoe.fpu_state);
fpu_initialize();
}