Unbroke shoebill, speed improvements

- Fixed inst_ext, which was completely broken since
  last commit, whoops!
- Added a bunch of __builtin_expect()s in all the
  critical paths, and now Marathon runs quite noticeably
  faster.
- Fixed (I think) all the SDL makefiles
This commit is contained in:
Peter Rutenbar 2015-01-26 22:26:30 -05:00
parent 9c3640cf48
commit 76f2b35170
9 changed files with 143 additions and 152 deletions

View File

@ -631,10 +631,6 @@ static floatx80
int8 roundingMode;
flag roundNearestEven, increment, isTiny;
int64 roundIncrement, roundMask, roundBits;
// [shoebill]
// printf("roundAndPackFloatx80: prec=%d sign=%d exp=%u sig0=0x%016llx sig1=0x%016llx\n",
// roundingPrecision, zSign, zExp, zSig0, zSig1);
roundingMode = float_rounding_mode;
roundNearestEven = ( roundingMode == float_round_nearest_even );
@ -3191,9 +3187,7 @@ int32 floatx80_to_int32( floatx80 a )
aExp = extractFloatx80Exp( a );
aSign = extractFloatx80Sign( a );
if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
// [shoebill] there is no way 0x4037 is the right constant to use here.
// 1.0 has exp=0x3fff, 0x4037 - 0x3fff == 56.
// (aSig >> 56) shifts out the mantissa bits, plus 8 more.
shiftCount = 0x4037 - aExp;
if ( shiftCount <= 0 ) shiftCount = 1;
shift64RightJamming( aSig, shiftCount, &aSig );

View File

@ -115,15 +115,15 @@ void *_cpu_thread (void *arg)
pthread_mutex_lock(&shoe.cpu_thread_lock);
while (1) {
if (shoe.cpu_thread_notifications) {
if sunlikely(shoe.cpu_thread_notifications) {
// If there's an interrupt pending
if (shoe.cpu_thread_notifications & 0xff) {
if slikely(shoe.cpu_thread_notifications & 0xff) {
// process_pending_interrupt() may clear SHOEBILL_STATE_STOPPED
process_pending_interrupt();
}
if (shoe.cpu_thread_notifications & SHOEBILL_STATE_RETURN) {
if sunlikely(shoe.cpu_thread_notifications & SHOEBILL_STATE_RETURN) {
pthread_mutex_unlock(&shoe.cpu_thread_lock);
return NULL;
}

View File

@ -55,9 +55,9 @@ static const _cc_func evaluate_cc[16] = {
};
#define nextword() ({const uint16_t w=lget(shoe.pc,2); if (shoe.abort) {return;}; shoe.pc+=2; w;})
#define nextlong() ({const uint32_t L=lget(shoe.pc,4); if (shoe.abort) {return;}; shoe.pc+=4; L;})
#define verify_supervisor() {if (!sr_s()) {throw_privilege_violation(); return;}}
#define nextword() ({const uint16_t w=lget(shoe.pc,2); if sunlikely(shoe.abort) {return;}; shoe.pc+=2; w;})
#define nextlong() ({const uint32_t L=lget(shoe.pc,4); if sunlikely(shoe.abort) {return;}; shoe.pc+=4; L;})
#define verify_supervisor() {if sunlikely(!sr_s()) {throw_privilege_violation(); return;}}
static void inst_callm(void) {
@ -466,9 +466,9 @@ static void inst_abcd (void) {
// FIXME: these addresses aren't predecremented (check whether a7 is incremented 2 bytes)
assert(!"acbd is broken");
packed_x = lget(shoe.a[x], 1);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
packed_y = lget(shoe.a[y], 1);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
}
else {
packed_x = shoe.d[x] & 0xff;
@ -494,7 +494,7 @@ static void inst_abcd (void) {
if (m) {
lset(shoe.a[x]-1, 1, packed_sum);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
shoe.a[x]--;
if (x != y)
@ -580,10 +580,10 @@ static void inst_stop (void) {
static void inst_rtr (void) {
const uint16_t ccr = lget(shoe.a[7], 2);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
const uint32_t pc = lget(shoe.a[7]+2, 4);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
shoe.a[7] += 6;
@ -595,7 +595,7 @@ static void inst_rtr (void) {
static void inst_rtd (void) {
const int16_t disp = nextword();
const uint32_t new_pc = lget(shoe.a[7], 4);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
shoe.pc = new_pc;
shoe.a[7] += 4;
@ -606,13 +606,13 @@ static void inst_rte (void) {
verify_supervisor();
const uint16_t sr = lget(shoe.a[7], 2);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
const uint32_t pc = lget(shoe.a[7]+2, 4);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
const uint16_t format_word = lget(shoe.a[7]+6, 2);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
// slog("rte: sr=0x%04x pc=0x%08x format=0x%04x, post-pop a7=0x%08x\n", sr, pc, format_word, shoe.a[7]+8);
@ -819,9 +819,9 @@ static void inst_addx (void) {
const uint32_t predec_x = shoe.a[x] - sz;
const uint32_t S = lget(predec_y, 4);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
const uint32_t D = lget(predec_x, 4);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
// Sm = mib(S + extend_bit, sz); // FIXME: sure you want to do this?
Sm = mib(S, sz);
@ -832,7 +832,7 @@ static void inst_addx (void) {
const uint32_t chopped_R = chop(R, sz);
lset(chopped_R, sz, shoe.dat);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
shoe.a[y] = predec_y;
shoe.a[x] = predec_x;
@ -926,10 +926,10 @@ static void inst_cmpm (void) {
const uint32_t ay = shoe.a[y] + ((x==y) ? sz : 0); // if x==y, then we pop the stack twice
const uint32_t dst = lget(ax, sz);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
const uint32_t src = lget(ay, sz);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
// *I believe* that if x==y, then that register will only be incremented *once*
// WRONG!
@ -1202,7 +1202,7 @@ static void inst_move (void) {
// So in this case, we need to switch back to the original a7 (set_sr(shoe.orig_sr)) before modifying it
// (This is hacky)
if (shoe.abort) {
if sunlikely(shoe.abort) {
if (m == 4 || m == 3) {
const uint16_t new_sr = shoe.sr;
const uint8_t delta = ((r==7) && (sz==1)) ? 2 : sz;
@ -1357,12 +1357,12 @@ static void inst_moves (void) {
assert(! ((a && (M&7)==r) && (((M>>3) == 3) || ((M>>3) == 4))) );
lset_fc(addr, sz, data, fc);
if (shoe.abort)
if sunlikely(shoe.abort)
return ;
}
else {
uint32_t data = lget_fc(addr, sz, fc);
if (shoe.abort)
if sunlikely(shoe.abort)
return ;
// if destination is address register, data is sign extended to 32 bits
@ -1758,7 +1758,7 @@ static void inst_pea (void) {
call_ea_addr(M);
// push it onto the stack
lset(shoe.a[7]-4, 4, shoe.dat);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
// decrement the stack pointer if lset didn't abort
shoe.a[7] -= 4;
}
@ -1775,10 +1775,10 @@ static void inst_subx (void) {
const uint32_t predecrement_y = shoe.a[y]-predecrement_sz;
const uint32_t src = lget(predecrement_x, sz);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
const uint32_t dst = lget(predecrement_y, sz);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
const uint32_t result = dst - src - (sr_x()?1:0);
@ -1868,7 +1868,7 @@ static void inst_bsr (void) {
}
lset(shoe.a[7]-4, 4, shoe.pc);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
shoe.a[7] -= 4;
shoe.pc = new_pc;
}
@ -1942,7 +1942,7 @@ static void inst_jsr (void) {
// my quadra 800 doesn't object when a7 is odd...
// so, no extra error checking needed
lset(shoe.a[7]-4, 4, shoe.pc);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
// slog("jsr: writing pc (0x%08x) to *0x%08x (phys=0x%08x)\n", shoe.pc, shoe.a[7]-4, shoe.physical_addr);
@ -1997,7 +1997,7 @@ static void inst_link_word (void) {
// push the contents of the address register onto the stack
lset(shoe.a[7]-4, 4, shoe.a[r]);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
shoe.a[7] -= 4;
// load the updated stack pointer into the address register
@ -2011,7 +2011,7 @@ static void inst_unlk (void) {
~decompose(shoe.op, 0100 1110 0101 1 rrr);
const uint32_t pop = lget(shoe.a[r], 4);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
// loads the stack pointer from the address register
shoe.a[7] = shoe.a[r]+4;
@ -2022,7 +2022,7 @@ static void inst_unlk (void) {
static void inst_rts (void) {
const uint32_t pop = lget(shoe.a[7], 4);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
shoe.a[7] += 4;
shoe.pc = pop;
@ -2059,7 +2059,7 @@ static void inst_link_long (void) {
// push the contents of the address register onto the stack
lset(shoe.a[7]-4, 4, shoe.a[r]);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
shoe.a[7] -= 4;
// load the updated stack pointer into the address register
@ -2093,7 +2093,7 @@ static void inst_movem (void) {
tmp = lget(shoe.dat, 4);
else
tmp = (uint32_t)((int32_t)((int16_t)lget(shoe.dat, 2))); // sign-extend if short-mode
if (shoe.abort) goto abort;
if sunlikely(shoe.abort) goto abort;
shoe.d[i] = tmp;
shoe.dat += sz;
}
@ -2107,7 +2107,7 @@ static void inst_movem (void) {
tmp = lget(shoe.dat, 4);
else
tmp = (uint32_t)((int32_t)((int16_t)lget(shoe.dat, 2))); // sign-extend if short-mode
if (shoe.abort) goto abort;
if sunlikely(shoe.abort) goto abort;
shoe.a[i] = tmp;
shoe.dat += sz;
}
@ -2152,7 +2152,7 @@ static void inst_movem (void) {
// FIXME: determine what happens when the predecrementing address register is written
if ((dfield >> i) & 1) {
lset(addr, sz, shoe.d[i]);
if (shoe.abort)
if sunlikely(shoe.abort)
goto abort; // FIXME: figure out how to abort cleanly
addr += sz;
}
@ -2175,7 +2175,7 @@ static void inst_movem (void) {
}
lset(addr, sz, data);
if (shoe.abort)
if sunlikely(shoe.abort)
goto abort; // FIXME: figure out how to abort cleanly
addr += sz;
}
@ -2233,26 +2233,26 @@ static void inst_movep (void) {
switch (m) {
case 0: { // word, mem->reg
uint16_t val = lget(addr, 1);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
val = (val << 8) | lget(addr + 2, 1);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
set_d(d, val, 2);
break;
}
case 1: { // long, mem->reg
uint32_t val = lget(addr, 1);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
val = (val << 8) | lget(addr + 2, 1);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
val = (val << 8) | lget(addr + 4, 1);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
val = (val << 8) | lget(addr + 6, 1);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
shoe.d[d] = val;
break;
@ -2260,26 +2260,26 @@ static void inst_movep (void) {
case 2: { // word, reg->mem
const uint16_t val = shoe.d[d];
lset(addr + 0, 1, (val >> 8) & 0xff);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
lset(addr + 2, 1, (val >> 0) & 0xff);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
break;
}
case 3: { // long, reg->mem
const uint32_t val = shoe.d[d];
lset(addr + 0, 1, (val >> 24) & 0xff);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
lset(addr + 2, 1, (val >> 16) & 0xff);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
lset(addr + 4, 1, (val >> 8) & 0xff);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
lset(addr + 6, 1, (val >> 0) & 0xff);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
break;
}
@ -2330,7 +2330,7 @@ void write_bitfield(const uint32_t width, const uint32_t offset, const uint32_t
//slog("write_bitfield: byte_mask = 0x%02x field_mask = 0x%02x bit_offset=%u byte_offset=%u\n", byte_mask, field_mask, bit_offset, byte_offset);
//slog("write_bitfield: changing byte at 0x%08x from 0x%02x to 0x%02x\n",
//first_byte_addr, old_byte, new_byte);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
}
else {
uint32_t boff = bit_offset;
@ -2340,7 +2340,7 @@ void write_bitfield(const uint32_t width, const uint32_t offset, const uint32_t
uint32_t remaining_field = field<<(32-width); // left-aligned
while (remaining_width > 0) {
const uint8_t byte = lget(addr, 1);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
const uint8_t mask = ~~(((uint8_t)((0xff) << (8-curwidth))) >> boff);
const uint8_t field_chunk = remaining_field >> (32-curwidth);
@ -2351,7 +2351,7 @@ void write_bitfield(const uint32_t width, const uint32_t offset, const uint32_t
lset(addr, 1, (byte & mask) | rotated_chunk);
//slog("write_bitfield: changing byte at 0x%08x from 0x%02x to 0x%02x\n",
//addr, byte, (byte & mask) | rotated_chunk);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
addr++;
remaining_field <<= curwidth;
@ -2399,10 +2399,10 @@ uint32_t extract_bitfield(const uint32_t width, const uint32_t offset, const uin
field = bitchop(lget(first_byte_addr, 1), 8-bit_offset);
//slog("debug: extract_bitfield: first byte field (low %u bits): 0x%02x\n", 8-bit_offset, field);
if (shoe.abort) return 0;
if sunlikely(shoe.abort) return 0;
if (width > (8-bit_offset)) { // if the data isn't entirely contained in the first byte
uint32_t last_long = lget(first_byte_addr+1, 4);
if (shoe.abort) return 0;
if sunlikely(shoe.abort) return 0;
field = (field<<(width - (8-bit_offset))) | // first_byte, left shifted
(last_long >> (32 - (width - (8-bit_offset)))); // last_long, right shifted
}
@ -2429,7 +2429,7 @@ static void inst_bfextu (void) {
}
const uint32_t field = extract_bitfield(width, offset, M, ea);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
shoe.d[r] = field;
@ -2455,9 +2455,9 @@ static void inst_bfchg (void) {
}
const uint32_t field = extract_bitfield(width, offset, M, ea);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
write_bitfield(width, offset, M, ea, ~~field);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
set_sr_c(0);
set_sr_v(0);
@ -2481,7 +2481,7 @@ static void inst_bfexts (void) {
}
const uint32_t field = extract_bitfield(width, offset, M, ea);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
const uint32_t mib = (field >> (width-1))&1;
const uint32_t maskA = (((uint32_t)0) - mib) << 1;
@ -2511,9 +2511,9 @@ static void inst_bfclr (void) {
}
const uint32_t field = extract_bitfield(width, offset, M, ea);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
write_bitfield(width, offset, M, ea, 0);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
set_sr_c(0);
set_sr_v(0);
@ -2537,9 +2537,9 @@ static void inst_bfset (void) {
}
const uint32_t field = extract_bitfield(width, offset, M, ea);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
write_bitfield(width, offset, M, ea, 0xffffffff);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
set_sr_c(0);
set_sr_v(0);
@ -2563,7 +2563,7 @@ static void inst_bftst (void) {
}
const uint32_t field = extract_bitfield(width, offset, M, ea);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
set_sr_c(0);
set_sr_v(0);
@ -2587,7 +2587,7 @@ static void inst_bfffo (void) {
}
const uint32_t field = extract_bitfield(width, offset, M, ea);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
uint32_t i;
for (i=1; (i<=width) && ((field>>(width-i))&1)==0; i++) ;
@ -2618,7 +2618,7 @@ static void inst_bfins (void) {
}
write_bitfield(width, offset, M, ea, field);
if (shoe.abort) return;
if sunlikely(shoe.abort) return;
set_sr_c(0);
set_sr_v(0);
@ -2752,20 +2752,20 @@ static void inst_ext (void) {
case ~b(010): { // byte -> word
uint16_t val = (int8_t)get_d(r, 1);
set_d(r, val, 2);
set_sr_z(get_d(r, 2));
set_sr_n(mib(shoe.d[r], 2));
set_sr_z(!val);
set_sr_n(val >> 15);
break;
} case ~b(011): { // word -> long
uint32_t val = (int16_t)get_d(r, 2);
set_d(r, val, 4);
set_sr_z(get_d(r, 4));
set_sr_n(mib(shoe.d[r], 4));
set_sr_z(!val);
set_sr_n(val >> 31);
break;
} case ~b(111): { // byte -> long
uint32_t val = (int8_t)get_d(r, 1);
set_d(r, val, 4);
set_sr_z(get_d(r, 4));
set_sr_n(mib(shoe.d[r], 4));
set_sr_z(!val);
set_sr_n(val >> 31);
break;
}
}
@ -2980,14 +2980,16 @@ static void inst_trap (void) {
set_sr_s(1);
push_a7(vector_offset, 2);
if (shoe.abort) goto fail;
if sunlikely(shoe.abort) goto fail;
push_a7(shoe.pc, 4);
if (shoe.abort) goto fail;
if sunlikely(shoe.abort) goto fail;
push_a7(shoe.orig_sr, 2);
if (shoe.abort) goto fail;
if sunlikely(shoe.abort) goto fail;
const uint32_t newpc = lget(shoe.vbr + vector_offset, 4);
if (shoe.abort) goto fail;
if sunlikely(shoe.abort) goto fail;
shoe.pc = newpc;
return ;
@ -3007,7 +3009,7 @@ void cpu_step()
shoe.orig_sr = shoe.sr;
// Is this an odd address? Throw an address exception!
if (shoe.pc & 1) {
if sunlikely(shoe.pc & 1) {
// throw_address_error(shoe.pc, 0);
// I'm leaving this assert in here for now because it almost always indicates a bug in the emulator when it fires
assert(!"odd PC address (probably a bug)");
@ -3018,7 +3020,7 @@ void cpu_step()
shoe.op = lget(shoe.pc, 2);
// If there was an exception, then the pc changed. Restart execution from the beginning.
if (shoe.abort) {
if sunlikely(shoe.abort) {
shoe.abort = 0;
return ;
}

View File

@ -34,17 +34,13 @@
void _physical_get_ram (void)
{
uint64_t *addr;
if (shoe.physical_addr < shoe.physical_mem_size)
if slikely(shoe.physical_addr < shoe.physical_mem_size)
addr = (uint64_t*)&shoe.physical_mem_base[shoe.physical_addr];
else
addr = (uint64_t*)&shoe.physical_mem_base[shoe.physical_addr % shoe.physical_mem_size];
const uint8_t bits = (8 - shoe.physical_size) * 8;
shoe.physical_dat = ntohll(*addr) >> bits;
if ((shoe.physical_addr >= 256) && (shoe.physical_addr < 0x4000)) {
slog("LOMEM get: *0x%08x = 0x%x\n", shoe.physical_addr, (uint32_t)shoe.physical_dat);
}
}
void _physical_get_rom (void)
@ -99,7 +95,7 @@ void _physical_get_io (void)
void _physical_get_super_slot (void)
{
const uint32_t slot = shoe.physical_addr >> 28;
if (shoe.slots[slot].connected)
if slikely(shoe.slots[slot].connected)
shoe.physical_dat = shoe.slots[slot].read_func(shoe.physical_addr,
shoe.physical_size,
slot);
@ -111,7 +107,7 @@ void _physical_get_super_slot (void)
void _physical_get_standard_slot (void)
{
const uint32_t slot = (shoe.physical_addr >> 24) & 0xf;
if (shoe.slots[slot].connected)
if slikely(shoe.slots[slot].connected)
shoe.physical_dat = shoe.slots[slot].read_func(shoe.physical_addr,
shoe.physical_size,
slot);
@ -342,7 +338,7 @@ static void translate_logical_addr()
// TODO: Check limit here
// If root descriptor is invalid, throw a bus error
if (rp_dt(rootp) == 0) {
if sunlikely(rp_dt(rootp) == 0) {
throw_bus_error(shoe.logical_addr, shoe.logical_is_write);
return ;
}
@ -377,7 +373,7 @@ static void translate_logical_addr()
const uint8_t dt = desc_dt(desc, desc_size);
// If this descriptor is invalid, throw a bus error
if (dt == 0) {
if sunlikely(dt == 0) {
throw_bus_error(shoe.logical_addr, shoe.logical_is_write);
return ;
}
@ -395,7 +391,7 @@ static void translate_logical_addr()
get_desc(desc & 0xfffffff0, (4 << desc_size));
// I think it's possible for an indirect descriptor to point to an invalid descriptor...
if (desc_dt(desc, desc_size) == 0) {
if sunlikely(desc_dt(desc, desc_size) == 0) {
throw_bus_error(shoe.logical_addr, shoe.logical_is_write);
return ;
}
@ -426,7 +422,7 @@ search_done:
wp |= desc_wp(desc, desc_size); // or in the wp flag for this page descriptor
// And finally throw a bus error
if (wp && shoe.logical_is_write) {
if sunlikely(wp && shoe.logical_is_write) {
throw_bus_error(shoe.logical_addr, shoe.logical_is_write);
return ;
}
@ -467,11 +463,11 @@ void logical_get (void)
{
// If address translation isn't enabled, this is a physical address
if (!tc_enable()) {
if sunlikely(!tc_enable()) {
shoe.physical_addr = shoe.logical_addr;
shoe.physical_size = shoe.logical_size;
physical_get();
if (shoe.abort) {
if sunlikely(shoe.abort) {
shoe.abort = 0;
throw_long_bus_error(shoe.logical_addr, 0);
return ;
@ -491,21 +487,21 @@ void logical_get (void)
shoe.logical_is_write = 0;
// Common case: the read is contained entirely within a page
if (!((pageoffset + logical_size - 1) >> ps)) {
if (!check_pmmu_cache()) {
if slikely(!((pageoffset + logical_size - 1) >> ps)) {
if sunlikely(!check_pmmu_cache()) {
translate_logical_addr();
if (shoe.abort)
if sunlikely(shoe.abort)
return ;
}
if (shoe.physical_addr < shoe.physical_mem_size) {
if slikely(shoe.physical_addr < shoe.physical_mem_size) {
// Fast path
shoe.logical_dat = ntohll(*(uint64_t*)&shoe.physical_mem_base[shoe.physical_addr]) >> ((8-logical_size)*8);
}
else {
shoe.physical_size = logical_size;
physical_get();
if (shoe.abort) {
if sunlikely(shoe.abort) {
shoe.abort = 0;
throw_long_bus_error(logical_addr, 0);
return ;
@ -521,9 +517,9 @@ void logical_get (void)
shoe.logical_addr = addr_a;
shoe.logical_size = size_a;
if (!check_pmmu_cache()) {
if sunlikely(!check_pmmu_cache()) {
translate_logical_addr();
if (shoe.abort)
if sunlikely(shoe.abort)
return ;
}
@ -531,16 +527,16 @@ void logical_get (void)
shoe.logical_addr = addr_b;
shoe.logical_size = size_b;
if (!check_pmmu_cache()) {
if sunlikely(!check_pmmu_cache()) {
translate_logical_addr();
if (shoe.abort)
if sunlikely(shoe.abort)
return ;
}
const uint32_t p_addr_b = shoe.physical_addr;
shoe.physical_size = size_b;
physical_get();
if (shoe.abort) {
if sunlikely(shoe.abort) {
shoe.abort = 0;
throw_long_bus_error(shoe.logical_addr, 0);
return ;
@ -550,7 +546,7 @@ void logical_get (void)
shoe.physical_addr = p_addr_a;
shoe.physical_size = size_a;
physical_get();
if (shoe.abort) {
if sunlikely(shoe.abort) {
shoe.abort = 0;
throw_long_bus_error(shoe.logical_addr, 0);
return ;
@ -563,7 +559,7 @@ void logical_get (void)
void logical_set (void)
{
// If address translation isn't enabled, this is a physical address
if (!tc_enable()) {
if sunlikely(!tc_enable()) {
shoe.physical_addr = shoe.logical_addr;
shoe.physical_size = shoe.logical_size;
shoe.physical_dat = shoe.logical_dat;
@ -583,11 +579,11 @@ void logical_set (void)
shoe.logical_is_write = 1;
// Common case: this write is contained entirely in one page
if (!((pageoffset + logical_size - 1) >> ps)) {
if slikely(!((pageoffset + logical_size - 1) >> ps)) {
// Common case: the write is contained entirely within a page
if (!check_pmmu_cache()) {
if sunlikely(!check_pmmu_cache()) {
translate_logical_addr();
if (shoe.abort)
if sunlikely(shoe.abort)
return ;
}
@ -605,18 +601,18 @@ void logical_set (void)
shoe.logical_addr = addr_a;
shoe.logical_size = size_a;
if (!check_pmmu_cache()) {
if sunlikely(!check_pmmu_cache()) {
translate_logical_addr();
if (shoe.abort)
if sunlikely(shoe.abort)
return ;
}
const uint32_t p_addr_a = shoe.physical_addr;
shoe.logical_addr = addr_b;
shoe.logical_size = size_b;
if (!check_pmmu_cache()) {
if sunlikely(!check_pmmu_cache()) {
translate_logical_addr();
if (shoe.abort)
if sunlikely(shoe.abort)
return ;
}
const uint32_t p_addr_b = shoe.physical_addr;
@ -639,8 +635,8 @@ void logical_set (void)
/* --- EA routines --- */
#pragma mark EA routines
#define nextword(pc) ({const uint16_t w=lget((pc),2);if (shoe.abort){return;}(pc)+=2; w;})
#define nextlong(pc) ({const uint32_t L=lget((pc),4);if (shoe.abort){return;}(pc)+=4; L;})
#define nextword(pc) ({const uint16_t w=lget((pc),2);if sunlikely(shoe.abort){return;}(pc)+=2; w;})
#define nextlong(pc) ({const uint32_t L=lget((pc),4);if sunlikely(shoe.abort){return;}(pc)+=4; L;})
// ea_decode_extended() - find the EA for those hiddeous 68020 addr modes
static void ea_decode_extended()
@ -760,7 +756,7 @@ static void ea_decode_extended()
case ~b(1001): case ~b(1010): case ~b(1011): {
// Indirect preindexed
const uint32_t intermediate = lget(base_addr + base_disp + index_val, 4);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
shoe.extended_addr = intermediate + outer_disp;
shoe.extended_len = mypc - start_pc;
// slog("addr=0x%x len=%u\n", shoe.extended_addr, shoe.extended_len);
@ -770,7 +766,7 @@ static void ea_decode_extended()
case ~b(0101): case ~b(0110): case ~b(0111): {
// Indirect postindexed
const uint32_t intermediate = lget(base_addr + base_disp, 4);
if (shoe.abort) return ;
if sunlikely(shoe.abort) return ;
shoe.extended_addr = intermediate + index_val + outer_disp;
shoe.extended_len = mypc - start_pc;
return ;
@ -847,7 +843,7 @@ void _ea_011_write (void)
const uint8_t delta = ((reg==7) && (shoe.sz==1)) ? 2 : shoe.sz;
lset(shoe.a[reg], shoe.sz, shoe.dat);
if (!shoe.abort)
if slikely(!shoe.abort)
shoe.a[reg] += delta;
}
@ -870,7 +866,7 @@ void _ea_100_write (void)
const uint8_t delta = ((reg==7) && (shoe.sz==1)) ? 2 : shoe.sz;
lset(shoe.a[reg] - delta, shoe.sz, shoe.dat);
if (!shoe.abort)
if slikely(!shoe.abort)
shoe.a[reg] -= delta;
}
@ -902,7 +898,7 @@ void _ea_101_addr (void)
void _ea_110_read (void)
{
ea_decode_extended();
if (!shoe.abort)
if slikely(!shoe.abort)
shoe.dat = lget(shoe.extended_addr, shoe.sz);
shoe.uncommitted_ea_read_pc = shoe.pc + shoe.extended_len;
}
@ -913,16 +909,16 @@ void _ea_110_read_commit (void)
void _ea_110_write (void)
{
ea_decode_extended();
if (!shoe.abort) {
if slikely(!shoe.abort) {
lset(shoe.extended_addr, shoe.sz, shoe.dat);
if (!shoe.abort)
if slikely(!shoe.abort)
shoe.pc += shoe.extended_len;
}
}
void _ea_110_addr (void)
{
ea_decode_extended();
if (!shoe.abort) {
if slikely(!shoe.abort) {
shoe.dat = shoe.extended_addr;
shoe.pc += shoe.extended_len;
}
@ -998,7 +994,7 @@ void _ea_111_010_addr (void)
void _ea_111_011_read (void)
{
ea_decode_extended();
if (!shoe.abort)
if slikely(!shoe.abort)
shoe.dat = lget(shoe.extended_addr, shoe.sz);
shoe.uncommitted_ea_read_pc = shoe.pc + shoe.extended_len;
}
@ -1009,7 +1005,7 @@ void _ea_111_011_read_commit (void)
void _ea_111_011_addr (void)
{
ea_decode_extended();
if (!shoe.abort) {
if slikely(!shoe.abort) {
shoe.dat = shoe.extended_addr;
shoe.pc += shoe.extended_len;
}

View File

@ -79,6 +79,9 @@
#endif
#define slikely(e) (__builtin_expect(!!(e), 1))
#define sunlikely(e) (__builtin_expect(!!(e), 0))
/*
* core_api.c stuff
*/
@ -876,7 +879,12 @@ extern const physical_get_ptr physical_get_jump_table[16];
extern const physical_set_ptr physical_set_jump_table[16];
#define physical_set() physical_set_jump_table[shoe.physical_addr >> 28]()
#define pset(addr, s, val) {shoe.physical_addr=(addr); shoe.physical_size=(s); shoe.physical_dat=(val); physical_set();}
#define pset(addr, s, val) do { \
shoe.physical_addr=(addr); \
shoe.physical_size=(s); \
shoe.physical_dat=(val); \
physical_set(); \
} while (0)
#define physical_get() physical_get_jump_table[shoe.physical_addr >> 28]()
#define pget(addr, s) ({shoe.physical_addr=(addr); shoe.physical_size=(s); physical_get(); shoe.physical_dat;})
@ -889,26 +897,17 @@ void logical_get (void);
logical_get(); \
shoe.logical_dat; \
})
#define lget(addr, s) ({ \
shoe.logical_addr=(addr); \
shoe.logical_size=(s); \
shoe.logical_fc = (sr_s() ? 5 : 1); \
logical_get(); \
shoe.logical_dat; \
})
#define lget(addr, s) lget_fc((addr), (s), (sr_s() ? 5 : 1))
void logical_set (void);
#define lset_fc(addr, s, val, fc) {\
#define lset_fc(addr, s, val, fc) do { \
shoe.logical_addr=(addr); \
shoe.logical_size=(s); \
shoe.logical_dat=(val); \
shoe.logical_fc = (fc); \
logical_set();\
}
#define lset(addr, s, val) { \
lset_fc((addr), (s), (val), sr_s() ? 5 : 1) \
}
} while (0)
#define lset(addr, s, val) lset_fc((addr), (s), (val), sr_s() ? 5 : 1)
typedef void (*_ea_func) (void);
extern const _ea_func ea_read_jump_table[64];
@ -922,10 +921,10 @@ extern const _ea_func ea_addr_jump_table[64];
#define ea_addr() ea_addr_jump_table[shoe.mr]()
#define call_ea_read(M, s) {shoe.mr=(M);shoe.sz=(s);ea_read();if (shoe.abort) return;}
#define call_ea_write(M, s) {shoe.mr=(M);shoe.sz=(s);ea_write();if (shoe.abort) return;}
#define call_ea_read_commit(M, s) {shoe.mr=(M);shoe.sz=(s);ea_read_commit();if (shoe.abort) return;}
#define call_ea_addr(M) {shoe.mr=(M);ea_addr();if (shoe.abort) return;}
#define call_ea_read(M, s) {shoe.mr=(M);shoe.sz=(s);ea_read();if sunlikely(shoe.abort) return;}
#define call_ea_write(M, s) {shoe.mr=(M);shoe.sz=(s);ea_write();if sunlikely(shoe.abort) return;}
#define call_ea_read_commit(M, s) {shoe.mr=(M);shoe.sz=(s);ea_read_commit();if sunlikely(shoe.abort) return;}
#define call_ea_addr(M) {shoe.mr=(M);ea_addr();if sunlikely(shoe.abort) return;}
#define push_a7(_dat, _sz) {shoe.a[7]-=(_sz);lset(shoe.a[7], (_sz), (_dat));}

View File

@ -145,7 +145,7 @@ uint32_t nubus_video_read_func(const uint32_t rawaddr, const uint32_t size,
const uint32_t addr = rawaddr & 0x00ffffff;
// ROM and control registers
if ((addr >> 20) == 0xf) {
if sunlikely((addr >> 20) == 0xf) {
slog("nubus_video_read_func: got a read to 0x%08x sz=%u\n", rawaddr, size);
@ -172,7 +172,7 @@ uint32_t nubus_video_read_func(const uint32_t rawaddr, const uint32_t size,
// Else, this is video ram
uint32_t i, result = 0;
if (addr < (ctx->pixels * 4)) {
if slikely(addr < (ctx->pixels * 4)) {
for (i=0; i<size; i++)
result = (result << 8) | ((uint8_t*)ctx->direct_buf)[addr + i];
}
@ -234,7 +234,7 @@ void nubus_video_write_func(const uint32_t rawaddr, const uint32_t size,
uint32_t i;
// ROM and control registers
if ((addr >> 20) == 0xf) {
if sunlikely((addr >> 20) == 0xf) {
slog("nubus_video_write_func: got a write to 0x%08x sz=%u data=0x%x\n", rawaddr, size, data);
@ -340,7 +340,7 @@ void nubus_video_write_func(const uint32_t rawaddr, const uint32_t size,
// Else, this is video ram
if (addr < (ctx->pixels * 4)) {
if slikely(addr < (ctx->pixels * 4)) {
uint32_t mydata, myaddr;
for (myaddr = addr + size, mydata = data; addr < myaddr; ) {
((uint8_t*)ctx->direct_buf)[--myaddr] = mydata & 0xff;

View File

@ -8,7 +8,7 @@ for i in adb fpu mc68851 mem via floppy core_api cpu dis; do
files="$files $i.post.c"
done
for i in atrap_tab coff exception macii_symbols redblack scsi video filesystem alloc_pool toby_frame_buffer ethernet sound; do
for i in SoftFloat/softfloat atrap_tab coff exception macii_symbols redblack scsi video filesystem alloc_pool toby_frame_buffer ethernet sound; do
files="$files ../core/$i.c"
done

View File

@ -8,7 +8,7 @@ for i in adb fpu mc68851 mem via floppy core_api cpu dis; do
files="$files $i.post.c"
done
for i in atrap_tab coff exception macii_symbols redblack scsi video filesystem alloc_pool toby_frame_buffer ethernet sound; do
for i in SoftFloat/softfloat atrap_tab coff exception macii_symbols redblack scsi video filesystem alloc_pool toby_frame_buffer ethernet sound; do
files="$files ../core/$i.c"
done

View File

@ -6,4 +6,4 @@ gcc -O1 ..\core\decoder_gen.c -o decoder_gen
decoder_gen inst .
decoder_gen dis .
gcc -O3 -flto -mno-ms-bitfields sdl.c adb.post.c fpu.post.c mc68851.post.c mem.post.c via.post.c floppy.post.c core_api.post.c cpu.post.c dis.post.c ..\core\atrap_tab.c ..\core\coff.c ..\core\exception.c ..\core\macii_symbols.c ..\core\redblack.c ..\core\scsi.c ..\core\video.c ..\core\filesystem.c ..\core\alloc_pool.c ..\core\toby_frame_buffer.c ..\core\ethernet.c ..\core\sound.c -lmingw32 -lopengl32 -lsdl2main -lsdl2 -o shoebill
gcc -O3 -flto -mno-ms-bitfields sdl.c adb.post.c fpu.post.c mc68851.post.c mem.post.c via.post.c floppy.post.c core_api.post.c cpu.post.c dis.post.c ..\core\atrap_tab.c ..\core\coff.c ..\core\exception.c ..\core\macii_symbols.c ..\core\redblack.c ..\core\scsi.c ..\core\video.c ..\core\filesystem.c ..\core\alloc_pool.c ..\core\toby_frame_buffer.c ..\core\ethernet.c ..\core\sound.c ..\core\SoftFloat\softfloat.c -lmingw32 -lopengl32 -lsdl2main -lsdl2 -o shoebill