diff --git a/core/SoftFloat/softfloat.c b/core/SoftFloat/softfloat.c index e4d3aab..7c45b61 100644 --- a/core/SoftFloat/softfloat.c +++ b/core/SoftFloat/softfloat.c @@ -631,10 +631,6 @@ static floatx80 int8 roundingMode; flag roundNearestEven, increment, isTiny; int64 roundIncrement, roundMask, roundBits; - - // [shoebill] - // printf("roundAndPackFloatx80: prec=%d sign=%d exp=%u sig0=0x%016llx sig1=0x%016llx\n", - // roundingPrecision, zSign, zExp, zSig0, zSig1); roundingMode = float_rounding_mode; roundNearestEven = ( roundingMode == float_round_nearest_even ); @@ -3191,9 +3187,7 @@ int32 floatx80_to_int32( floatx80 a ) aExp = extractFloatx80Exp( a ); aSign = extractFloatx80Sign( a ); if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; - // [shoebill] there is no way 0x4037 is the right constant to use here. - // 1.0 has exp=0x3fff, 0x4037 - 0x3fff == 56. - // (aSig >> 56) shifts out the mantissa bits, plus 8 more. + shiftCount = 0x4037 - aExp; if ( shiftCount <= 0 ) shiftCount = 1; shift64RightJamming( aSig, shiftCount, &aSig ); diff --git a/core/core_api.c b/core/core_api.c index 4233853..d3a4d9c 100644 --- a/core/core_api.c +++ b/core/core_api.c @@ -115,15 +115,15 @@ void *_cpu_thread (void *arg) pthread_mutex_lock(&shoe.cpu_thread_lock); while (1) { - if (shoe.cpu_thread_notifications) { + if sunlikely(shoe.cpu_thread_notifications) { // If there's an interrupt pending - if (shoe.cpu_thread_notifications & 0xff) { + if slikely(shoe.cpu_thread_notifications & 0xff) { // process_pending_interrupt() may clear SHOEBILL_STATE_STOPPED process_pending_interrupt(); } - if (shoe.cpu_thread_notifications & SHOEBILL_STATE_RETURN) { + if sunlikely(shoe.cpu_thread_notifications & SHOEBILL_STATE_RETURN) { pthread_mutex_unlock(&shoe.cpu_thread_lock); return NULL; } diff --git a/core/cpu.c b/core/cpu.c index 240df22..bffcbb7 100644 --- a/core/cpu.c +++ b/core/cpu.c @@ -55,9 +55,9 @@ static const _cc_func evaluate_cc[16] = { }; -#define nextword() ({const uint16_t w=lget(shoe.pc,2); if (shoe.abort) {return;}; shoe.pc+=2; w;}) -#define nextlong() ({const uint32_t L=lget(shoe.pc,4); if (shoe.abort) {return;}; shoe.pc+=4; L;}) -#define verify_supervisor() {if (!sr_s()) {throw_privilege_violation(); return;}} +#define nextword() ({const uint16_t w=lget(shoe.pc,2); if sunlikely(shoe.abort) {return;}; shoe.pc+=2; w;}) +#define nextlong() ({const uint32_t L=lget(shoe.pc,4); if sunlikely(shoe.abort) {return;}; shoe.pc+=4; L;}) +#define verify_supervisor() {if sunlikely(!sr_s()) {throw_privilege_violation(); return;}} static void inst_callm(void) { @@ -466,9 +466,9 @@ static void inst_abcd (void) { // FIXME: these addresses aren't predecremented (check whether a7 is incremented 2 bytes) assert(!"acbd is broken"); packed_x = lget(shoe.a[x], 1); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; packed_y = lget(shoe.a[y], 1); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; } else { packed_x = shoe.d[x] & 0xff; @@ -494,7 +494,7 @@ static void inst_abcd (void) { if (m) { lset(shoe.a[x]-1, 1, packed_sum); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; shoe.a[x]--; if (x != y) @@ -580,10 +580,10 @@ static void inst_stop (void) { static void inst_rtr (void) { const uint16_t ccr = lget(shoe.a[7], 2); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; const uint32_t pc = lget(shoe.a[7]+2, 4); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; shoe.a[7] += 6; @@ -595,7 +595,7 @@ static void inst_rtr (void) { static void inst_rtd (void) { const int16_t disp = nextword(); const uint32_t new_pc = lget(shoe.a[7], 4); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; shoe.pc = new_pc; shoe.a[7] += 4; @@ -606,13 +606,13 @@ static void inst_rte (void) { verify_supervisor(); const uint16_t sr = lget(shoe.a[7], 2); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; const uint32_t pc = lget(shoe.a[7]+2, 4); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; const uint16_t format_word = lget(shoe.a[7]+6, 2); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; // slog("rte: sr=0x%04x pc=0x%08x format=0x%04x, post-pop a7=0x%08x\n", sr, pc, format_word, shoe.a[7]+8); @@ -819,9 +819,9 @@ static void inst_addx (void) { const uint32_t predec_x = shoe.a[x] - sz; const uint32_t S = lget(predec_y, 4); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; const uint32_t D = lget(predec_x, 4); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; // Sm = mib(S + extend_bit, sz); // FIXME: sure you want to do this? Sm = mib(S, sz); @@ -832,7 +832,7 @@ static void inst_addx (void) { const uint32_t chopped_R = chop(R, sz); lset(chopped_R, sz, shoe.dat); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; shoe.a[y] = predec_y; shoe.a[x] = predec_x; @@ -926,10 +926,10 @@ static void inst_cmpm (void) { const uint32_t ay = shoe.a[y] + ((x==y) ? sz : 0); // if x==y, then we pop the stack twice const uint32_t dst = lget(ax, sz); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; const uint32_t src = lget(ay, sz); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; // *I believe* that if x==y, then that register will only be incremented *once* // WRONG! @@ -1202,7 +1202,7 @@ static void inst_move (void) { // So in this case, we need to switch back to the original a7 (set_sr(shoe.orig_sr)) before modifying it // (This is hacky) - if (shoe.abort) { + if sunlikely(shoe.abort) { if (m == 4 || m == 3) { const uint16_t new_sr = shoe.sr; const uint8_t delta = ((r==7) && (sz==1)) ? 2 : sz; @@ -1357,12 +1357,12 @@ static void inst_moves (void) { assert(! ((a && (M&7)==r) && (((M>>3) == 3) || ((M>>3) == 4))) ); lset_fc(addr, sz, data, fc); - if (shoe.abort) + if sunlikely(shoe.abort) return ; } else { uint32_t data = lget_fc(addr, sz, fc); - if (shoe.abort) + if sunlikely(shoe.abort) return ; // if destination is address register, data is sign extended to 32 bits @@ -1758,7 +1758,7 @@ static void inst_pea (void) { call_ea_addr(M); // push it onto the stack lset(shoe.a[7]-4, 4, shoe.dat); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; // decrement the stack pointer if lset didn't abort shoe.a[7] -= 4; } @@ -1775,10 +1775,10 @@ static void inst_subx (void) { const uint32_t predecrement_y = shoe.a[y]-predecrement_sz; const uint32_t src = lget(predecrement_x, sz); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; const uint32_t dst = lget(predecrement_y, sz); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; const uint32_t result = dst - src - (sr_x()?1:0); @@ -1868,7 +1868,7 @@ static void inst_bsr (void) { } lset(shoe.a[7]-4, 4, shoe.pc); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; shoe.a[7] -= 4; shoe.pc = new_pc; } @@ -1942,7 +1942,7 @@ static void inst_jsr (void) { // my quadra 800 doesn't object when a7 is odd... // so, no extra error checking needed lset(shoe.a[7]-4, 4, shoe.pc); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; // slog("jsr: writing pc (0x%08x) to *0x%08x (phys=0x%08x)\n", shoe.pc, shoe.a[7]-4, shoe.physical_addr); @@ -1997,7 +1997,7 @@ static void inst_link_word (void) { // push the contents of the address register onto the stack lset(shoe.a[7]-4, 4, shoe.a[r]); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; shoe.a[7] -= 4; // load the updated stack pointer into the address register @@ -2011,7 +2011,7 @@ static void inst_unlk (void) { ~decompose(shoe.op, 0100 1110 0101 1 rrr); const uint32_t pop = lget(shoe.a[r], 4); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; // loads the stack pointer from the address register shoe.a[7] = shoe.a[r]+4; @@ -2022,7 +2022,7 @@ static void inst_unlk (void) { static void inst_rts (void) { const uint32_t pop = lget(shoe.a[7], 4); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; shoe.a[7] += 4; shoe.pc = pop; @@ -2059,7 +2059,7 @@ static void inst_link_long (void) { // push the contents of the address register onto the stack lset(shoe.a[7]-4, 4, shoe.a[r]); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; shoe.a[7] -= 4; // load the updated stack pointer into the address register @@ -2093,7 +2093,7 @@ static void inst_movem (void) { tmp = lget(shoe.dat, 4); else tmp = (uint32_t)((int32_t)((int16_t)lget(shoe.dat, 2))); // sign-extend if short-mode - if (shoe.abort) goto abort; + if sunlikely(shoe.abort) goto abort; shoe.d[i] = tmp; shoe.dat += sz; } @@ -2107,7 +2107,7 @@ static void inst_movem (void) { tmp = lget(shoe.dat, 4); else tmp = (uint32_t)((int32_t)((int16_t)lget(shoe.dat, 2))); // sign-extend if short-mode - if (shoe.abort) goto abort; + if sunlikely(shoe.abort) goto abort; shoe.a[i] = tmp; shoe.dat += sz; } @@ -2152,7 +2152,7 @@ static void inst_movem (void) { // FIXME: determine what happens when the predecrementing address register is written if ((dfield >> i) & 1) { lset(addr, sz, shoe.d[i]); - if (shoe.abort) + if sunlikely(shoe.abort) goto abort; // FIXME: figure out how to abort cleanly addr += sz; } @@ -2175,7 +2175,7 @@ static void inst_movem (void) { } lset(addr, sz, data); - if (shoe.abort) + if sunlikely(shoe.abort) goto abort; // FIXME: figure out how to abort cleanly addr += sz; } @@ -2233,26 +2233,26 @@ static void inst_movep (void) { switch (m) { case 0: { // word, mem->reg uint16_t val = lget(addr, 1); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; val = (val << 8) | lget(addr + 2, 1); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; set_d(d, val, 2); break; } case 1: { // long, mem->reg uint32_t val = lget(addr, 1); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; val = (val << 8) | lget(addr + 2, 1); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; val = (val << 8) | lget(addr + 4, 1); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; val = (val << 8) | lget(addr + 6, 1); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; shoe.d[d] = val; break; @@ -2260,26 +2260,26 @@ static void inst_movep (void) { case 2: { // word, reg->mem const uint16_t val = shoe.d[d]; lset(addr + 0, 1, (val >> 8) & 0xff); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; lset(addr + 2, 1, (val >> 0) & 0xff); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; break; } case 3: { // long, reg->mem const uint32_t val = shoe.d[d]; lset(addr + 0, 1, (val >> 24) & 0xff); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; lset(addr + 2, 1, (val >> 16) & 0xff); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; lset(addr + 4, 1, (val >> 8) & 0xff); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; lset(addr + 6, 1, (val >> 0) & 0xff); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; break; } @@ -2330,7 +2330,7 @@ void write_bitfield(const uint32_t width, const uint32_t offset, const uint32_t //slog("write_bitfield: byte_mask = 0x%02x field_mask = 0x%02x bit_offset=%u byte_offset=%u\n", byte_mask, field_mask, bit_offset, byte_offset); //slog("write_bitfield: changing byte at 0x%08x from 0x%02x to 0x%02x\n", //first_byte_addr, old_byte, new_byte); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; } else { uint32_t boff = bit_offset; @@ -2340,7 +2340,7 @@ void write_bitfield(const uint32_t width, const uint32_t offset, const uint32_t uint32_t remaining_field = field<<(32-width); // left-aligned while (remaining_width > 0) { const uint8_t byte = lget(addr, 1); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; const uint8_t mask = ~~(((uint8_t)((0xff) << (8-curwidth))) >> boff); const uint8_t field_chunk = remaining_field >> (32-curwidth); @@ -2351,7 +2351,7 @@ void write_bitfield(const uint32_t width, const uint32_t offset, const uint32_t lset(addr, 1, (byte & mask) | rotated_chunk); //slog("write_bitfield: changing byte at 0x%08x from 0x%02x to 0x%02x\n", //addr, byte, (byte & mask) | rotated_chunk); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; addr++; remaining_field <<= curwidth; @@ -2399,10 +2399,10 @@ uint32_t extract_bitfield(const uint32_t width, const uint32_t offset, const uin field = bitchop(lget(first_byte_addr, 1), 8-bit_offset); //slog("debug: extract_bitfield: first byte field (low %u bits): 0x%02x\n", 8-bit_offset, field); - if (shoe.abort) return 0; + if sunlikely(shoe.abort) return 0; if (width > (8-bit_offset)) { // if the data isn't entirely contained in the first byte uint32_t last_long = lget(first_byte_addr+1, 4); - if (shoe.abort) return 0; + if sunlikely(shoe.abort) return 0; field = (field<<(width - (8-bit_offset))) | // first_byte, left shifted (last_long >> (32 - (width - (8-bit_offset)))); // last_long, right shifted } @@ -2429,7 +2429,7 @@ static void inst_bfextu (void) { } const uint32_t field = extract_bitfield(width, offset, M, ea); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; shoe.d[r] = field; @@ -2455,9 +2455,9 @@ static void inst_bfchg (void) { } const uint32_t field = extract_bitfield(width, offset, M, ea); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; write_bitfield(width, offset, M, ea, ~~field); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; set_sr_c(0); set_sr_v(0); @@ -2481,7 +2481,7 @@ static void inst_bfexts (void) { } const uint32_t field = extract_bitfield(width, offset, M, ea); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; const uint32_t mib = (field >> (width-1))&1; const uint32_t maskA = (((uint32_t)0) - mib) << 1; @@ -2511,9 +2511,9 @@ static void inst_bfclr (void) { } const uint32_t field = extract_bitfield(width, offset, M, ea); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; write_bitfield(width, offset, M, ea, 0); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; set_sr_c(0); set_sr_v(0); @@ -2537,9 +2537,9 @@ static void inst_bfset (void) { } const uint32_t field = extract_bitfield(width, offset, M, ea); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; write_bitfield(width, offset, M, ea, 0xffffffff); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; set_sr_c(0); set_sr_v(0); @@ -2563,7 +2563,7 @@ static void inst_bftst (void) { } const uint32_t field = extract_bitfield(width, offset, M, ea); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; set_sr_c(0); set_sr_v(0); @@ -2587,7 +2587,7 @@ static void inst_bfffo (void) { } const uint32_t field = extract_bitfield(width, offset, M, ea); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; uint32_t i; for (i=1; (i<=width) && ((field>>(width-i))&1)==0; i++) ; @@ -2618,7 +2618,7 @@ static void inst_bfins (void) { } write_bitfield(width, offset, M, ea, field); - if (shoe.abort) return; + if sunlikely(shoe.abort) return; set_sr_c(0); set_sr_v(0); @@ -2752,20 +2752,20 @@ static void inst_ext (void) { case ~b(010): { // byte -> word uint16_t val = (int8_t)get_d(r, 1); set_d(r, val, 2); - set_sr_z(get_d(r, 2)); - set_sr_n(mib(shoe.d[r], 2)); + set_sr_z(!val); + set_sr_n(val >> 15); break; } case ~b(011): { // word -> long uint32_t val = (int16_t)get_d(r, 2); set_d(r, val, 4); - set_sr_z(get_d(r, 4)); - set_sr_n(mib(shoe.d[r], 4)); + set_sr_z(!val); + set_sr_n(val >> 31); break; } case ~b(111): { // byte -> long uint32_t val = (int8_t)get_d(r, 1); set_d(r, val, 4); - set_sr_z(get_d(r, 4)); - set_sr_n(mib(shoe.d[r], 4)); + set_sr_z(!val); + set_sr_n(val >> 31); break; } } @@ -2980,14 +2980,16 @@ static void inst_trap (void) { set_sr_s(1); push_a7(vector_offset, 2); - if (shoe.abort) goto fail; + if sunlikely(shoe.abort) goto fail; + push_a7(shoe.pc, 4); - if (shoe.abort) goto fail; + if sunlikely(shoe.abort) goto fail; + push_a7(shoe.orig_sr, 2); - if (shoe.abort) goto fail; + if sunlikely(shoe.abort) goto fail; const uint32_t newpc = lget(shoe.vbr + vector_offset, 4); - if (shoe.abort) goto fail; + if sunlikely(shoe.abort) goto fail; shoe.pc = newpc; return ; @@ -3007,7 +3009,7 @@ void cpu_step() shoe.orig_sr = shoe.sr; // Is this an odd address? Throw an address exception! - if (shoe.pc & 1) { + if sunlikely(shoe.pc & 1) { // throw_address_error(shoe.pc, 0); // I'm leaving this assert in here for now because it almost always indicates a bug in the emulator when it fires assert(!"odd PC address (probably a bug)"); @@ -3018,7 +3020,7 @@ void cpu_step() shoe.op = lget(shoe.pc, 2); // If there was an exception, then the pc changed. Restart execution from the beginning. - if (shoe.abort) { + if sunlikely(shoe.abort) { shoe.abort = 0; return ; } diff --git a/core/mem.c b/core/mem.c index bd1d0ef..af3e94c 100644 --- a/core/mem.c +++ b/core/mem.c @@ -34,17 +34,13 @@ void _physical_get_ram (void) { uint64_t *addr; - if (shoe.physical_addr < shoe.physical_mem_size) + if slikely(shoe.physical_addr < shoe.physical_mem_size) addr = (uint64_t*)&shoe.physical_mem_base[shoe.physical_addr]; else addr = (uint64_t*)&shoe.physical_mem_base[shoe.physical_addr % shoe.physical_mem_size]; const uint8_t bits = (8 - shoe.physical_size) * 8; shoe.physical_dat = ntohll(*addr) >> bits; - - if ((shoe.physical_addr >= 256) && (shoe.physical_addr < 0x4000)) { - slog("LOMEM get: *0x%08x = 0x%x\n", shoe.physical_addr, (uint32_t)shoe.physical_dat); - } } void _physical_get_rom (void) @@ -99,7 +95,7 @@ void _physical_get_io (void) void _physical_get_super_slot (void) { const uint32_t slot = shoe.physical_addr >> 28; - if (shoe.slots[slot].connected) + if slikely(shoe.slots[slot].connected) shoe.physical_dat = shoe.slots[slot].read_func(shoe.physical_addr, shoe.physical_size, slot); @@ -111,7 +107,7 @@ void _physical_get_super_slot (void) void _physical_get_standard_slot (void) { const uint32_t slot = (shoe.physical_addr >> 24) & 0xf; - if (shoe.slots[slot].connected) + if slikely(shoe.slots[slot].connected) shoe.physical_dat = shoe.slots[slot].read_func(shoe.physical_addr, shoe.physical_size, slot); @@ -342,7 +338,7 @@ static void translate_logical_addr() // TODO: Check limit here // If root descriptor is invalid, throw a bus error - if (rp_dt(rootp) == 0) { + if sunlikely(rp_dt(rootp) == 0) { throw_bus_error(shoe.logical_addr, shoe.logical_is_write); return ; } @@ -377,7 +373,7 @@ static void translate_logical_addr() const uint8_t dt = desc_dt(desc, desc_size); // If this descriptor is invalid, throw a bus error - if (dt == 0) { + if sunlikely(dt == 0) { throw_bus_error(shoe.logical_addr, shoe.logical_is_write); return ; } @@ -395,7 +391,7 @@ static void translate_logical_addr() get_desc(desc & 0xfffffff0, (4 << desc_size)); // I think it's possible for an indirect descriptor to point to an invalid descriptor... - if (desc_dt(desc, desc_size) == 0) { + if sunlikely(desc_dt(desc, desc_size) == 0) { throw_bus_error(shoe.logical_addr, shoe.logical_is_write); return ; } @@ -426,7 +422,7 @@ search_done: wp |= desc_wp(desc, desc_size); // or in the wp flag for this page descriptor // And finally throw a bus error - if (wp && shoe.logical_is_write) { + if sunlikely(wp && shoe.logical_is_write) { throw_bus_error(shoe.logical_addr, shoe.logical_is_write); return ; } @@ -467,11 +463,11 @@ void logical_get (void) { // If address translation isn't enabled, this is a physical address - if (!tc_enable()) { + if sunlikely(!tc_enable()) { shoe.physical_addr = shoe.logical_addr; shoe.physical_size = shoe.logical_size; physical_get(); - if (shoe.abort) { + if sunlikely(shoe.abort) { shoe.abort = 0; throw_long_bus_error(shoe.logical_addr, 0); return ; @@ -491,21 +487,21 @@ void logical_get (void) shoe.logical_is_write = 0; // Common case: the read is contained entirely within a page - if (!((pageoffset + logical_size - 1) >> ps)) { - if (!check_pmmu_cache()) { + if slikely(!((pageoffset + logical_size - 1) >> ps)) { + if sunlikely(!check_pmmu_cache()) { translate_logical_addr(); - if (shoe.abort) + if sunlikely(shoe.abort) return ; } - if (shoe.physical_addr < shoe.physical_mem_size) { + if slikely(shoe.physical_addr < shoe.physical_mem_size) { // Fast path shoe.logical_dat = ntohll(*(uint64_t*)&shoe.physical_mem_base[shoe.physical_addr]) >> ((8-logical_size)*8); } else { shoe.physical_size = logical_size; physical_get(); - if (shoe.abort) { + if sunlikely(shoe.abort) { shoe.abort = 0; throw_long_bus_error(logical_addr, 0); return ; @@ -521,9 +517,9 @@ void logical_get (void) shoe.logical_addr = addr_a; shoe.logical_size = size_a; - if (!check_pmmu_cache()) { + if sunlikely(!check_pmmu_cache()) { translate_logical_addr(); - if (shoe.abort) + if sunlikely(shoe.abort) return ; } @@ -531,16 +527,16 @@ void logical_get (void) shoe.logical_addr = addr_b; shoe.logical_size = size_b; - if (!check_pmmu_cache()) { + if sunlikely(!check_pmmu_cache()) { translate_logical_addr(); - if (shoe.abort) + if sunlikely(shoe.abort) return ; } const uint32_t p_addr_b = shoe.physical_addr; shoe.physical_size = size_b; physical_get(); - if (shoe.abort) { + if sunlikely(shoe.abort) { shoe.abort = 0; throw_long_bus_error(shoe.logical_addr, 0); return ; @@ -550,7 +546,7 @@ void logical_get (void) shoe.physical_addr = p_addr_a; shoe.physical_size = size_a; physical_get(); - if (shoe.abort) { + if sunlikely(shoe.abort) { shoe.abort = 0; throw_long_bus_error(shoe.logical_addr, 0); return ; @@ -563,7 +559,7 @@ void logical_get (void) void logical_set (void) { // If address translation isn't enabled, this is a physical address - if (!tc_enable()) { + if sunlikely(!tc_enable()) { shoe.physical_addr = shoe.logical_addr; shoe.physical_size = shoe.logical_size; shoe.physical_dat = shoe.logical_dat; @@ -583,11 +579,11 @@ void logical_set (void) shoe.logical_is_write = 1; // Common case: this write is contained entirely in one page - if (!((pageoffset + logical_size - 1) >> ps)) { + if slikely(!((pageoffset + logical_size - 1) >> ps)) { // Common case: the write is contained entirely within a page - if (!check_pmmu_cache()) { + if sunlikely(!check_pmmu_cache()) { translate_logical_addr(); - if (shoe.abort) + if sunlikely(shoe.abort) return ; } @@ -605,18 +601,18 @@ void logical_set (void) shoe.logical_addr = addr_a; shoe.logical_size = size_a; - if (!check_pmmu_cache()) { + if sunlikely(!check_pmmu_cache()) { translate_logical_addr(); - if (shoe.abort) + if sunlikely(shoe.abort) return ; } const uint32_t p_addr_a = shoe.physical_addr; shoe.logical_addr = addr_b; shoe.logical_size = size_b; - if (!check_pmmu_cache()) { + if sunlikely(!check_pmmu_cache()) { translate_logical_addr(); - if (shoe.abort) + if sunlikely(shoe.abort) return ; } const uint32_t p_addr_b = shoe.physical_addr; @@ -639,8 +635,8 @@ void logical_set (void) /* --- EA routines --- */ #pragma mark EA routines -#define nextword(pc) ({const uint16_t w=lget((pc),2);if (shoe.abort){return;}(pc)+=2; w;}) -#define nextlong(pc) ({const uint32_t L=lget((pc),4);if (shoe.abort){return;}(pc)+=4; L;}) +#define nextword(pc) ({const uint16_t w=lget((pc),2);if sunlikely(shoe.abort){return;}(pc)+=2; w;}) +#define nextlong(pc) ({const uint32_t L=lget((pc),4);if sunlikely(shoe.abort){return;}(pc)+=4; L;}) // ea_decode_extended() - find the EA for those hiddeous 68020 addr modes static void ea_decode_extended() @@ -760,7 +756,7 @@ static void ea_decode_extended() case ~b(1001): case ~b(1010): case ~b(1011): { // Indirect preindexed const uint32_t intermediate = lget(base_addr + base_disp + index_val, 4); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; shoe.extended_addr = intermediate + outer_disp; shoe.extended_len = mypc - start_pc; // slog("addr=0x%x len=%u\n", shoe.extended_addr, shoe.extended_len); @@ -770,7 +766,7 @@ static void ea_decode_extended() case ~b(0101): case ~b(0110): case ~b(0111): { // Indirect postindexed const uint32_t intermediate = lget(base_addr + base_disp, 4); - if (shoe.abort) return ; + if sunlikely(shoe.abort) return ; shoe.extended_addr = intermediate + index_val + outer_disp; shoe.extended_len = mypc - start_pc; return ; @@ -847,7 +843,7 @@ void _ea_011_write (void) const uint8_t delta = ((reg==7) && (shoe.sz==1)) ? 2 : shoe.sz; lset(shoe.a[reg], shoe.sz, shoe.dat); - if (!shoe.abort) + if slikely(!shoe.abort) shoe.a[reg] += delta; } @@ -870,7 +866,7 @@ void _ea_100_write (void) const uint8_t delta = ((reg==7) && (shoe.sz==1)) ? 2 : shoe.sz; lset(shoe.a[reg] - delta, shoe.sz, shoe.dat); - if (!shoe.abort) + if slikely(!shoe.abort) shoe.a[reg] -= delta; } @@ -902,7 +898,7 @@ void _ea_101_addr (void) void _ea_110_read (void) { ea_decode_extended(); - if (!shoe.abort) + if slikely(!shoe.abort) shoe.dat = lget(shoe.extended_addr, shoe.sz); shoe.uncommitted_ea_read_pc = shoe.pc + shoe.extended_len; } @@ -913,16 +909,16 @@ void _ea_110_read_commit (void) void _ea_110_write (void) { ea_decode_extended(); - if (!shoe.abort) { + if slikely(!shoe.abort) { lset(shoe.extended_addr, shoe.sz, shoe.dat); - if (!shoe.abort) + if slikely(!shoe.abort) shoe.pc += shoe.extended_len; } } void _ea_110_addr (void) { ea_decode_extended(); - if (!shoe.abort) { + if slikely(!shoe.abort) { shoe.dat = shoe.extended_addr; shoe.pc += shoe.extended_len; } @@ -998,7 +994,7 @@ void _ea_111_010_addr (void) void _ea_111_011_read (void) { ea_decode_extended(); - if (!shoe.abort) + if slikely(!shoe.abort) shoe.dat = lget(shoe.extended_addr, shoe.sz); shoe.uncommitted_ea_read_pc = shoe.pc + shoe.extended_len; } @@ -1009,7 +1005,7 @@ void _ea_111_011_read_commit (void) void _ea_111_011_addr (void) { ea_decode_extended(); - if (!shoe.abort) { + if slikely(!shoe.abort) { shoe.dat = shoe.extended_addr; shoe.pc += shoe.extended_len; } diff --git a/core/shoebill.h b/core/shoebill.h index 30e02fe..5b6d79b 100644 --- a/core/shoebill.h +++ b/core/shoebill.h @@ -79,6 +79,9 @@ #endif +#define slikely(e) (__builtin_expect(!!(e), 1)) +#define sunlikely(e) (__builtin_expect(!!(e), 0)) + /* * core_api.c stuff */ @@ -876,7 +879,12 @@ extern const physical_get_ptr physical_get_jump_table[16]; extern const physical_set_ptr physical_set_jump_table[16]; #define physical_set() physical_set_jump_table[shoe.physical_addr >> 28]() -#define pset(addr, s, val) {shoe.physical_addr=(addr); shoe.physical_size=(s); shoe.physical_dat=(val); physical_set();} +#define pset(addr, s, val) do { \ + shoe.physical_addr=(addr); \ + shoe.physical_size=(s); \ + shoe.physical_dat=(val); \ + physical_set(); \ +} while (0) #define physical_get() physical_get_jump_table[shoe.physical_addr >> 28]() #define pget(addr, s) ({shoe.physical_addr=(addr); shoe.physical_size=(s); physical_get(); shoe.physical_dat;}) @@ -889,26 +897,17 @@ void logical_get (void); logical_get(); \ shoe.logical_dat; \ }) - -#define lget(addr, s) ({ \ - shoe.logical_addr=(addr); \ - shoe.logical_size=(s); \ - shoe.logical_fc = (sr_s() ? 5 : 1); \ - logical_get(); \ - shoe.logical_dat; \ -}) +#define lget(addr, s) lget_fc((addr), (s), (sr_s() ? 5 : 1)) void logical_set (void); -#define lset_fc(addr, s, val, fc) {\ +#define lset_fc(addr, s, val, fc) do { \ shoe.logical_addr=(addr); \ shoe.logical_size=(s); \ shoe.logical_dat=(val); \ shoe.logical_fc = (fc); \ logical_set();\ -} -#define lset(addr, s, val) { \ - lset_fc((addr), (s), (val), sr_s() ? 5 : 1) \ -} +} while (0) +#define lset(addr, s, val) lset_fc((addr), (s), (val), sr_s() ? 5 : 1) typedef void (*_ea_func) (void); extern const _ea_func ea_read_jump_table[64]; @@ -922,10 +921,10 @@ extern const _ea_func ea_addr_jump_table[64]; #define ea_addr() ea_addr_jump_table[shoe.mr]() -#define call_ea_read(M, s) {shoe.mr=(M);shoe.sz=(s);ea_read();if (shoe.abort) return;} -#define call_ea_write(M, s) {shoe.mr=(M);shoe.sz=(s);ea_write();if (shoe.abort) return;} -#define call_ea_read_commit(M, s) {shoe.mr=(M);shoe.sz=(s);ea_read_commit();if (shoe.abort) return;} -#define call_ea_addr(M) {shoe.mr=(M);ea_addr();if (shoe.abort) return;} +#define call_ea_read(M, s) {shoe.mr=(M);shoe.sz=(s);ea_read();if sunlikely(shoe.abort) return;} +#define call_ea_write(M, s) {shoe.mr=(M);shoe.sz=(s);ea_write();if sunlikely(shoe.abort) return;} +#define call_ea_read_commit(M, s) {shoe.mr=(M);shoe.sz=(s);ea_read_commit();if sunlikely(shoe.abort) return;} +#define call_ea_addr(M) {shoe.mr=(M);ea_addr();if sunlikely(shoe.abort) return;} #define push_a7(_dat, _sz) {shoe.a[7]-=(_sz);lset(shoe.a[7], (_sz), (_dat));} diff --git a/core/video.c b/core/video.c index 58ee7e8..6d7c71c 100644 --- a/core/video.c +++ b/core/video.c @@ -145,7 +145,7 @@ uint32_t nubus_video_read_func(const uint32_t rawaddr, const uint32_t size, const uint32_t addr = rawaddr & 0x00ffffff; // ROM and control registers - if ((addr >> 20) == 0xf) { + if sunlikely((addr >> 20) == 0xf) { slog("nubus_video_read_func: got a read to 0x%08x sz=%u\n", rawaddr, size); @@ -172,7 +172,7 @@ uint32_t nubus_video_read_func(const uint32_t rawaddr, const uint32_t size, // Else, this is video ram uint32_t i, result = 0; - if (addr < (ctx->pixels * 4)) { + if slikely(addr < (ctx->pixels * 4)) { for (i=0; idirect_buf)[addr + i]; } @@ -234,7 +234,7 @@ void nubus_video_write_func(const uint32_t rawaddr, const uint32_t size, uint32_t i; // ROM and control registers - if ((addr >> 20) == 0xf) { + if sunlikely((addr >> 20) == 0xf) { slog("nubus_video_write_func: got a write to 0x%08x sz=%u data=0x%x\n", rawaddr, size, data); @@ -340,7 +340,7 @@ void nubus_video_write_func(const uint32_t rawaddr, const uint32_t size, // Else, this is video ram - if (addr < (ctx->pixels * 4)) { + if slikely(addr < (ctx->pixels * 4)) { uint32_t mydata, myaddr; for (myaddr = addr + size, mydata = data; addr < myaddr; ) { ((uint8_t*)ctx->direct_buf)[--myaddr] = mydata & 0xff; diff --git a/sdl-gui/lin_build.sh b/sdl-gui/lin_build.sh index da2227c..f543072 100755 --- a/sdl-gui/lin_build.sh +++ b/sdl-gui/lin_build.sh @@ -8,7 +8,7 @@ for i in adb fpu mc68851 mem via floppy core_api cpu dis; do files="$files $i.post.c" done -for i in atrap_tab coff exception macii_symbols redblack scsi video filesystem alloc_pool toby_frame_buffer ethernet sound; do +for i in SoftFloat/softfloat atrap_tab coff exception macii_symbols redblack scsi video filesystem alloc_pool toby_frame_buffer ethernet sound; do files="$files ../core/$i.c" done diff --git a/sdl-gui/osx_build.sh b/sdl-gui/osx_build.sh index 16f14f3..eb13bab 100755 --- a/sdl-gui/osx_build.sh +++ b/sdl-gui/osx_build.sh @@ -8,7 +8,7 @@ for i in adb fpu mc68851 mem via floppy core_api cpu dis; do files="$files $i.post.c" done -for i in atrap_tab coff exception macii_symbols redblack scsi video filesystem alloc_pool toby_frame_buffer ethernet sound; do +for i in SoftFloat/softfloat atrap_tab coff exception macii_symbols redblack scsi video filesystem alloc_pool toby_frame_buffer ethernet sound; do files="$files ../core/$i.c" done diff --git a/sdl-gui/win_build.bat b/sdl-gui/win_build.bat index 4891afc..b25163d 100755 --- a/sdl-gui/win_build.bat +++ b/sdl-gui/win_build.bat @@ -6,4 +6,4 @@ gcc -O1 ..\core\decoder_gen.c -o decoder_gen decoder_gen inst . decoder_gen dis . -gcc -O3 -flto -mno-ms-bitfields sdl.c adb.post.c fpu.post.c mc68851.post.c mem.post.c via.post.c floppy.post.c core_api.post.c cpu.post.c dis.post.c ..\core\atrap_tab.c ..\core\coff.c ..\core\exception.c ..\core\macii_symbols.c ..\core\redblack.c ..\core\scsi.c ..\core\video.c ..\core\filesystem.c ..\core\alloc_pool.c ..\core\toby_frame_buffer.c ..\core\ethernet.c ..\core\sound.c -lmingw32 -lopengl32 -lsdl2main -lsdl2 -o shoebill \ No newline at end of file +gcc -O3 -flto -mno-ms-bitfields sdl.c adb.post.c fpu.post.c mc68851.post.c mem.post.c via.post.c floppy.post.c core_api.post.c cpu.post.c dis.post.c ..\core\atrap_tab.c ..\core\coff.c ..\core\exception.c ..\core\macii_symbols.c ..\core\redblack.c ..\core\scsi.c ..\core\video.c ..\core\filesystem.c ..\core\alloc_pool.c ..\core\toby_frame_buffer.c ..\core\ethernet.c ..\core\sound.c ..\core\SoftFloat\softfloat.c -lmingw32 -lopengl32 -lsdl2main -lsdl2 -o shoebill