diff --git a/core/core_api.c b/core/core_api.c index d3a4d9c..0ba4029 100644 --- a/core/core_api.c +++ b/core/core_api.c @@ -738,6 +738,8 @@ uint32_t shoebill_initialize(shoebill_config_t *config) init_scsi_bus_state(); init_iwm_state(); + /* Invalidate the pc cache */ + invalidate_pccache(); set_sr(0x2000); shoe.pc = pc; @@ -799,6 +801,9 @@ void shoebill_restart (void) // clear the pmmu cache memset(shoe.pmmu_cache, 0, sizeof(shoe.pmmu_cache)); + // Invalidate the pc cache + invalidate_pccache(); + // Reset all CPU registers memset(shoe.d, 0, sizeof(shoe.d)); memset(shoe.a, 0, sizeof(shoe.a)); @@ -813,6 +818,8 @@ void shoebill_restart (void) // Reset all pmmu registers shoe.crp = shoe.srp = shoe.drp = 0; shoe.tc = 0; + shoe.tc_pagesize = shoe.tc_pagemask = 0; + shoe.tc_ps = shoe.tc_is = shoe.tc_is_plus_ps = shoe.tc_enable = shoe.tc_sre = 0; shoe.pcsr = 0; shoe.ac = 0; memset(shoe.bad, 0, sizeof(shoe.bad)); diff --git a/core/cpu.c b/core/cpu.c index bffcbb7..21fbe62 100644 --- a/core/cpu.c +++ b/core/cpu.c @@ -32,31 +32,17 @@ global_shoebill_context_t shoe; -static _Bool _cc_t() {return 1;} -static _Bool _cc_f() {return 0;} -static _Bool _cc_hi() {return !sr_c() && !sr_z();} -static _Bool _cc_ls() {return sr_c() || sr_z();} -static _Bool _cc_cc() {return !sr_c();} -static _Bool _cc_cs() {return sr_c();} -static _Bool _cc_ne() {return !sr_z();} -static _Bool _cc_eq() {return sr_z();} -static _Bool _cc_vc() {return !sr_v();} -static _Bool _cc_vs() {return sr_v();} -static _Bool _cc_pl() {return !sr_n();} -static _Bool _cc_mi() {return sr_n();} -static _Bool _cc_ge() {return (sr_n() && sr_v()) || (!sr_n() && !sr_v());} -static _Bool _cc_lt() {return (sr_n() && !sr_v()) || (!sr_n() && sr_v());} -static _Bool _cc_gt() {return (sr_n() && sr_v() && !sr_z()) || (!sr_n() && !sr_v() && !sr_z());} -static _Bool _cc_le() {return sr_z() || (sr_n() && !sr_v()) || (!sr_n() && sr_v());} -typedef _Bool (*_cc_func)(); -static const _cc_func evaluate_cc[16] = { - _cc_t, _cc_f, _cc_hi, _cc_ls, _cc_cc, _cc_cs, _cc_ne, _cc_eq, - _cc_vc, _cc_vs, _cc_pl, _cc_mi, _cc_ge, _cc_lt, _cc_gt, _cc_le +/* Precomputed results for condition code tests */ +static const uint16_t cc_consts[16] = { + 0xffff, 0x0000, 0x0505, 0xfafa, 0x5555, 0xaaaa, 0x0f0f, 0xf0f0, + 0x3333, 0xcccc, 0x00ff, 0xff00, 0xcc33, 0x33cc, 0x0c03, 0xf3fc }; +#define evaluate_cc(c) ((cc_consts[(c)] >> (shoe.sr & 0xf)) & 1) -#define nextword() ({const uint16_t w=lget(shoe.pc,2); if sunlikely(shoe.abort) {return;}; shoe.pc+=2; w;}) -#define nextlong() ({const uint32_t L=lget(shoe.pc,4); if sunlikely(shoe.abort) {return;}; shoe.pc+=4; L;}) +#define nextword() ({const uint16_t w = pccache_nextword(shoe.pc); if sunlikely(shoe.abort) {return;} shoe.pc += 2; w;}) +#define nextlong() ({const uint32_t L = pccache_nextlong(shoe.pc); if sunlikely(shoe.abort) {return;} shoe.pc += 4; L;}) + #define verify_supervisor() {if sunlikely(!sr_s()) {throw_privilege_violation(); return;}} @@ -105,7 +91,7 @@ static void inst_trapcc (void) { const uint32_t sz = y << (z+1); // too clever const uint32_t next_pc = shoe.pc + sz; - if (evaluate_cc[c]()) + if (evaluate_cc(c)) throw_frame_two(shoe.sr, next_pc, 7, shoe.orig_pc); else shoe.pc = next_pc; @@ -388,7 +374,7 @@ static void inst_divu (void) { const uint32_t dividend = shoe.d[r]; const uint16_t divisor = (uint16_t)shoe.dat; - if (divisor == 0) { + if sunlikely(divisor == 0) { throw_frame_two(shoe.orig_sr, shoe.uncommitted_ea_read_pc, 5, shoe.orig_pc); return ; } @@ -420,7 +406,7 @@ static void inst_divs (void) { const uint16_t u_divisor = (uint16_t)shoe.dat; const int16_t s_divisor = (int16_t)shoe.dat; - if (s_divisor == 0) { + if sunlikely(s_divisor == 0) { throw_frame_two(shoe.orig_sr, shoe.uncommitted_ea_read_pc, 5, shoe.orig_pc); return ; } @@ -1029,7 +1015,7 @@ static void inst_long_div (void) { call_ea_read(M, 4); const uint32_t divisor = shoe.dat; - if (divisor == 0) { + if sunlikely(divisor == 0) { throw_frame_two(shoe.orig_sr, shoe.uncommitted_ea_read_pc, 5, shoe.orig_pc); return ; } @@ -1253,21 +1239,6 @@ static void inst_movec (void) { else reg[r] = shoe.cacr; return ; } - /* // These are >'020 registers - case 0x003: { // TC - // TC is a 16 bit register, but movec is always a 32bit - if (x) shoe.tc = reg[r] & ~b(1100 0000 0000 0000); - else reg[r] = shoe.tc & ~b(1100 0000 0000 0000); - return ; - } - case 0x006: // DTT0 - if (x) shoe.dtt0 = reg[r] & ~b(11111111 11111111 11100011 01100100); - else reg[r] = shoe.dtt0 & ~b(11111111 11111111 11100011 01100100); - return ; - case 0x007: // DTT1 - if (x) shoe.dtt1 = reg[r] & ~b(11111111 11111111 11100011 01100100); - else reg[r] = shoe.dtt1 & ~b(11111111 11111111 11100011 01100100); - return ; */ case 0x801: // VBR if (x) shoe.vbr = reg[r]; else reg[r] = shoe.vbr; @@ -1325,7 +1296,7 @@ static void inst_moves (void) { */ // For now, only supporting fc 1 (user data space) - if (fc != 1) { + if sunlikely(fc != 1) { slog("inst_moves: error: hit fc=%u\n", fc); assert(!"inst_moves: error, hit weird function code"); return ; @@ -1838,7 +1809,7 @@ static void inst_suba (void) { static void inst_dbcc (void) { ~decompose(shoe.op, 0101 cccc 11001 rrr); - if (evaluate_cc[c]()) { + if (evaluate_cc(c)) { shoe.pc += 2; } else { @@ -1877,7 +1848,7 @@ static void inst_bcc (void) { const uint32_t orig_pc = shoe.pc; ~decompose(shoe.op, 0110 cccc dddddddd); - if (evaluate_cc[c]()) { + if (evaluate_cc(c)) { if (d == 0) { const int16_t ext = (int16_t)nextword(); shoe.pc = orig_pc + ext; @@ -1899,7 +1870,7 @@ static void inst_bcc (void) { static void inst_scc (void) { ~decompose(shoe.op, 0101 cccc 11 MMMMMM); - shoe.dat = evaluate_cc[c]() ? 0xff : 0; + shoe.dat = evaluate_cc(c) ? 0xff : 0; call_ea_write(M, 1); } @@ -3008,25 +2979,14 @@ void cpu_step() shoe.orig_pc = shoe.pc; shoe.orig_sr = shoe.sr; - // Is this an odd address? Throw an address exception! - if sunlikely(shoe.pc & 1) { - // throw_address_error(shoe.pc, 0); - // I'm leaving this assert in here for now because it almost always indicates a bug in the emulator when it fires - assert(!"odd PC address (probably a bug)"); - return ; - } - // Fetch the next instruction word - shoe.op = lget(shoe.pc, 2); + shoe.op = pccache_nextword(shoe.pc); - // If there was an exception, then the pc changed. Restart execution from the beginning. - if sunlikely(shoe.abort) { - shoe.abort = 0; - return ; + // If the fetch succeeded, execute it + if slikely(!shoe.abort) { + shoe.pc += 2; + inst_instruction_to_pointer[inst_opcode_map[shoe.op]](); } - shoe.pc+=2; - - inst_instruction_to_pointer[inst_opcode_map[shoe.op]](); /* The abort flag indicates that a routine should stop trying to execute the instruction and return immediately to cpu_step(), usually to begin diff --git a/core/fpu.c b/core/fpu.c index 7fa7601..5cf8f51 100644 --- a/core/fpu.c +++ b/core/fpu.c @@ -172,8 +172,8 @@ enum { } fpu_formats; #define fpu_get_state_ptr() fpu_state_t *fpu = (fpu_state_t*)shoe.fpu_state -#define nextword() ({const uint16_t w=lget(shoe.pc,2); if (shoe.abort) {return;}; shoe.pc+=2; w;}) -#define nextlong() ({const uint32_t L=lget(shoe.pc,4); if (shoe.abort) {return;}; shoe.pc+=4; L;}) +#define nextword() ({const uint16_t w = pccache_nextword(shoe.pc); if sunlikely(shoe.abort) return; shoe.pc += 2; w;}) +#define nextlong() ({const uint32_t L = pccache_nextlong(shoe.pc); if sunlikely(shoe.abort) return; shoe.pc += 4; L;}) #define verify_supervisor() {if (!sr_s()) {throw_privilege_violation(); return;}} #pragma mark FPU exception stuff diff --git a/core/mc68851.c b/core/mc68851.c index 617e9c7..1c1e83f 100644 --- a/core/mc68851.c +++ b/core/mc68851.c @@ -83,6 +83,9 @@ void inst_mc68851_pflushr(uint16_t ext){ // Just nuke the entire cache memset(shoe.pmmu_cache[0].valid_map, 0, PMMU_CACHE_SIZE/8); memset(shoe.pmmu_cache[1].valid_map, 0, PMMU_CACHE_SIZE/8); + + /* Invalidate the pc cache */ + invalidate_pccache(); } void inst_mc68851_pflush(uint16_t ext){ @@ -91,6 +94,9 @@ void inst_mc68851_pflush(uint16_t ext){ memset(shoe.pmmu_cache[0].valid_map, 0, PMMU_CACHE_SIZE/8); memset(shoe.pmmu_cache[1].valid_map, 0, PMMU_CACHE_SIZE/8); // slog("%s: Error, not implemented!\n", __func__); + + /* Invalidate the pc cache */ + invalidate_pccache(); } void inst_mc68851_pmove(uint16_t ext){ @@ -100,6 +106,12 @@ void inst_mc68851_pmove(uint16_t ext){ ~decompose(shoe.op, 1111 000 000 MMMMMM); ~decompose(ext, fff ppp w 0000 nnn 00); + /* + * For simplicity, just blow away pccache whenever + * the PMMU state changes at all + */ + if (!w) + invalidate_pccache(); // instruction format #1 @@ -119,7 +131,16 @@ void inst_mc68851_pmove(uint16_t ext){ switch (p) { case 0: // tc - if (!w) shoe.tc = shoe.dat & 0x83FFFFFF; + if (!w) { + shoe.tc = shoe.dat & 0x83FFFFFF; + shoe.tc_is = (shoe.tc >> 16) & 0xf; + shoe.tc_ps = (shoe.tc >> 20) & 0xf; + shoe.tc_pagesize = 1 << shoe.tc_ps; + shoe.tc_pagemask = shoe.tc_pagesize - 1; + shoe.tc_is_plus_ps = shoe.tc_is + shoe.tc_ps; + shoe.tc_enable = (shoe.tc >> 31) & 1; + shoe.tc_sre = (shoe.tc >> 25) & 1; + } else { shoe.dat = shoe.tc; //if (!tc_fcl()) assert(!"pmove->tc: function codes not supported\n"); @@ -193,7 +214,7 @@ static int64_t ptest_search(const uint32_t _logical_addr, const uint64_t rootp) uint8_t i; uint64_t desc = rootp; // Initial descriptor is the root pointer descriptor uint8_t desc_size = 1; // And the root pointer descriptor is always 8 bytes (1==8 bytes, 0==4 bytes) - uint8_t used_bits = tc_is(); // Keep track of how many bits will be the effective "page size" + uint8_t used_bits = shoe.tc_is; // Keep track of how many bits will be the effective "page size" // (If the table search terminates early (before used_bits == ts_ps()), // then this will be the effective page size. That is, the number of bits // we or into the physical addr from the virtual addr) @@ -315,7 +336,7 @@ void inst_mc68851_ptest(uint16_t ext){ ~decompose(shoe.op, 1111 0000 00 MMMMMM); ~decompose(ext, 100 LLL R AAAA FFFFF); // Erata in 68kPRM - F is 6 bits, and A is 3 - assert(tc_enable()); // XXX: Throws some exception if tc_enable isn't set + assert(shoe.tc_enable); // XXX: Throws some exception if tc_enable isn't set assert(tc_fcl() == 0); // XXX: I can't handle function code lookups, and I don't want to assert(L == 7); // XXX: Not currently handling searching to a particular level diff --git a/core/mem.c b/core/mem.c index af3e94c..923d986 100644 --- a/core/mem.c +++ b/core/mem.c @@ -289,33 +289,52 @@ const physical_set_ptr physical_set_jump_table[16] = { } -static _Bool check_pmmu_cache(void) +static _Bool check_pmmu_cache_write(void) { - const _Bool use_srp = (tc_sre() && (shoe.logical_fc >= 4)); + const _Bool use_srp = (shoe.tc_sre && (shoe.logical_fc >= 5)); // logical addr [is]xxxxxxxxxxxx[ps] -> value xxxxxxxxxxxx - const uint32_t value = (shoe.logical_addr << tc_is()) >> (tc_is() + tc_ps()); + const uint32_t value = (shoe.logical_addr << shoe.tc_is) >> shoe.tc_is_plus_ps; // value xxx[xxxxxxxxx] -> key xxxxxxxxx - const uint32_t key = value & (PMMU_CACHE_SIZE-1); // low PMMU_CACHE_KEY_BITS bits + const uint32_t key = value & (PMMU_CACHE_SIZE - 1); // low PMMU_CACHE_KEY_BITS bits const pmmu_cache_entry_t entry = shoe.pmmu_cache[use_srp].entry[key]; - const _Bool is_set = (shoe.pmmu_cache[use_srp].valid_map[key/8] >> (key & 7)) & 1; - const _Bool values_match = (entry.logical_value == value); - const _Bool first_modify = !(shoe.logical_is_write && !entry.modified); - const _Bool not_write_protected = !(shoe.logical_is_write && entry.wp); + const _Bool is_set = (shoe.pmmu_cache[use_srp].valid_map[key >> 3] >> (key & 7)) & 1; const uint32_t ps_mask = 0xffffffff >> entry.used_bits; const uint32_t v_mask = ~~ps_mask; shoe.physical_addr = ((entry.physical_addr<<8) & v_mask) | (shoe.logical_addr & ps_mask); - return is_set && values_match && first_modify && not_write_protected; + return is_set && (entry.logical_value == value) && entry.modified && !entry.wp; +} + +static _Bool check_pmmu_cache_read(void) +{ + const _Bool use_srp = (shoe.tc_sre && (shoe.logical_fc >= 5)); + + // logical addr [is]xxxxxxxxxxxx[ps] -> value xxxxxxxxxxxx + const uint32_t value = (shoe.logical_addr << shoe.tc_is) >> shoe.tc_is_plus_ps; + // value xxx[xxxxxxxxx] -> key xxxxxxxxx + const uint32_t key = value & (PMMU_CACHE_SIZE - 1); // low PMMU_CACHE_KEY_BITS bits + + const pmmu_cache_entry_t entry = shoe.pmmu_cache[use_srp].entry[key]; + + const _Bool is_set = (shoe.pmmu_cache[use_srp].valid_map[key >> 3] >> (key & 7)) & 1; + + const uint32_t ps_mask = 0xffffffff >> entry.used_bits; + const uint32_t v_mask = ~~ps_mask; + + shoe.physical_addr = ((entry.physical_addr<<8) & v_mask) | (shoe.logical_addr & ps_mask); + return is_set && (entry.logical_value == value); } static void translate_logical_addr() { - const uint8_t use_srp = (tc_sre() && (shoe.logical_fc >= 4)); + const uint8_t use_srp = (shoe.tc_sre && (shoe.logical_fc >= 5)); + assert((0x66 >> shoe.logical_fc) & 1); // we only support these FCs for now + uint64_t *rootp_ptr = (use_srp ? (&shoe.srp) : (&shoe.crp)); const uint64_t rootp = *rootp_ptr; uint8_t desc_did_change = 0; @@ -325,7 +344,7 @@ static void translate_logical_addr() uint8_t i; uint64_t desc = rootp; // Initial descriptor is the root pointer descriptor uint8_t desc_size = 1; // And the root pointer descriptor is always 8 bytes (1==8 bytes, 0==4 bytes) - uint8_t used_bits = tc_is(); // Keep track of how many bits will be the effective "page size" + uint8_t used_bits = shoe.tc_is; // Keep track of how many bits will be the effective "page size" // (If the table search terminates early (before used_bits == ts_ps()), // then (32 - used_bits) will be the effective page size. That is, the number of bits // we or into the physical addr from the virtual addr) @@ -443,7 +462,7 @@ search_done: /* --- insert this translation into pmmu_cache --- */ // logical addr [is]xxxxxxxxxxxx[ps] -> value xxxxxxxxxxxx - const uint32_t value = (shoe.logical_addr << tc_is()) >> (tc_is() + tc_ps()); + const uint32_t value = (shoe.logical_addr << shoe.tc_is) >> shoe.tc_is_plus_ps; // value xxx[xxxxxxxxx] -> key xxxxxxxxx const uint32_t key = value & (PMMU_CACHE_SIZE-1); // low PMMU_CACHE_KEY_BITS bits @@ -463,7 +482,7 @@ void logical_get (void) { // If address translation isn't enabled, this is a physical address - if sunlikely(!tc_enable()) { + if sunlikely(!shoe.tc_enable) { shoe.physical_addr = shoe.logical_addr; shoe.physical_size = shoe.logical_size; physical_get(); @@ -479,16 +498,13 @@ void logical_get (void) const uint32_t logical_size = shoe.logical_size; const uint32_t logical_addr = shoe.logical_addr; - const uint16_t ps = tc_ps(); // log2 of the page size - const uint32_t pagesize = 1 << ps; // the page size - const uint32_t pagemask = pagesize-1; // a mask of the page bits + const uint32_t pagemask = shoe.tc_pagemask; const uint32_t pageoffset = logical_addr & pagemask; - shoe.logical_is_write = 0; - // Common case: the read is contained entirely within a page - if slikely(!((pageoffset + logical_size - 1) >> ps)) { - if sunlikely(!check_pmmu_cache()) { + if slikely(!((pageoffset + logical_size - 1) >> shoe.tc_ps)) { + if sunlikely(!check_pmmu_cache_read()) { + shoe.logical_is_write = 0; translate_logical_addr(); if sunlikely(shoe.abort) return ; @@ -515,9 +531,11 @@ void logical_get (void) const uint32_t size_a = logical_size - size_b; const uint32_t addr_b = addr_a + size_a; + shoe.logical_is_write = 0; + shoe.logical_addr = addr_a; shoe.logical_size = size_a; - if sunlikely(!check_pmmu_cache()) { + if sunlikely(!check_pmmu_cache_read()) { translate_logical_addr(); if sunlikely(shoe.abort) return ; @@ -527,7 +545,7 @@ void logical_get (void) shoe.logical_addr = addr_b; shoe.logical_size = size_b; - if sunlikely(!check_pmmu_cache()) { + if sunlikely(!check_pmmu_cache_read()) { translate_logical_addr(); if sunlikely(shoe.abort) return ; @@ -559,7 +577,7 @@ void logical_get (void) void logical_set (void) { // If address translation isn't enabled, this is a physical address - if sunlikely(!tc_enable()) { + if sunlikely(!shoe.tc_enable) { shoe.physical_addr = shoe.logical_addr; shoe.physical_size = shoe.logical_size; shoe.physical_dat = shoe.logical_dat; @@ -570,18 +588,16 @@ void logical_set (void) const uint32_t logical_size = shoe.logical_size; const uint32_t logical_addr = shoe.logical_addr; - const uint16_t ps = tc_ps(); // log2 of the page size - const uint32_t pagesize = 1 << ps; // the page size - const uint32_t pagemask = pagesize-1; // a mask of the page bits + const uint32_t pagemask = shoe.tc_pagemask; const uint32_t pageoffset = logical_addr & pagemask; // Make the translate function fail if the page is write-protected shoe.logical_is_write = 1; // Common case: this write is contained entirely in one page - if slikely(!((pageoffset + logical_size - 1) >> ps)) { + if slikely(!((pageoffset + logical_size - 1) >> shoe.tc_ps)) { // Common case: the write is contained entirely within a page - if sunlikely(!check_pmmu_cache()) { + if sunlikely(!check_pmmu_cache_write()) { translate_logical_addr(); if sunlikely(shoe.abort) return ; @@ -601,7 +617,7 @@ void logical_set (void) shoe.logical_addr = addr_a; shoe.logical_size = size_a; - if sunlikely(!check_pmmu_cache()) { + if sunlikely(!check_pmmu_cache_write()) { translate_logical_addr(); if sunlikely(shoe.abort) return ; @@ -610,7 +626,7 @@ void logical_set (void) shoe.logical_addr = addr_b; shoe.logical_size = size_b; - if sunlikely(!check_pmmu_cache()) { + if sunlikely(!check_pmmu_cache_write()) { translate_logical_addr(); if sunlikely(shoe.abort) return ; @@ -631,12 +647,163 @@ void logical_set (void) } } +/* --- PC cache routines --- */ +#pragma mark PC cache routines + +static uint16_t pccache_miss(const uint32_t pc) +{ + const uint32_t pagemask = shoe.tc_pagemask; + const uint32_t pageoffset = pc & pagemask; + uint32_t paddr; + + /* + * I think the instruction decoder uses these + * these function codes: + * 6 -> supervisor program space, + * 2 -> user program space + */ + shoe.logical_fc = sr_s() ? 6 : 2; + shoe.logical_addr = pc; + if sunlikely(!check_pmmu_cache_read()) { + shoe.logical_is_write = 0; + translate_logical_addr(); + if sunlikely(shoe.abort) + goto fail; + } + + paddr = shoe.physical_addr ^ pageoffset; + + shoe.pccache_use_srp = shoe.tc_sre && sr_s(); + shoe.pccache_logical_page = pc ^ pageoffset; + + if (paddr < 0x40000000) { + /* Address in RAM */ + + if sunlikely(paddr >= shoe.physical_mem_size) + paddr %= shoe.physical_mem_size; + + shoe.pccache_ptr = &shoe.physical_mem_base[paddr]; + return ntohs(*(uint16_t*)(shoe.pccache_ptr + pageoffset)); + } + else if (paddr < 0x50000000) { + /* Address in ROM */ + shoe.pccache_ptr = &shoe.physical_rom_base[paddr & (shoe.physical_rom_size - 1)]; + return ntohs(*(uint16_t*)(shoe.pccache_ptr + pageoffset)); + } + + /* + * For now, only supporting reads from RAM and ROM. + * This could easily be supported by just calling + * physical_get() and leaving the cache invalid, + * but I don't think A/UX ever tries to execute outside + * RAM/ROM. + */ + assert(!"pccache_miss: neither RAM nor ROM!\n"); + +fail: + invalidate_pccache(); + return 0; +} + +uint16_t pccache_nextword(const uint32_t pc) +{ + if (sunlikely(pc & 1)) + goto odd_addr; + + if slikely(shoe.tc_enable) { + const uint32_t pc_offset = pc & shoe.tc_pagemask; + const uint32_t pc_page = pc ^ pc_offset; + const uint32_t use_srp = shoe.tc_sre && sr_s(); + + /* If the cache exists and is valid */ + if slikely((shoe.pccache_use_srp == use_srp) && (shoe.pccache_logical_page == pc_page)) { + // printf("pccache_nextword: hit: pc=%x\n", pc); + return ntohs(*(uint16_t*)(shoe.pccache_ptr + pc_offset)); + } + // printf("pccache_nextword: miss: pc=%x\n", pc); + + return pccache_miss(pc); + } + else { + uint32_t paddr = pc; + + if (paddr < 0x40000000) { + /* Address in RAM */ + + if sunlikely(paddr >= shoe.physical_mem_size) + paddr %= shoe.physical_mem_size; + + return ntohs(*(uint16_t*)(&shoe.physical_mem_base[paddr])); + } + else if (paddr < 0x50000000) { + /* Address in ROM */ + + return ntohs(*(uint16_t*)&shoe.physical_rom_base[paddr & (shoe.physical_rom_size - 1)]); + } + assert(!"!tc_enable: neither RAM nor RAM\n"); + } + +odd_addr: + assert(!"odd pc address!\n"); + return 0; +} + +uint32_t pccache_nextlong(const uint32_t pc) +{ + if slikely(shoe.tc_enable) { + const uint32_t lastpage = shoe.pccache_logical_page; + const uint32_t pc_offset = pc & shoe.tc_pagemask; + const uint32_t pc_page = pc ^ pc_offset; + const uint32_t use_srp = shoe.tc_sre && sr_s(); + + + /* If the cache exists, is valid, and the read is contained entirely within 1 page */ + if slikely((shoe.pccache_use_srp == use_srp) && (lastpage == pc_page) && !((pc_offset + 3) >> shoe.tc_ps)) { + const uint32_t result = ntohl(*(uint32_t*)(shoe.pccache_ptr + pc_offset)); + if (sunlikely(pc_offset & 1)) + goto odd_addr; + return result; + } + + const uint32_t result_high = pccache_nextword(pc) << 16; + if sunlikely(shoe.abort) + return 0; + + return result_high | pccache_nextword(pc + 2); + } + else { + uint32_t paddr = pc; + + if sunlikely(paddr & 1) + goto odd_addr; + + if (paddr < 0x40000000) { + /* Address in RAM */ + + if sunlikely(paddr >= shoe.physical_mem_size) + paddr %= shoe.physical_mem_size; + + return ntohl(*(uint32_t*)(&shoe.physical_mem_base[paddr])); + } + else if (paddr < 0x50000000) { + /* Address in ROM */ + + return ntohl(*(uint32_t*)&shoe.physical_rom_base[paddr & (shoe.physical_rom_size - 1)]); + } + assert(!"!tc_enable: neither RAM nor RAM\n"); + } + +odd_addr: + assert(!"odd pc address!\n"); + return 0; +} + /* --- EA routines --- */ #pragma mark EA routines -#define nextword(pc) ({const uint16_t w=lget((pc),2);if sunlikely(shoe.abort){return;}(pc)+=2; w;}) -#define nextlong(pc) ({const uint32_t L=lget((pc),4);if sunlikely(shoe.abort){return;}(pc)+=4; L;}) +#define nextword(pc) ({const uint16_t w = pccache_nextword(pc); if sunlikely(shoe.abort) return; (pc) += 2; w;}) +#define nextlong(pc) ({const uint32_t L = pccache_nextlong(pc); if sunlikely(shoe.abort) return; (pc) += 4; L;}) // ea_decode_extended() - find the EA for those hiddeous 68020 addr modes static void ea_decode_extended() diff --git a/core/shoebill.h b/core/shoebill.h index 5b6d79b..dfc4746 100644 --- a/core/shoebill.h +++ b/core/shoebill.h @@ -261,11 +261,11 @@ uint8_t* shoebill_extract_kernel(const char *disk_path, const char *kernel_path, #define set_sr_t1(b) {shoe.sr &= (~(1<<15)); shoe.sr |= (((b)!=0)<<15);} // MMU - #define tc_enable() (shoe.tc >> 31) - #define tc_sre() ((shoe.tc >> 25) & 1) + #define _tc_enable() (shoe.tc >> 31) // _tc_enable,sre,ps,is are all extracted in shoe.tc_* + #define _tc_sre() ((shoe.tc >> 25) & 1) #define tc_fcl() ((shoe.tc >> 24) & 1) - #define tc_ps() ((shoe.tc >> 20) & 0xf) - #define tc_is() ((shoe.tc >> 16) & 0xf) + #define _tc_ps() ((shoe.tc >> 20) & 0xf) + #define _tc_is() ((shoe.tc >> 16) & 0xf) #define tc_tia() ((shoe.tc >> 12) & 0xf) #define tc_tib() ((shoe.tc >> 8) & 0xf) #define tc_tic() ((shoe.tc >> 4) & 0xf) @@ -738,6 +738,11 @@ typedef struct { _Bool logical_is_write; // <- boolean: true iff the operation is logical_set() uint8_t logical_fc; // logical function code +#define invalidate_pccache() do {shoe.pccache_use_srp = 2;} while (0) + uint32_t pccache_use_srp; // 1 -> use srp, 0 -> use crp, other -> pccache is invalid + uint32_t pccache_logical_page; + uint8_t *pccache_ptr; + // -- PMMU caching structures --- #define PMMU_CACHE_KEY_BITS 10 #define PMMU_CACHE_SIZE (1<