Major speed improvements (25-50%)

There may be bugs lurking in it. On my Core i7 macbook pro,
shoebill now runs so fast that SetUpTimeK() on A/UX 3 hangs.
(SetUpTimeK tries to time a dbra loop, and refuses to accept any
speed faster than a certain threshold, which shoebill is now
surpassing. If you see A/UX hanging early in boot, it's probably
that.)

- Added a new specialized cache for instruction stream reads
-- This also lets us distinguish between data and instruction
   reads, which the 68020 does. Instruction reads are now done
   with the correct function code (2 or 6), although that
   doesn't currently fix or improve anything currently
- Added an obvious condition code optimization, dunno how I missed
  it earlier
- Other little changes
This commit is contained in:
Peter Rutenbar 2015-01-29 00:19:57 -05:00
parent 76f2b35170
commit 476a8bb570
7 changed files with 270 additions and 103 deletions

View File

@ -738,6 +738,8 @@ uint32_t shoebill_initialize(shoebill_config_t *config)
init_scsi_bus_state(); init_scsi_bus_state();
init_iwm_state(); init_iwm_state();
/* Invalidate the pc cache */
invalidate_pccache();
set_sr(0x2000); set_sr(0x2000);
shoe.pc = pc; shoe.pc = pc;
@ -799,6 +801,9 @@ void shoebill_restart (void)
// clear the pmmu cache // clear the pmmu cache
memset(shoe.pmmu_cache, 0, sizeof(shoe.pmmu_cache)); memset(shoe.pmmu_cache, 0, sizeof(shoe.pmmu_cache));
// Invalidate the pc cache
invalidate_pccache();
// Reset all CPU registers // Reset all CPU registers
memset(shoe.d, 0, sizeof(shoe.d)); memset(shoe.d, 0, sizeof(shoe.d));
memset(shoe.a, 0, sizeof(shoe.a)); memset(shoe.a, 0, sizeof(shoe.a));
@ -813,6 +818,8 @@ void shoebill_restart (void)
// Reset all pmmu registers // Reset all pmmu registers
shoe.crp = shoe.srp = shoe.drp = 0; shoe.crp = shoe.srp = shoe.drp = 0;
shoe.tc = 0; shoe.tc = 0;
shoe.tc_pagesize = shoe.tc_pagemask = 0;
shoe.tc_ps = shoe.tc_is = shoe.tc_is_plus_ps = shoe.tc_enable = shoe.tc_sre = 0;
shoe.pcsr = 0; shoe.pcsr = 0;
shoe.ac = 0; shoe.ac = 0;
memset(shoe.bad, 0, sizeof(shoe.bad)); memset(shoe.bad, 0, sizeof(shoe.bad));

View File

@ -32,31 +32,17 @@
global_shoebill_context_t shoe; global_shoebill_context_t shoe;
static _Bool _cc_t() {return 1;} /* Precomputed results for condition code tests */
static _Bool _cc_f() {return 0;} static const uint16_t cc_consts[16] = {
static _Bool _cc_hi() {return !sr_c() && !sr_z();} 0xffff, 0x0000, 0x0505, 0xfafa, 0x5555, 0xaaaa, 0x0f0f, 0xf0f0,
static _Bool _cc_ls() {return sr_c() || sr_z();} 0x3333, 0xcccc, 0x00ff, 0xff00, 0xcc33, 0x33cc, 0x0c03, 0xf3fc
static _Bool _cc_cc() {return !sr_c();}
static _Bool _cc_cs() {return sr_c();}
static _Bool _cc_ne() {return !sr_z();}
static _Bool _cc_eq() {return sr_z();}
static _Bool _cc_vc() {return !sr_v();}
static _Bool _cc_vs() {return sr_v();}
static _Bool _cc_pl() {return !sr_n();}
static _Bool _cc_mi() {return sr_n();}
static _Bool _cc_ge() {return (sr_n() && sr_v()) || (!sr_n() && !sr_v());}
static _Bool _cc_lt() {return (sr_n() && !sr_v()) || (!sr_n() && sr_v());}
static _Bool _cc_gt() {return (sr_n() && sr_v() && !sr_z()) || (!sr_n() && !sr_v() && !sr_z());}
static _Bool _cc_le() {return sr_z() || (sr_n() && !sr_v()) || (!sr_n() && sr_v());}
typedef _Bool (*_cc_func)();
static const _cc_func evaluate_cc[16] = {
_cc_t, _cc_f, _cc_hi, _cc_ls, _cc_cc, _cc_cs, _cc_ne, _cc_eq,
_cc_vc, _cc_vs, _cc_pl, _cc_mi, _cc_ge, _cc_lt, _cc_gt, _cc_le
}; };
#define evaluate_cc(c) ((cc_consts[(c)] >> (shoe.sr & 0xf)) & 1)
#define nextword() ({const uint16_t w=lget(shoe.pc,2); if sunlikely(shoe.abort) {return;}; shoe.pc+=2; w;}) #define nextword() ({const uint16_t w = pccache_nextword(shoe.pc); if sunlikely(shoe.abort) {return;} shoe.pc += 2; w;})
#define nextlong() ({const uint32_t L=lget(shoe.pc,4); if sunlikely(shoe.abort) {return;}; shoe.pc+=4; L;}) #define nextlong() ({const uint32_t L = pccache_nextlong(shoe.pc); if sunlikely(shoe.abort) {return;} shoe.pc += 4; L;})
#define verify_supervisor() {if sunlikely(!sr_s()) {throw_privilege_violation(); return;}} #define verify_supervisor() {if sunlikely(!sr_s()) {throw_privilege_violation(); return;}}
@ -105,7 +91,7 @@ static void inst_trapcc (void) {
const uint32_t sz = y << (z+1); // too clever const uint32_t sz = y << (z+1); // too clever
const uint32_t next_pc = shoe.pc + sz; const uint32_t next_pc = shoe.pc + sz;
if (evaluate_cc[c]()) if (evaluate_cc(c))
throw_frame_two(shoe.sr, next_pc, 7, shoe.orig_pc); throw_frame_two(shoe.sr, next_pc, 7, shoe.orig_pc);
else else
shoe.pc = next_pc; shoe.pc = next_pc;
@ -388,7 +374,7 @@ static void inst_divu (void) {
const uint32_t dividend = shoe.d[r]; const uint32_t dividend = shoe.d[r];
const uint16_t divisor = (uint16_t)shoe.dat; const uint16_t divisor = (uint16_t)shoe.dat;
if (divisor == 0) { if sunlikely(divisor == 0) {
throw_frame_two(shoe.orig_sr, shoe.uncommitted_ea_read_pc, 5, shoe.orig_pc); throw_frame_two(shoe.orig_sr, shoe.uncommitted_ea_read_pc, 5, shoe.orig_pc);
return ; return ;
} }
@ -420,7 +406,7 @@ static void inst_divs (void) {
const uint16_t u_divisor = (uint16_t)shoe.dat; const uint16_t u_divisor = (uint16_t)shoe.dat;
const int16_t s_divisor = (int16_t)shoe.dat; const int16_t s_divisor = (int16_t)shoe.dat;
if (s_divisor == 0) { if sunlikely(s_divisor == 0) {
throw_frame_two(shoe.orig_sr, shoe.uncommitted_ea_read_pc, 5, shoe.orig_pc); throw_frame_two(shoe.orig_sr, shoe.uncommitted_ea_read_pc, 5, shoe.orig_pc);
return ; return ;
} }
@ -1029,7 +1015,7 @@ static void inst_long_div (void) {
call_ea_read(M, 4); call_ea_read(M, 4);
const uint32_t divisor = shoe.dat; const uint32_t divisor = shoe.dat;
if (divisor == 0) { if sunlikely(divisor == 0) {
throw_frame_two(shoe.orig_sr, shoe.uncommitted_ea_read_pc, 5, shoe.orig_pc); throw_frame_two(shoe.orig_sr, shoe.uncommitted_ea_read_pc, 5, shoe.orig_pc);
return ; return ;
} }
@ -1253,21 +1239,6 @@ static void inst_movec (void) {
else reg[r] = shoe.cacr; else reg[r] = shoe.cacr;
return ; return ;
} }
/* // These are >'020 registers
case 0x003: { // TC
// TC is a 16 bit register, but movec is always a 32bit
if (x) shoe.tc = reg[r] & ~b(1100 0000 0000 0000);
else reg[r] = shoe.tc & ~b(1100 0000 0000 0000);
return ;
}
case 0x006: // DTT0
if (x) shoe.dtt0 = reg[r] & ~b(11111111 11111111 11100011 01100100);
else reg[r] = shoe.dtt0 & ~b(11111111 11111111 11100011 01100100);
return ;
case 0x007: // DTT1
if (x) shoe.dtt1 = reg[r] & ~b(11111111 11111111 11100011 01100100);
else reg[r] = shoe.dtt1 & ~b(11111111 11111111 11100011 01100100);
return ; */
case 0x801: // VBR case 0x801: // VBR
if (x) shoe.vbr = reg[r]; if (x) shoe.vbr = reg[r];
else reg[r] = shoe.vbr; else reg[r] = shoe.vbr;
@ -1325,7 +1296,7 @@ static void inst_moves (void) {
*/ */
// For now, only supporting fc 1 (user data space) // For now, only supporting fc 1 (user data space)
if (fc != 1) { if sunlikely(fc != 1) {
slog("inst_moves: error: hit fc=%u\n", fc); slog("inst_moves: error: hit fc=%u\n", fc);
assert(!"inst_moves: error, hit weird function code"); assert(!"inst_moves: error, hit weird function code");
return ; return ;
@ -1838,7 +1809,7 @@ static void inst_suba (void) {
static void inst_dbcc (void) { static void inst_dbcc (void) {
~decompose(shoe.op, 0101 cccc 11001 rrr); ~decompose(shoe.op, 0101 cccc 11001 rrr);
if (evaluate_cc[c]()) { if (evaluate_cc(c)) {
shoe.pc += 2; shoe.pc += 2;
} }
else { else {
@ -1877,7 +1848,7 @@ static void inst_bcc (void) {
const uint32_t orig_pc = shoe.pc; const uint32_t orig_pc = shoe.pc;
~decompose(shoe.op, 0110 cccc dddddddd); ~decompose(shoe.op, 0110 cccc dddddddd);
if (evaluate_cc[c]()) { if (evaluate_cc(c)) {
if (d == 0) { if (d == 0) {
const int16_t ext = (int16_t)nextword(); const int16_t ext = (int16_t)nextword();
shoe.pc = orig_pc + ext; shoe.pc = orig_pc + ext;
@ -1899,7 +1870,7 @@ static void inst_bcc (void) {
static void inst_scc (void) { static void inst_scc (void) {
~decompose(shoe.op, 0101 cccc 11 MMMMMM); ~decompose(shoe.op, 0101 cccc 11 MMMMMM);
shoe.dat = evaluate_cc[c]() ? 0xff : 0; shoe.dat = evaluate_cc(c) ? 0xff : 0;
call_ea_write(M, 1); call_ea_write(M, 1);
} }
@ -3008,25 +2979,14 @@ void cpu_step()
shoe.orig_pc = shoe.pc; shoe.orig_pc = shoe.pc;
shoe.orig_sr = shoe.sr; shoe.orig_sr = shoe.sr;
// Is this an odd address? Throw an address exception!
if sunlikely(shoe.pc & 1) {
// throw_address_error(shoe.pc, 0);
// I'm leaving this assert in here for now because it almost always indicates a bug in the emulator when it fires
assert(!"odd PC address (probably a bug)");
return ;
}
// Fetch the next instruction word // Fetch the next instruction word
shoe.op = lget(shoe.pc, 2); shoe.op = pccache_nextword(shoe.pc);
// If there was an exception, then the pc changed. Restart execution from the beginning. // If the fetch succeeded, execute it
if sunlikely(shoe.abort) { if slikely(!shoe.abort) {
shoe.abort = 0; shoe.pc += 2;
return ; inst_instruction_to_pointer[inst_opcode_map[shoe.op]]();
} }
shoe.pc+=2;
inst_instruction_to_pointer[inst_opcode_map[shoe.op]]();
/* The abort flag indicates that a routine should stop trying to execute the /* The abort flag indicates that a routine should stop trying to execute the
instruction and return immediately to cpu_step(), usually to begin instruction and return immediately to cpu_step(), usually to begin

View File

@ -172,8 +172,8 @@ enum {
} fpu_formats; } fpu_formats;
#define fpu_get_state_ptr() fpu_state_t *fpu = (fpu_state_t*)shoe.fpu_state #define fpu_get_state_ptr() fpu_state_t *fpu = (fpu_state_t*)shoe.fpu_state
#define nextword() ({const uint16_t w=lget(shoe.pc,2); if (shoe.abort) {return;}; shoe.pc+=2; w;}) #define nextword() ({const uint16_t w = pccache_nextword(shoe.pc); if sunlikely(shoe.abort) return; shoe.pc += 2; w;})
#define nextlong() ({const uint32_t L=lget(shoe.pc,4); if (shoe.abort) {return;}; shoe.pc+=4; L;}) #define nextlong() ({const uint32_t L = pccache_nextlong(shoe.pc); if sunlikely(shoe.abort) return; shoe.pc += 4; L;})
#define verify_supervisor() {if (!sr_s()) {throw_privilege_violation(); return;}} #define verify_supervisor() {if (!sr_s()) {throw_privilege_violation(); return;}}
#pragma mark FPU exception stuff #pragma mark FPU exception stuff

View File

@ -83,6 +83,9 @@ void inst_mc68851_pflushr(uint16_t ext){
// Just nuke the entire cache // Just nuke the entire cache
memset(shoe.pmmu_cache[0].valid_map, 0, PMMU_CACHE_SIZE/8); memset(shoe.pmmu_cache[0].valid_map, 0, PMMU_CACHE_SIZE/8);
memset(shoe.pmmu_cache[1].valid_map, 0, PMMU_CACHE_SIZE/8); memset(shoe.pmmu_cache[1].valid_map, 0, PMMU_CACHE_SIZE/8);
/* Invalidate the pc cache */
invalidate_pccache();
} }
void inst_mc68851_pflush(uint16_t ext){ void inst_mc68851_pflush(uint16_t ext){
@ -91,6 +94,9 @@ void inst_mc68851_pflush(uint16_t ext){
memset(shoe.pmmu_cache[0].valid_map, 0, PMMU_CACHE_SIZE/8); memset(shoe.pmmu_cache[0].valid_map, 0, PMMU_CACHE_SIZE/8);
memset(shoe.pmmu_cache[1].valid_map, 0, PMMU_CACHE_SIZE/8); memset(shoe.pmmu_cache[1].valid_map, 0, PMMU_CACHE_SIZE/8);
// slog("%s: Error, not implemented!\n", __func__); // slog("%s: Error, not implemented!\n", __func__);
/* Invalidate the pc cache */
invalidate_pccache();
} }
void inst_mc68851_pmove(uint16_t ext){ void inst_mc68851_pmove(uint16_t ext){
@ -100,6 +106,12 @@ void inst_mc68851_pmove(uint16_t ext){
~decompose(shoe.op, 1111 000 000 MMMMMM); ~decompose(shoe.op, 1111 000 000 MMMMMM);
~decompose(ext, fff ppp w 0000 nnn 00); ~decompose(ext, fff ppp w 0000 nnn 00);
/*
* For simplicity, just blow away pccache whenever
* the PMMU state changes at all
*/
if (!w)
invalidate_pccache();
// instruction format #1 // instruction format #1
@ -119,7 +131,16 @@ void inst_mc68851_pmove(uint16_t ext){
switch (p) { switch (p) {
case 0: // tc case 0: // tc
if (!w) shoe.tc = shoe.dat & 0x83FFFFFF; if (!w) {
shoe.tc = shoe.dat & 0x83FFFFFF;
shoe.tc_is = (shoe.tc >> 16) & 0xf;
shoe.tc_ps = (shoe.tc >> 20) & 0xf;
shoe.tc_pagesize = 1 << shoe.tc_ps;
shoe.tc_pagemask = shoe.tc_pagesize - 1;
shoe.tc_is_plus_ps = shoe.tc_is + shoe.tc_ps;
shoe.tc_enable = (shoe.tc >> 31) & 1;
shoe.tc_sre = (shoe.tc >> 25) & 1;
}
else { else {
shoe.dat = shoe.tc; shoe.dat = shoe.tc;
//if (!tc_fcl()) assert(!"pmove->tc: function codes not supported\n"); //if (!tc_fcl()) assert(!"pmove->tc: function codes not supported\n");
@ -193,7 +214,7 @@ static int64_t ptest_search(const uint32_t _logical_addr, const uint64_t rootp)
uint8_t i; uint8_t i;
uint64_t desc = rootp; // Initial descriptor is the root pointer descriptor uint64_t desc = rootp; // Initial descriptor is the root pointer descriptor
uint8_t desc_size = 1; // And the root pointer descriptor is always 8 bytes (1==8 bytes, 0==4 bytes) uint8_t desc_size = 1; // And the root pointer descriptor is always 8 bytes (1==8 bytes, 0==4 bytes)
uint8_t used_bits = tc_is(); // Keep track of how many bits will be the effective "page size" uint8_t used_bits = shoe.tc_is; // Keep track of how many bits will be the effective "page size"
// (If the table search terminates early (before used_bits == ts_ps()), // (If the table search terminates early (before used_bits == ts_ps()),
// then this will be the effective page size. That is, the number of bits // then this will be the effective page size. That is, the number of bits
// we or into the physical addr from the virtual addr) // we or into the physical addr from the virtual addr)
@ -315,7 +336,7 @@ void inst_mc68851_ptest(uint16_t ext){
~decompose(shoe.op, 1111 0000 00 MMMMMM); ~decompose(shoe.op, 1111 0000 00 MMMMMM);
~decompose(ext, 100 LLL R AAAA FFFFF); // Erata in 68kPRM - F is 6 bits, and A is 3 ~decompose(ext, 100 LLL R AAAA FFFFF); // Erata in 68kPRM - F is 6 bits, and A is 3
assert(tc_enable()); // XXX: Throws some exception if tc_enable isn't set assert(shoe.tc_enable); // XXX: Throws some exception if tc_enable isn't set
assert(tc_fcl() == 0); // XXX: I can't handle function code lookups, and I don't want to assert(tc_fcl() == 0); // XXX: I can't handle function code lookups, and I don't want to
assert(L == 7); // XXX: Not currently handling searching to a particular level assert(L == 7); // XXX: Not currently handling searching to a particular level

View File

@ -289,33 +289,52 @@ const physical_set_ptr physical_set_jump_table[16] = {
} }
static _Bool check_pmmu_cache(void) static _Bool check_pmmu_cache_write(void)
{ {
const _Bool use_srp = (tc_sre() && (shoe.logical_fc >= 4)); const _Bool use_srp = (shoe.tc_sre && (shoe.logical_fc >= 5));
// logical addr [is]xxxxxxxxxxxx[ps] -> value xxxxxxxxxxxx // logical addr [is]xxxxxxxxxxxx[ps] -> value xxxxxxxxxxxx
const uint32_t value = (shoe.logical_addr << tc_is()) >> (tc_is() + tc_ps()); const uint32_t value = (shoe.logical_addr << shoe.tc_is) >> shoe.tc_is_plus_ps;
// value xxx[xxxxxxxxx] -> key xxxxxxxxx // value xxx[xxxxxxxxx] -> key xxxxxxxxx
const uint32_t key = value & (PMMU_CACHE_SIZE-1); // low PMMU_CACHE_KEY_BITS bits const uint32_t key = value & (PMMU_CACHE_SIZE - 1); // low PMMU_CACHE_KEY_BITS bits
const pmmu_cache_entry_t entry = shoe.pmmu_cache[use_srp].entry[key]; const pmmu_cache_entry_t entry = shoe.pmmu_cache[use_srp].entry[key];
const _Bool is_set = (shoe.pmmu_cache[use_srp].valid_map[key/8] >> (key & 7)) & 1; const _Bool is_set = (shoe.pmmu_cache[use_srp].valid_map[key >> 3] >> (key & 7)) & 1;
const _Bool values_match = (entry.logical_value == value);
const _Bool first_modify = !(shoe.logical_is_write && !entry.modified);
const _Bool not_write_protected = !(shoe.logical_is_write && entry.wp);
const uint32_t ps_mask = 0xffffffff >> entry.used_bits; const uint32_t ps_mask = 0xffffffff >> entry.used_bits;
const uint32_t v_mask = ~~ps_mask; const uint32_t v_mask = ~~ps_mask;
shoe.physical_addr = ((entry.physical_addr<<8) & v_mask) | (shoe.logical_addr & ps_mask); shoe.physical_addr = ((entry.physical_addr<<8) & v_mask) | (shoe.logical_addr & ps_mask);
return is_set && values_match && first_modify && not_write_protected; return is_set && (entry.logical_value == value) && entry.modified && !entry.wp;
}
static _Bool check_pmmu_cache_read(void)
{
const _Bool use_srp = (shoe.tc_sre && (shoe.logical_fc >= 5));
// logical addr [is]xxxxxxxxxxxx[ps] -> value xxxxxxxxxxxx
const uint32_t value = (shoe.logical_addr << shoe.tc_is) >> shoe.tc_is_plus_ps;
// value xxx[xxxxxxxxx] -> key xxxxxxxxx
const uint32_t key = value & (PMMU_CACHE_SIZE - 1); // low PMMU_CACHE_KEY_BITS bits
const pmmu_cache_entry_t entry = shoe.pmmu_cache[use_srp].entry[key];
const _Bool is_set = (shoe.pmmu_cache[use_srp].valid_map[key >> 3] >> (key & 7)) & 1;
const uint32_t ps_mask = 0xffffffff >> entry.used_bits;
const uint32_t v_mask = ~~ps_mask;
shoe.physical_addr = ((entry.physical_addr<<8) & v_mask) | (shoe.logical_addr & ps_mask);
return is_set && (entry.logical_value == value);
} }
static void translate_logical_addr() static void translate_logical_addr()
{ {
const uint8_t use_srp = (tc_sre() && (shoe.logical_fc >= 4)); const uint8_t use_srp = (shoe.tc_sre && (shoe.logical_fc >= 5));
assert((0x66 >> shoe.logical_fc) & 1); // we only support these FCs for now
uint64_t *rootp_ptr = (use_srp ? (&shoe.srp) : (&shoe.crp)); uint64_t *rootp_ptr = (use_srp ? (&shoe.srp) : (&shoe.crp));
const uint64_t rootp = *rootp_ptr; const uint64_t rootp = *rootp_ptr;
uint8_t desc_did_change = 0; uint8_t desc_did_change = 0;
@ -325,7 +344,7 @@ static void translate_logical_addr()
uint8_t i; uint8_t i;
uint64_t desc = rootp; // Initial descriptor is the root pointer descriptor uint64_t desc = rootp; // Initial descriptor is the root pointer descriptor
uint8_t desc_size = 1; // And the root pointer descriptor is always 8 bytes (1==8 bytes, 0==4 bytes) uint8_t desc_size = 1; // And the root pointer descriptor is always 8 bytes (1==8 bytes, 0==4 bytes)
uint8_t used_bits = tc_is(); // Keep track of how many bits will be the effective "page size" uint8_t used_bits = shoe.tc_is; // Keep track of how many bits will be the effective "page size"
// (If the table search terminates early (before used_bits == ts_ps()), // (If the table search terminates early (before used_bits == ts_ps()),
// then (32 - used_bits) will be the effective page size. That is, the number of bits // then (32 - used_bits) will be the effective page size. That is, the number of bits
// we or into the physical addr from the virtual addr) // we or into the physical addr from the virtual addr)
@ -443,7 +462,7 @@ search_done:
/* --- insert this translation into pmmu_cache --- */ /* --- insert this translation into pmmu_cache --- */
// logical addr [is]xxxxxxxxxxxx[ps] -> value xxxxxxxxxxxx // logical addr [is]xxxxxxxxxxxx[ps] -> value xxxxxxxxxxxx
const uint32_t value = (shoe.logical_addr << tc_is()) >> (tc_is() + tc_ps()); const uint32_t value = (shoe.logical_addr << shoe.tc_is) >> shoe.tc_is_plus_ps;
// value xxx[xxxxxxxxx] -> key xxxxxxxxx // value xxx[xxxxxxxxx] -> key xxxxxxxxx
const uint32_t key = value & (PMMU_CACHE_SIZE-1); // low PMMU_CACHE_KEY_BITS bits const uint32_t key = value & (PMMU_CACHE_SIZE-1); // low PMMU_CACHE_KEY_BITS bits
@ -463,7 +482,7 @@ void logical_get (void)
{ {
// If address translation isn't enabled, this is a physical address // If address translation isn't enabled, this is a physical address
if sunlikely(!tc_enable()) { if sunlikely(!shoe.tc_enable) {
shoe.physical_addr = shoe.logical_addr; shoe.physical_addr = shoe.logical_addr;
shoe.physical_size = shoe.logical_size; shoe.physical_size = shoe.logical_size;
physical_get(); physical_get();
@ -479,16 +498,13 @@ void logical_get (void)
const uint32_t logical_size = shoe.logical_size; const uint32_t logical_size = shoe.logical_size;
const uint32_t logical_addr = shoe.logical_addr; const uint32_t logical_addr = shoe.logical_addr;
const uint16_t ps = tc_ps(); // log2 of the page size const uint32_t pagemask = shoe.tc_pagemask;
const uint32_t pagesize = 1 << ps; // the page size
const uint32_t pagemask = pagesize-1; // a mask of the page bits
const uint32_t pageoffset = logical_addr & pagemask; const uint32_t pageoffset = logical_addr & pagemask;
shoe.logical_is_write = 0;
// Common case: the read is contained entirely within a page // Common case: the read is contained entirely within a page
if slikely(!((pageoffset + logical_size - 1) >> ps)) { if slikely(!((pageoffset + logical_size - 1) >> shoe.tc_ps)) {
if sunlikely(!check_pmmu_cache()) { if sunlikely(!check_pmmu_cache_read()) {
shoe.logical_is_write = 0;
translate_logical_addr(); translate_logical_addr();
if sunlikely(shoe.abort) if sunlikely(shoe.abort)
return ; return ;
@ -515,9 +531,11 @@ void logical_get (void)
const uint32_t size_a = logical_size - size_b; const uint32_t size_a = logical_size - size_b;
const uint32_t addr_b = addr_a + size_a; const uint32_t addr_b = addr_a + size_a;
shoe.logical_is_write = 0;
shoe.logical_addr = addr_a; shoe.logical_addr = addr_a;
shoe.logical_size = size_a; shoe.logical_size = size_a;
if sunlikely(!check_pmmu_cache()) { if sunlikely(!check_pmmu_cache_read()) {
translate_logical_addr(); translate_logical_addr();
if sunlikely(shoe.abort) if sunlikely(shoe.abort)
return ; return ;
@ -527,7 +545,7 @@ void logical_get (void)
shoe.logical_addr = addr_b; shoe.logical_addr = addr_b;
shoe.logical_size = size_b; shoe.logical_size = size_b;
if sunlikely(!check_pmmu_cache()) { if sunlikely(!check_pmmu_cache_read()) {
translate_logical_addr(); translate_logical_addr();
if sunlikely(shoe.abort) if sunlikely(shoe.abort)
return ; return ;
@ -559,7 +577,7 @@ void logical_get (void)
void logical_set (void) void logical_set (void)
{ {
// If address translation isn't enabled, this is a physical address // If address translation isn't enabled, this is a physical address
if sunlikely(!tc_enable()) { if sunlikely(!shoe.tc_enable) {
shoe.physical_addr = shoe.logical_addr; shoe.physical_addr = shoe.logical_addr;
shoe.physical_size = shoe.logical_size; shoe.physical_size = shoe.logical_size;
shoe.physical_dat = shoe.logical_dat; shoe.physical_dat = shoe.logical_dat;
@ -570,18 +588,16 @@ void logical_set (void)
const uint32_t logical_size = shoe.logical_size; const uint32_t logical_size = shoe.logical_size;
const uint32_t logical_addr = shoe.logical_addr; const uint32_t logical_addr = shoe.logical_addr;
const uint16_t ps = tc_ps(); // log2 of the page size const uint32_t pagemask = shoe.tc_pagemask;
const uint32_t pagesize = 1 << ps; // the page size
const uint32_t pagemask = pagesize-1; // a mask of the page bits
const uint32_t pageoffset = logical_addr & pagemask; const uint32_t pageoffset = logical_addr & pagemask;
// Make the translate function fail if the page is write-protected // Make the translate function fail if the page is write-protected
shoe.logical_is_write = 1; shoe.logical_is_write = 1;
// Common case: this write is contained entirely in one page // Common case: this write is contained entirely in one page
if slikely(!((pageoffset + logical_size - 1) >> ps)) { if slikely(!((pageoffset + logical_size - 1) >> shoe.tc_ps)) {
// Common case: the write is contained entirely within a page // Common case: the write is contained entirely within a page
if sunlikely(!check_pmmu_cache()) { if sunlikely(!check_pmmu_cache_write()) {
translate_logical_addr(); translate_logical_addr();
if sunlikely(shoe.abort) if sunlikely(shoe.abort)
return ; return ;
@ -601,7 +617,7 @@ void logical_set (void)
shoe.logical_addr = addr_a; shoe.logical_addr = addr_a;
shoe.logical_size = size_a; shoe.logical_size = size_a;
if sunlikely(!check_pmmu_cache()) { if sunlikely(!check_pmmu_cache_write()) {
translate_logical_addr(); translate_logical_addr();
if sunlikely(shoe.abort) if sunlikely(shoe.abort)
return ; return ;
@ -610,7 +626,7 @@ void logical_set (void)
shoe.logical_addr = addr_b; shoe.logical_addr = addr_b;
shoe.logical_size = size_b; shoe.logical_size = size_b;
if sunlikely(!check_pmmu_cache()) { if sunlikely(!check_pmmu_cache_write()) {
translate_logical_addr(); translate_logical_addr();
if sunlikely(shoe.abort) if sunlikely(shoe.abort)
return ; return ;
@ -631,12 +647,163 @@ void logical_set (void)
} }
} }
/* --- PC cache routines --- */
#pragma mark PC cache routines
static uint16_t pccache_miss(const uint32_t pc)
{
const uint32_t pagemask = shoe.tc_pagemask;
const uint32_t pageoffset = pc & pagemask;
uint32_t paddr;
/*
* I think the instruction decoder uses these
* these function codes:
* 6 -> supervisor program space,
* 2 -> user program space
*/
shoe.logical_fc = sr_s() ? 6 : 2;
shoe.logical_addr = pc;
if sunlikely(!check_pmmu_cache_read()) {
shoe.logical_is_write = 0;
translate_logical_addr();
if sunlikely(shoe.abort)
goto fail;
}
paddr = shoe.physical_addr ^ pageoffset;
shoe.pccache_use_srp = shoe.tc_sre && sr_s();
shoe.pccache_logical_page = pc ^ pageoffset;
if (paddr < 0x40000000) {
/* Address in RAM */
if sunlikely(paddr >= shoe.physical_mem_size)
paddr %= shoe.physical_mem_size;
shoe.pccache_ptr = &shoe.physical_mem_base[paddr];
return ntohs(*(uint16_t*)(shoe.pccache_ptr + pageoffset));
}
else if (paddr < 0x50000000) {
/* Address in ROM */
shoe.pccache_ptr = &shoe.physical_rom_base[paddr & (shoe.physical_rom_size - 1)];
return ntohs(*(uint16_t*)(shoe.pccache_ptr + pageoffset));
}
/*
* For now, only supporting reads from RAM and ROM.
* This could easily be supported by just calling
* physical_get() and leaving the cache invalid,
* but I don't think A/UX ever tries to execute outside
* RAM/ROM.
*/
assert(!"pccache_miss: neither RAM nor ROM!\n");
fail:
invalidate_pccache();
return 0;
}
uint16_t pccache_nextword(const uint32_t pc)
{
if (sunlikely(pc & 1))
goto odd_addr;
if slikely(shoe.tc_enable) {
const uint32_t pc_offset = pc & shoe.tc_pagemask;
const uint32_t pc_page = pc ^ pc_offset;
const uint32_t use_srp = shoe.tc_sre && sr_s();
/* If the cache exists and is valid */
if slikely((shoe.pccache_use_srp == use_srp) && (shoe.pccache_logical_page == pc_page)) {
// printf("pccache_nextword: hit: pc=%x\n", pc);
return ntohs(*(uint16_t*)(shoe.pccache_ptr + pc_offset));
}
// printf("pccache_nextword: miss: pc=%x\n", pc);
return pccache_miss(pc);
}
else {
uint32_t paddr = pc;
if (paddr < 0x40000000) {
/* Address in RAM */
if sunlikely(paddr >= shoe.physical_mem_size)
paddr %= shoe.physical_mem_size;
return ntohs(*(uint16_t*)(&shoe.physical_mem_base[paddr]));
}
else if (paddr < 0x50000000) {
/* Address in ROM */
return ntohs(*(uint16_t*)&shoe.physical_rom_base[paddr & (shoe.physical_rom_size - 1)]);
}
assert(!"!tc_enable: neither RAM nor RAM\n");
}
odd_addr:
assert(!"odd pc address!\n");
return 0;
}
uint32_t pccache_nextlong(const uint32_t pc)
{
if slikely(shoe.tc_enable) {
const uint32_t lastpage = shoe.pccache_logical_page;
const uint32_t pc_offset = pc & shoe.tc_pagemask;
const uint32_t pc_page = pc ^ pc_offset;
const uint32_t use_srp = shoe.tc_sre && sr_s();
/* If the cache exists, is valid, and the read is contained entirely within 1 page */
if slikely((shoe.pccache_use_srp == use_srp) && (lastpage == pc_page) && !((pc_offset + 3) >> shoe.tc_ps)) {
const uint32_t result = ntohl(*(uint32_t*)(shoe.pccache_ptr + pc_offset));
if (sunlikely(pc_offset & 1))
goto odd_addr;
return result;
}
const uint32_t result_high = pccache_nextword(pc) << 16;
if sunlikely(shoe.abort)
return 0;
return result_high | pccache_nextword(pc + 2);
}
else {
uint32_t paddr = pc;
if sunlikely(paddr & 1)
goto odd_addr;
if (paddr < 0x40000000) {
/* Address in RAM */
if sunlikely(paddr >= shoe.physical_mem_size)
paddr %= shoe.physical_mem_size;
return ntohl(*(uint32_t*)(&shoe.physical_mem_base[paddr]));
}
else if (paddr < 0x50000000) {
/* Address in ROM */
return ntohl(*(uint32_t*)&shoe.physical_rom_base[paddr & (shoe.physical_rom_size - 1)]);
}
assert(!"!tc_enable: neither RAM nor RAM\n");
}
odd_addr:
assert(!"odd pc address!\n");
return 0;
}
/* --- EA routines --- */ /* --- EA routines --- */
#pragma mark EA routines #pragma mark EA routines
#define nextword(pc) ({const uint16_t w=lget((pc),2);if sunlikely(shoe.abort){return;}(pc)+=2; w;}) #define nextword(pc) ({const uint16_t w = pccache_nextword(pc); if sunlikely(shoe.abort) return; (pc) += 2; w;})
#define nextlong(pc) ({const uint32_t L=lget((pc),4);if sunlikely(shoe.abort){return;}(pc)+=4; L;}) #define nextlong(pc) ({const uint32_t L = pccache_nextlong(pc); if sunlikely(shoe.abort) return; (pc) += 4; L;})
// ea_decode_extended() - find the EA for those hiddeous 68020 addr modes // ea_decode_extended() - find the EA for those hiddeous 68020 addr modes
static void ea_decode_extended() static void ea_decode_extended()

View File

@ -261,11 +261,11 @@ uint8_t* shoebill_extract_kernel(const char *disk_path, const char *kernel_path,
#define set_sr_t1(b) {shoe.sr &= (~(1<<15)); shoe.sr |= (((b)!=0)<<15);} #define set_sr_t1(b) {shoe.sr &= (~(1<<15)); shoe.sr |= (((b)!=0)<<15);}
// MMU // MMU
#define tc_enable() (shoe.tc >> 31) #define _tc_enable() (shoe.tc >> 31) // _tc_enable,sre,ps,is are all extracted in shoe.tc_*
#define tc_sre() ((shoe.tc >> 25) & 1) #define _tc_sre() ((shoe.tc >> 25) & 1)
#define tc_fcl() ((shoe.tc >> 24) & 1) #define tc_fcl() ((shoe.tc >> 24) & 1)
#define tc_ps() ((shoe.tc >> 20) & 0xf) #define _tc_ps() ((shoe.tc >> 20) & 0xf)
#define tc_is() ((shoe.tc >> 16) & 0xf) #define _tc_is() ((shoe.tc >> 16) & 0xf)
#define tc_tia() ((shoe.tc >> 12) & 0xf) #define tc_tia() ((shoe.tc >> 12) & 0xf)
#define tc_tib() ((shoe.tc >> 8) & 0xf) #define tc_tib() ((shoe.tc >> 8) & 0xf)
#define tc_tic() ((shoe.tc >> 4) & 0xf) #define tc_tic() ((shoe.tc >> 4) & 0xf)
@ -738,6 +738,11 @@ typedef struct {
_Bool logical_is_write; // <- boolean: true iff the operation is logical_set() _Bool logical_is_write; // <- boolean: true iff the operation is logical_set()
uint8_t logical_fc; // logical function code uint8_t logical_fc; // logical function code
#define invalidate_pccache() do {shoe.pccache_use_srp = 2;} while (0)
uint32_t pccache_use_srp; // 1 -> use srp, 0 -> use crp, other -> pccache is invalid
uint32_t pccache_logical_page;
uint8_t *pccache_ptr;
// -- PMMU caching structures --- // -- PMMU caching structures ---
#define PMMU_CACHE_KEY_BITS 10 #define PMMU_CACHE_KEY_BITS 10
#define PMMU_CACHE_SIZE (1<<PMMU_CACHE_KEY_BITS) #define PMMU_CACHE_SIZE (1<<PMMU_CACHE_KEY_BITS)
@ -773,6 +778,10 @@ typedef struct {
// 68851 registers // 68851 registers
uint64_t crp, srp, drp; // user/supervisor/DMA root pointers uint64_t crp, srp, drp; // user/supervisor/DMA root pointers
uint32_t tc; // translation control uint32_t tc; // translation control
uint32_t tc_pagesize, tc_pagemask; // page size and page mask
uint8_t tc_ps, tc_is, tc_is_plus_ps, tc_enable, tc_sre; // commonly read bits in shoe.tc
uint16_t pcsr; // PMMU cache status uint16_t pcsr; // PMMU cache status
uint16_t ac; // access control uint16_t ac; // access control
uint16_t bad[8]; // breakpoint acknowledge data registers uint16_t bad[8]; // breakpoint acknowledge data registers
@ -872,6 +881,9 @@ void throw_frame_zero(uint16_t sr, uint32_t pc, uint16_t vector_num);
// mem.c functions // mem.c functions
uint16_t pccache_nextword(uint32_t pc);
uint32_t pccache_nextlong(uint32_t pc);
//void physical_get (void); //void physical_get (void);
typedef void (*physical_get_ptr) (void); typedef void (*physical_get_ptr) (void);
typedef void (*physical_set_ptr) (void); typedef void (*physical_set_ptr) (void);

View File

@ -84,7 +84,7 @@ void printregs()
print_mmu_rp(shoe.crp); print_mmu_rp(shoe.crp);
printf("tc: e=%u sre=%u fcl=%u ps=%u is=%u (tia=%u tib=%u tic=%u tid=%u)\n", printf("tc: e=%u sre=%u fcl=%u ps=%u is=%u (tia=%u tib=%u tic=%u tid=%u)\n",
tc_enable(), tc_sre(), tc_fcl(), tc_ps(), tc_is(), tc_tia(), tc_tib(), tc_tic(), tc_tid()); _tc_enable(), _tc_sre(), tc_fcl(), _tc_ps(), _tc_is(), tc_tia(), tc_tib(), tc_tic(), tc_tid());
printf("\n"); printf("\n");
} }