Implement partial block cache invalidation. Rewrite core cached blocks

execution loop with a Duff's device. Gather some predecode time statistics.
This shows that only around 2% of total emulation time is spent for
predecoding the instructions.
This commit is contained in:
gbeauche 2003-11-03 21:28:32 +00:00
parent f0ea192460
commit a42281aad1
7 changed files with 97 additions and 21 deletions

View File

@ -224,6 +224,9 @@ static void *nvram_func(void *arg);
static void *tick_func(void *arg); static void *tick_func(void *arg);
#if EMULATED_PPC #if EMULATED_PPC
static void sigusr2_handler(int sig); static void sigusr2_handler(int sig);
extern void emul_ppc(uint32 start);
extern void init_emul_ppc(void);
extern void exit_emul_ppc(void);
#else #else
static void sigusr2_handler(int sig, sigcontext_struct *sc); static void sigusr2_handler(int sig, sigcontext_struct *sc);
static void sigsegv_handler(int sig, sigcontext_struct *sc); static void sigsegv_handler(int sig, sigcontext_struct *sc);
@ -779,6 +782,11 @@ quit:
static void Quit(void) static void Quit(void)
{ {
#if EMULATED_PPC
// Exit PowerPC emulation
exit_emul_ppc();
#endif
// Stop 60Hz thread // Stop 60Hz thread
if (tick_thread_active) { if (tick_thread_active) {
pthread_cancel(tick_thread); pthread_cancel(tick_thread);
@ -882,8 +890,6 @@ static void Quit(void)
*/ */
#if EMULATED_PPC #if EMULATED_PPC
extern void emul_ppc(uint32 start);
extern void init_emul_ppc(void);
void jump_to_rom(uint32 entry) void jump_to_rom(uint32 entry)
{ {
init_emul_ppc(); init_emul_ppc();

View File

@ -35,6 +35,7 @@ struct basic_block_info
}; };
uintptr pc; uintptr pc;
uintptr end_pc;
int32 count; int32 count;
uint32 size; uint32 size;
uint32 c1; uint32 c1;

View File

@ -552,6 +552,18 @@ void init_emul_ppc(void)
#endif #endif
} }
/*
* Deinitialize emulation
*/
void exit_emul_ppc(void)
{
delete main_cpu;
#if MULTICORE_CPU
delete interrupt_cpu;
#endif
}
/* /*
* Emulation loop * Emulation loop
*/ */

View File

@ -45,7 +45,7 @@ private:
entry * active; entry * active;
entry * dormant; entry * dormant;
uint32 cacheline(uint32 addr) const { uint32 cacheline(uintptr addr) const {
return (addr >> 2) & HASH_MASK; return (addr >> 2) & HASH_MASK;
} }
@ -59,6 +59,7 @@ public:
void initialize(); void initialize();
void clear(); void clear();
void clear_range(uintptr start, uintptr end);
block_info *find(uintptr pc); block_info *find(uintptr pc);
entry *first_active() const; entry *first_active() const;
entry *first_dormant() const; entry *first_dormant() const;
@ -116,6 +117,26 @@ void block_cache< block_info, block_allocator >::clear()
dormant = NULL; dormant = NULL;
} }
template< class block_info, template<class T> class block_allocator >
inline void block_cache< block_info, block_allocator >::clear_range(uintptr start, uintptr end)
{
if (!active)
return;
entry *q;
entry *p = active;
while (p) {
q = p;
p = p->next;
if (((q->pc >= start) && (q->pc < end)) ||
((q->end_pc >= start) && (q->end_pc < end))) {
remove_from_cl_list(q);
remove_from_list(q);
delete_blockinfo(q);
}
}
}
template< class block_info, template<class T> class block_allocator > template< class block_info, template<class T> class block_allocator >
inline block_info *block_cache< block_info, block_allocator >::new_blockinfo() inline block_info *block_cache< block_info, block_allocator >::new_blockinfo()
{ {

View File

@ -37,7 +37,6 @@ struct powerpc_block_info
uint32 opcode; uint32 opcode;
}; };
uint32 end_pc;
decode_info * di; decode_info * di;
}; };

View File

@ -31,6 +31,17 @@
#define DEBUG 0 #define DEBUG 0
#include "debug.h" #include "debug.h"
// Define to gather some compile time statistics
#define PROFILE_COMPILE_TIME 1
#if PROFILE_COMPILE_TIME
#include <time.h>
static uint32 compile_count = 0;
static clock_t compile_time = 0;
static clock_t emul_start_time = 0;
static clock_t emul_end_time = 0;
#endif
void powerpc_cpu::set_register(int id, any_register const & value) void powerpc_cpu::set_register(int id, any_register const & value)
{ {
if (id >= powerpc_registers::GPR(0) && id <= powerpc_registers::GPR(31)) { if (id >= powerpc_registers::GPR(0) && id <= powerpc_registers::GPR(31)) {
@ -231,10 +242,34 @@ void powerpc_cpu::initialize()
#if ENABLE_MON #if ENABLE_MON
mon_init(); mon_init();
#endif #endif
#if PROFILE_COMPILE_TIME
emul_start_time = clock();
#endif
} }
powerpc_cpu::~powerpc_cpu() powerpc_cpu::~powerpc_cpu()
{ {
#if PROFILE_COMPILE_TIME
emul_end_time = clock();
const char *type = NULL;
#ifndef PPC_NO_DECODE_CACHE
type = "predecode";
#endif
if (type) {
printf("### Statistics for block %s\n", type);
printf("Total block %s count : %d\n", type, compile_count);
uint32 emul_time = emul_end_time - emul_start_time;
printf("Total emulation time : %.1f sec\n",
double(emul_time) / double(CLOCKS_PER_SEC));
printf("Total %s time : %.1f sec (%.1f%%)\n", type,
double(compile_time) / double(CLOCKS_PER_SEC),
100.0 * double(compile_time) / double(emul_time));
printf("\n");
}
#endif
kill_decode_cache(); kill_decode_cache();
#if ENABLE_MON #if ENABLE_MON
@ -318,6 +353,10 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache)
#ifndef PPC_NO_DECODE_CACHE #ifndef PPC_NO_DECODE_CACHE
if (enable_cache) { if (enable_cache) {
for (;;) { for (;;) {
#if PROFILE_COMPILE_TIME
compile_count++;
clock_t start_time = clock();
#endif
block_info *bi = block_cache.new_blockinfo(); block_info *bi = block_cache.new_blockinfo();
bi->init(pc()); bi->init(pc());
@ -360,13 +399,14 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache)
di = bi->di + blocklen; di = bi->di + blocklen;
} }
} while ((ii->cflow & CFLOW_END_BLOCK) == 0); } while ((ii->cflow & CFLOW_END_BLOCK) == 0);
#ifdef PPC_LAZY_PC_UPDATE
bi->end_pc = dpc; bi->end_pc = dpc;
#endif
bi->size = di - bi->di; bi->size = di - bi->di;
block_cache.add_to_cl_list(bi); block_cache.add_to_cl_list(bi);
block_cache.add_to_active_list(bi); block_cache.add_to_active_list(bi);
decode_cache_p += bi->size; decode_cache_p += bi->size;
#if PROFILE_COMPILE_TIME
compile_time += (clock() - start_time);
#endif
// Execute all cached blocks // Execute all cached blocks
for (;;) { for (;;) {
@ -379,21 +419,19 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache)
di[i].execute(this, di[i].opcode); di[i].execute(this, di[i].opcode);
#else #else
const int r = bi->size % 4; const int r = bi->size % 4;
di += r;
int n = (bi->size + 3) / 4;
switch (r) { switch (r) {
case 3: di->execute(this, di->opcode); di++; case 0: do {
case 2: di->execute(this, di->opcode); di++; di += 4;
case 1: di->execute(this, di->opcode); di++; di[-4].execute(this, di[-4].opcode);
case 0: break; case 3: di[-3].execute(this, di[-3].opcode);
} case 2: di[-2].execute(this, di[-2].opcode);
const int n = bi->size / 4; case 1: di[-1].execute(this, di[-1].opcode);
for (int i = 0; i < n; i++) { } while (--n > 0);
di[0].execute(this, di[0].opcode);
di[1].execute(this, di[1].opcode);
di[2].execute(this, di[2].opcode);
di[3].execute(this, di[3].opcode);
di += 4;
} }
#endif #endif
if (!spcflags().empty()) { if (!spcflags().empty()) {
if (!check_spcflags()) if (!check_spcflags())
return; return;
@ -483,8 +521,8 @@ void powerpc_cpu::invalidate_cache()
void powerpc_cpu::invalidate_cache_range(uintptr start, uintptr end) void powerpc_cpu::invalidate_cache_range(uintptr start, uintptr end)
{ {
D(bug("Invalidate cache block [%08x - %08x]\n", start, end));
#ifndef PPC_NO_DECODE_CACHE #ifndef PPC_NO_DECODE_CACHE
// TODO: partial translation cache invalidatation block_cache.clear_range(start, end);
invalidate_cache();
#endif #endif
} }

View File

@ -1101,7 +1101,6 @@ void powerpc_cpu::execute_mftbr(uint32 opcode)
void powerpc_cpu::execute_invalidate_cache_range() void powerpc_cpu::execute_invalidate_cache_range()
{ {
if (cache_range.start != cache_range.end) { if (cache_range.start != cache_range.end) {
D(bug("Invalidate Cache Block [%08x - %08x]\n", cache_range.start, cache_range.end));
invalidate_cache_range(cache_range.start, cache_range.end); invalidate_cache_range(cache_range.start, cache_range.end);
cache_range.start = cache_range.end = 0; cache_range.start = cache_range.end = 0;
} }