Implement partial block cache invalidation. Rewrite core cached blocks

execution loop with a Duff's device. Gather some predecode time statistics.
This shows that only around 2% of total emulation time is spent for
predecoding the instructions.
This commit is contained in:
gbeauche 2003-11-03 21:28:32 +00:00
parent f0ea192460
commit a42281aad1
7 changed files with 97 additions and 21 deletions

View File

@ -224,6 +224,9 @@ static void *nvram_func(void *arg);
static void *tick_func(void *arg);
#if EMULATED_PPC
static void sigusr2_handler(int sig);
extern void emul_ppc(uint32 start);
extern void init_emul_ppc(void);
extern void exit_emul_ppc(void);
#else
static void sigusr2_handler(int sig, sigcontext_struct *sc);
static void sigsegv_handler(int sig, sigcontext_struct *sc);
@ -779,6 +782,11 @@ quit:
static void Quit(void)
{
#if EMULATED_PPC
// Exit PowerPC emulation
exit_emul_ppc();
#endif
// Stop 60Hz thread
if (tick_thread_active) {
pthread_cancel(tick_thread);
@ -882,8 +890,6 @@ static void Quit(void)
*/
#if EMULATED_PPC
extern void emul_ppc(uint32 start);
extern void init_emul_ppc(void);
void jump_to_rom(uint32 entry)
{
init_emul_ppc();

View File

@ -35,6 +35,7 @@ struct basic_block_info
};
uintptr pc;
uintptr end_pc;
int32 count;
uint32 size;
uint32 c1;

View File

@ -552,6 +552,18 @@ void init_emul_ppc(void)
#endif
}
/*
* Deinitialize emulation
*/
void exit_emul_ppc(void)
{
delete main_cpu;
#if MULTICORE_CPU
delete interrupt_cpu;
#endif
}
/*
* Emulation loop
*/

View File

@ -45,7 +45,7 @@ private:
entry * active;
entry * dormant;
uint32 cacheline(uint32 addr) const {
uint32 cacheline(uintptr addr) const {
return (addr >> 2) & HASH_MASK;
}
@ -59,6 +59,7 @@ public:
void initialize();
void clear();
void clear_range(uintptr start, uintptr end);
block_info *find(uintptr pc);
entry *first_active() const;
entry *first_dormant() const;
@ -116,6 +117,26 @@ void block_cache< block_info, block_allocator >::clear()
dormant = NULL;
}
template< class block_info, template<class T> class block_allocator >
inline void block_cache< block_info, block_allocator >::clear_range(uintptr start, uintptr end)
{
if (!active)
return;
entry *q;
entry *p = active;
while (p) {
q = p;
p = p->next;
if (((q->pc >= start) && (q->pc < end)) ||
((q->end_pc >= start) && (q->end_pc < end))) {
remove_from_cl_list(q);
remove_from_list(q);
delete_blockinfo(q);
}
}
}
template< class block_info, template<class T> class block_allocator >
inline block_info *block_cache< block_info, block_allocator >::new_blockinfo()
{

View File

@ -37,7 +37,6 @@ struct powerpc_block_info
uint32 opcode;
};
uint32 end_pc;
decode_info * di;
};

View File

@ -31,6 +31,17 @@
#define DEBUG 0
#include "debug.h"
// Define to gather some compile time statistics
#define PROFILE_COMPILE_TIME 1
#if PROFILE_COMPILE_TIME
#include <time.h>
static uint32 compile_count = 0;
static clock_t compile_time = 0;
static clock_t emul_start_time = 0;
static clock_t emul_end_time = 0;
#endif
void powerpc_cpu::set_register(int id, any_register const & value)
{
if (id >= powerpc_registers::GPR(0) && id <= powerpc_registers::GPR(31)) {
@ -231,10 +242,34 @@ void powerpc_cpu::initialize()
#if ENABLE_MON
mon_init();
#endif
#if PROFILE_COMPILE_TIME
emul_start_time = clock();
#endif
}
powerpc_cpu::~powerpc_cpu()
{
#if PROFILE_COMPILE_TIME
emul_end_time = clock();
const char *type = NULL;
#ifndef PPC_NO_DECODE_CACHE
type = "predecode";
#endif
if (type) {
printf("### Statistics for block %s\n", type);
printf("Total block %s count : %d\n", type, compile_count);
uint32 emul_time = emul_end_time - emul_start_time;
printf("Total emulation time : %.1f sec\n",
double(emul_time) / double(CLOCKS_PER_SEC));
printf("Total %s time : %.1f sec (%.1f%%)\n", type,
double(compile_time) / double(CLOCKS_PER_SEC),
100.0 * double(compile_time) / double(emul_time));
printf("\n");
}
#endif
kill_decode_cache();
#if ENABLE_MON
@ -318,6 +353,10 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache)
#ifndef PPC_NO_DECODE_CACHE
if (enable_cache) {
for (;;) {
#if PROFILE_COMPILE_TIME
compile_count++;
clock_t start_time = clock();
#endif
block_info *bi = block_cache.new_blockinfo();
bi->init(pc());
@ -360,13 +399,14 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache)
di = bi->di + blocklen;
}
} while ((ii->cflow & CFLOW_END_BLOCK) == 0);
#ifdef PPC_LAZY_PC_UPDATE
bi->end_pc = dpc;
#endif
bi->size = di - bi->di;
block_cache.add_to_cl_list(bi);
block_cache.add_to_active_list(bi);
decode_cache_p += bi->size;
#if PROFILE_COMPILE_TIME
compile_time += (clock() - start_time);
#endif
// Execute all cached blocks
for (;;) {
@ -379,21 +419,19 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache)
di[i].execute(this, di[i].opcode);
#else
const int r = bi->size % 4;
di += r;
int n = (bi->size + 3) / 4;
switch (r) {
case 3: di->execute(this, di->opcode); di++;
case 2: di->execute(this, di->opcode); di++;
case 1: di->execute(this, di->opcode); di++;
case 0: break;
}
const int n = bi->size / 4;
for (int i = 0; i < n; i++) {
di[0].execute(this, di[0].opcode);
di[1].execute(this, di[1].opcode);
di[2].execute(this, di[2].opcode);
di[3].execute(this, di[3].opcode);
case 0: do {
di += 4;
di[-4].execute(this, di[-4].opcode);
case 3: di[-3].execute(this, di[-3].opcode);
case 2: di[-2].execute(this, di[-2].opcode);
case 1: di[-1].execute(this, di[-1].opcode);
} while (--n > 0);
}
#endif
if (!spcflags().empty()) {
if (!check_spcflags())
return;
@ -483,8 +521,8 @@ void powerpc_cpu::invalidate_cache()
void powerpc_cpu::invalidate_cache_range(uintptr start, uintptr end)
{
D(bug("Invalidate cache block [%08x - %08x]\n", start, end));
#ifndef PPC_NO_DECODE_CACHE
// TODO: partial translation cache invalidatation
invalidate_cache();
block_cache.clear_range(start, end);
#endif
}

View File

@ -1101,7 +1101,6 @@ void powerpc_cpu::execute_mftbr(uint32 opcode)
void powerpc_cpu::execute_invalidate_cache_range()
{
if (cache_range.start != cache_range.end) {
D(bug("Invalidate Cache Block [%08x - %08x]\n", cache_range.start, cache_range.end));
invalidate_cache_range(cache_range.start, cache_range.end);
cache_range.start = cache_range.end = 0;
}