mirror of
https://github.com/kanjitalk755/macemu.git
synced 2024-11-27 02:49:42 +00:00
Implement partial block cache invalidation. Rewrite core cached blocks
execution loop with a Duff's device. Gather some predecode time statistics. This shows that only around 2% of total emulation time is spent for predecoding the instructions.
This commit is contained in:
parent
f0ea192460
commit
a42281aad1
@ -224,6 +224,9 @@ static void *nvram_func(void *arg);
|
||||
static void *tick_func(void *arg);
|
||||
#if EMULATED_PPC
|
||||
static void sigusr2_handler(int sig);
|
||||
extern void emul_ppc(uint32 start);
|
||||
extern void init_emul_ppc(void);
|
||||
extern void exit_emul_ppc(void);
|
||||
#else
|
||||
static void sigusr2_handler(int sig, sigcontext_struct *sc);
|
||||
static void sigsegv_handler(int sig, sigcontext_struct *sc);
|
||||
@ -779,6 +782,11 @@ quit:
|
||||
|
||||
static void Quit(void)
|
||||
{
|
||||
#if EMULATED_PPC
|
||||
// Exit PowerPC emulation
|
||||
exit_emul_ppc();
|
||||
#endif
|
||||
|
||||
// Stop 60Hz thread
|
||||
if (tick_thread_active) {
|
||||
pthread_cancel(tick_thread);
|
||||
@ -882,8 +890,6 @@ static void Quit(void)
|
||||
*/
|
||||
|
||||
#if EMULATED_PPC
|
||||
extern void emul_ppc(uint32 start);
|
||||
extern void init_emul_ppc(void);
|
||||
void jump_to_rom(uint32 entry)
|
||||
{
|
||||
init_emul_ppc();
|
||||
|
@ -35,6 +35,7 @@ struct basic_block_info
|
||||
};
|
||||
|
||||
uintptr pc;
|
||||
uintptr end_pc;
|
||||
int32 count;
|
||||
uint32 size;
|
||||
uint32 c1;
|
||||
|
@ -552,6 +552,18 @@ void init_emul_ppc(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Deinitialize emulation
|
||||
*/
|
||||
|
||||
void exit_emul_ppc(void)
|
||||
{
|
||||
delete main_cpu;
|
||||
#if MULTICORE_CPU
|
||||
delete interrupt_cpu;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Emulation loop
|
||||
*/
|
||||
|
@ -45,7 +45,7 @@ private:
|
||||
entry * active;
|
||||
entry * dormant;
|
||||
|
||||
uint32 cacheline(uint32 addr) const {
|
||||
uint32 cacheline(uintptr addr) const {
|
||||
return (addr >> 2) & HASH_MASK;
|
||||
}
|
||||
|
||||
@ -59,6 +59,7 @@ public:
|
||||
|
||||
void initialize();
|
||||
void clear();
|
||||
void clear_range(uintptr start, uintptr end);
|
||||
block_info *find(uintptr pc);
|
||||
entry *first_active() const;
|
||||
entry *first_dormant() const;
|
||||
@ -116,6 +117,26 @@ void block_cache< block_info, block_allocator >::clear()
|
||||
dormant = NULL;
|
||||
}
|
||||
|
||||
template< class block_info, template<class T> class block_allocator >
|
||||
inline void block_cache< block_info, block_allocator >::clear_range(uintptr start, uintptr end)
|
||||
{
|
||||
if (!active)
|
||||
return;
|
||||
|
||||
entry *q;
|
||||
entry *p = active;
|
||||
while (p) {
|
||||
q = p;
|
||||
p = p->next;
|
||||
if (((q->pc >= start) && (q->pc < end)) ||
|
||||
((q->end_pc >= start) && (q->end_pc < end))) {
|
||||
remove_from_cl_list(q);
|
||||
remove_from_list(q);
|
||||
delete_blockinfo(q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template< class block_info, template<class T> class block_allocator >
|
||||
inline block_info *block_cache< block_info, block_allocator >::new_blockinfo()
|
||||
{
|
||||
|
@ -37,7 +37,6 @@ struct powerpc_block_info
|
||||
uint32 opcode;
|
||||
};
|
||||
|
||||
uint32 end_pc;
|
||||
decode_info * di;
|
||||
};
|
||||
|
||||
|
@ -31,6 +31,17 @@
|
||||
#define DEBUG 0
|
||||
#include "debug.h"
|
||||
|
||||
// Define to gather some compile time statistics
|
||||
#define PROFILE_COMPILE_TIME 1
|
||||
|
||||
#if PROFILE_COMPILE_TIME
|
||||
#include <time.h>
|
||||
static uint32 compile_count = 0;
|
||||
static clock_t compile_time = 0;
|
||||
static clock_t emul_start_time = 0;
|
||||
static clock_t emul_end_time = 0;
|
||||
#endif
|
||||
|
||||
void powerpc_cpu::set_register(int id, any_register const & value)
|
||||
{
|
||||
if (id >= powerpc_registers::GPR(0) && id <= powerpc_registers::GPR(31)) {
|
||||
@ -231,10 +242,34 @@ void powerpc_cpu::initialize()
|
||||
#if ENABLE_MON
|
||||
mon_init();
|
||||
#endif
|
||||
|
||||
#if PROFILE_COMPILE_TIME
|
||||
emul_start_time = clock();
|
||||
#endif
|
||||
}
|
||||
|
||||
powerpc_cpu::~powerpc_cpu()
|
||||
{
|
||||
#if PROFILE_COMPILE_TIME
|
||||
emul_end_time = clock();
|
||||
|
||||
const char *type = NULL;
|
||||
#ifndef PPC_NO_DECODE_CACHE
|
||||
type = "predecode";
|
||||
#endif
|
||||
if (type) {
|
||||
printf("### Statistics for block %s\n", type);
|
||||
printf("Total block %s count : %d\n", type, compile_count);
|
||||
uint32 emul_time = emul_end_time - emul_start_time;
|
||||
printf("Total emulation time : %.1f sec\n",
|
||||
double(emul_time) / double(CLOCKS_PER_SEC));
|
||||
printf("Total %s time : %.1f sec (%.1f%%)\n", type,
|
||||
double(compile_time) / double(CLOCKS_PER_SEC),
|
||||
100.0 * double(compile_time) / double(emul_time));
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
kill_decode_cache();
|
||||
|
||||
#if ENABLE_MON
|
||||
@ -318,6 +353,10 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache)
|
||||
#ifndef PPC_NO_DECODE_CACHE
|
||||
if (enable_cache) {
|
||||
for (;;) {
|
||||
#if PROFILE_COMPILE_TIME
|
||||
compile_count++;
|
||||
clock_t start_time = clock();
|
||||
#endif
|
||||
block_info *bi = block_cache.new_blockinfo();
|
||||
bi->init(pc());
|
||||
|
||||
@ -360,13 +399,14 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache)
|
||||
di = bi->di + blocklen;
|
||||
}
|
||||
} while ((ii->cflow & CFLOW_END_BLOCK) == 0);
|
||||
#ifdef PPC_LAZY_PC_UPDATE
|
||||
bi->end_pc = dpc;
|
||||
#endif
|
||||
bi->size = di - bi->di;
|
||||
block_cache.add_to_cl_list(bi);
|
||||
block_cache.add_to_active_list(bi);
|
||||
decode_cache_p += bi->size;
|
||||
#if PROFILE_COMPILE_TIME
|
||||
compile_time += (clock() - start_time);
|
||||
#endif
|
||||
|
||||
// Execute all cached blocks
|
||||
for (;;) {
|
||||
@ -379,21 +419,19 @@ void powerpc_cpu::execute(uint32 entry, bool enable_cache)
|
||||
di[i].execute(this, di[i].opcode);
|
||||
#else
|
||||
const int r = bi->size % 4;
|
||||
di += r;
|
||||
int n = (bi->size + 3) / 4;
|
||||
switch (r) {
|
||||
case 3: di->execute(this, di->opcode); di++;
|
||||
case 2: di->execute(this, di->opcode); di++;
|
||||
case 1: di->execute(this, di->opcode); di++;
|
||||
case 0: break;
|
||||
}
|
||||
const int n = bi->size / 4;
|
||||
for (int i = 0; i < n; i++) {
|
||||
di[0].execute(this, di[0].opcode);
|
||||
di[1].execute(this, di[1].opcode);
|
||||
di[2].execute(this, di[2].opcode);
|
||||
di[3].execute(this, di[3].opcode);
|
||||
di += 4;
|
||||
case 0: do {
|
||||
di += 4;
|
||||
di[-4].execute(this, di[-4].opcode);
|
||||
case 3: di[-3].execute(this, di[-3].opcode);
|
||||
case 2: di[-2].execute(this, di[-2].opcode);
|
||||
case 1: di[-1].execute(this, di[-1].opcode);
|
||||
} while (--n > 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!spcflags().empty()) {
|
||||
if (!check_spcflags())
|
||||
return;
|
||||
@ -483,8 +521,8 @@ void powerpc_cpu::invalidate_cache()
|
||||
|
||||
void powerpc_cpu::invalidate_cache_range(uintptr start, uintptr end)
|
||||
{
|
||||
D(bug("Invalidate cache block [%08x - %08x]\n", start, end));
|
||||
#ifndef PPC_NO_DECODE_CACHE
|
||||
// TODO: partial translation cache invalidatation
|
||||
invalidate_cache();
|
||||
block_cache.clear_range(start, end);
|
||||
#endif
|
||||
}
|
||||
|
@ -1101,7 +1101,6 @@ void powerpc_cpu::execute_mftbr(uint32 opcode)
|
||||
void powerpc_cpu::execute_invalidate_cache_range()
|
||||
{
|
||||
if (cache_range.start != cache_range.end) {
|
||||
D(bug("Invalidate Cache Block [%08x - %08x]\n", cache_range.start, cache_range.end));
|
||||
invalidate_cache_range(cache_range.start, cache_range.end);
|
||||
cache_range.start = cache_range.end = 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user