- Remove dead code in readcpu.cpp concerning CONST_JUMP control flow.

- Replace unused fl_compiled with fl_const_jump
- Implement block inlining enabled with USE_INLINING && USE_CHECKSUM_INFO.
  However, this is currently disabled as it doesn't give much and exhibits
  even more a cache/code generation problem with FPU JIT compiled code.
- Actual checksum values are now integral part of a blockinfo regardless
  of USE_CHECKSUM_INFO is set or not. Reduce number of elements in that
  structure and speeds up a little calculation of checksum of chained blocks.
- Don't care about show_checksum() for now.
This commit is contained in:
gbeauche 2002-10-02 15:55:10 +00:00
parent 21909f1eed
commit 94a9038826
4 changed files with 129 additions and 80 deletions

View File

@ -40,6 +40,9 @@ union cacheline {
/* Use chain of checksum_info_t to compute the block checksum */
#define USE_CHECKSUM_INFO 0
/* Use code inlining, aka follow-up of constant jumps */
#define USE_INLINING 0
#define USE_F_ALIAS 1
#define USE_OFFSET 1
#define COMP_DEBUG 1
@ -491,8 +494,6 @@ typedef struct dep_t {
typedef struct checksum_info_t {
uae_u8 *start_p;
uae_u32 length;
uae_u32 c1;
uae_u32 c2;
struct checksum_info_t *next;
} checksum_info;
@ -510,19 +511,13 @@ typedef struct blockinfo_t {
uae_u8* pc_p;
#if USE_CHECKSUM_INFO
checksum_info *csi;
# define CSI_TYPE checksum_info
# define CSI_START_P(csi) (csi)->start_p
# define CSI_LENGTH(csi) (csi)->length
#else
uae_u32 c1;
uae_u32 c2;
#if USE_CHECKSUM_INFO
checksum_info *csi;
#else
uae_u32 len;
uae_u32 min_pcp;
# define CSI_TYPE blockinfo
# define CSI_START_P(csi) (csi)->min_pcp
# define CSI_LENGTH(csi) (csi)->len
#endif
struct blockinfo_t* next_same_cl;

View File

@ -115,6 +115,11 @@ static inline int end_block(uae_u32 opcode)
return (prop[opcode].cflow & fl_end_block);
}
static inline bool is_const_jump(uae_u32 opcode)
{
return (prop[opcode].cflow == fl_const_jump);
}
uae_u8* start_pc_p;
uae_u32 start_pc;
uae_u32 current_block_pc_p;
@ -576,6 +581,27 @@ static HardBlockAllocator<blockinfo> BlockInfoAllocator;
static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
#endif
static __inline__ checksum_info *alloc_checksum_info(void)
{
checksum_info *csi = ChecksumInfoAllocator.acquire();
csi->next = NULL;
return csi;
}
static __inline__ void free_checksum_info(checksum_info *csi)
{
csi->next = NULL;
ChecksumInfoAllocator.release(csi);
}
static __inline__ void free_checksum_info_chain(checksum_info *csi)
{
while (csi != NULL) {
checksum_info *csi2 = csi->next;
free_checksum_info(csi);
csi = csi2;
}
}
static __inline__ blockinfo *alloc_blockinfo(void)
{
@ -589,12 +615,8 @@ static __inline__ blockinfo *alloc_blockinfo(void)
static __inline__ void free_blockinfo(blockinfo *bi)
{
#if USE_CHECKSUM_INFO
checksum_info *csi = bi->csi;
while (csi != NULL) {
checksum_info *csi2 = csi->next;
ChecksumInfoAllocator.release(csi);
csi = csi2;
}
free_checksum_info_chain(bi->csi);
bi->csi = NULL;
#endif
BlockInfoAllocator.release(bi);
}
@ -4611,6 +4633,7 @@ void compiler_init(void)
write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
// Build compiler tables
@ -5208,34 +5231,46 @@ void alloc_cache(void)
extern cpuop_rettype op_illg_1 (uae_u32 opcode) REGPARAM;
static void calc_checksum(CSI_TYPE* csi, uae_u32* c1, uae_u32* c2)
static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
{
uae_u32 k1=0;
uae_u32 k2=0;
uae_s32 len=CSI_LENGTH(csi);
uae_u32 tmp=(uae_u32)CSI_START_P(csi);
uae_u32* pos;
uae_u32 k1 = 0;
uae_u32 k2 = 0;
len+=(tmp&3);
tmp&=(~3);
pos=(uae_u32*)tmp;
#if USE_CHECKSUM_INFO
checksum_info *csi = bi->csi;
Dif(!csi) abort();
while (csi) {
uae_s32 len = csi->length;
uae_u32 tmp = (uae_u32)csi->start_p;
#else
uae_s32 len = bi->len;
uae_u32 tmp = (uae_u32)bi->min_pcp;
#endif
uae_u32*pos;
if (len<0 || len>MAX_CHECKSUM_LEN) {
*c1=0;
*c2=0;
}
else {
while (len>0) {
k1+=*pos;
k2^=*pos;
pos++;
len-=4;
len += (tmp & 3);
tmp &= ~3;
pos = (uae_u32 *)tmp;
if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
while (len > 0) {
k1 += *pos;
k2 ^= *pos;
pos++;
len -= 4;
}
}
#if USE_CHECKSUM_INFO
csi = csi->next;
}
*c1=k1;
*c2=k2;
}
#endif
*c1 = k1;
*c2 = k2;
}
#if 0
static void show_checksum(CSI_TYPE* csi)
{
uae_u32 k1=0;
@ -5260,6 +5295,7 @@ static void show_checksum(CSI_TYPE* csi)
write_log(" bla\n");
}
}
#endif
int check_for_cache_miss(void)
@ -5320,19 +5356,6 @@ static inline int block_check_checksum(blockinfo* bi)
checksum_count++;
#if USE_CHECKSUM_INFO
checksum_info *csi = bi->csi;
Dif(!csi) abort();
isgood = true;
while (csi && isgood) {
if (csi->c1 || csi->c2)
calc_checksum(csi,&c1,&c2);
else
c1 = c2 = 1; /* Make sure it doesn't match */
isgood = isgood && (c1 == csi->c1 && c2 == csi->c2);
csi = csi->next;
}
#else
if (bi->c1 || bi->c2)
calc_checksum(bi,&c1,&c2);
else {
@ -5340,7 +5363,6 @@ static inline int block_check_checksum(blockinfo* bi)
}
isgood=(c1==bi->c1 && c2==bi->c2);
#endif
if (isgood) {
/* This block is still OK. So we reactivate. Of course, that
@ -5639,9 +5661,17 @@ void build_comp(void)
prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
}
#define IS_CONST_JUMP(opc) \
( ((table68k[opc].mnemo == i_Bcc) && (table68k[opc].cc < 2)) \
|| (table68k[opc].mnemo == i_BSR) \
)
for (i = 0; tbl[i].opcode < 65536; i++) {
int cflow = table68k[tbl[i].opcode].cflow;
prop[cft_map(tbl[i].opcode)].cflow = cflow;
if (USE_INLINING && IS_CONST_JUMP(tbl[i].opcode))
prop[cft_map(tbl[i].opcode)].cflow = fl_const_jump;
else
prop[cft_map(tbl[i].opcode)].cflow = cflow;
int uses_fpu = tbl[i].specific & 32;
if (uses_fpu && avoid_fpu)
@ -5650,6 +5680,8 @@ void build_comp(void)
compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
}
#undef IS_CONST_JUMP
for (i = 0; nftbl[i].opcode < 65536; i++) {
int uses_fpu = tbl[i].specific & 32;
if (uses_fpu && avoid_fpu)
@ -5934,8 +5966,14 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
int r;
int was_comp=0;
uae_u8 liveflags[MAXRUN+1];
#if USE_CHECKSUM_INFO
bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location;
uae_u32 min_pcp=max_pcp;
#else
uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
uae_u32 min_pcp=max_pcp;
#endif
uae_u32 cl=cacheline(pc_hist[0].location);
void* specflags=(void*)&regs.spcflags;
blockinfo* bi=NULL;
@ -5979,6 +6017,10 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
remove_deps(bi); /* We are about to create new code */
bi->optlevel=optlev;
bi->pc_p=(uae_u8*)pc_hist[0].location;
#if USE_CHECKSUM_INFO
free_checksum_info_chain(bi->csi);
bi->csi = NULL;
#endif
liveflags[blocklen]=0x1f; /* All flags needed afterwards */
i=blocklen;
@ -5986,10 +6028,25 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
uae_u16* currpcp=pc_hist[i].location;
uae_u32 op=DO_GET_OPCODE(currpcp);
#if USE_CHECKSUM_INFO
trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
#if USE_INLINING
if (is_const_jump(op)) {
checksum_info *csi = alloc_checksum_info();
csi->start_p = (uae_u8 *)min_pcp;
csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
csi->next = bi->csi;
bi->csi = csi;
max_pcp = (uae_u32)currpcp;
}
#endif
min_pcp = (uae_u32)currpcp;
#else
if ((uae_u32)currpcp<min_pcp)
min_pcp=(uae_u32)currpcp;
if ((uae_u32)currpcp>max_pcp)
max_pcp=(uae_u32)currpcp;
#endif
liveflags[i]=((liveflags[i+1]&
(~prop[op].set_flags))|
@ -5998,6 +6055,14 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
liveflags[i]&= ~FLAG_Z;
}
#if USE_CHECKSUM_INFO
checksum_info *csi = alloc_checksum_info();
csi->start_p = (uae_u8 *)min_pcp;
csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
csi->next = bi->csi;
bi->csi = csi;
#endif
bi->needed_flags=liveflags[0];
align_target(align_loops);
@ -6244,21 +6309,27 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
big_to_small_state(&live,&(bi->env));
#endif
#if USE_CHECKSUM_INFO
remove_from_list(bi);
if (trace_in_rom) {
// No need to checksum that block trace on cache invalidation
free_checksum_info_chain(bi->csi);
bi->csi = NULL;
add_to_dormant(bi);
}
else {
calc_checksum(bi,&(bi->c1),&(bi->c2));
add_to_active(bi);
}
#else
if (next_pc_p+extra_len>=max_pcp &&
next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
else
max_pcp+=LONGEST_68K_INST;
#if USE_CHECKSUM_INFO
checksum_info *csi = (bi->csi = ChecksumInfoAllocator.acquire());
csi->next = NULL;
csi->length = max_pcp - min_pcp;
csi->start_p = (uae_u8 *)min_pcp;
#else
bi->len=max_pcp-min_pcp;
bi->min_pcp=min_pcp;
#endif
remove_from_list(bi);
if (isinrom(min_pcp) && isinrom(max_pcp)) {
@ -6267,13 +6338,10 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
flight! */
}
else {
#if USE_CHECKSUM_INFO
calc_checksum(csi,&csi->c1,&csi->c2);
#else
calc_checksum(bi,&(bi->c1),&(bi->c2));
#endif
add_to_active(bi);
}
#endif
current_cache_size += get_target() - (uae_u8 *)current_compile_p;

View File

@ -786,20 +786,6 @@ void read_table68k (void)
|| (table68k[opc].mnemo == i_BSR) \
)
#if 0
// gb-- Don't follow false and true branches as we may not be
// able to determine the whole block length in bytes in order
// to compute the block checksum
// We can follow unconditional jumps if neither Lazy Flusher
// nor Dynamic Code Patches feature is enabled
// UPDATE: this is no longer permitted since we can decide
// at runtime whether the JIT compiler is used or not
if (IS_CONST_JUMP(i))
table68k[i].cflow = fl_normal;
#endif
// Fix flags used information for Scc, Bcc, TRAPcc, DBcc instructions
int flags_used = table68k[i].flaglive;
if ( (mnemo == i_Scc)

View File

@ -62,7 +62,7 @@ ENUMDECL {
fl_jump = 2,
fl_return = 3,
fl_trap = 4,
fl_compiled = 8,
fl_const_jump = 8,
/* Instructions that can trap don't mark the end of a block */
fl_end_block = 3