revive and fix almost two-year old port to x86_64

This commit is contained in:
gbeauche 2004-11-01 16:01:51 +00:00
parent 730364ac1e
commit e58fbc745b
5 changed files with 326 additions and 197 deletions

View File

@ -42,48 +42,62 @@
#define EBP_INDEX 5 #define EBP_INDEX 5
#define ESI_INDEX 6 #define ESI_INDEX 6
#define EDI_INDEX 7 #define EDI_INDEX 7
#if defined(__x86_64__)
#define R8_INDEX 8
#define R9_INDEX 9
#define R10_INDEX 10
#define R11_INDEX 11
#define R12_INDEX 12
#define R13_INDEX 13
#define R14_INDEX 14
#define R15_INDEX 15
#endif
/* The register in which subroutines return an integer return value */ /* The register in which subroutines return an integer return value */
#define REG_RESULT 0 #define REG_RESULT EAX_INDEX
/* The registers subroutines take their first and second argument in */ /* The registers subroutines take their first and second argument in */
#if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION ) #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
/* Handle the _fastcall parameters of ECX and EDX */ /* Handle the _fastcall parameters of ECX and EDX */
#define REG_PAR1 1 #define REG_PAR1 ECX_INDEX
#define REG_PAR2 2 #define REG_PAR2 EDX_INDEX
#elif defined(__x86_64__)
#define REG_PAR1 EDI_INDEX
#define REG_PAR2 ESI_INDEX
#else #else
#define REG_PAR1 0 #define REG_PAR1 EAX_INDEX
#define REG_PAR2 2 #define REG_PAR2 EDX_INDEX
#endif #endif
/* Three registers that are not used for any of the above */ #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
#define REG_NOPAR1 6
#define REG_NOPAR2 5
#define REG_NOPAR3 3
#define REG_PC_PRE 0 /* The register we use for preloading regs.pc_p */
#if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION ) #if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
#define REG_PC_TMP 0 #define REG_PC_TMP EAX_INDEX
#else #else
#define REG_PC_TMP 1 /* Another register that is not the above */ #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
#endif #endif
#define SHIFTCOUNT_NREG 1 /* Register that can be used for shiftcount. #define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
-1 if any reg will do */ -1 if any reg will do */
#define MUL_NREG1 0 /* %eax will hold the low 32 bits after a 32x32 mul */ #define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
#define MUL_NREG2 2 /* %edx will hold the high 32 bits */ #define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
uae_s8 always_used[]={4,-1}; uae_s8 always_used[]={4,-1};
#if defined(__x86_64__)
uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
#else
uae_s8 can_byte[]={0,1,2,3,-1}; uae_s8 can_byte[]={0,1,2,3,-1};
uae_s8 can_word[]={0,1,2,3,5,6,7,-1}; uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
#endif
#if USE_OPTIMIZED_CALLS #if USE_OPTIMIZED_CALLS
/* Make sure interpretive core does not use cpuopti */ /* Make sure interpretive core does not use cpuopti */
uae_u8 call_saved[]={0,0,0,1,1,1,1,1}; uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
#error FIXME: code not ready
#else #else
/* cpuopti mutate instruction handlers to assume registers are saved /* cpuopti mutate instruction handlers to assume registers are saved
by the caller */ by the caller */
uae_u8 call_saved[]={0,0,0,0,1,0,0,0}; uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
#endif #endif
/* This *should* be the same as call_saved. But: /* This *should* be the same as call_saved. But:
@ -93,7 +107,7 @@ uae_u8 call_saved[]={0,0,0,0,1,0,0,0};
- Special registers (such like the stack pointer) should not be "preserved" - Special registers (such like the stack pointer) should not be "preserved"
by pushing, even though they are "saved" across function calls by pushing, even though they are "saved" across function calls
*/ */
uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1}; uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1};
/* Whether classes of instructions do or don't clobber the native flags */ /* Whether classes of instructions do or don't clobber the native flags */
#define CLOBBER_MOV #define CLOBBER_MOV
@ -118,8 +132,10 @@ uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
#define CLOBBER_TEST clobber_flags() #define CLOBBER_TEST clobber_flags()
#define CLOBBER_CL16 #define CLOBBER_CL16
#define CLOBBER_CL8 #define CLOBBER_CL8
#define CLOBBER_SE32
#define CLOBBER_SE16 #define CLOBBER_SE16
#define CLOBBER_SE8 #define CLOBBER_SE8
#define CLOBBER_ZE32
#define CLOBBER_ZE16 #define CLOBBER_ZE16
#define CLOBBER_ZE8 #define CLOBBER_ZE8
#define CLOBBER_SW16 clobber_flags() #define CLOBBER_SW16 clobber_flags()
@ -130,7 +146,11 @@ uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
#define CLOBBER_BSF clobber_flags() #define CLOBBER_BSF clobber_flags()
/* FIXME: disabled until that's proofread. */ /* FIXME: disabled until that's proofread. */
#if 0 #if defined(__x86_64__)
#define USE_NEW_RTASM 1
#endif
#if USE_NEW_RTASM
#if defined(__x86_64__) #if defined(__x86_64__)
#define X86_TARGET_64BIT 1 #define X86_TARGET_64BIT 1
@ -143,6 +163,7 @@ uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1};
#define x86_emit_byte(B) emit_byte(B) #define x86_emit_byte(B) emit_byte(B)
#define x86_emit_word(W) emit_word(W) #define x86_emit_word(W) emit_word(W)
#define x86_emit_long(L) emit_long(L) #define x86_emit_long(L) emit_long(L)
#define x86_emit_quad(Q) emit_quad(Q)
#define x86_get_target() get_target() #define x86_get_target() get_target()
#define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__) #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
@ -155,13 +176,21 @@ static void jit_fail(const char *msg, const char *file, int line, const char *fu
LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
{ {
#if defined(__x86_64__)
PUSHQr(r);
#else
PUSHLr(r); PUSHLr(r);
#endif
} }
LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
{ {
#if defined(__x86_64__)
POPQr(r);
#else
POPLr(r); POPLr(r);
#endif
} }
LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
@ -486,6 +515,12 @@ LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
} }
LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
{
MOVSLQrr(s, d);
}
LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
{ {
MOVSWLrr(s, d); MOVSWLrr(s, d);
@ -2898,12 +2933,19 @@ LENDFUNC(WRITE,READ,0,raw_popfl,(void))
static __inline__ void raw_call_r(R4 r) static __inline__ void raw_call_r(R4 r)
{ {
#if USE_NEW_RTASM
CALLsr(r);
#else
emit_byte(0xff); emit_byte(0xff);
emit_byte(0xd0+r); emit_byte(0xd0+r);
#endif
} }
static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
{ {
#if USE_NEW_RTASM
CALLsm(base, X86_NOREG, r, m);
#else
int mu; int mu;
switch(m) { switch(m) {
case 1: mu=0; break; case 1: mu=0; break;
@ -2916,16 +2958,24 @@ static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
emit_byte(0x14); emit_byte(0x14);
emit_byte(0x05+8*r+0x40*mu); emit_byte(0x05+8*r+0x40*mu);
emit_long(base); emit_long(base);
#endif
} }
static __inline__ void raw_jmp_r(R4 r) static __inline__ void raw_jmp_r(R4 r)
{ {
#if USE_NEW_RTASM
JMPsr(r);
#else
emit_byte(0xff); emit_byte(0xff);
emit_byte(0xe0+r); emit_byte(0xe0+r);
#endif
} }
static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
{ {
#if USE_NEW_RTASM
JMPsm(base, X86_NOREG, r, m);
#else
int mu; int mu;
switch(m) { switch(m) {
case 1: mu=0; break; case 1: mu=0; break;
@ -2938,6 +2988,7 @@ static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
emit_byte(0x24); emit_byte(0x24);
emit_byte(0x05+8*r+0x40*mu); emit_byte(0x05+8*r+0x40*mu);
emit_long(base); emit_long(base);
#endif
} }
static __inline__ void raw_jmp_m(uae_u32 base) static __inline__ void raw_jmp_m(uae_u32 base)
@ -2950,35 +3001,43 @@ static __inline__ void raw_jmp_m(uae_u32 base)
static __inline__ void raw_call(uae_u32 t) static __inline__ void raw_call(uae_u32 t)
{ {
#if USE_NEW_RTASM
CALLm(t);
#else
emit_byte(0xe8); emit_byte(0xe8);
emit_long(t-(uae_u32)target-4); emit_long(t-(uae_u32)target-4);
#endif
} }
static __inline__ void raw_jmp(uae_u32 t) static __inline__ void raw_jmp(uae_u32 t)
{ {
#if USE_NEW_RTASM
JMPm(t);
#else
emit_byte(0xe9); emit_byte(0xe9);
emit_long(t-(uae_u32)target-4); emit_long(t-(uae_u32)target-4);
#endif
} }
static __inline__ void raw_jl(uae_u32 t) static __inline__ void raw_jl(uae_u32 t)
{ {
emit_byte(0x0f); emit_byte(0x0f);
emit_byte(0x8c); emit_byte(0x8c);
emit_long(t-(uae_u32)target-4); emit_long(t-(uintptr)target-4);
} }
static __inline__ void raw_jz(uae_u32 t) static __inline__ void raw_jz(uae_u32 t)
{ {
emit_byte(0x0f); emit_byte(0x0f);
emit_byte(0x84); emit_byte(0x84);
emit_long(t-(uae_u32)target-4); emit_long(t-(uintptr)target-4);
} }
static __inline__ void raw_jnz(uae_u32 t) static __inline__ void raw_jnz(uae_u32 t)
{ {
emit_byte(0x0f); emit_byte(0x0f);
emit_byte(0x85); emit_byte(0x85);
emit_long(t-(uae_u32)target-4); emit_long(t-(uintptr)target-4);
} }
static __inline__ void raw_jnz_l_oponly(void) static __inline__ void raw_jnz_l_oponly(void)
@ -3103,11 +3162,11 @@ static __inline__ void raw_flags_to_reg(int r)
{ {
raw_lahf(0); /* Most flags in AH */ raw_lahf(0); /* Most flags in AH */
//raw_setcc(r,0); /* V flag in AL */ //raw_setcc(r,0); /* V flag in AL */
raw_setcc_m((uae_u32)live.state[FLAGTMP].mem,0); raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
#if 1 /* Let's avoid those nasty partial register stalls */ #if 1 /* Let's avoid those nasty partial register stalls */
//raw_mov_b_mr((uae_u32)live.state[FLAGTMP].mem,r); //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
raw_mov_b_mr(((uae_u32)live.state[FLAGTMP].mem)+1,r+4); raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
//live.state[FLAGTMP].status=CLEAN; //live.state[FLAGTMP].status=CLEAN;
live.state[FLAGTMP].status=INMEM; live.state[FLAGTMP].status=INMEM;
live.state[FLAGTMP].realreg=-1; live.state[FLAGTMP].realreg=-1;
@ -3134,7 +3193,7 @@ static __inline__ void raw_flags_to_reg(int r)
{ {
raw_pushfl(); raw_pushfl();
raw_pop_l_r(r); raw_pop_l_r(r);
raw_mov_l_mr((uae_u32)live.state[FLAGTMP].mem,r); raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
// live.state[FLAGTMP].status=CLEAN; // live.state[FLAGTMP].status=CLEAN;
live.state[FLAGTMP].status=INMEM; live.state[FLAGTMP].status=INMEM;
live.state[FLAGTMP].realreg=-1; live.state[FLAGTMP].realreg=-1;
@ -3160,10 +3219,10 @@ static __inline__ void raw_reg_to_flags(int r)
static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r) static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
{ {
#if 1 #if 1
raw_mov_l_rm(target,(uae_u32)live.state[r].mem); raw_mov_l_rm(target,(uintptr)live.state[r].mem);
#else #else
raw_mov_b_rm(target,(uae_u32)live.state[r].mem); raw_mov_b_rm(target,(uintptr)live.state[r].mem);
raw_mov_b_rm(target+4,((uae_u32)live.state[r].mem)+1); raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
#endif #endif
} }
@ -3171,11 +3230,11 @@ static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r) static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
{ {
if (live.nat[target].canbyte) if (live.nat[target].canbyte)
raw_mov_b_rm(target,(uae_u32)live.state[r].mem); raw_mov_b_rm(target,(uintptr)live.state[r].mem);
else if (live.nat[target].canword) else if (live.nat[target].canword)
raw_mov_w_rm(target,(uae_u32)live.state[r].mem); raw_mov_w_rm(target,(uintptr)live.state[r].mem);
else else
raw_mov_l_rm(target,(uae_u32)live.state[r].mem); raw_mov_l_rm(target,(uintptr)live.state[r].mem);
} }
#define NATIVE_FLAG_Z 0x40 #define NATIVE_FLAG_Z 0x40
@ -3355,7 +3414,7 @@ static void vec(int x, struct sigcontext sc)
for (i=0;i<5;i++) for (i=0;i<5;i++)
vecbuf[i]=target[i]; vecbuf[i]=target[i];
emit_byte(0xe9); emit_byte(0xe9);
emit_long((uae_u32)veccode-(uae_u32)target-4); emit_long((uintptr)veccode-(uintptr)target-4);
write_log("Create jump to %p\n",veccode); write_log("Create jump to %p\n",veccode);
write_log("Handled one access!\n"); write_log("Handled one access!\n");
@ -3382,9 +3441,9 @@ static void vec(int x, struct sigcontext sc)
} }
for (i=0;i<5;i++) for (i=0;i<5;i++)
raw_mov_b_mi(sc.eip+i,vecbuf[i]); raw_mov_b_mi(sc.eip+i,vecbuf[i]);
raw_mov_l_mi((uae_u32)&in_handler,0); raw_mov_l_mi((uintptr)&in_handler,0);
emit_byte(0xe9); emit_byte(0xe9);
emit_long(sc.eip+len-(uae_u32)target-4); emit_long(sc.eip+len-(uintptr)target-4);
in_handler=1; in_handler=1;
target=tmp; target=tmp;
} }
@ -3544,19 +3603,21 @@ static void
cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
{ {
static uae_u8 cpuid_space[256]; static uae_u8 cpuid_space[256];
static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
uae_u8* tmp=get_target(); uae_u8* tmp=get_target();
s_op = op;
set_target(cpuid_space); set_target(cpuid_space);
raw_push_l_r(0); /* eax */ raw_push_l_r(0); /* eax */
raw_push_l_r(1); /* ecx */ raw_push_l_r(1); /* ecx */
raw_push_l_r(2); /* edx */ raw_push_l_r(2); /* edx */
raw_push_l_r(3); /* ebx */ raw_push_l_r(3); /* ebx */
raw_mov_l_rm(0,(uae_u32)&op); raw_mov_l_rm(0,(uintptr)&s_op);
raw_cpuid(0); raw_cpuid(0);
if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0); raw_mov_l_mr((uintptr)&s_eax,0);
if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3); raw_mov_l_mr((uintptr)&s_ebx,3);
if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1); raw_mov_l_mr((uintptr)&s_ecx,1);
if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2); raw_mov_l_mr((uintptr)&s_edx,2);
raw_pop_l_r(3); raw_pop_l_r(3);
raw_pop_l_r(2); raw_pop_l_r(2);
raw_pop_l_r(1); raw_pop_l_r(1);
@ -3565,6 +3626,10 @@ cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
set_target(tmp); set_target(tmp);
((cpuop_func*)cpuid_space)(0); ((cpuop_func*)cpuid_space)(0);
if (eax != NULL) *eax = s_eax;
if (ebx != NULL) *ebx = s_ebx;
if (ecx != NULL) *ecx = s_ecx;
if (edx != NULL) *edx = s_edx;
} }
static void static void
@ -4152,7 +4217,7 @@ LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
emit_byte(0xf0); /* f2xm1 */ emit_byte(0xf0); /* f2xm1 */
emit_byte(0xdc); emit_byte(0xdc);
emit_byte(0x05); emit_byte(0x05);
emit_long((uae_u32)&one); /* Add '1' without using extra stack space */ emit_long((uintptr)&one); /* Add '1' without using extra stack space */
emit_byte(0xd9); emit_byte(0xd9);
emit_byte(0xfd); /* and scale it */ emit_byte(0xfd); /* and scale it */
emit_byte(0xdd); emit_byte(0xdd);
@ -4186,7 +4251,7 @@ LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
emit_byte(0xf0); /* f2xm1 */ emit_byte(0xf0); /* f2xm1 */
emit_byte(0xdc); emit_byte(0xdc);
emit_byte(0x05); emit_byte(0x05);
emit_long((uae_u32)&one); /* Add '1' without using extra stack space */ emit_long((uintptr)&one); /* Add '1' without using extra stack space */
emit_byte(0xd9); emit_byte(0xd9);
emit_byte(0xfd); /* and scale it */ emit_byte(0xfd); /* and scale it */
emit_byte(0xdd); emit_byte(0xdd);

View File

@ -41,7 +41,7 @@ extern void compiler_dumpstate(void);
#define TAGMASK 0x0000ffff #define TAGMASK 0x0000ffff
#define TAGSIZE (TAGMASK+1) #define TAGSIZE (TAGMASK+1)
#define MAXRUN 1024 #define MAXRUN 1024
#define cacheline(x) (((uae_u32)x)&TAGMASK) #define cacheline(x) (((uintptr)x)&TAGMASK)
extern uae_u8* start_pc_p; extern uae_u8* start_pc_p;
extern uae_u32 start_pc; extern uae_u32 start_pc;
@ -125,13 +125,17 @@ union cacheline {
#define KILLTHERAT 1 /* Set to 1 to avoid some partial_rat_stalls */ #define KILLTHERAT 1 /* Set to 1 to avoid some partial_rat_stalls */
/* Whether to preserve registers across calls to JIT compiled routines */ /* Whether to preserve registers across calls to JIT compiled routines */
#ifdef X86_ASSEMBLY #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
#define USE_PUSH_POP 0 #define USE_PUSH_POP 0
#else #else
#define USE_PUSH_POP 1 #define USE_PUSH_POP 1
#endif #endif
#if defined(__x86_64__)
#define N_REGS 16 /* really only 15, but they are numbered 0-3,5-15 */
#else
#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */ #define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */
#endif
#define N_FREGS 6 /* That leaves us two positions on the stack to play with */ #define N_FREGS 6 /* That leaves us two positions on the stack to play with */
/* Functions exposed to newcpu, or to what was moved from newcpu.c to /* Functions exposed to newcpu, or to what was moved from newcpu.c to
@ -525,7 +529,7 @@ extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond);
struct blockinfo_t; struct blockinfo_t;
typedef struct dep_t { typedef struct dep_t {
uintptr* jmp_off; uae_u32* jmp_off;
struct blockinfo_t* target; struct blockinfo_t* target;
struct blockinfo_t* source; struct blockinfo_t* source;
struct dep_t** prev_p; struct dep_t** prev_p;

View File

@ -31,6 +31,15 @@
#error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler" #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
#endif #endif
/* NOTE: support for AMD64 assumes translation cache and other code
* buffers are allocated into a 32-bit address space because (i) B2/JIT
* code is not 64-bit clean and (ii) it's faster to resolve branches
* that way.
*/
#if !defined(__i386__) && !defined(__x86_64__)
#error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
#endif
#define USE_MATCH 0 #define USE_MATCH 0
/* kludge for Brian, so he can compile under MSVC++ */ /* kludge for Brian, so he can compile under MSVC++ */
@ -107,9 +116,9 @@ static int untranslated_compfn(const void *e1, const void *e2)
} }
#endif #endif
compop_func *compfunctbl[65536]; static compop_func *compfunctbl[65536];
compop_func *nfcompfunctbl[65536]; static compop_func *nfcompfunctbl[65536];
cpuop_func *nfcpufunctbl[65536]; static cpuop_func *nfcpufunctbl[65536];
uae_u8* comp_pc_p; uae_u8* comp_pc_p;
// From newcpu.cpp // From newcpu.cpp
@ -134,7 +143,6 @@ const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly? static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
static int align_loops = 32; // Align the start of loops static int align_loops = 32; // Align the start of loops
static int align_jumps = 32; // Align the start of jumps static int align_jumps = 32; // Align the start of jumps
static int zero_fd = -1;
static int optcount[10] = { static int optcount[10] = {
10, // How often a block has to be executed before it is translated 10, // How often a block has to be executed before it is translated
0, // How often to use naive translation 0, // How often to use naive translation
@ -177,10 +185,10 @@ static inline unsigned int cft_map (unsigned int f)
uae_u8* start_pc_p; uae_u8* start_pc_p;
uae_u32 start_pc; uae_u32 start_pc;
uae_u32 current_block_pc_p; uae_u32 current_block_pc_p;
uae_u32 current_block_start_target; static uintptr current_block_start_target;
uae_u32 needed_flags; uae_u32 needed_flags;
static uae_u32 next_pc_p; static uintptr next_pc_p;
static uae_u32 taken_pc_p; static uintptr taken_pc_p;
static int branch_cc; static int branch_cc;
static int redo_current_block; static int redo_current_block;
@ -192,6 +200,8 @@ static uae_u8* current_compile_p=NULL;
static uae_u8* max_compile_start; static uae_u8* max_compile_start;
static uae_u8* compiled_code=NULL; static uae_u8* compiled_code=NULL;
static uae_s32 reg_alloc_run; static uae_s32 reg_alloc_run;
const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
static uae_u8* popallspace=NULL;
void* pushall_call_handler=NULL; void* pushall_call_handler=NULL;
static void* popall_do_nothing=NULL; static void* popall_do_nothing=NULL;
@ -447,7 +457,7 @@ static __inline__ void invalidate_block(blockinfo* bi)
static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target) static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
{ {
blockinfo* tbi=get_blockinfo_addr((void*)target); blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
Dif(!tbi) { Dif(!tbi) {
write_log("Could not create jmpdep!\n"); write_log("Could not create jmpdep!\n");
@ -711,6 +721,12 @@ static __inline__ void emit_long(uae_u32 x)
target+=4; target+=4;
} }
static __inline__ void emit_quad(uae_u64 x)
{
*((uae_u64*)target)=x;
target+=8;
}
static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen) static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
{ {
memcpy((uae_u8 *)target,block,blocklen); memcpy((uae_u8 *)target,block,blocklen);
@ -1125,12 +1141,12 @@ static __inline__ void do_load_reg(int n, int r)
else if (r == FLAGX) else if (r == FLAGX)
raw_load_flagx(n, r); raw_load_flagx(n, r);
else else
raw_mov_l_rm(n, (uae_u32) live.state[r].mem); raw_mov_l_rm(n, (uintptr) live.state[r].mem);
} }
static __inline__ void check_load_reg(int n, int r) static __inline__ void check_load_reg(int n, int r)
{ {
raw_mov_l_rm(n, (uae_u32) live.state[r].mem); raw_mov_l_rm(n, (uintptr) live.state[r].mem);
} }
static __inline__ void log_vwrite(int r) static __inline__ void log_vwrite(int r)
@ -1241,9 +1257,9 @@ static void tomem(int r)
if (live.state[r].status==DIRTY) { if (live.state[r].status==DIRTY) {
switch (live.state[r].dirtysize) { switch (live.state[r].dirtysize) {
case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break; case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break; case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break; case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
default: abort(); default: abort();
} }
log_vwrite(r); log_vwrite(r);
@ -1271,7 +1287,7 @@ static __inline__ void writeback_const(int r)
abort(); abort();
} }
raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val); raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
log_vwrite(r); log_vwrite(r);
live.state[r].val=0; live.state[r].val=0;
set_status(r,INMEM); set_status(r,INMEM);
@ -1404,7 +1420,7 @@ static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
if (size==4 && live.state[r].validsize==2) { if (size==4 && live.state[r].validsize==2) {
log_isused(bestreg); log_isused(bestreg);
log_visused(r); log_visused(r);
raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem); raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
raw_bswap_32(bestreg); raw_bswap_32(bestreg);
raw_zero_extend_16_rr(rr,rr); raw_zero_extend_16_rr(rr,rr);
raw_zero_extend_16_rr(bestreg,bestreg); raw_zero_extend_16_rr(bestreg,bestreg);
@ -1901,9 +1917,9 @@ static void f_tomem(int r)
{ {
if (live.fate[r].status==DIRTY) { if (live.fate[r].status==DIRTY) {
#if USE_LONG_DOUBLE #if USE_LONG_DOUBLE
raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg); raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
#else #else
raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg); raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
#endif #endif
live.fate[r].status=CLEAN; live.fate[r].status=CLEAN;
} }
@ -1913,9 +1929,9 @@ static void f_tomem_drop(int r)
{ {
if (live.fate[r].status==DIRTY) { if (live.fate[r].status==DIRTY) {
#if USE_LONG_DOUBLE #if USE_LONG_DOUBLE
raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg); raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
#else #else
raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg); raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
#endif #endif
live.fate[r].status=INMEM; live.fate[r].status=INMEM;
} }
@ -2023,9 +2039,9 @@ static int f_alloc_reg(int r, int willclobber)
if (!willclobber) { if (!willclobber) {
if (live.fate[r].status!=UNDEF) { if (live.fate[r].status!=UNDEF) {
#if USE_LONG_DOUBLE #if USE_LONG_DOUBLE
raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem); raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
#else #else
raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem); raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
#endif #endif
} }
live.fate[r].status=CLEAN; live.fate[r].status=CLEAN;
@ -2231,7 +2247,7 @@ MIDFUNC(0,duplicate_carry,(void))
{ {
evict(FLAGX); evict(FLAGX);
make_flags_live_internal(); make_flags_live_internal();
COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2); COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
log_vwrite(FLAGX); log_vwrite(FLAGX);
} }
MENDFUNC(0,duplicate_carry,(void)) MENDFUNC(0,duplicate_carry,(void))
@ -2987,6 +3003,38 @@ MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
} }
MENDFUNC(2,mul_32_32,(RW4 d, R4 s)) MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
#if SIZEOF_VOID_P == 8
MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
{
int isrmw;
if (isconst(s)) {
set_const(d,(uae_s32)live.state[s].val);
return;
}
CLOBBER_SE32;
isrmw=(s==d);
if (!isrmw) {
s=readreg(s,4);
d=writereg(d,4);
}
else { /* If we try to lock this twice, with different sizes, we
are int trouble! */
s=d=rmw(s,4,4);
}
raw_sign_extend_32_rr(d,s);
if (!isrmw) {
unlock2(d);
unlock2(s);
}
else {
unlock2(s);
}
}
MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
#endif
MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s)) MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
{ {
int isrmw; int isrmw;
@ -4884,18 +4932,7 @@ void compiler_init(void)
static bool initialized = false; static bool initialized = false;
if (initialized) if (initialized)
return; return;
#ifndef WIN32
// Open /dev/zero
zero_fd = open("/dev/zero", O_RDWR);
if (zero_fd < 0) {
char str[200];
sprintf(str, GetString(STR_NO_DEV_ZERO_ERR), strerror(errno));
ErrorAlert(str);
QuitEmulator();
}
#endif
#if JIT_DEBUG #if JIT_DEBUG
// JIT debug mode ? // JIT debug mode ?
JITDebug = PrefsFindBool("jitdebug"); JITDebug = PrefsFindBool("jitdebug");
@ -4960,12 +4997,12 @@ void compiler_exit(void)
vm_release(compiled_code, cache_size * 1024); vm_release(compiled_code, cache_size * 1024);
compiled_code = 0; compiled_code = 0;
} }
#ifndef WIN32 // Deallocate popallspace
// Close /dev/zero if (popallspace) {
if (zero_fd > 0) vm_release(popallspace, POPALLSPACE_SIZE);
close(zero_fd); popallspace = 0;
#endif }
#if PROFILE_COMPILE_TIME #if PROFILE_COMPILE_TIME
write_log("### Compile Block statistics\n"); write_log("### Compile Block statistics\n");
@ -5052,13 +5089,13 @@ void init_comp(void)
} }
live.state[PC_P].mem=(uae_u32*)&(regs.pc_p); live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
live.state[PC_P].needflush=NF_TOMEM; live.state[PC_P].needflush=NF_TOMEM;
set_const(PC_P,(uae_u32)comp_pc_p); set_const(PC_P,(uintptr)comp_pc_p);
live.state[FLAGX].mem=&(regflags.x); live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
live.state[FLAGX].needflush=NF_TOMEM; live.state[FLAGX].needflush=NF_TOMEM;
set_status(FLAGX,INMEM); set_status(FLAGX,INMEM);
live.state[FLAGTMP].mem=&(regflags.cznv); live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
live.state[FLAGTMP].needflush=NF_TOMEM; live.state[FLAGTMP].needflush=NF_TOMEM;
set_status(FLAGTMP,INMEM); set_status(FLAGTMP,INMEM);
@ -5132,7 +5169,7 @@ void flush(int save_regs)
switch(live.state[i].status) { switch(live.state[i].status) {
case INMEM: case INMEM:
if (live.state[i].val) { if (live.state[i].val) {
raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val); raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
log_vwrite(i); log_vwrite(i);
live.state[i].val=0; live.state[i].val=0;
} }
@ -5230,10 +5267,10 @@ static void align_target(uae_u32 a)
return; return;
if (tune_nop_fillers) if (tune_nop_fillers)
raw_emit_nop_filler(a - (((uae_u32)target) & (a - 1))); raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
else { else {
/* Fill with NOPs --- makes debugging with gdb easier */ /* Fill with NOPs --- makes debugging with gdb easier */
while ((uae_u32)target&(a-1)) while ((uintptr)target&(a-1))
*target++=0x90; *target++=0x90;
} }
} }
@ -5301,15 +5338,15 @@ void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
static uae_u32 get_handler_address(uae_u32 addr) static uae_u32 get_handler_address(uae_u32 addr)
{ {
uae_u32 cl=cacheline(addr); uae_u32 cl=cacheline(addr);
blockinfo* bi=get_blockinfo_addr_new((void*)addr,0); blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
return (uae_u32)&(bi->direct_handler_to_use); return (uintptr)&(bi->direct_handler_to_use);
} }
static uae_u32 get_handler(uae_u32 addr) static uae_u32 get_handler(uae_u32 addr)
{ {
uae_u32 cl=cacheline(addr); uae_u32 cl=cacheline(addr);
blockinfo* bi=get_blockinfo_addr_new((void*)addr,0); blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
return (uae_u32)bi->direct_handler_to_use; return (uintptr)bi->direct_handler_to_use;
} }
static void load_handler(int reg, uae_u32 addr) static void load_handler(int reg, uae_u32 addr)
@ -5321,12 +5358,18 @@ static void load_handler(int reg, uae_u32 addr)
* if that assumption is wrong! No branches, no second chances, just * if that assumption is wrong! No branches, no second chances, just
* straight go-for-it attitude */ * straight go-for-it attitude */
static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber) static void writemem_real(int address, int source, int size, int tmp, int clobber)
{ {
int f=tmp; int f=tmp;
if (clobber) if (clobber)
f=source; f=source;
#if SIZEOF_VOID_P == 8
/* HACK: address calculation is suboptimal and possibly broken */
sign_extend_32_rr(address, address);
#endif
switch(size) { switch(size) {
case 1: mov_b_bRr(address,source,MEMBaseDiff); break; case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break; case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
@ -5338,13 +5381,13 @@ static void writemem_real(int address, int source, int offset, int size, int tmp
void writebyte(int address, int source, int tmp) void writebyte(int address, int source, int tmp)
{ {
writemem_real(address,source,20,1,tmp,0); writemem_real(address,source,1,tmp,0);
} }
static __inline__ void writeword_general(int address, int source, int tmp, static __inline__ void writeword_general(int address, int source, int tmp,
int clobber) int clobber)
{ {
writemem_real(address,source,16,2,tmp,clobber); writemem_real(address,source,2,tmp,clobber);
} }
void writeword_clobber(int address, int source, int tmp) void writeword_clobber(int address, int source, int tmp)
@ -5360,7 +5403,7 @@ void writeword(int address, int source, int tmp)
static __inline__ void writelong_general(int address, int source, int tmp, static __inline__ void writelong_general(int address, int source, int tmp,
int clobber) int clobber)
{ {
writemem_real(address,source,12,4,tmp,clobber); writemem_real(address,source,4,tmp,clobber);
} }
void writelong_clobber(int address, int source, int tmp) void writelong_clobber(int address, int source, int tmp)
@ -5379,13 +5422,18 @@ void writelong(int address, int source, int tmp)
* if that assumption is wrong! No branches, no second chances, just * if that assumption is wrong! No branches, no second chances, just
* straight go-for-it attitude */ * straight go-for-it attitude */
static void readmem_real(int address, int dest, int offset, int size, int tmp) static void readmem_real(int address, int dest, int size, int tmp)
{ {
int f=tmp; int f=tmp;
if (size==4 && address!=dest) if (size==4 && address!=dest)
f=dest; f=dest;
#if SIZEOF_VOID_P == 8
/* HACK: address calculation is suboptimal and possibly broken */
sign_extend_32_rr(address, address);
#endif
switch(size) { switch(size) {
case 1: mov_b_brR(dest,address,MEMBaseDiff); break; case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break; case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
@ -5396,17 +5444,17 @@ static void readmem_real(int address, int dest, int offset, int size, int tmp)
void readbyte(int address, int dest, int tmp) void readbyte(int address, int dest, int tmp)
{ {
readmem_real(address,dest,8,1,tmp); readmem_real(address,dest,1,tmp);
} }
void readword(int address, int dest, int tmp) void readword(int address, int dest, int tmp)
{ {
readmem_real(address,dest,4,2,tmp); readmem_real(address,dest,2,tmp);
} }
void readlong(int address, int dest, int tmp) void readlong(int address, int dest, int tmp)
{ {
readmem_real(address,dest,0,4,tmp); readmem_real(address,dest,4,tmp);
} }
void get_n_addr(int address, int dest, int tmp) void get_n_addr(int address, int dest, int tmp)
@ -5628,15 +5676,15 @@ static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
Dif(!csi) abort(); Dif(!csi) abort();
while (csi) { while (csi) {
uae_s32 len = csi->length; uae_s32 len = csi->length;
uae_u32 tmp = (uae_u32)csi->start_p; uintptr tmp = (uintptr)csi->start_p;
#else #else
uae_s32 len = bi->len; uae_s32 len = bi->len;
uae_u32 tmp = (uae_u32)bi->min_pcp; uintptr tmp = (uintptr)bi->min_pcp;
#endif #endif
uae_u32*pos; uae_u32*pos;
len += (tmp & 3); len += (tmp & 3);
tmp &= ~3; tmp &= ~((uintptr)3);
pos = (uae_u32 *)tmp; pos = (uae_u32 *)tmp;
if (len >= 0 && len <= MAX_CHECKSUM_LEN) { if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
@ -5663,7 +5711,7 @@ static void show_checksum(CSI_TYPE* csi)
uae_u32 k1=0; uae_u32 k1=0;
uae_u32 k2=0; uae_u32 k2=0;
uae_s32 len=CSI_LENGTH(csi); uae_s32 len=CSI_LENGTH(csi);
uae_u32 tmp=(uae_u32)CSI_START_P(csi); uae_u32 tmp=(uintptr)CSI_START_P(csi);
uae_u32* pos; uae_u32* pos;
len+=(tmp&3); len+=(tmp&3);
@ -5855,12 +5903,16 @@ static __inline__ void match_states(blockinfo* bi)
} }
} }
static uae_u8 popallspace[1024]; /* That should be enough space */
static __inline__ void create_popalls(void) static __inline__ void create_popalls(void)
{ {
int i,r; int i,r;
if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
write_log("FATAL: Could not allocate popallspace!\n");
abort();
}
vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
current_compile_p=popallspace; current_compile_p=popallspace;
set_target(current_compile_p); set_target(current_compile_p);
#if USE_PUSH_POP #if USE_PUSH_POP
@ -5874,7 +5926,7 @@ static __inline__ void create_popalls(void)
if (need_to_preserve[i]) if (need_to_preserve[i])
raw_pop_l_r(i); raw_pop_l_r(i);
} }
raw_jmp((uae_u32)do_nothing); raw_jmp((uintptr)do_nothing);
align_target(align_jumps); align_target(align_jumps);
popall_execute_normal=get_target(); popall_execute_normal=get_target();
@ -5882,7 +5934,7 @@ static __inline__ void create_popalls(void)
if (need_to_preserve[i]) if (need_to_preserve[i])
raw_pop_l_r(i); raw_pop_l_r(i);
} }
raw_jmp((uae_u32)execute_normal); raw_jmp((uintptr)execute_normal);
align_target(align_jumps); align_target(align_jumps);
popall_cache_miss=get_target(); popall_cache_miss=get_target();
@ -5890,7 +5942,7 @@ static __inline__ void create_popalls(void)
if (need_to_preserve[i]) if (need_to_preserve[i])
raw_pop_l_r(i); raw_pop_l_r(i);
} }
raw_jmp((uae_u32)cache_miss); raw_jmp((uintptr)cache_miss);
align_target(align_jumps); align_target(align_jumps);
popall_recompile_block=get_target(); popall_recompile_block=get_target();
@ -5898,7 +5950,7 @@ static __inline__ void create_popalls(void)
if (need_to_preserve[i]) if (need_to_preserve[i])
raw_pop_l_r(i); raw_pop_l_r(i);
} }
raw_jmp((uae_u32)recompile_block); raw_jmp((uintptr)recompile_block);
align_target(align_jumps); align_target(align_jumps);
popall_exec_nostats=get_target(); popall_exec_nostats=get_target();
@ -5906,7 +5958,7 @@ static __inline__ void create_popalls(void)
if (need_to_preserve[i]) if (need_to_preserve[i])
raw_pop_l_r(i); raw_pop_l_r(i);
} }
raw_jmp((uae_u32)exec_nostats); raw_jmp((uintptr)exec_nostats);
align_target(align_jumps); align_target(align_jumps);
popall_check_checksum=get_target(); popall_check_checksum=get_target();
@ -5914,7 +5966,7 @@ static __inline__ void create_popalls(void)
if (need_to_preserve[i]) if (need_to_preserve[i])
raw_pop_l_r(i); raw_pop_l_r(i);
} }
raw_jmp((uae_u32)check_checksum); raw_jmp((uintptr)check_checksum);
align_target(align_jumps); align_target(align_jumps);
current_compile_p=get_target(); current_compile_p=get_target();
@ -5937,11 +5989,11 @@ static __inline__ void create_popalls(void)
} }
#endif #endif
r=REG_PC_TMP; r=REG_PC_TMP;
raw_mov_l_rm(r,(uae_u32)&regs.pc_p); raw_mov_l_rm(r,(uintptr)&regs.pc_p);
raw_and_l_ri(r,TAGMASK); raw_and_l_ri(r,TAGMASK);
raw_jmp_m_indexed((uae_u32)cache_tags,r,4); raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
#ifdef X86_ASSEMBLY #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
align_target(align_jumps); align_target(align_jumps);
m68k_compile_execute = (void (*)(void))get_target(); m68k_compile_execute = (void (*)(void))get_target();
for (i=N_REGS;i--;) { for (i=N_REGS;i--;) {
@ -5949,27 +6001,30 @@ static __inline__ void create_popalls(void)
raw_push_l_r(i); raw_push_l_r(i);
} }
align_target(align_loops); align_target(align_loops);
uae_u32 dispatch_loop = (uae_u32)get_target(); uae_u32 dispatch_loop = (uintptr)get_target();
r=REG_PC_TMP; r=REG_PC_TMP;
raw_mov_l_rm(r,(uae_u32)&regs.pc_p); raw_mov_l_rm(r,(uintptr)&regs.pc_p);
raw_and_l_ri(r,TAGMASK); raw_and_l_ri(r,TAGMASK);
raw_call_m_indexed((uae_u32)cache_tags,r,4); raw_call_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
raw_cmp_l_mi((uae_u32)&regs.spcflags,0); raw_cmp_l_mi((uintptr)&regs.spcflags,0);
raw_jcc_b_oponly(NATIVE_CC_EQ); raw_jcc_b_oponly(NATIVE_CC_EQ);
emit_byte(dispatch_loop-((uae_u32)get_target()+1)); emit_byte(dispatch_loop-((uintptr)get_target()+1));
raw_call((uae_u32)m68k_do_specialties); raw_call((uintptr)m68k_do_specialties);
raw_test_l_rr(REG_RESULT,REG_RESULT); raw_test_l_rr(REG_RESULT,REG_RESULT);
raw_jcc_b_oponly(NATIVE_CC_EQ); raw_jcc_b_oponly(NATIVE_CC_EQ);
emit_byte(dispatch_loop-((uae_u32)get_target()+1)); emit_byte(dispatch_loop-((uintptr)get_target()+1));
raw_cmp_b_mi((uae_u32)&quit_program,0); raw_cmp_b_mi((uintptr)&quit_program,0);
raw_jcc_b_oponly(NATIVE_CC_EQ); raw_jcc_b_oponly(NATIVE_CC_EQ);
emit_byte(dispatch_loop-((uae_u32)get_target()+1)); emit_byte(dispatch_loop-((uintptr)get_target()+1));
for (i=0;i<N_REGS;i++) { for (i=0;i<N_REGS;i++) {
if (need_to_preserve[i]) if (need_to_preserve[i])
raw_pop_l_r(i); raw_pop_l_r(i);
} }
raw_ret(); raw_ret();
#endif #endif
// no need to further write into popallspace
vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
} }
static __inline__ void reset_lists(void) static __inline__ void reset_lists(void)
@ -5989,15 +6044,15 @@ static void prepare_block(blockinfo* bi)
set_target(current_compile_p); set_target(current_compile_p);
align_target(align_jumps); align_target(align_jumps);
bi->direct_pen=(cpuop_func *)get_target(); bi->direct_pen=(cpuop_func *)get_target();
raw_mov_l_rm(0,(uae_u32)&(bi->pc_p)); raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
raw_mov_l_mr((uae_u32)&regs.pc_p,0); raw_mov_l_mr((uintptr)&regs.pc_p,0);
raw_jmp((uae_u32)popall_execute_normal); raw_jmp((uintptr)popall_execute_normal);
align_target(align_jumps); align_target(align_jumps);
bi->direct_pcc=(cpuop_func *)get_target(); bi->direct_pcc=(cpuop_func *)get_target();
raw_mov_l_rm(0,(uae_u32)&(bi->pc_p)); raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
raw_mov_l_mr((uae_u32)&regs.pc_p,0); raw_mov_l_mr((uintptr)&regs.pc_p,0);
raw_jmp((uae_u32)popall_check_checksum); raw_jmp((uintptr)popall_check_checksum);
current_compile_p=get_target(); current_compile_p=get_target();
bi->deplist=NULL; bi->deplist=NULL;
@ -6342,21 +6397,25 @@ int failure;
#define TARGET_M68K 0 #define TARGET_M68K 0
#define TARGET_POWERPC 1 #define TARGET_POWERPC 1
#define TARGET_X86 2 #define TARGET_X86 2
#define TARGET_X86_64 3
#if defined(i386) || defined(__i386__) #if defined(i386) || defined(__i386__)
#define TARGET_NATIVE TARGET_X86 #define TARGET_NATIVE TARGET_X86
#endif #endif
#if defined(powerpc) || defined(__powerpc__) #if defined(powerpc) || defined(__powerpc__)
#define TARGET_NATIVE TARGET_POWERPC #define TARGET_NATIVE TARGET_POWERPC
#endif #endif
#if defined(x86_64) || defined(__x86_64__)
#define TARGET_NATIVE TARGET_X86_64
#endif
#ifdef ENABLE_MON #ifdef ENABLE_MON
static uae_u32 mon_read_byte_jit(uae_u32 addr) static uae_u32 mon_read_byte_jit(uintptr addr)
{ {
uae_u8 *m = (uae_u8 *)addr; uae_u8 *m = (uae_u8 *)addr;
return (uae_u32)(*m); return (uintptr)(*m);
} }
static void mon_write_byte_jit(uae_u32 addr, uae_u32 b) static void mon_write_byte_jit(uintptr addr, uae_u32 b)
{ {
uae_u8 *m = (uae_u8 *)addr; uae_u8 *m = (uae_u8 *)addr;
*m = b; *m = b;
@ -6373,11 +6432,12 @@ void disasm_block(int target, uint8 * start, size_t length)
sprintf(disasm_str, "%s $%x $%x", sprintf(disasm_str, "%s $%x $%x",
target == TARGET_M68K ? "d68" : target == TARGET_M68K ? "d68" :
target == TARGET_X86 ? "d86" : target == TARGET_X86 ? "d86" :
target == TARGET_X86_64 ? "d8664" :
target == TARGET_POWERPC ? "d" : "x", target == TARGET_POWERPC ? "d" : "x",
start, start + length - 1); start, start + length - 1);
uae_u32 (*old_mon_read_byte)(uae_u32) = mon_read_byte; uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
void (*old_mon_write_byte)(uae_u32, uae_u32) = mon_write_byte; void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
mon_read_byte = mon_read_byte_jit; mon_read_byte = mon_read_byte_jit;
mon_write_byte = mon_write_byte_jit; mon_write_byte = mon_write_byte_jit;
@ -6390,12 +6450,12 @@ void disasm_block(int target, uint8 * start, size_t length)
#endif #endif
} }
static inline void disasm_native_block(uint8 *start, size_t length) static void disasm_native_block(uint8 *start, size_t length)
{ {
disasm_block(TARGET_NATIVE, start, length); disasm_block(TARGET_NATIVE, start, length);
} }
static inline void disasm_m68k_block(uint8 *start, size_t length) static void disasm_m68k_block(uint8 *start, size_t length)
{ {
disasm_block(TARGET_M68K, start, length); disasm_block(TARGET_M68K, start, length);
} }
@ -6429,9 +6489,9 @@ void compiler_dumpstate(void)
write_log("### Block in Mac address space\n"); write_log("### Block in Mac address space\n");
write_log("M68K block : %p\n", write_log("M68K block : %p\n",
(void *)get_virtual_address(last_regs_pc_p)); (void *)(uintptr)get_virtual_address(last_regs_pc_p));
write_log("Native block : %p (%d bytes)\n", write_log("Native block : %p (%d bytes)\n",
(void *)get_virtual_address(last_compiled_block_addr), (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
get_blockinfo_addr(last_regs_pc_p)->direct_handler_size); get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
write_log("\n"); write_log("\n");
} }
@ -6455,11 +6515,11 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
uae_u8 liveflags[MAXRUN+1]; uae_u8 liveflags[MAXRUN+1];
#if USE_CHECKSUM_INFO #if USE_CHECKSUM_INFO
bool trace_in_rom = isinrom((uintptr)pc_hist[0].location); bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location; uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
uae_u32 min_pcp=max_pcp; uintptr min_pcp=max_pcp;
#else #else
uae_u32 max_pcp=(uae_u32)pc_hist[0].location; uintptr max_pcp=(uintptr)pc_hist[0].location;
uae_u32 min_pcp=max_pcp; uintptr min_pcp=max_pcp;
#endif #endif
uae_u32 cl=cacheline(pc_hist[0].location); uae_u32 cl=cacheline(pc_hist[0].location);
void* specflags=(void*)&regs.spcflags; void* specflags=(void*)&regs.spcflags;
@ -6499,7 +6559,7 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
optlev++; optlev++;
bi->count=optcount[optlev]-1; bi->count=optcount[optlev]-1;
} }
current_block_pc_p=(uae_u32)pc_hist[0].location; current_block_pc_p=(uintptr)pc_hist[0].location;
remove_deps(bi); /* We are about to create new code */ remove_deps(bi); /* We are about to create new code */
bi->optlevel=optlev; bi->optlevel=optlev;
@ -6524,15 +6584,15 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
csi->length = max_pcp - min_pcp + LONGEST_68K_INST; csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
csi->next = bi->csi; csi->next = bi->csi;
bi->csi = csi; bi->csi = csi;
max_pcp = (uae_u32)currpcp; max_pcp = (uintptr)currpcp;
} }
#endif #endif
min_pcp = (uae_u32)currpcp; min_pcp = (uintptr)currpcp;
#else #else
if ((uae_u32)currpcp<min_pcp) if ((uintptr)currpcp<min_pcp)
min_pcp=(uae_u32)currpcp; min_pcp=(uintptr)currpcp;
if ((uae_u32)currpcp>max_pcp) if ((uintptr)currpcp>max_pcp)
max_pcp=(uae_u32)currpcp; max_pcp=(uintptr)currpcp;
#endif #endif
liveflags[i]=((liveflags[i+1]& liveflags[i]=((liveflags[i+1]&
@ -6558,19 +6618,19 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
bi->direct_handler=(cpuop_func *)get_target(); bi->direct_handler=(cpuop_func *)get_target();
set_dhtu(bi,bi->direct_handler); set_dhtu(bi,bi->direct_handler);
bi->status=BI_COMPILING; bi->status=BI_COMPILING;
current_block_start_target=(uae_u32)get_target(); current_block_start_target=(uintptr)get_target();
log_startblock(); log_startblock();
if (bi->count>=0) { /* Need to generate countdown code */ if (bi->count>=0) { /* Need to generate countdown code */
raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location); raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
raw_sub_l_mi((uae_u32)&(bi->count),1); raw_sub_l_mi((uintptr)&(bi->count),1);
raw_jl((uae_u32)popall_recompile_block); raw_jl((uintptr)popall_recompile_block);
} }
if (optlev==0) { /* No need to actually translate */ if (optlev==0) { /* No need to actually translate */
/* Execute normally without keeping stats */ /* Execute normally without keeping stats */
raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location); raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
raw_jmp((uae_u32)popall_exec_nostats); raw_jmp((uintptr)popall_exec_nostats);
} }
else { else {
reg_alloc_run=0; reg_alloc_run=0;
@ -6584,8 +6644,8 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
#if JIT_DEBUG #if JIT_DEBUG
if (JITDebug) { if (JITDebug) {
raw_mov_l_mi((uae_u32)&last_regs_pc_p,(uae_u32)pc_hist[0].location); raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
raw_mov_l_mi((uae_u32)&last_compiled_block_addr,(uae_u32)current_block_start_target); raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
} }
#endif #endif
@ -6636,12 +6696,12 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
#if USE_NORMAL_CALLING_CONVENTION #if USE_NORMAL_CALLING_CONVENTION
raw_push_l_r(REG_PAR1); raw_push_l_r(REG_PAR1);
#endif #endif
raw_mov_l_mi((uae_u32)&regs.pc_p, raw_mov_l_mi((uintptr)&regs.pc_p,
(uae_u32)pc_hist[i].location); (uintptr)pc_hist[i].location);
raw_call((uae_u32)cputbl[opcode]); raw_call((uintptr)cputbl[opcode]);
#if PROFILE_UNTRANSLATED_INSNS #if PROFILE_UNTRANSLATED_INSNS
// raw_cputbl_count[] is indexed with plain opcode (in m68k order) // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
raw_add_l_mi((uae_u32)&raw_cputbl_count[cft_map(opcode)],1); raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
#endif #endif
#if USE_NORMAL_CALLING_CONVENTION #if USE_NORMAL_CALLING_CONVENTION
raw_inc_sp(4); raw_inc_sp(4);
@ -6650,13 +6710,13 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
if (i < blocklen - 1) { if (i < blocklen - 1) {
uae_s8* branchadd; uae_s8* branchadd;
raw_mov_l_rm(0,(uae_u32)specflags); raw_mov_l_rm(0,(uintptr)specflags);
raw_test_l_rr(0,0); raw_test_l_rr(0,0);
raw_jz_b_oponly(); raw_jz_b_oponly();
branchadd=(uae_s8 *)get_target(); branchadd=(uae_s8 *)get_target();
emit_byte(0); emit_byte(0);
raw_jmp((uae_u32)popall_do_nothing); raw_jmp((uintptr)popall_do_nothing);
*branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1; *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
} }
} }
} }
@ -6690,8 +6750,8 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
log_flush(); log_flush();
if (next_pc_p) { /* A branch was registered */ if (next_pc_p) { /* A branch was registered */
uae_u32 t1=next_pc_p; uintptr t1=next_pc_p;
uae_u32 t2=taken_pc_p; uintptr t2=taken_pc_p;
int cc=branch_cc; int cc=branch_cc;
uae_u32* branchadd; uae_u32* branchadd;
@ -6716,28 +6776,28 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
/* predicted outcome */ /* predicted outcome */
tbi=get_blockinfo_addr_new((void*)t1,1); tbi=get_blockinfo_addr_new((void*)t1,1);
match_states(tbi); match_states(tbi);
raw_cmp_l_mi((uae_u32)specflags,0); raw_cmp_l_mi((uintptr)specflags,0);
raw_jcc_l_oponly(4); raw_jcc_l_oponly(4);
tba=(uae_u32*)get_target(); tba=(uae_u32*)get_target();
emit_long(get_handler(t1)-((uae_u32)tba+4)); emit_long(get_handler(t1)-((uintptr)tba+4));
raw_mov_l_mi((uae_u32)&regs.pc_p,t1); raw_mov_l_mi((uintptr)&regs.pc_p,t1);
raw_jmp((uae_u32)popall_do_nothing); raw_jmp((uintptr)popall_do_nothing);
create_jmpdep(bi,0,tba,t1); create_jmpdep(bi,0,tba,t1);
align_target(align_jumps); align_target(align_jumps);
/* not-predicted outcome */ /* not-predicted outcome */
*branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4); *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
live=tmp; /* Ouch again */ live=tmp; /* Ouch again */
tbi=get_blockinfo_addr_new((void*)t2,1); tbi=get_blockinfo_addr_new((void*)t2,1);
match_states(tbi); match_states(tbi);
//flush(1); /* Can only get here if was_comp==1 */ //flush(1); /* Can only get here if was_comp==1 */
raw_cmp_l_mi((uae_u32)specflags,0); raw_cmp_l_mi((uintptr)specflags,0);
raw_jcc_l_oponly(4); raw_jcc_l_oponly(4);
tba=(uae_u32*)get_target(); tba=(uae_u32*)get_target();
emit_long(get_handler(t2)-((uae_u32)tba+4)); emit_long(get_handler(t2)-((uintptr)tba+4));
raw_mov_l_mi((uae_u32)&regs.pc_p,t2); raw_mov_l_mi((uintptr)&regs.pc_p,t2);
raw_jmp((uae_u32)popall_do_nothing); raw_jmp((uintptr)popall_do_nothing);
create_jmpdep(bi,1,tba,t2); create_jmpdep(bi,1,tba,t2);
} }
else else
@ -6751,9 +6811,9 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
r=live.state[PC_P].realreg; r=live.state[PC_P].realreg;
raw_and_l_ri(r,TAGMASK); raw_and_l_ri(r,TAGMASK);
int r2 = (r==0) ? 1 : 0; int r2 = (r==0) ? 1 : 0;
raw_mov_l_ri(r2,(uae_u32)popall_do_nothing); raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
raw_cmp_l_mi((uae_u32)specflags,0); raw_cmp_l_mi((uintptr)specflags,0);
raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4); raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,SIZEOF_VOID_P);
raw_jmp_r(r2); raw_jmp_r(r2);
} }
else if (was_comp && isconst(PC_P)) { else if (was_comp && isconst(PC_P)) {
@ -6761,25 +6821,25 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
uae_u32* tba; uae_u32* tba;
blockinfo* tbi; blockinfo* tbi;
tbi=get_blockinfo_addr_new((void*)v,1); tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
match_states(tbi); match_states(tbi);
raw_cmp_l_mi((uae_u32)specflags,0); raw_cmp_l_mi((uintptr)specflags,0);
raw_jcc_l_oponly(4); raw_jcc_l_oponly(4);
tba=(uae_u32*)get_target(); tba=(uae_u32*)get_target();
emit_long(get_handler(v)-((uae_u32)tba+4)); emit_long(get_handler(v)-((uintptr)tba+4));
raw_mov_l_mi((uae_u32)&regs.pc_p,v); raw_mov_l_mi((uintptr)&regs.pc_p,v);
raw_jmp((uae_u32)popall_do_nothing); raw_jmp((uintptr)popall_do_nothing);
create_jmpdep(bi,0,tba,v); create_jmpdep(bi,0,tba,v);
} }
else { else {
r=REG_PC_TMP; r=REG_PC_TMP;
raw_mov_l_rm(r,(uae_u32)&regs.pc_p); raw_mov_l_rm(r,(uintptr)&regs.pc_p);
raw_and_l_ri(r,TAGMASK); raw_and_l_ri(r,TAGMASK);
int r2 = (r==0) ? 1 : 0; int r2 = (r==0) ? 1 : 0;
raw_mov_l_ri(r2,(uae_u32)popall_do_nothing); raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
raw_cmp_l_mi((uae_u32)specflags,0); raw_cmp_l_mi((uintptr)specflags,0);
raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4); raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,SIZEOF_VOID_P);
raw_jmp_r(r2); raw_jmp_r(r2);
} }
} }
@ -6850,8 +6910,8 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
/* This is the non-direct handler */ /* This is the non-direct handler */
bi->handler= bi->handler=
bi->handler_to_use=(cpuop_func *)get_target(); bi->handler_to_use=(cpuop_func *)get_target();
raw_cmp_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location); raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
raw_jnz((uae_u32)popall_cache_miss); raw_jnz((uintptr)popall_cache_miss);
comp_pc_p=(uae_u8*)pc_hist[0].location; comp_pc_p=(uae_u8*)pc_hist[0].location;
bi->status=BI_FINALIZING; bi->status=BI_FINALIZING;
@ -6859,7 +6919,7 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
match_states(bi); match_states(bi);
flush(1); flush(1);
raw_jmp((uae_u32)bi->direct_handler); raw_jmp((uintptr)bi->direct_handler);
current_compile_p=get_target(); current_compile_p=get_target();
raise_in_cl_list(bi); raise_in_cl_list(bi);
@ -6925,7 +6985,7 @@ void execute_normal(void)
typedef void (*compiled_handler)(void); typedef void (*compiled_handler)(void);
#ifdef X86_ASSEMBLY #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
void (*m68k_compile_execute)(void) = NULL; void (*m68k_compile_execute)(void) = NULL;
#else #else
void m68k_do_compile_execute(void) void m68k_do_compile_execute(void)

View File

@ -1327,7 +1327,7 @@ void m68k_do_execute (void)
} }
} }
#if USE_JIT && !defined(X86_ASSEMBLY) #if USE_JIT && !(defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY))
void m68k_compile_execute (void) void m68k_compile_execute (void)
{ {
for (;;) { for (;;) {

View File

@ -287,7 +287,7 @@ extern void m68k_record_step(uaecptr);
extern void m68k_do_execute(void); extern void m68k_do_execute(void);
extern void m68k_execute(void); extern void m68k_execute(void);
#if USE_JIT #if USE_JIT
#ifdef X86_ASSEMBLY #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
/* This is generated code */ /* This is generated code */
extern void (*m68k_compile_execute)(void); extern void (*m68k_compile_execute)(void);
#else #else