mirror of
https://github.com/kanjitalk755/macemu.git
synced 2024-11-18 18:05:21 +00:00
- Rewrite raw_init_cpu() to match more details, from kernel sources.
- Add possibility to tune code alignment to the underlying processor. However, this is turned off as I don't see much improvement and align_jumps = 64 for Athlon looks suspicious to me. - Remove two extra align_target() that are already covered. - Remove unused may_trap() predicate.
This commit is contained in:
parent
feca66d43e
commit
ecd3db832e
@ -2343,94 +2343,227 @@ static void vec(int x, struct sigcontext sc)
|
||||
* Checking for CPU features *
|
||||
*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
uae_u32 eax;
|
||||
uae_u32 ecx;
|
||||
uae_u32 edx;
|
||||
uae_u32 ebx;
|
||||
} x86_regs;
|
||||
struct cpuinfo_x86 {
|
||||
uae_u8 x86; // CPU family
|
||||
uae_u8 x86_vendor; // CPU vendor
|
||||
uae_u8 x86_processor; // CPU canonical processor type
|
||||
uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
|
||||
uae_u32 x86_hwcap;
|
||||
uae_u8 x86_model;
|
||||
uae_u8 x86_mask;
|
||||
int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
|
||||
char x86_vendor_id[16];
|
||||
};
|
||||
struct cpuinfo_x86 cpuinfo;
|
||||
|
||||
enum {
|
||||
X86_VENDOR_INTEL = 0,
|
||||
X86_VENDOR_CYRIX = 1,
|
||||
X86_VENDOR_AMD = 2,
|
||||
X86_VENDOR_UMC = 3,
|
||||
X86_VENDOR_NEXGEN = 4,
|
||||
X86_VENDOR_CENTAUR = 5,
|
||||
X86_VENDOR_RISE = 6,
|
||||
X86_VENDOR_TRANSMETA = 7,
|
||||
X86_VENDOR_NSC = 8,
|
||||
X86_VENDOR_UNKNOWN = 0xff
|
||||
};
|
||||
|
||||
/* This could be so much easier if it could make assumptions about the
|
||||
compiler... */
|
||||
enum {
|
||||
X86_PROCESSOR_I386, /* 80386 */
|
||||
X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
|
||||
X86_PROCESSOR_PENTIUM,
|
||||
X86_PROCESSOR_PENTIUMPRO,
|
||||
X86_PROCESSOR_K6,
|
||||
X86_PROCESSOR_ATHLON,
|
||||
X86_PROCESSOR_PENTIUM4,
|
||||
X86_PROCESSOR_max
|
||||
};
|
||||
|
||||
static uae_u8 cpuid_space[256];
|
||||
static uae_u32 cpuid_ptr;
|
||||
static uae_u32 cpuid_level;
|
||||
static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
|
||||
"80386",
|
||||
"80486",
|
||||
"Pentium",
|
||||
"PentiumPro",
|
||||
"K6",
|
||||
"Athlon",
|
||||
"Pentium4"
|
||||
};
|
||||
|
||||
static x86_regs cpuid(uae_u32 level)
|
||||
static struct ptt {
|
||||
const int align_loop;
|
||||
const int align_loop_max_skip;
|
||||
const int align_jump;
|
||||
const int align_jump_max_skip;
|
||||
const int align_func;
|
||||
}
|
||||
x86_alignments[X86_PROCESSOR_max] = {
|
||||
{ 4, 3, 4, 3, 4 },
|
||||
{ 16, 15, 16, 15, 16 },
|
||||
{ 16, 7, 16, 7, 16 },
|
||||
{ 16, 15, 16, 7, 16 },
|
||||
{ 32, 7, 32, 7, 32 },
|
||||
{ 16, 7, 64, 7, 16 },
|
||||
{ 0, 0, 0, 0, 0 }
|
||||
};
|
||||
|
||||
static void
|
||||
x86_get_cpu_vendor(struct cpuinfo_x86 *c)
|
||||
{
|
||||
x86_regs answer;
|
||||
uae_u8* tmp=get_target();
|
||||
char *v = c->x86_vendor_id;
|
||||
|
||||
cpuid_ptr=(uae_u32)&answer;
|
||||
cpuid_level=level;
|
||||
|
||||
set_target(cpuid_space);
|
||||
raw_push_l_r(0); /* eax */
|
||||
raw_push_l_r(1); /* ecx */
|
||||
raw_push_l_r(2); /* edx */
|
||||
raw_push_l_r(3); /* ebx */
|
||||
raw_push_l_r(7); /* edi */
|
||||
raw_mov_l_rm(0,(uae_u32)&cpuid_level);
|
||||
raw_cpuid(0);
|
||||
raw_mov_l_rm(7,(uae_u32)&cpuid_ptr);
|
||||
raw_mov_l_Rr(7,0,0);
|
||||
raw_mov_l_Rr(7,1,4);
|
||||
raw_mov_l_Rr(7,2,8);
|
||||
raw_mov_l_Rr(7,3,12);
|
||||
raw_pop_l_r(7);
|
||||
raw_pop_l_r(3);
|
||||
raw_pop_l_r(2);
|
||||
raw_pop_l_r(1);
|
||||
raw_pop_l_r(0);
|
||||
raw_ret();
|
||||
set_target(tmp);
|
||||
|
||||
((cpuop_func*)cpuid_space)(0);
|
||||
return answer;
|
||||
if (!strcmp(v, "GenuineIntel"))
|
||||
c->x86_vendor = X86_VENDOR_INTEL;
|
||||
else if (!strcmp(v, "AuthenticAMD"))
|
||||
c->x86_vendor = X86_VENDOR_AMD;
|
||||
else if (!strcmp(v, "CyrixInstead"))
|
||||
c->x86_vendor = X86_VENDOR_CYRIX;
|
||||
else if (!strcmp(v, "Geode by NSC"))
|
||||
c->x86_vendor = X86_VENDOR_NSC;
|
||||
else if (!strcmp(v, "UMC UMC UMC "))
|
||||
c->x86_vendor = X86_VENDOR_UMC;
|
||||
else if (!strcmp(v, "CentaurHauls"))
|
||||
c->x86_vendor = X86_VENDOR_CENTAUR;
|
||||
else if (!strcmp(v, "NexGenDriven"))
|
||||
c->x86_vendor = X86_VENDOR_NEXGEN;
|
||||
else if (!strcmp(v, "RiseRiseRise"))
|
||||
c->x86_vendor = X86_VENDOR_RISE;
|
||||
else if (!strcmp(v, "GenuineTMx86") ||
|
||||
!strcmp(v, "TransmetaCPU"))
|
||||
c->x86_vendor = X86_VENDOR_TRANSMETA;
|
||||
else
|
||||
c->x86_vendor = X86_VENDOR_UNKNOWN;
|
||||
}
|
||||
|
||||
static void raw_init_cpu(void)
|
||||
static void
|
||||
cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
|
||||
{
|
||||
x86_regs x;
|
||||
uae_u32 maxlev;
|
||||
|
||||
x=cpuid(0);
|
||||
maxlev=x.eax;
|
||||
write_log("Max CPUID level=%d Processor is %c%c%c%c%c%c%c%c%c%c%c%c\n",
|
||||
maxlev,
|
||||
x.ebx,
|
||||
x.ebx>>8,
|
||||
x.ebx>>16,
|
||||
x.ebx>>24,
|
||||
x.edx,
|
||||
x.edx>>8,
|
||||
x.edx>>16,
|
||||
x.edx>>24,
|
||||
x.ecx,
|
||||
x.ecx>>8,
|
||||
x.ecx>>16,
|
||||
x.ecx>>24
|
||||
);
|
||||
have_rat_stall=(x.ecx==0x6c65746e);
|
||||
static uae_u8 cpuid_space[256];
|
||||
uae_u8* tmp=get_target();
|
||||
|
||||
if (maxlev>=1) {
|
||||
x=cpuid(1);
|
||||
if (x.edx&(1<<15))
|
||||
have_cmov=1;
|
||||
}
|
||||
if (!have_cmov)
|
||||
have_rat_stall=0;
|
||||
#if 0 /* For testing of non-cmov code! */
|
||||
have_cmov=0;
|
||||
#endif
|
||||
#if 1 /* It appears that partial register writes are a bad idea even on
|
||||
set_target(cpuid_space);
|
||||
raw_push_l_r(0); /* eax */
|
||||
raw_push_l_r(1); /* ecx */
|
||||
raw_push_l_r(2); /* edx */
|
||||
raw_push_l_r(3); /* ebx */
|
||||
raw_mov_l_rm(0,(uae_u32)&op);
|
||||
raw_cpuid(0);
|
||||
if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
|
||||
if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
|
||||
if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
|
||||
if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
|
||||
raw_pop_l_r(3);
|
||||
raw_pop_l_r(2);
|
||||
raw_pop_l_r(1);
|
||||
raw_pop_l_r(0);
|
||||
raw_ret();
|
||||
set_target(tmp);
|
||||
|
||||
((cpuop_func*)cpuid_space)(0);
|
||||
}
|
||||
|
||||
static void
|
||||
raw_init_cpu(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &cpuinfo;
|
||||
|
||||
/* Defaults */
|
||||
c->x86_vendor = X86_VENDOR_UNKNOWN;
|
||||
c->cpuid_level = -1; /* CPUID not detected */
|
||||
c->x86_model = c->x86_mask = 0; /* So far unknown... */
|
||||
c->x86_vendor_id[0] = '\0'; /* Unset */
|
||||
c->x86_hwcap = 0;
|
||||
|
||||
/* Get vendor name */
|
||||
c->x86_vendor_id[12] = '\0';
|
||||
cpuid(0x00000000,
|
||||
(uae_u32 *)&c->cpuid_level,
|
||||
(uae_u32 *)&c->x86_vendor_id[0],
|
||||
(uae_u32 *)&c->x86_vendor_id[8],
|
||||
(uae_u32 *)&c->x86_vendor_id[4]);
|
||||
x86_get_cpu_vendor(c);
|
||||
|
||||
/* Intel-defined flags: level 0x00000001 */
|
||||
c->x86_brand_id = 0;
|
||||
if ( c->cpuid_level >= 0x00000001 ) {
|
||||
uae_u32 tfms, brand_id;
|
||||
cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
|
||||
c->x86 = (tfms >> 8) & 15;
|
||||
c->x86_model = (tfms >> 4) & 15;
|
||||
c->x86_brand_id = brand_id & 0xff;
|
||||
if ( (c->x86_vendor == X86_VENDOR_AMD) &&
|
||||
(c->x86 == 0xf)) {
|
||||
/* AMD Extended Family and Model Values */
|
||||
c->x86 += (tfms >> 20) & 0xff;
|
||||
c->x86_model += (tfms >> 12) & 0xf0;
|
||||
}
|
||||
c->x86_mask = tfms & 15;
|
||||
} else {
|
||||
/* Have CPUID level 0 only - unheard of */
|
||||
c->x86 = 4;
|
||||
}
|
||||
|
||||
/* Canonicalize processor ID */
|
||||
c->x86_processor = X86_PROCESSOR_max;
|
||||
switch (c->x86) {
|
||||
case 3:
|
||||
c->x86_processor = X86_PROCESSOR_I386;
|
||||
break;
|
||||
case 4:
|
||||
c->x86_processor = X86_PROCESSOR_I486;
|
||||
break;
|
||||
case 5:
|
||||
if (c->x86_vendor == X86_VENDOR_AMD)
|
||||
c->x86_processor = X86_PROCESSOR_K6;
|
||||
else
|
||||
c->x86_processor = X86_PROCESSOR_PENTIUM;
|
||||
break;
|
||||
case 6:
|
||||
if (c->x86_vendor == X86_VENDOR_AMD)
|
||||
c->x86_processor = X86_PROCESSOR_ATHLON;
|
||||
else
|
||||
c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
|
||||
break;
|
||||
case 15:
|
||||
if (c->x86_vendor == X86_VENDOR_INTEL) {
|
||||
/* Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
|
||||
if (c->x86_brand_id >= 8)
|
||||
c->x86_processor = X86_PROCESSOR_PENTIUM4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (c->x86_processor == X86_PROCESSOR_max) {
|
||||
fprintf(stderr, "Error: unknown processor type\n");
|
||||
fprintf(stderr, " Family : %d\n", c->x86);
|
||||
fprintf(stderr, " Model : %d\n", c->x86_model);
|
||||
fprintf(stderr, " Mask : %d\n", c->x86_mask);
|
||||
if (c->x86_brand_id)
|
||||
fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
|
||||
abort();
|
||||
}
|
||||
|
||||
/* Have CMOV support? */
|
||||
have_cmov = (c->x86_hwcap & (1 << 15)) && true;
|
||||
|
||||
/* Can the host CPU suffer from partial register stalls? */
|
||||
have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
|
||||
#if 1
|
||||
/* It appears that partial register writes are a bad idea even on
|
||||
AMD K7 cores, even though they are not supposed to have the
|
||||
dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
|
||||
if (have_cmov)
|
||||
have_rat_stall=1;
|
||||
if (c->x86_processor == X86_PROCESSOR_ATHLON)
|
||||
have_rat_stall = true;
|
||||
#endif
|
||||
|
||||
/* Alignments */
|
||||
if (tune_alignment) {
|
||||
align_loops = x86_alignments[c->x86_processor].align_loop;
|
||||
align_jumps = x86_alignments[c->x86_processor].align_jump;
|
||||
}
|
||||
|
||||
write_log("Max CPUID level=%d Processor is %s [%s]\n",
|
||||
c->cpuid_level, c->x86_vendor_id,
|
||||
x86_processor_string_table[c->x86_processor]);
|
||||
}
|
||||
|
||||
|
||||
|
@ -88,6 +88,9 @@ static bool lazy_flush = true; // Flag: lazy translation cache invalidation
|
||||
static bool avoid_fpu = true; // Flag: compile FPU instructions ?
|
||||
static bool have_cmov = false; // target has CMOV instructions ?
|
||||
static bool have_rat_stall = true; // target has partial register stalls ?
|
||||
static bool tune_alignment = false; // Tune code alignments for running CPU ?
|
||||
static int align_loops = 32; // Align the start of loops
|
||||
static int align_jumps = 32; // Align the start of jumps
|
||||
static int zero_fd = -1;
|
||||
static int optcount[10] = {
|
||||
10, // How often a block has to be executed before it is translated
|
||||
@ -104,18 +107,11 @@ struct op_properties {
|
||||
};
|
||||
static op_properties prop[65536];
|
||||
|
||||
// gb-- Control Flow Predicates
|
||||
|
||||
static inline int end_block(uae_u32 opcode)
|
||||
{
|
||||
return (prop[opcode].cflow & fl_end_block);
|
||||
}
|
||||
|
||||
static inline bool may_trap(uae_u32 opcode)
|
||||
{
|
||||
return (prop[opcode].cflow & fl_trap);
|
||||
}
|
||||
|
||||
uae_u8* start_pc_p;
|
||||
uae_u32 start_pc;
|
||||
uae_u32 current_block_pc_p;
|
||||
@ -4562,6 +4558,7 @@ void compiler_init(void)
|
||||
raw_init_cpu();
|
||||
write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
|
||||
write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
|
||||
write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
|
||||
|
||||
// Translation cache flush mechanism
|
||||
lazy_flush = PrefsFindBool("jitlazyflush");
|
||||
@ -5407,54 +5404,55 @@ static __inline__ void create_popalls(void)
|
||||
registers before jumping back to the various get-out routines.
|
||||
This generates the code for it.
|
||||
*/
|
||||
popall_do_nothing=current_compile_p;
|
||||
align_target(align_jumps);
|
||||
popall_do_nothing=get_target();
|
||||
for (i=0;i<N_REGS;i++) {
|
||||
if (need_to_preserve[i])
|
||||
raw_pop_l_r(i);
|
||||
}
|
||||
raw_jmp((uae_u32)do_nothing);
|
||||
align_target(32);
|
||||
|
||||
align_target(align_jumps);
|
||||
popall_execute_normal=get_target();
|
||||
for (i=0;i<N_REGS;i++) {
|
||||
if (need_to_preserve[i])
|
||||
raw_pop_l_r(i);
|
||||
}
|
||||
raw_jmp((uae_u32)execute_normal);
|
||||
align_target(32);
|
||||
|
||||
align_target(align_jumps);
|
||||
popall_cache_miss=get_target();
|
||||
for (i=0;i<N_REGS;i++) {
|
||||
if (need_to_preserve[i])
|
||||
raw_pop_l_r(i);
|
||||
}
|
||||
raw_jmp((uae_u32)cache_miss);
|
||||
align_target(32);
|
||||
|
||||
align_target(align_jumps);
|
||||
popall_recompile_block=get_target();
|
||||
for (i=0;i<N_REGS;i++) {
|
||||
if (need_to_preserve[i])
|
||||
raw_pop_l_r(i);
|
||||
}
|
||||
raw_jmp((uae_u32)recompile_block);
|
||||
align_target(32);
|
||||
|
||||
|
||||
align_target(align_jumps);
|
||||
popall_exec_nostats=get_target();
|
||||
for (i=0;i<N_REGS;i++) {
|
||||
if (need_to_preserve[i])
|
||||
raw_pop_l_r(i);
|
||||
}
|
||||
raw_jmp((uae_u32)exec_nostats);
|
||||
align_target(32);
|
||||
|
||||
|
||||
align_target(align_jumps);
|
||||
popall_check_checksum=get_target();
|
||||
for (i=0;i<N_REGS;i++) {
|
||||
if (need_to_preserve[i])
|
||||
raw_pop_l_r(i);
|
||||
}
|
||||
raw_jmp((uae_u32)check_checksum);
|
||||
align_target(32);
|
||||
|
||||
|
||||
align_target(align_jumps);
|
||||
current_compile_p=get_target();
|
||||
#else
|
||||
popall_exec_nostats=(void *)exec_nostats;
|
||||
@ -5496,19 +5494,17 @@ static void prepare_block(blockinfo* bi)
|
||||
int i;
|
||||
|
||||
set_target(current_compile_p);
|
||||
align_target(32);
|
||||
align_target(align_jumps);
|
||||
bi->direct_pen=(cpuop_func *)get_target();
|
||||
raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
|
||||
raw_mov_l_mr((uae_u32)®s.pc_p,0);
|
||||
raw_jmp((uae_u32)popall_execute_normal);
|
||||
|
||||
align_target(32);
|
||||
align_target(align_jumps);
|
||||
bi->direct_pcc=(cpuop_func *)get_target();
|
||||
raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
|
||||
raw_mov_l_mr((uae_u32)®s.pc_p,0);
|
||||
raw_jmp((uae_u32)popall_check_checksum);
|
||||
|
||||
align_target(32);
|
||||
current_compile_p=get_target();
|
||||
|
||||
bi->deplist=NULL;
|
||||
@ -5920,7 +5916,7 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
|
||||
|
||||
bi->needed_flags=liveflags[0];
|
||||
|
||||
align_target(32);
|
||||
align_target(align_loops);
|
||||
was_comp=0;
|
||||
|
||||
bi->direct_handler=(cpuop_func *)get_target();
|
||||
@ -6095,7 +6091,7 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
|
||||
raw_jmp((uae_u32)popall_do_nothing);
|
||||
create_jmpdep(bi,0,tba,t1);
|
||||
|
||||
align_target(16);
|
||||
align_target(align_jumps);
|
||||
/* not-predicted outcome */
|
||||
*branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
|
||||
live=tmp; /* Ouch again */
|
||||
@ -6201,7 +6197,7 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
|
||||
#endif
|
||||
|
||||
log_dump();
|
||||
align_target(32);
|
||||
align_target(align_jumps);
|
||||
|
||||
/* This is the non-direct handler */
|
||||
bi->handler=
|
||||
@ -6217,9 +6213,7 @@ static void compile_block(cpu_history* pc_hist, int blocklen)
|
||||
|
||||
raw_jmp((uae_u32)bi->direct_handler);
|
||||
|
||||
align_target(32);
|
||||
current_compile_p=get_target();
|
||||
|
||||
raise_in_cl_list(bi);
|
||||
|
||||
/* We will flush soon, anyway, so let's do it now */
|
||||
|
Loading…
Reference in New Issue
Block a user