From 294664b726db861678b66850db4274b55a782a42 Mon Sep 17 00:00:00 2001 From: gbeauche <> Date: Mon, 6 Feb 2006 23:06:54 +0000 Subject: [PATCH] Fix SAHF_SETO_PROFITABLE code for x86-64 platforms. This was only an experiment. Improvement was marginal: only +3% on AMD64 (an Athlon 64 3200+). However, it may be interesting to test it on EM64T (e.g. newer P4s) since an older P3/800, hence in 32-bit mode, got a +15% improvement in Speedometer 4 benchmarks. Rationale: lahf/seto sequences avoid load/stores to the stack (push/pop) and it was thus hoped to be faster. Anyhow, SAHF_SETO_PROFITABLE can only be enabled manually at this time. Edit your generated Makefile for testing, but first make sure your CPU supports lahf in 64-bit mode (lahf_lm flag in /proc/cpuinfo). --- BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp b/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp index a7a57986..c474e47e 100644 --- a/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp +++ b/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp @@ -52,6 +52,11 @@ #define R14_INDEX 14 #define R15_INDEX 15 #endif +/* XXX this has to match X86_Reg8H_Base + 4 */ +#define AH_INDEX (0x10+4+EAX_INDEX) +#define CH_INDEX (0x10+4+ECX_INDEX) +#define DH_INDEX (0x10+4+EDX_INDEX) +#define BH_INDEX (0x10+4+EBX_INDEX) /* The register in which subroutines return an integer return value */ #define REG_RESULT EAX_INDEX @@ -2414,7 +2419,7 @@ LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) { emit_byte(0x88); - emit_byte(0x05+8*s); + emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */ emit_long(d); } LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) @@ -3221,7 +3226,6 @@ static __inline__ void raw_emit_nop_filler(int nbytes) #ifdef SAHF_SETO_PROFITABLE #define FLAG_NREG1 0 /* Set to -1 if any register will do */ - static __inline__ void raw_flags_to_reg(int r) { raw_lahf(0); /* Most flags in AH */ @@ -3230,7 +3234,7 @@ static __inline__ void raw_flags_to_reg(int r) #if 1 /* Let's avoid those nasty partial register stalls */ //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r); - raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4); + raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX); //live.state[FLAGTMP].status=CLEAN; live.state[FLAGTMP].status=INMEM; live.state[FLAGTMP].realreg=-1;