Fix SAHF_SETO_PROFITABLE code for x86-64 platforms.

This was only an experiment. Improvement was marginal: only +3% on AMD64
(an Athlon 64 3200+). However, it may be interesting to test it on EM64T
(e.g. newer P4s) since an older P3/800, hence in 32-bit mode, got a +15%
improvement in Speedometer 4 benchmarks.

Rationale: lahf/seto sequences avoid load/stores to the stack (push/pop)
and it was thus hoped to be faster.

Anyhow, SAHF_SETO_PROFITABLE can only be enabled manually at this time.
Edit your generated Makefile for testing, but first make sure your CPU
supports lahf in 64-bit mode (lahf_lm flag in /proc/cpuinfo).
This commit is contained in:
gbeauche 2006-02-06 23:06:54 +00:00
parent 3b94dfb1a9
commit 294664b726

View File

@ -52,6 +52,11 @@
#define R14_INDEX 14
#define R15_INDEX 15
#endif
/* XXX this has to match X86_Reg8H_Base + 4 */
#define AH_INDEX (0x10+4+EAX_INDEX)
#define CH_INDEX (0x10+4+ECX_INDEX)
#define DH_INDEX (0x10+4+EDX_INDEX)
#define BH_INDEX (0x10+4+EBX_INDEX)
/* The register in which subroutines return an integer return value */
#define REG_RESULT EAX_INDEX
@ -2414,7 +2419,7 @@ LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
{
emit_byte(0x88);
emit_byte(0x05+8*s);
emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
emit_long(d);
}
LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
@ -3221,7 +3226,6 @@ static __inline__ void raw_emit_nop_filler(int nbytes)
#ifdef SAHF_SETO_PROFITABLE
#define FLAG_NREG1 0 /* Set to -1 if any register will do */
static __inline__ void raw_flags_to_reg(int r)
{
raw_lahf(0); /* Most flags in AH */
@ -3230,7 +3234,7 @@ static __inline__ void raw_flags_to_reg(int r)
#if 1 /* Let's avoid those nasty partial register stalls */
//raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,r+4);
raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
//live.state[FLAGTMP].status=CLEAN;
live.state[FLAGTMP].status=INMEM;
live.state[FLAGTMP].realreg=-1;