Workaround change in flags handling for BSF instruction on Pentium 4.

i.e. currently disable translation of ADDX/SUBX/B<CHG,CLR,SET,TST> instructions
in that case. That is to say, better (much?) slower than inaccurate. :-(
This commit is contained in:
gbeauche 2003-03-13 15:57:01 +00:00
parent a8e76deb69
commit 0cfa3126b3
4 changed files with 64 additions and 20 deletions

View File

@ -2691,6 +2691,32 @@ raw_init_cpu(void)
x86_processor_string_table[c->x86_processor]);
}
static bool target_check_bsf(void)
{
bool mismatch = false;
for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
for (int g_CF = 0; g_CF <= 1; g_CF++) {
for (int g_OF = 0; g_OF <= 1; g_OF++) {
for (int g_SF = 0; g_SF <= 1; g_SF++) {
for (int value = -1; value <= 1; value++) {
int flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
int tmp = value;
__asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
: "+r" (flags), "+r" (tmp) : : "flags");
int OF = (flags >> 11) & 1;
int SF = (flags >> 7) & 1;
int ZF = (flags >> 6) & 1;
int CF = flags & 1;
tmp = (value == 0);
if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
mismatch = true;
}
}}}}
if (mismatch)
write_log("Target CPU defines all flags on BSF instruction\n");
return !mismatch;
}
/*************************************************************************
* FPU stuff *

View File

@ -335,7 +335,8 @@ DECLARE_MIDFUNC(setcc(W1 d, IMM cc));
DECLARE_MIDFUNC(setcc_m(IMM d, IMM cc));
DECLARE_MIDFUNC(cmov_l_rr(RW4 d, R4 s, IMM cc));
DECLARE_MIDFUNC(cmov_l_rm(RW4 d, IMM s, IMM cc));
DECLARE_MIDFUNC(bsf_l_rr(W4 d, R4 s));
/* Set native Z flag only if register is zero */
DECLARE_MIDFUNC(setzflg_l(RW4 r));
DECLARE_MIDFUNC(pop_m(IMM d));
DECLARE_MIDFUNC(push_m(IMM d));
DECLARE_MIDFUNC(pop_l(W4 d));

View File

@ -130,6 +130,7 @@ static bool have_cmov = false; // target has CMOV instructions ?
static bool have_rat_stall = true; // target has partial register stalls ?
const bool tune_alignment = true; // Tune code alignments for running CPU ?
const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
static int align_loops = 32; // Align the start of loops
static int align_jumps = 32; // Align the start of jumps
static int zero_fd = -1;
@ -2670,16 +2671,23 @@ MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
}
MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
MIDFUNC(2,bsf_l_rr,(W4 d, R4 s))
MIDFUNC(1,setzflg_l,(RW4 r))
{
CLOBBER_BSF;
s=readreg(s,4);
d=writereg(d,4);
raw_bsf_l_rr(d,s);
unlock2(s);
unlock2(d);
if (setzflg_uses_bsf) {
CLOBBER_BSF;
r=rmw(r,4,4);
raw_bsf_l_rr(r,r);
unlock2(r);
}
else {
/* Errr, not implemented yet in a generic way. And actually,
that should not be generated for now, if BSF doesn't
preserve flags but ZF. */
write_log("attempt to make unsupported setzflg()\n");
abort();
}
}
MENDFUNC(2,bsf_l_rr,(W4 d, R4 s))
MENDFUNC(1,setzflg_l,(RW4 r))
MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
{
@ -4664,6 +4672,7 @@ void compiler_init(void)
// Initialize target CPU (check for features, e.g. CMOV, rat stalls)
raw_init_cpu();
setzflg_uses_bsf = target_check_bsf();
write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
@ -5747,7 +5756,8 @@ void build_comp(void)
prop[cft_map(tbl[i].opcode)].cflow = cflow;
int uses_fpu = tbl[i].specific & 32;
if (uses_fpu && avoid_fpu)
int uses_setzflg = tbl[i].specific & 64;
if ((uses_fpu && avoid_fpu) || (uses_setzflg && !setzflg_uses_bsf))
compfunctbl[cft_map(tbl[i].opcode)] = NULL;
else
compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
@ -5755,7 +5765,8 @@ void build_comp(void)
for (i = 0; nftbl[i].opcode < 65536; i++) {
int uses_fpu = tbl[i].specific & 32;
if (uses_fpu && avoid_fpu)
int uses_setzflg = tbl[i].specific & 64;
if ((uses_fpu && avoid_fpu) || (uses_setzflg && !setzflg_uses_bsf))
nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
else
nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;

View File

@ -41,6 +41,7 @@
#define uses_cmov global_cmov=1
#define mayfail global_mayfail=1
#define uses_fpu global_fpu=1
#define uses_setzflg global_setzflg=1
int hack_opcode;
@ -52,6 +53,7 @@ static int global_cmov;
static int long_opcode;
static int global_mayfail;
static int global_fpu;
static int global_setzflg;
static char endstr[1000];
static char lines[100000];
@ -1086,6 +1088,7 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst)
case flag_addx:
case flag_subx:
uses_setzflg;
uses_cmov;
comprintf("\tdont_care_flags();\n");
{
@ -1122,7 +1125,7 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst)
}
comprintf("\tif (needed_flags&FLAG_Z) {\n"
"\tcmov_l_rr(zero,one,5);\n"
"\tbsf_l_rr(zero,zero);\n"
"\tsetzflg_l(zero);\n"
"\t}\n");
comprintf("\tlive_flags();\n");
comprintf("\tend_needflags();\n");
@ -1381,14 +1384,16 @@ gen_opcode (unsigned long int opcode)
case i_BTST: op="bt"; need_write=0; break;
}
comprintf("\t%s_l_rr(dst,s);\n" /* Answer now in C */
"\tsbb_l(s,s);\n" /* s is 0 if bit was 0,
-1 otherwise */
"\tmake_flags_live();\n" /* Get the flags back */
"\tdont_care_flags();\n"
"\tstart_needflags();\n"
"\tbsf_l_rr(s,s);\n"
"\tlive_flags();\n"
"\tend_needflags();\n",op);
"\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */
"\tmake_flags_live();\n" /* Get the flags back */
"\tdont_care_flags();\n",op);
if (!noflags) {
uses_setzflg;
comprintf("\tstart_needflags();\n"
"\tsetzflg_l(s);\n"
"\tlive_flags();\n"
"\tend_needflags();\n");
}
if (need_write)
genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
}
@ -2941,6 +2946,7 @@ generate_one_opcode (int rp, int noflags)
if (global_isaddx) flags|=8;
if (global_iscjump) flags|=16;
if (global_fpu) flags|=32;
if (global_setzflg) flags|=64;
comprintf ("}\n");