diff --git a/BasiliskII/src/uae_cpu/Makefile.am b/BasiliskII/src/uae_cpu/Makefile.am new file mode 100644 index 00000000..fa42287d --- /dev/null +++ b/BasiliskII/src/uae_cpu/Makefile.am @@ -0,0 +1,80 @@ +# +# Note: this Makefile only contains rules for the source +# generator tools. +# + +# +# suppress warnings about overriding LDFLAGS and CPPFLAGS +# +AUTOMAKE_OPTIONS = -Wno-gnu + +AM_CPPFLAGS = $(DEFINES) \ + "-I$(srcdir)/../include" \ + "-I$(srcdir)/../Unix" \ + "-I$(builddir)/.." \ + "-I$(builddir)" \ + "-I$(srcdir)" + +CC = $(CC_FOR_BUILD) +CXX = $(CXX_FOR_BUILD) + +LDFLAGS = $(LDFLAGS_FOR_BUILD) +CPPFLAGS = $(CPPFLAGS_FOR_BUILD) +CFLAGS = $(CFLAGS_FOR_BUILD) +CXXFLAGS = $(CXXFLAGS_FOR_BUILD) +LIBS=-lm + +CFLAGS_NOWARN = $(DBGSP) +AM_CFLAGS = $(CFLAGS_NOWARN) $(WFLAGS) +AM_CXXFLAGS = $(CFLAGS_NOWARN) $(WFLAGS) + +noinst_PROGRAMS = build68k gencpu +if USE_JIT +noinst_PROGRAMS += gencomp +endif + +BUILT_SOURCES = \ + cpudefs.cpp \ + cpuemu.cpp \ + cpustbl.cpp \ + cpufunctbl.cpp \ + cputbl.h \ + $(empty) + +build68k_SOURCES = build68k.c +gencpu_SOURCES = gencpu.c m68k.h readcpu.cpp readcpu.h cpudefs.cpp +gencomp_SOURCES = +if GENCOMP_ARCH_X86 +gencomp_SOURCES += compiler/gencomp.c +endif +if GENCOMP_ARCH_ARM +gencomp_SOURCES += compiler/gencomp_arm.c +endif +gencomp_SOURCES += readcpu.cpp cpudefs.cpp + +if USE_JIT +BUILT_SOURCES += compemu.cpp compstbl.cpp comptbl.h +endif + + +cpudefs.cpp: build68k$(EXEEXT) $(srcdir)/table68k + $(AM_V_GEN)./build68k <$(srcdir)/table68k > $@ +cpuemu.cpp: gencpu$(EXEEXT) + $(AM_V_GEN)./gencpu$(EXEEXT) +cpustbl.cpp cpufunctbl.cpp cputbl.h: cpuemu.cpp +compemu.cpp: gencomp$(EXEEXT) + $(AM_V_GEN)./gencomp$(EXEEXT) +compstbl.cpp comptbl.h: compemu.cpp + +CLEANFILES = $(BUILT_SOURCES) + +EXTRA_DIST = \ + table68k \ + compiler/codegen_arm.cpp compiler/codegen_arm.h \ + compiler/compemu_midfunc_arm.cpp compiler/compemu_midfunc_arm.h \ + compiler/compemu_midfunc_arm2.cpp compiler/compemu_midfunc_arm2.h \ + compiler/test_codegen_arm.c \ + compiler/codegen_x86.cpp compiler/codegen_x86.h \ + compiler/compemu_midfunc_x86.cpp compiler/compemu_midfunc_x86.h \ + compiler/test_codegen_x86.cpp \ + $(empty) diff --git a/BasiliskII/src/uae_cpu/aranym_glue.cpp b/BasiliskII/src/uae_cpu/aranym_glue.cpp new file mode 100644 index 00000000..02f7b149 --- /dev/null +++ b/BasiliskII/src/uae_cpu/aranym_glue.cpp @@ -0,0 +1,326 @@ +/* + * aranym_glue.cpp - CPU interface + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "sysdeps.h" + +#include "cpu_emulation.h" +#include "newcpu.h" +#include "hardware.h" +#include "scc.h" +#include "input.h" +#ifdef USE_JIT +# include "compiler/compemu.h" +#endif +#include "nf_objs.h" + +#include "debug.h" + +// RAM and ROM pointers +memptr RAMBase = 0; // RAM base (Atari address space) gb-- init is important +uint8 *RAMBaseHost; // RAM base (host address space) +uint32 RAMSize = 0x00e00000; // Size of RAM + +memptr ROMBase = 0x00e00000; // ROM base (Atari address space) +uint8 *ROMBaseHost; // ROM base (host address space) +uint32 ROMSize = 0x00100000; // Size of ROM + +uint32 RealROMSize; // Real size of ROM + +memptr HWBase = 0x00f00000; // HW base (Atari address space) +uint8 *HWBaseHost; // HW base (host address space) +uint32 HWSize = 0x00100000; // Size of HW space + +memptr FastRAMBase = 0x01000000; // Fast-RAM base (Atari address space) +uint8 *FastRAMBaseHost; // Fast-RAM base (host address space) + +#ifdef HW_SIGSEGV +uint8 *FakeIOBaseHost; +#endif + +#ifdef FIXED_VIDEORAM +memptr VideoRAMBase = ARANYMVRAMSTART; // VideoRAM base (Atari address space) +#else +memptr VideoRAMBase; // VideoRAM base (Atari address space) +#endif +uint8 *VideoRAMBaseHost;// VideoRAM base (host address space) +//uint32 VideoRAMSize; // Size of VideoRAM + +#ifndef NOT_MALLOC +uintptr MEMBaseDiff; // Global offset between a Atari address and its Host equivalent +uintptr ROMBaseDiff; +uintptr FastRAMBaseDiff; +#endif + +uintptr VMEMBaseDiff; // Global offset between a Atari VideoRAM address and /dev/fb0 mmap + + +#if defined(ENABLE_EXCLUSIVE_SPCFLAGS) && !defined(HAVE_HARDWARE_LOCKS) +SDL_mutex *spcflags_lock; +#endif +#if defined(ENABLE_REALSTOP) +SDL_cond *stop_condition; +#endif + + +/* + * Initialize 680x0 emulation + */ + +bool InitMEM() { + InitMEMBaseDiff(RAMBaseHost, RAMBase); + InitROMBaseDiff(ROMBaseHost, ROMBase); + InitFastRAMBaseDiff(FastRAMBaseHost, FastRAMBase); + InitVMEMBaseDiff(VideoRAMBaseHost, VideoRAMBase); + return true; +} + +bool Init680x0(void) +{ + init_m68k(); + +#if defined(ENABLE_EXCLUSIVE_SPCFLAGS) && !defined(HAVE_HARDWARE_LOCKS) + if ((spcflags_lock = SDL_CreateMutex()) == NULL) { + panicbug("Error by SDL_CreateMutex()"); + exit(EXIT_FAILURE); + } +#endif + +#if ENABLE_REALSTOP + if ((stop_condition = SDL_CreateCond()) == NULL) { + panicbug("Error by SDL_CreateCond()"); + exit(EXIT_FAILURE); + } +#endif + +#ifdef USE_JIT + if (bx_options.jit.jit) compiler_init(); +#endif + return true; +} + +/* + * Instr. RESET + */ + +void AtariReset(void) +{ + // reset Atari hardware here + HWReset(); + // reset NatFeats here + NFReset(); + // reset the input devices (input.cpp) + InputReset(); + +} + +/* + * Reset CPU + */ + +void Reset680x0(void) +{ + m68k_reset(); +} + +/* + * Deinitialize 680x0 emulation + */ + +void Exit680x0(void) +{ +#ifdef USE_JIT + if (bx_options.jit.jit) compiler_exit(); +#endif + exit_m68k(); +} + + +/* + * Reset and start 680x0 emulation + */ + +void Start680x0(void) +{ + m68k_reset(); +#ifdef USE_JIT + if (bx_options.jit.jit) { + m68k_compile_execute(); + } + else +#endif + m68k_execute(); +} + +/* + * Restart running 680x0 emulation safely from different thread + */ +void Restart680x0(void) +{ + quit_program = 2; + TriggerNMI(); +} + +/* + * Quit 680x0 emulation safely from different thread + */ +void Quit680x0(void) +{ + quit_program = 1; + TriggerNMI(); +} + + +int MFPdoInterrupt(void) +{ + return getMFP()->doInterrupt(); +} + +int SCCdoInterrupt(void) +{ + return getSCC()->doInterrupt(); +} + +/* + * Trigger interrupts + */ +void TriggerInternalIRQ(void) +{ + SPCFLAGS_SET( SPCFLAG_INTERNAL_IRQ ); +} + +void TriggerInt3(void) +{ + SPCFLAGS_SET( SPCFLAG_INT3 ); +} + +void TriggerVBL(void) +{ + SPCFLAGS_SET( SPCFLAG_VBL ); +} + +void TriggerInt5(void) +{ + SPCFLAGS_SET( SPCFLAG_INT5 ); +} + +void TriggerSCC(bool enable) +{ + if (enable) + SPCFLAGS_SET( SPCFLAG_SCC ); + else + SPCFLAGS_CLEAR( SPCFLAG_SCC ); +} + +void TriggerMFP(bool enable) +{ + if (enable) + SPCFLAGS_SET( SPCFLAG_MFP ); + else + SPCFLAGS_CLEAR( SPCFLAG_MFP ); +} + +void TriggerNMI(void) +{ + SPCFLAGS_SET( SPCFLAG_BRK ); // use _BRK for NMI +} + +#ifndef REBOOT_OR_HALT +#define REBOOT_OR_HALT 0 // halt by default +#endif + +#if REBOOT_OR_HALT == 1 +# define CPU_MSG "CPU: Rebooting" +# define CPU_ACTION Restart680x0() +#else +# define CPU_MSG "CPU: Halting" +# define CPU_ACTION Quit680x0() +#endif + +#ifdef ENABLE_EPSLIMITER + +#ifndef EPS_LIMIT +# define EPS_LIMIT 10000 /* this might be too high if ARAnyM is slowed down by printing the bus errors on console */ +#endif + +void check_eps_limit(uaecptr pc) +{ + static long last_exception_time=-1; + static long exception_per_sec=0; + static long exception_per_sec_pc=0; + static uaecptr prevpc = 0; + + if (bx_options.cpu.eps_enabled) { + if (last_exception_time == -1) { + last_exception_time = SDL_GetTicks(); + } + + exception_per_sec++; + + if (pc == prevpc) { + /* BUS ERRORs occur at the same PC - watch out! */ + exception_per_sec_pc++; + } + else { + exception_per_sec_pc = 0; + prevpc = pc; + } + + if (SDL_GetTicks() - last_exception_time > 1000) { + last_exception_time = SDL_GetTicks(); + if (exception_per_sec_pc > bx_options.cpu.eps_max || + exception_per_sec > EPS_LIMIT /* make it configurable */) { + panicbug("CPU: Exception per second limit reached: %ld/%ld", + exception_per_sec_pc, exception_per_sec); + /* would be cool to open SDL dialog here: */ + /* [Exception per seconds limit reached. XXXXX exception + occured in the last second. The limit is set to YYYYY + in your config file. Do you want to continue emulation, + reset ARAnyM or quit ?][Continue] [Reset] [Quit] + */ + panicbug(CPU_MSG); + CPU_ACTION; + } + exception_per_sec = 0; + exception_per_sec_pc = 0; + } + } +} +#endif + +void report_double_bus_error() +{ + panicbug("CPU: Double bus fault detected !"); + /* would be cool to open SDL dialog here: */ + /* [Double bus fault detected. The emulated system crashed badly. + Do you want to reset ARAnyM or quit ?] [Reset] [Quit]" + */ + panicbug(CPU_MSG); + CPU_ACTION; +} + +#ifdef FLIGHT_RECORDER +extern bool cpu_flight_recorder_active; +void cpu_flight_recorder(int activate) { cpu_flight_recorder_active = activate; } +#endif diff --git a/BasiliskII/src/uae_cpu/basilisk_glue.cpp b/BasiliskII/src/uae_cpu/basilisk_glue.cpp index b29c7702..9a794b48 100644 --- a/BasiliskII/src/uae_cpu/basilisk_glue.cpp +++ b/BasiliskII/src/uae_cpu/basilisk_glue.cpp @@ -56,8 +56,12 @@ uintptr MEMBaseDiff; // Global offset between a Mac address and its Host equiva bool UseJIT = false; #endif +// #if defined(ENABLE_EXCLUSIVE_SPCFLAGS) && !defined(HAVE_HARDWARE_LOCKS) +B2_mutex *spcflags_lock = NULL; +// #endif + // From newcpu.cpp -extern bool quit_program; +extern int quit_program; /* @@ -66,6 +70,7 @@ extern bool quit_program; bool Init680x0(void) { + spcflags_lock = B2_create_mutex(); #if REAL_ADDRESSING // Mac address space = host address space RAMBaseMac = (uintptr)RAMBaseHost; @@ -160,6 +165,7 @@ void TriggerInterrupt(void) void TriggerNMI(void) { //!! not implemented yet + // SPCFLAGS_SET( SPCFLAG_BRK ); // use _BRK for NMI } @@ -200,7 +206,7 @@ void Execute68kTrap(uint16 trap, struct M68kRegisters *r) // Execute trap m68k_setpc(m68k_areg(regs, 7)); fill_prefetch_0(); - quit_program = false; + quit_program = 0; m68k_execute(); // Clean up stack @@ -215,7 +221,7 @@ void Execute68kTrap(uint16 trap, struct M68kRegisters *r) r->d[i] = m68k_dreg(regs, i); for (i=0; i<7; i++) r->a[i] = m68k_areg(regs, i); - quit_program = false; + quit_program = 0; } @@ -247,7 +253,7 @@ void Execute68k(uint32 addr, struct M68kRegisters *r) // Execute routine m68k_setpc(addr); fill_prefetch_0(); - quit_program = false; + quit_program = 0; m68k_execute(); // Clean up stack @@ -262,5 +268,18 @@ void Execute68k(uint32 addr, struct M68kRegisters *r) r->d[i] = m68k_dreg(regs, i); for (i=0; i<7; i++) r->a[i] = m68k_areg(regs, i); - quit_program = false; + quit_program = 0; +} + +void report_double_bus_error() +{ +#if 0 + panicbug("CPU: Double bus fault detected !"); + /* would be cool to open SDL dialog here: */ + /* [Double bus fault detected. The emulated system crashed badly. + Do you want to reset ARAnyM or quit ?] [Reset] [Quit]" + */ + panicbug(CPU_MSG); + CPU_ACTION; +#endif } diff --git a/BasiliskII/src/uae_cpu/build68k.c b/BasiliskII/src/uae_cpu/build68k.c index 8ec3ab55..e996758d 100644 --- a/BasiliskII/src/uae_cpu/build68k.c +++ b/BasiliskII/src/uae_cpu/build68k.c @@ -1,32 +1,44 @@ +/* + * build68k.c - m68k CPU builder + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ /* * UAE - The Un*x Amiga Emulator * * Read 68000 CPU specs from file "table68k" and build table68k.c * * Copyright 1995,1996 Bernd Schmidt - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include -#include -#include - -#include "sysdeps.h" #include "readcpu.h" +#include +#include +#include +#include +#include +#undef abort + static FILE *tablef; static int nextch = 0; @@ -65,15 +77,15 @@ static int nextchtohex(void) } } -int main(int argc, char **argv) +int main() { int no_insns = 0; printf ("#include \"sysdeps.h\"\n"); printf ("#include \"readcpu.h\"\n"); printf ("struct instr_def defs68k[] = {\n"); -#ifdef WIN32 - tablef = fopen(argc > 1 ? argv[1] : "table68k","r"); +#if 0 + tablef = fopen("table68k","r"); if (tablef == NULL) { fprintf(stderr, "table68k not found\n"); exit(1); @@ -122,8 +134,8 @@ int main(int argc, char **argv) case 'r': currbit = bitr; break; case 'R': currbit = bitR; break; case 'z': currbit = bitz; break; - case 'E': currbit = bitE; break; - case 'p': currbit = bitp; break; + case 'E': currbit = bitE; break; + case 'p': currbit = bitp; break; default: abort(); } if (!(bitmask & 1)) { @@ -138,6 +150,7 @@ int main(int argc, char **argv) patbits[i] = nextch; getnextch(); } + (void) patbits; while (isspace(nextch) || nextch == ':') /* Get CPU and privilege level */ getnextch(); @@ -172,6 +185,8 @@ int main(int argc, char **argv) getnextch(); switch(nextch){ case '-': flagset[i] = fa_unset; break; + case '/': flagset[i] = fa_isjmp; break; + case '+': flagset[i] = fa_isbranch; break; case '0': flagset[i] = fa_zero; break; case '1': flagset[i] = fa_one; break; case 'x': flagset[i] = fa_dontcare; break; @@ -191,6 +206,8 @@ int main(int argc, char **argv) getnextch(); switch(nextch){ case '-': flaguse[i] = fu_unused; break; + case '/': flaguse[i] = fu_isjmp; break; + case '+': flaguse[i] = fu_maybecc; break; case '?': flaguse[i] = fu_unknown; break; default: flaguse[i] = fu_used; break; } @@ -235,7 +252,7 @@ int main(int argc, char **argv) if (nextch != ':') abort(); - fgets(opcstr, 250, tablef); + assert(fgets(opcstr, 250, tablef) != NULL); getnextch(); { int j; @@ -243,12 +260,12 @@ int main(int argc, char **argv) char *opstrp = opcstr, *osendp; int slen = 0; - while (isspace(*opstrp)) + while (isspace((int)*opstrp)) opstrp++; osendp = opstrp; while (*osendp) { - if (!isspace (*osendp)) + if (!isspace ((int)*osendp)) slen = osendp - opstrp + 1; osendp++; } @@ -271,6 +288,5 @@ int main(int argc, char **argv) } } printf("};\nint n_defs68k = %d;\n", no_insns); - fflush(stdout); return 0; } diff --git a/BasiliskII/src/uae_cpu/compiler/codegen_arm.cpp b/BasiliskII/src/uae_cpu/compiler/codegen_arm.cpp new file mode 100644 index 00000000..334ae753 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/codegen_arm.cpp @@ -0,0 +1,2730 @@ +/* + * compiler/codegen_arm.cpp - ARM code generator + * + * Copyright (c) 2013 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * JIT compiler m68k -> ARM + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Current state: + * - Experimental + * - Still optimizable + * - Not clock cycle optimized + * - as a first step this compiler emulates x86 instruction to be compatible + * with gencomp. Better would be a specialized version of gencomp compiling + * 68k instructions to ARM compatible instructions. This is a step for the + * future + * + */ + +#include "flags_arm.h" + +// Declare the built-in __clear_cache function. +extern void __clear_cache (char*, char*); + +/************************************************************************* + * Some basic information about the the target CPU * + *************************************************************************/ + +#define R0_INDEX 0 +#define R1_INDEX 1 +#define R2_INDEX 2 +#define R3_INDEX 3 +#define R4_INDEX 4 +#define R5_INDEX 5 +#define R6_INDEX 6 +#define R7_INDEX 7 +#define R8_INDEX 8 +#define R9_INDEX 9 +#define R10_INDEX 10 +#define R11_INDEX 11 +#define R12_INDEX 12 +#define R13_INDEX 13 +#define R14_INDEX 14 +#define R15_INDEX 15 + +#define RSP_INDEX 13 +#define RLR_INDEX 14 +#define RPC_INDEX 15 + +/* The register in which subroutines return an integer return value */ +#define REG_RESULT R0_INDEX + +/* The registers subroutines take their first and second argument in */ +#define REG_PAR1 R0_INDEX +#define REG_PAR2 R1_INDEX + +#define REG_WORK1 R2_INDEX +#define REG_WORK2 R3_INDEX + +//#define REG_DATAPTR R10_INDEX + +#define REG_PC_PRE R0_INDEX /* The register we use for preloading regs.pc_p */ +#define REG_PC_TMP R1_INDEX /* Another register that is not the above */ + +#define SHIFTCOUNT_NREG R1_INDEX /* Register that can be used for shiftcount. + -1 if any reg will do. Normally this can be set to -1 but compemu_support is tied to 1 */ +#define MUL_NREG1 R0_INDEX /* %r4 will hold the low 32 bits after a 32x32 mul */ +#define MUL_NREG2 R1_INDEX /* %r5 will hold the high 32 bits */ + +#define STACK_ALIGN 4 +#define STACK_OFFSET sizeof(void *) +#define STACK_SHADOW_SPACE 0 + +uae_s8 always_used[]={2,3,-1}; +uae_s8 can_byte[]={0,1,4,5,6,7,8,9,10,11,12,-1}; +uae_s8 can_word[]={0,1,4,5,6,7,8,9,10,11,12,-1}; + +uae_u8 call_saved[]={0,0,0,0,1,1,1,1,1,1,1,1,0,1,1,1}; + +/* This *should* be the same as call_saved. But: + - We might not really know which registers are saved, and which aren't, + so we need to preserve some, but don't want to rely on everyone else + also saving those registers + - Special registers (such like the stack pointer) should not be "preserved" + by pushing, even though they are "saved" across function calls +*/ +static const uae_u8 need_to_preserve[]={0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0}; +static const uae_u32 PRESERVE_MASK = ((1<=-128 && x<=127); +} + +static inline int is8bit(uae_s32 x) +{ + return (x>=-255 && x<=255); +} + +static inline int isword(uae_s32 x) +{ + return (x>=-32768 && x<=32767); +} + +#define jit_unimplemented(fmt, ...) do{ panicbug("**** Unimplemented ****"); panicbug(fmt, ## __VA_ARGS__); abort(); }while (0) + +#if 0 /* currently unused */ +static void jit_fail(const char *msg, const char *file, int line, const char *function) +{ + panicbug("JIT failure in function %s from file %s at line %d: %s", + function, file, line, msg); + abort(); +} +#endif + +LOWFUNC(NONE,WRITE,1,raw_push_l_r,(RR4 r)) +{ + PUSH(r); +} + +LOWFUNC(NONE,READ,1,raw_pop_l_r,(RR4 r)) +{ + POP(r); +} + +LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, RR1 s)) +{ + MVN_ri(REG_WORK1, 0); // mvn r2,#0 + LSL_rri(REG_WORK2, d, 24); // lsl r3, %[d], #24 + ORR_rrrLSRi(REG_WORK2, REG_WORK2, REG_WORK1, 8); // orr r3, r3, r2, lsr #8 + LSL_rri(REG_WORK1, s, 24); // lsl r2, %[s], #24 + + ADCS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adcs r3, r3, r2 + + BIC_rri(d, d, 0xFF); // bic %[d],%[d],#0xFF + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr %[d],%[d], R3 LSR #24 +} + +LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, RR2 s)) +{ + MVN_ri(REG_WORK1, 0); // mvn r2,#0 + LSL_rri(REG_WORK2, d, 16); // lsl r3, %[d], #16 + ORR_rrrLSRi(REG_WORK2, REG_WORK2, REG_WORK1, 16); // orr r3, r3, r2, lsr #16 + LSL_rri(REG_WORK1, s, 16); // lsl r2, %[s], #16 + + ADCS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 +#ifdef ARMV6_ASSEMBLY + PKHTB_rrrASRi(d,d,REG_WORK2,16); +#else + BIC_rri(d, d, 0xff); // bic %[d],%[d],#0xff + BIC_rri(d, d, 0xff00); // bic %[d],%[d],#0xff00 + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr %[d], %[d], r3, lsr #16 +#endif +} + +LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, RR4 s)) +{ + ADCS_rrr(d, d, s); // adcs %[d],%[d],%[s] +} + +LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, RR1 s)) +{ + LSL_rri(REG_WORK1, s, 24); // lsl r2, %[s], #24 + LSL_rri(REG_WORK2, d, 24); // lsl r3, %[d], #24 + + ADDS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 + + BIC_rri(d, d, 0xFF); // bic %[d],%[d],#0xFF + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr %[d],%[d], r3 LSR #24 +} + +LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, RR2 s)) +{ + LSL_rri(REG_WORK1, s, 16); // lsl r2, %[s], #16 + LSL_rri(REG_WORK2, d, 16); // lsl r3, %[d], #16 + + ADDS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 + +#ifdef ARMV6_ASSEMBLY + PKHTB_rrrASRi(d,d,REG_WORK2,16); +#else + BIC_rri(d, d, 0xff); // bic %[d],%[d],#0xff + BIC_rri(d, d, 0xff00); // bic %[d],%[d],#0xff00 + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr r7, r7, r3, LSR #16 +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, RR4 s)) +{ + ADDS_rrr(d, d, s); // adds %[d], %[d], %[s] +} + +LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_word_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldrh r2, [pc, #offs] +#else +# ifdef ARMV6_ASSEMBLY + LDRH_rRI(REG_WORK1, RPC_INDEX, 24); // ldrh r2, [pc, #24] ; +# else + LDRH_rRI(REG_WORK1, RPC_INDEX, 16); // ldrh r2, [pc, #16] ; +# endif +#endif + LSL_rri(REG_WORK2, d, 16); // lsl r3, %[d], #16 + LSL_rri(REG_WORK1, REG_WORK1, 16); // lsl r2, r2, #16 + + ADDS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 + +#ifdef ARMV6_ASSEMBLY + PKHTB_rrrASRi(d,d,REG_WORK2,16); +#else + BIC_rri(d, d, 0xff); // bic %[d],%[d],#0xff + BIC_rri(d, d, 0xff00); // bic %[d],%[d],#0xff00 + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr %[d],%[d], r3, LSR #16 +#endif + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_word(i); + skip_word(0); + //: +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) +{ + LSL_rri(REG_WORK2, d, 24); // lsl r3, %[d], #24 + + ADDS_rri(REG_WORK2, REG_WORK2, i << 24); // adds r3, r3, #0x12000000 + + BIC_rri(d, d, 0xFF); // bic %[d],%[d], #0xFF + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr %[d],%[d], r3, lsr #24 +} + +LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + ADDS_rrr(d, d, REG_WORK1); // adds %[d], %[d], r2 +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + ADDS_rrr(d, d, REG_WORK1); // adds %[d], %[d], r2 + B_i(0); // b + + //: + emit_long(i); + //: +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, RR1 s)) +{ + MVN_rrLSLi(REG_WORK1, s, 24); // mvn r2, %[s], lsl #24 + MVN_rrLSRi(REG_WORK1, REG_WORK1, 24); // mvn r2, %[s], lsr #24 + AND_rrr(d, d, REG_WORK1); // and %[d], %[d], r2 + + LSLS_rri(REG_WORK1, d, 24); // lsls r2, %[d], #24 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, RR2 s)) +{ + MVN_rrLSLi(REG_WORK1, s, 16); // mvn r2, %[s], lsl #16 + MVN_rrLSRi(REG_WORK1, REG_WORK1, 16); // mvn r2, %[s], lsr #16 + AND_rrr(d, d, REG_WORK1); // and %[d], %[d], r2 + + LSLS_rri(REG_WORK1, d, 16); // lsls r2, %[d], #16 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, RR4 s)) +{ + ANDS_rrr(d, d, s); // ands r7, r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #16] ; +#endif + ANDS_rrr(d, d, REG_WORK1); // ands %[d], %[d], r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(i); + //: +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, RR4 s)) +{ + MOV_rr(REG_WORK1, s); // mov r2,%[s] + RSB_rri(REG_WORK2, REG_WORK1, 0); // rsb r3,r2,#0 + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // and r2,r2,r3 + CLZ_rr(REG_WORK2, REG_WORK1); // clz r3,r2 + MOV_ri(d, 32); // mov %[d],#32 + SUB_rrr(d, d, REG_WORK2); // sub %[d],%[d],r3 + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TEQ_ri(d, 0); // teq %[d],#0 + CC_SUBS_rri(NATIVE_CC_NE, d,d,1); // sub %[d],%[d],#1 + CC_BIC_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_Z_FLAG); // bic r3,r3,#0x40000000 + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_Z_FLAG); // orr r3,r3,#0x40000000 + MSR_CPSR_r(REG_WORK2); // msr cpsr,r3 +} + +LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) +{ +#if defined(ARMV6_ASSEMBLY) + REVSH_rr(REG_WORK1,r); // revsh r2,%[r] + UXTH_rr(REG_WORK1, REG_WORK1); // utxh r2,r2 + LSR_rri(r, r, 16); + ORR_rrrLSLi(r, REG_WORK1, r, 16); // orr %[r], %[r], r2 +#else + MOV_rr(REG_WORK1, r); // mov r2, r6 + BIC_rri(REG_WORK1, REG_WORK1, 0xff0000); // bic r2, r2, #0xff0000 + BIC_rri(REG_WORK1, REG_WORK1, 0xff000000); // bic r2, r2, #0xff000000 + + EOR_rrr(r, r, REG_WORK1); // eor r6, r6, r2 + + ORR_rrrLSRi(r, r, REG_WORK1, 8); // orr r6, r6, r2, lsr #8 + BIC_rri(REG_WORK1, REG_WORK1, 0xff00); // bic r2, r2, #0xff00 + ORR_rrrLSLi(r,r,REG_WORK1, 8); // orr r6, r6, r2, lsl #8 +#endif +} + +LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) +{ +#if defined(ARMV6_ASSEMBLY) + REV_rr(r,r); // rev %[r],%[r] +#else + EOR_rrrRORi(REG_WORK1, r, r, 16); // eor r2, r6, r6, ror #16 + BIC_rri(REG_WORK1, REG_WORK1, 0xff0000); // bic r2, r2, #0xff0000 + ROR_rri(r, r, 8); // ror r6, r6, #8 + EOR_rrrLSRi(r, r, REG_WORK1, 8); // eor r6, r6, r2, lsr #8 +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(RR4 r, IMM i)) +{ + int imm = (1 << (i & 0x1f)); + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_ri(r, imm); // tst r6, #0x1000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} + +LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(RR4 r, RR4 b)) +{ + AND_rri(REG_WORK2, b, 0x1f); // and r3, r7, #0x1f + LSR_rrr(REG_WORK1, r, REG_WORK2); // lsr r2, r6, r3 + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_ri(REG_WORK1, 1); // tst r2, #1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} + +LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, RR4 b)) +{ + MOV_ri(REG_WORK1, 1); // mov r2, #1 + AND_rri(REG_WORK2, b, 0x1f); // and r3, r7, #0x1f + LSL_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // lsl r2, r2, r3 + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_rr(r, REG_WORK1); // tst r6, r2 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + EOR_rrr(r, r, REG_WORK1); // eor r6, r6, r2 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} + +LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, RR4 b)) +{ + MOV_ri(REG_WORK1, 1); // mov r2, #1 + AND_rri(REG_WORK2, b, 0x1f); // and r3, r7, #0x1f + LSL_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // lsl r2, r2, r3 + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_rr(r, REG_WORK1); // tst r6, r2 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + BIC_rrr(r, r, REG_WORK1); // bic r6, r6, r2 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} + +LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, RR4 b)) +{ + MOV_ri(REG_WORK1, 1); // mov r2, #1 + AND_rri(REG_WORK2, b, 0x1f); // and r3, r7, #0x1f + LSL_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // lsl r2, r2, r3 + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_rr(r, REG_WORK1); // tst r6, r2 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + ORR_rrr(r, r, REG_WORK1); // orr r6, r6, r2 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} + +LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, RR4 s, IMM cc)) +{ + switch (cc) { + case 9: // LS + BEQ_i(0); // beq Z != 0 + BCC_i(0); // bcc C == 0 + + //: + MOV_rr(d, s); // mov r7,r6 + break; + + case 8: // HI + BEQ_i(1); // beq Z != 0 + BCS_i(0); // bcs C != 0 + MOV_rr(d, s); // mov r7,#0 + break; + + default: + CC_MOV_rr(cc, d, s); // MOVcc R7,#1 + break; + } + //: +} + +LOWFUNC(WRITE,NONE,2,raw_cmp_b,(RR1 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + SXTB_rr(REG_WORK1, d); // sxtb r2,%[d] + SXTB_rr(REG_WORK2, s); // sxtb r3,%[s] +#else + LSL_rri(REG_WORK1, d, 24); // lsl r2,r6,#24 + LSL_rri(REG_WORK2, s, 24); // lsl r3,r7,#24 +#endif + CMP_rr(REG_WORK1, REG_WORK2); // cmp r2, r3 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_cmp_w,(RR2 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + SXTH_rr(REG_WORK1, d); // sxtb r2,%[d] + SXTH_rr(REG_WORK2, s); // sxtb r3,%[s] +#else + LSL_rri(REG_WORK1, d, 16); // lsl r6, r1, #16 + LSL_rri(REG_WORK2, s, 16); // lsl r7, r2, #16 +#endif + + CMP_rr(REG_WORK1, REG_WORK2); // cmp r7, r6, asr #16 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_cmp_l,(RR4 d, RR4 s)) +{ + CMP_rr(d, s); // cmp r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, RR4 s)) +{ + SMULL_rrrr(REG_WORK1, REG_WORK2, d, s); // smull r2,r3,r7,r6 + MOV_rr(d, REG_WORK1); // mov r7,r2 +} + +LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) +{ + SMULL_rrrr(REG_WORK1, REG_WORK2, d, s); // smull r2,r3,r7,r6 + MOV_rr(MUL_NREG1, REG_WORK1); // mov r7,r2 + MOV_rr(MUL_NREG2, REG_WORK2); +} + +LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + ADD_rrr(d, s, REG_WORK1); // add r7, r6, r2 +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + ADD_rrr(d, s, REG_WORK1); // add r7, r6, r2 + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} + +LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)) +{ + int shft; + switch(factor) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // LDR R2,[PC, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // LDR R2,[PC, #8] +#endif + ADD_rrr(REG_WORK1, s, REG_WORK1); // ADD R7,R6,R2 + ADD_rrrLSLi(d, REG_WORK1, index, shft); // ADD R7,R7,R5,LSL #2 +#if !defined(USE_DATA_BUFFER) + B_i(0); // B jp + + emit_long(offset); + //; +#endif +} + +LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) +{ + int shft; + switch(factor) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + + ADD_rrrLSLi(d, s, index, shft); // ADD R7,R6,R5,LSL #2 +} + +LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 12); // ldr r2, [pc, #12] ; +#endif + LDRB_rRR(REG_WORK1, REG_WORK1, s); // ldrb r2, [r2, r6] + + BIC_rri(d, d, 0xff); // bic r7, r7, #0xff + ORR_rrr(d, d, REG_WORK1); // orr r7, r7, r2 +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} + +LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(RR4 d, RR1 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2,[pc, #offs] + STRB_rRR(s, d, REG_WORK1); // strb r6,[r7, r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2,[pc,#4] + STRB_rRR(s, d, REG_WORK1); // strb r6,[r7, r2] + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // ldr r2, [pc, #8] ; +#endif + MOV_ri(REG_WORK2, s & 0xFF); // mov r3, #0x34 + STRB_rR(REG_WORK2, REG_WORK1); // strb r3, [r2] +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //d: + emit_long(d); + + //: +#endif +} + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, RR1 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + STRB_rR(s, REG_WORK1); // strb r6, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STRB_rR(s, REG_WORK1); // strb r6, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} + +LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) +{ + BIC_rri(d, d, 0xff); // bic %[d], %[d], #0xff + ORR_rri(d, d, (s & 0xff)); // orr %[d], %[d], #%[s] +} + +LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 12); // ldr r2, [pc, #12] ; +#endif + LDRB_rR(REG_WORK2, REG_WORK1); // ldrb r2, [r2] + BIC_rri(d, d, 0xff); // bic r7, r7, #0xff + ORR_rrr(d, REG_WORK2, d); // orr r7, r2, r7 +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(s); + //: +#endif +} + +LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, RR1 s)) +{ + AND_rri(REG_WORK1, s, 0xff); // and r2,r2, #0xff + BIC_rri(d, d, 0x0ff); // bic %[d], %[d], #0xff + ORR_rrr(d, d, REG_WORK1); // orr %[d], %[d], r2 +} + +LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + LDR_rRR(d, REG_WORK1, s); // ldr r7, [r2, r6] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + LDR_rRR(d, REG_WORK1, s); // ldr r7, [r2, r6] + + B_i(0); // b + + emit_long(offset); //: + //: +#endif +} + +LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(RR4 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2,[pc, #offs] + STR_rRR(s, d, REG_WORK1); // str R6,[R7, r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2,[pc,#4] ; + STR_rRR(s, d, REG_WORK1); // str R6,[R7, r2] + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) +{ + // TODO: optimize imm + +#if defined(USE_DATA_BUFFER) + data_check_end(8, 12); + long offs = data_long_offs(d); + + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + + offs = data_long_offs(s); + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // ldr r3, [pc, #offs] ; s + + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // ldr r2, [pc, #8] ; + LDR_rRI(REG_WORK2, RPC_INDEX, 8); // ldr r3, [pc, #8] ; + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] + B_i(1); // b + + emit_long(d); //: + emit_long(s); //: + + //: +#endif +} + +LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else +# ifdef ARMV6_ASSEMBLY + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // ldr r2, [pc, #16] ; +# else + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #16] ; +# endif +#endif + LDRH_rRR(REG_WORK1, REG_WORK1, s); // ldrh r2, [r2, r6] + +#ifdef ARMV6_ASSEMBLY + PKHBT_rrr(d,REG_WORK1,d); +#else + BIC_rri(d, d, 0xff); // bic r7, r7, #0xff + BIC_rri(d, d, 0xff00); // bic r7, r7, #0xff00 + ORR_rrr(d, d, REG_WORK1); // orr r7, r7, r2 +#endif + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + emit_long(offset); //: + //: +#endif +} + +LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(RR4 d, RR2 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2,[pc, #offs] + STRH_rRR(s, d, REG_WORK1); // strh r6,[r7, r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2,[pc,#4] + STRH_rRR(s, d, REG_WORK1); // strh r6,[r7, r2] + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, RR2 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc,#offs] + STRH_rR(s, REG_WORK1); // strh r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STRH_rR(s, REG_WORK1); // strh r3, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} + +LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_word_offs(s); + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // ldrh r3, [pc, #offs] +#else +# ifdef ARMV6_ASSEMBLY + LDRH_rRI(REG_WORK2, RPC_INDEX, 12); // ldrh r3, [pc, #12] ; +# else + LDRH_rRI(REG_WORK2, RPC_INDEX, 4); // ldrh r3, [pc, #12] ; +# endif +#endif + +#ifdef ARMV6_ASSEMBLY + PKHBT_rrr(d,REG_WORK2,d); +#else + BIC_rri(REG_WORK1, d, 0xff); // bic r2, r7, #0xff + BIC_rri(REG_WORK1, REG_WORK1, 0xff00); // bic r2, r2, #0xff00 + ORR_rrr(d, REG_WORK2, REG_WORK1); // orr r7, r3, r2 +#endif + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_word(s); + skip_word(0); + //: +#endif +} + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) +{ + // TODO: optimize imm + +#if defined(USE_DATA_BUFFER) + data_check_end(8, 12); + long offs = data_long_offs(d); + + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // ldr r3, [pc, #offs] ; + + offs = data_word_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + + STRH_rR(REG_WORK1, REG_WORK2); // strh r2, [r3] +#else + LDR_rRI(REG_WORK2, RPC_INDEX, 8); // ldr r3, [pc, #8] ; + LDRH_rRI(REG_WORK1, RPC_INDEX, 8); // ldrh r2, [pc, #8] ; + STRH_rR(REG_WORK1, REG_WORK2); // strh r2, [r3] + B_i(1); // b + + //mem: + emit_long(d); + //imm: + emit_word(s); + skip_word(0); // Alignment + + //: +#endif +} + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + STR_rR(s, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STR_rR(s, REG_WORK1); // str r3, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(RR4 d, IMM i, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + +#if defined(USE_DATA_BUFFER) + long offs = data_word_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDRH_rRI(REG_WORK1, RPC_INDEX, 4); // ldrh r2, [pc, #4] ; +#endif + if (offset >= 0) + STRH_rRI(REG_WORK1, d, offset); // strh r2, [r7, #0x54] + else + STRH_rRi(REG_WORK1, d, -offset);// strh r2, [r7, #-0x54] +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_word(i); + skip_word(0); + //: +#endif +} + +LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 12); // ldr r2, [pc, #12] ; +#endif + LDRH_rR(REG_WORK1, REG_WORK1); // ldrh r2, [r2] + LSR_rri(d, d, 16); // lsr r7, r7, #16 + ORR_rrrLSLi(d, REG_WORK1, d, 16); // orr r7, r2, r7, lsl #16 +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(s); + //: +#endif +} + +LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, RR2 s)) +{ + LSL_rri(REG_WORK1, s, 16); // lsl r2, r6, #16 + ORR_rrrLSRi(d, REG_WORK1, d, 16); // orr r7, r2, r7, lsr #16 + ROR_rri(d, d, 16); // ror r7, r7, #16 +} + +LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, RR4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + + if (offset >= 0) + LDRH_rRI(REG_WORK1, s, offset); // ldrh r2, [r6, #12] + else + LDRH_rRi(REG_WORK1, s, -offset); // ldrh r2, [r6, #-12] + +#ifdef ARMV6_ASSEMBLY + PKHBT_rrr(d,REG_WORK1,d); +#else + BIC_rri(d, d, 0xff); // bic r7, r7, #0xff + BIC_rri(d, d, 0xff00); // bic r7, r7, #0xff00 + ORR_rrr(d, d, REG_WORK1); // orr r7, r7, r2 +#endif +} + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(RR4 d, RR2 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + + if (offset >= 0) + STRH_rRI(s, d, offset); // strh r6, [r7, #0x7f] + else + STRH_rRi(s, d, -offset);// strh r6, [r7, #-0x7f] +} + +LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [r10, #offs] + LDR_rR(d, REG_WORK1); // ldr r7, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + LDR_rR(d, REG_WORK1); // ldr r7, [r2] + B_i(0); // b + + emit_long(s); //: + + //: +#endif +} + +LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, MEMR base, RR4 index, IMM factor)) +{ + int shft; + switch(factor) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(base); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + LDR_rRR_LSLi(d, REG_WORK1, index, shft); // ldr %[d], [r2, %[index], lsl #[shift]] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + LDR_rRR_LSLi(d, REG_WORK1, index, shft); // ldr %[d], [r2, %[index], lsl #[shift]] + + B_i(0); // b + emit_long(base); //: + //: +#endif +} + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(RR4 d, IMM i, IMM offset8)) +{ + Dif(!isbyte(offset8)) abort(); + +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; +#endif + if (offset8 >= 0) + STR_rRI(REG_WORK1, d, offset8); // str r2, [r7, #0x54] + else + STR_rRi(REG_WORK1, d, -offset8); // str r2, [r7, #-0x54] +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(i); + //: +#endif +} + +LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, RR4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + + if (offset >= 0) { + LDR_rRI(d, s, offset); // ldr r2, [r1, #-12] + } else + LDR_rRi(d, s, -offset); // ldr r2, [r1, #12] +} + +LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, RR4 s)) +{ + MOV_rr(d, s); // mov %[d], %[s] +} + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(RR4 d, RR4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + + if (offset >= 0) + STR_rRI(s, d, offset); // str r6, [r7, #12] + else + STR_rRi(s, d, -offset); // str r6, [r7, #-12] +} + +LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) +{ + UMULL_rrrr(REG_WORK1, REG_WORK2, d, s); // umull r2,r3,r7,r6 + MOV_rr(MUL_NREG1, REG_WORK1); // mov r7,r2 + MOV_rr(MUL_NREG2, REG_WORK2); +} + +LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, RR1 s)) +{ + AND_rri(REG_WORK1, s, 0xFF); // and r2, %[s], 0xFF + ORR_rrr(d, d, REG_WORK1); // orr %[d], %[d], r2 + LSLS_rri(REG_WORK1, d, 24); // lsls r2, %[d], #24 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, s); // UXTH r2, %[s] +#else + BIC_rri(REG_WORK1, s, 0xff000000); // bic r2, %[s], #0xff000000 + BIC_rri(REG_WORK1, REG_WORK1, 0x00ff0000); // bic r2, r2, #0x00ff0000 +#endif + ORR_rrr(d, d, REG_WORK1); // orr %[d], %[d], r2 + LSLS_rri(REG_WORK1, d, 16); // lsls r2, %[d], #16 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, RR4 s)) +{ + ORRS_rrr(d, d, s); // orrs r7, r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // LDR r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // LDR r2, [pc,#16] ; +#endif + ORRS_rrr(d, d, REG_WORK1); // ORRS r7,r7,r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + // value: + emit_long(i); + //jp: +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + int imm = 32 - (i & 0x1f); + + MOV_rrLSLi(REG_WORK1, r, 24); // mov r2,r7,lsl #24 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 8); // orr r2,r2,r2,lsr #8 + + RORS_rri(REG_WORK1, REG_WORK1, imm); // rors r2,r2,#(32 - (i & 0x1f)) + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(REG_WORK1, 1); // tst r2,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(r, r, 0xff); // bic r7,r7,#0xff + ORR_rrr(r, r, REG_WORK1); // orr r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, RR1 r)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + + MOV_ri(REG_WORK2, 32); // mov r3,#32 + AND_rri(REG_WORK1, r, 0x1f); // and r2,r6,#0x1f + SUB_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // sub r3,r3,r2 + + MOV_rrLSLi(REG_WORK1, d, 24); // mov r2,r7,lsl #24 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 8); // orr r2,r2,r2,lsr #8 + + RORS_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // rors r2,r2,r3 + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(REG_WORK1, 1); // tst r2,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(d, d, 0xff); // bic r7,r7,#0xff + + ORR_rrr(d, d, REG_WORK1); // orr r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + int imm = 32 - (i & 0x1f); + + MOV_rrLSLi(REG_WORK1, r, 16); // mov r2,r7,lsl #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + + RORS_rri(REG_WORK1, REG_WORK1, imm); // rors r2,r2,#(32 - (i & 0x1f)) + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(REG_WORK1, 1); // tst r2,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); + + BIC_rri(r, r, 0xff00); // bic r2,r2,#0xff00 + BIC_rri(r, r, 0xff); // bic r2,r2,#0xff + + ORR_rrrLSRi(r, r, REG_WORK1, 16); // orr r7,r7,r2,lsr #16 +} + +LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, RR1 r)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + + MOV_ri(REG_WORK2, 32); // mov r3,#32 + AND_rri(REG_WORK1, r, 0x1f); // and r2,r6,#0x1f + SUB_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // sub r3,r3,r2 + + MOV_rrLSLi(REG_WORK1, d, 16); // mov r2,r7,lsl #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + + RORS_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // rors r2,r2,r3 + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(REG_WORK1, 1); // tst r2,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); + + BIC_rri(d, d, 0xff00); // bic r2,r2,#0xff00 + BIC_rri(d, d, 0xff); // bic r2,r2,#0xff + + ORR_rrrLSRi(d, d, REG_WORK1, 16); // orr r2,r2,r7,lsr #16 +} + +LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + int imm = 32 - (i & 0x1f); + + RORS_rri(r, r, imm); // rors r7,r7,#(32 - (i & 0x1f)) + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(r, 1); // tst r7,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); +} + +LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) +{ + RORS_rri(r, r, i & 0x1F); // RORS r7,r7,#12 +} + +LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, RR1 r)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + + MOV_ri(REG_WORK1, 32); // mov r2,#32 + AND_rri(REG_WORK2, r, 0x1f); // and r3,r6,#0x1f + SUB_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // sub r2,r2,r3 + + RORS_rrr(d, d, REG_WORK1); // rors r7,r7,r2 + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(d, 1); // tst r7,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); +} + +LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, RR1 r)) +{ + RORS_rrr(d, d, r); // RORS r7,r7,r6 +} + +LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) +{ + MOV_rrLSLi(REG_WORK1, r, 24); // mov r2,r7,lsl #24 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 8); // orr r2,r2,r2,lsr #8 + + RORS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // rors r2,r2,#12 + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(r, r, 0xff); // bic r7,r7,#0xff + ORR_rrr(r, r, REG_WORK1); // orr r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, RR1 r)) +{ + MOV_rrLSLi(REG_WORK1, d, 24); // mov r2,r7,lsl #24 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 8); // orr r2,r2,r2,lsr #8 + + RORS_rrr(REG_WORK1, REG_WORK1, r); // rors r2,r2,r6 + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(d, d, 0xff); // bic r7,r7,#0xff + ORR_rrr(d, d, REG_WORK1); // orr r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) +{ + MOV_rrLSLi(REG_WORK1, r, 16); // mov r2,r7,lsl #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + + RORS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // RORS r2,r2,#12 + + BIC_rri(r, r, 0xff00); // bic r7,r7,#0xff00 + BIC_rri(r, r, 0xff); // bic r7,r7,#0xff + + ORR_rrrLSRi(r, r, REG_WORK1, 16); // orr r7,r7,r2,lsr #16 +} + +LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, RR1 r)) +{ + MOV_rrLSLi(REG_WORK1, d, 16); // mov r2,r7,lsl #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + + RORS_rrr(REG_WORK1, REG_WORK1, r); // RORS r2,r2,r6 + + BIC_rri(d, d, 0xff00); // bic r7,r7,#0xff00 + BIC_rri(d, d, 0xff); // bic r7,r7,#0xff + + ORR_rrrLSRi(d, d, REG_WORK1, 16); // orr r7,r7,r2,lsr #16 +} + +LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, RR1 s)) +{ + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + + LSL_rri(REG_WORK2, d, 24); // lsl r3, %[d], #24 + LSL_rri(REG_WORK1, s, 24); // lsl r2, r6, #24 + + SBCS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + BIC_rri(d, d, 0xFF); + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr r7, r7, r3 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, RR4 s)) +{ + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + + SBCS_rrr(d, d, s); // sbcs r7, r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, RR2 s)) +{ + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + + LSL_rri(REG_WORK2, d, 16); // lsl r3, %[d], #24 + LSL_rri(REG_WORK1, s, 16); // lsl r2, r6, #16 + + SBCS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + BIC_rri(d,d, 0xff); + BIC_rri(d,d, 0xff00); + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr r7, r7, r3 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) +{ + switch (cc) { + case 9: // LS + BEQ_i(0); // beq + BCC_i(1); // bcs + + MOV_ri(d, 1); // mov r7,#0 + B_i(0); // b + + //: + MOV_ri(d, 0); // mov r7,#1 + break; + + case 8: // HI + BEQ_i(2); // beq Z != 0 + BCS_i(1); // bcc C = 0 + + //: + MOV_ri(d, 1); // mov r7,#0 + B_i(0); // b + + //: + MOV_ri(d, 0); // mov r7,#1 + break; + + default: + CC_MOV_ri(cc, d, 1); // MOVcc R7,#1 + CC_MOV_ri(cc^1, d, 0); // MOVcc^1 R7,#0 + break; + } + //: +} + +LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) +{ + switch (cc) { + case 9: // LS + BEQ_i(0); // beq + BCC_i(1); // bcs + + MOV_ri(REG_WORK1, 1); // mov r2,#0 + B_i(0); // b + + //: + MOV_ri(REG_WORK1, 0); // mov r2,#1 + break; + + case 8: // HI + BEQ_i(2); // beq Z != 0 + BCS_i(1); // bcc C = 0 + + MOV_ri(REG_WORK1, 1); // mov r2,#0 + B_i(0); // b + + //: + MOV_ri(REG_WORK1, 0); // mov r2,#1 + break; + + default: + CC_MOV_ri(cc, REG_WORK1, 1); // MOVcc R2,#1 + CC_MOV_ri(cc^1, REG_WORK1, 0); // MOVcc^1 R2,#0 + break; + } + //: +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // LDR R3,[PC, #offs] +#else + LDR_rRI(REG_WORK2, RPC_INDEX, 4); // LDR R3,[PC, #4] +#endif + STRB_rR(REG_WORK1, REG_WORK2); // STRB R2,[R3] +#if !defined(USE_DATA_BUFFER) + B_i(0); // B + + emit_long(d); + //: +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) +{ + LSL_rri(REG_WORK1, r, 24); // LSL r2,r7,#24 + + LSLS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // LSLS r2,r2,#12 + + BIC_rri(r, r, 0xff); // BIC r7,r7,0xff + ORR_rrrLSRi(r, r, REG_WORK1, 24); // ORR r7,r7,r2,lsr #24 +} + +LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, RR1 r)) +{ + LSL_rri(REG_WORK1, d, 24); // LSL r2,r7,#24 + LSLS_rrr(REG_WORK1, REG_WORK1, r); // LSLS r2,r2,r6 + BIC_rri(d, d, 0xff); // BIC r7,r7,#0xff + ORR_rrrLSRi(d, d, REG_WORK1, 24); // ORR r7,r7,r2,lsr #24 +} + +LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) +{ + LSLS_rri(r,r, i & 0x1f); // lsls r7,r7,#12 +} + +LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, RR1 r)) +{ + LSLS_rrr(d, d, r); +} + +LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) +{ + LSL_rri(REG_WORK1, r, 16); // LSL r2,r7,#16 + LSLS_rri(REG_WORK1, REG_WORK1, i&0x1f); // LSLS r2,r2,#12 + + ORR_rrrLSRi(REG_WORK1, REG_WORK1, r, 16); // ORR r2,r2,r7,lsr #16 + + ROR_rri(r, REG_WORK1, 16); // ROR r7,r2,#16 +} + +LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, RR1 r)) +{ + LSL_rri(REG_WORK1, d, 16); // LSL r2,r7,#16 + LSLS_rrr(REG_WORK1, REG_WORK1, r); // LSLS r2,r2,r6 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, d, 16); // ORR r2,r2,r7,lsr #16 + ROR_rri(d, REG_WORK1, 16); // ROR r7,r2,#16 +} + +LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) +{ + LSL_rri(REG_WORK1, r, 24); // lsl r2,r7,#24 + ASR_rri(REG_WORK1, REG_WORK1, 24); // asr r2,r2,#24 + + ASRS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // asrs r2,r2,#12 + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(r,r, 0xff); // bic r7,r7,#0xff + ORR_rrr(r,r,REG_WORK1); // orr r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, RR1 r)) +{ + LSL_rri(REG_WORK1, d, 24); // lsl r2,r7,#24 + ASR_rri(REG_WORK1, REG_WORK1, 24); // asr r2,r2,#24 + + ASRS_rrr(REG_WORK1, REG_WORK1, r); // asrs r2,r2,r6 + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(d,d, 0xff); // bic r7,r7,#0xff + + ORR_rrr(d,d,REG_WORK1); // orr r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) +{ + LSL_rri(REG_WORK1, r, 16); // lsl r2,r7,#16 + ASR_rri(REG_WORK1, REG_WORK1, 16); // asr r2,r2,#16 + + ASRS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // asrs r2,r2,#12 + +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, REG_WORK1); +#else + BIC_rri(REG_WORK1, REG_WORK1, 0xff000000); + BIC_rri(REG_WORK1, REG_WORK1, 0xff0000); +#endif + + BIC_rri(r,r,0xff00); // bic r7,r7,#0xff00 + BIC_rri(r,r,0xff); // bic r7,r7,#0xff + + ORR_rrr(r,r,REG_WORK1); // orr r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, RR1 r)) +{ + LSL_rri(REG_WORK1, d, 16); // lsl r2,r7,#16 + ASR_rri(REG_WORK1, REG_WORK1, 16); // asr r2,r2,#16 + + ASRS_rrr(REG_WORK1, REG_WORK1, r); // asrs r2,r2,r6 + +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, REG_WORK1); +#else + BIC_rri(REG_WORK1, REG_WORK1, 0xff000000); // bic r2,r2,#0xff000000 + BIC_rri(REG_WORK1, REG_WORK1, 0xff0000); // bic r2,r2,#0xff0000 +#endif + + BIC_rri(d,d, 0xff00); // bic r7,r7,#0xff00 + BIC_rri(d,d, 0xff); // bic r7,r7,#0xff + + ORR_rrr(d,d,REG_WORK1); // orr r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) +{ + ASRS_rri(r, r, i & 0x1f); // ASRS r7,r7,#12 +} + +LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, RR1 r)) +{ + ASRS_rrr(d, d, r); // ASRS r7,r7,r6 +} + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) +{ + AND_rri(REG_WORK1, r, 0xff); // AND r2,r7,#0xFF + + LSRS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // LSRS r2,r2,r6 + + BIC_rri(r, r, 0xFF); // BIC r7,r7,#0xff + ORR_rrr(r, r, REG_WORK1); // ORR r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, RR1 r)) +{ + AND_rri(REG_WORK1, d, 0xff); // AND r2,r7,#0xFF + + LSRS_rrr(REG_WORK1, REG_WORK1, r); // LSRS r2,r2,r6 + + BIC_rri(d, d, 0xFF); // BIC r7,r7,#0xff + ORR_rrr(d, d, REG_WORK1); // ORR r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) +{ + LSRS_rri(r, r, i & 0x1f); // LSRS r7,r7,#12 +} + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, r); +#else + BIC_rri(REG_WORK1, r, 0xff0000); // BIC r2,r7,#0xff0000 + BIC_rri(REG_WORK1, REG_WORK1, 0xff000000); // BIC r2,r2,#0xff000000 +#endif + + LSRS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // LSRS r2,r2,#12 + + BIC_rri(r, r, 0xFF); // BIC r7,r7,#0xff + BIC_rri(r, r, 0xFF00); // BIC r7,r7,#0xff00 + ORR_rrr(r, r, REG_WORK1); // ORR r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, RR1 r)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, d); +#else + BIC_rri(REG_WORK1, d, 0xff0000); // BIC r2,r7,#0xff0000 + BIC_rri(REG_WORK1, REG_WORK1, 0xff000000); // BIC r2,r2,#0xff000000 +#endif + + LSRS_rrr(REG_WORK1, REG_WORK1, r); // LSRS r2,r2,r6 + + BIC_rri(d, d, 0xFF); // BIC r7,r7,#0xff + BIC_rri(d, d, 0xFF00); // BIC r7,r7,#0xff00 + ORR_rrr(d, d, REG_WORK1); // ORR r7,r7,r2 +} + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, RR1 r)) +{ + LSRS_rrr(d, d, r); +} + +LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, RR1 s)) +{ + LSL_rri(REG_WORK1, s, 24); // lsl r2, r6, #24 + LSL_rri(REG_WORK2, d, 24); // lsl r3, r7, #24 + + SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + BIC_rri(d, d, 0xFF); + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr r7, r7, r3 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) +{ + LSL_rri(REG_WORK2, d, 24); // lsl r3, r7, #24 + + SUBS_rri(REG_WORK2, REG_WORK2, i << 24); // subs r3, r3, #0x12000000 + BIC_rri(d, d, 0xFF); // bic r7, r7, #0xFF + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr r7, r7, r3, lsr #24 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, RR4 s)) +{ + SUBS_rrr(d, d, s); // subs r7, r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #16] ; +#endif + SUBS_rrr(d, d, REG_WORK1); // subs r7, r7, r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(i); + //: +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, RR2 s)) +{ + LSL_rri(REG_WORK1, s, 16); // lsl r2, r6, #16 + LSL_rri(REG_WORK2, d, 16); // lsl r3, r7, #16 + + SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + BIC_rri(d, d, 0xff); + BIC_rri(d, d, 0xff00); + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr r7, r7, r3 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) +{ + // TODO: optimize_imm + +#if defined(USE_DATA_BUFFER) + long offs = data_word_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; +#else + LDRH_rRI(REG_WORK1, RPC_INDEX, 36); // ldrh r2, [pc, #36] ; +#endif + LSL_rri(REG_WORK1, REG_WORK1, 16); // lsl r2, r2, #16 + LSL_rri(REG_WORK2, d, 16); // lsl r3, r6, #16 + + SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + BIC_rri(d, d, 0xff); + BIC_rri(d, d, 0xff00); + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr r6, r3, r6, lsr #16 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + emit_word(i); + skip_word(0); //: + + //: +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(RR1 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + SXTB_rr(REG_WORK1, s); + SXTB_rr(REG_WORK2, d); +#else + LSL_rri(REG_WORK1, s, 24); // lsl r2, r6, #24 + LSL_rri(REG_WORK2, d, 24); // lsl r3, r7, #24 +#endif + + TST_rr(REG_WORK2, REG_WORK1); // tst r3, r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(RR4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #16] ; +#endif + TST_rr(d, REG_WORK1); // tst r7, r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(i); + //: +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(RR4 d, RR4 s)) +{ + TST_rr(d, s); // tst r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(RR2 d, RR2 s)) +{ +#ifdef ARMV6_ASSEMBLY + SXTH_rr(REG_WORK1, s); + SXTH_rr(REG_WORK2, d); +#else + LSL_rri(REG_WORK1, s, 16); // lsl r2, r6, #16 + LSL_rri(REG_WORK2, d, 16); // lsl r3, r7, #16 +#endif + + TST_rr(REG_WORK2, REG_WORK1); // tst r3, r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, RR1 s)) +{ + AND_rri(REG_WORK1, s, 0xFF); // and r2, %[s], 0xFF + EOR_rrr(d, d, REG_WORK1); // eor %[d], %[d], r2 + LSLS_rri(REG_WORK1, d, 24); // lsls r2, %[d], #24 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, s); // UXTH r2, %[s] +#else + BIC_rri(REG_WORK1, s, 0xff000000); // bic r2, %[s], #0xff000000 + BIC_rri(REG_WORK1, REG_WORK1, 0x00ff0000); // bic r2, r2, #0x00ff0000 +#endif + EOR_rrr(d, d, REG_WORK1); // eor %[d], %[d], r2 + LSLS_rri(REG_WORK1, d, 16); // lsls r2, %[d], #16 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, RR4 s)) +{ + EORS_rrr(d, d, s); // eors r7, r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} + +LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + SXTH_rr(d, s); // sxth %[d],%[s] +#else + LSL_rri(d, s, 16); // lsl r6, r7, #16 + ASR_rri(d, d, 16); // asr r6, r6, #16 +#endif +} + +LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + SXTB_rr(d, s); // SXTB %[d],%[s] +#else + ROR_rri(d, s, 8); // ror r6, r7, #8 + ASR_rri(d, d, 24); // asr r6, r6, #24 +#endif +} + +LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(d, s); // UXTB %[d], %[s] +#else + ROR_rri(d, s, 8); // ror r2, r1, #8 + LSR_rri(d, d, 24); // lsr r2, r2, #24 +#endif +} + +LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(d, s); // UXTH %[d], %[s] +#else + BIC_rri(d, s, 0xff000000); // bic %[d], %[s], #0xff000000 + BIC_rri(d, d, 0x00ff0000); // bic %[d], %[d], #0x00ff0000 +#endif +} + +static inline void raw_dec_sp(int off) +{ + if (off) { + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + SUB_rrr(RSP_INDEX, RSP_INDEX, REG_WORK1); // sub r7, r7, r2 + B_i(0); // b + //: + emit_long(off); + } +} + +static inline void raw_inc_sp(int off) +{ + if (off) { + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + ADD_rrr(RSP_INDEX, RSP_INDEX, REG_WORK1); // sub r7, r7, r2 + B_i(0); // b + //: + emit_long(off); + } +} + +static inline void raw_push_regs_to_preserve(void) { + PUSH_REGS(PRESERVE_MASK); +} + +static inline void raw_pop_preserved_regs(void) { + POP_REGS(PRESERVE_MASK); +} + +// Verify!!! +/* FLAGX is byte sized, and we *do* write it at that size */ +static inline void raw_load_flagx(uae_u32 t) +{ + raw_mov_l_rm(t,(uintptr)live.state[FLAGX].mem); +} + +static inline void raw_flags_evicted(int r) +{ + //live.state[FLAGTMP].status=CLEAN; + live.state[FLAGTMP].status=INMEM; + live.state[FLAGTMP].realreg=-1; + /* We just "evicted" FLAGTMP. */ + if (live.nat[r].nholds!=1) { + /* Huh? */ + abort(); + } + live.nat[r].nholds=0; +} + +static inline void raw_flags_init(void) { +} + +static __inline__ void raw_flags_set_zero(int s, int tmp) +{ + raw_mov_l_rr(tmp,s); + MRS_CPSR(s); + BIC_rri(s,s,ARM_Z_FLAG); + AND_rri(tmp,tmp,ARM_Z_FLAG); + EOR_rri(tmp,tmp,ARM_Z_FLAG); + ORR_rrr(s,s,tmp); + MSR_CPSR_r(s); +} + +static inline void raw_flags_to_reg(int r) +{ + MRS_CPSR(r); + raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r); + raw_flags_evicted(r); +} + +static inline void raw_reg_to_flags(int r) +{ + MSR_CPSR_r(r); // msr CPSR_fc, %r +} + +/* Apparently, there are enough instructions between flag store and + flag reload to avoid the partial memory stall */ +static inline void raw_load_flagreg(uae_u32 t) +{ + raw_mov_l_rm(t,(uintptr)live.state[FLAGTMP].mem); +} + +/* %eax register is clobbered if target processor doesn't support fucomi */ +#define FFLAG_NREG_CLOBBER_CONDITION !have_cmov +#define FFLAG_NREG R0_INDEX +#define FLAG_NREG2 -1 +#define FLAG_NREG1 -1 +#define FLAG_NREG3 -1 + +static inline void raw_fflags_into_flags(int r) +{ + jit_unimplemented("raw_fflags_into_flags %x", r); +} + +static inline void raw_fp_init(void) +{ + int i; + + for (i=0;i=1) { +// emit_byte(0xde); +// emit_byte(0xd9); + live.tos-=2; + } + while (live.tos>=0) { +// emit_byte(0xdd); +// emit_byte(0xd8); + live.tos--; + } + raw_fp_init(); +} + +LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMPTRW m, FR r)) +{ + jit_unimplemented("raw_fmov_mr_drop %x %x", m, r); +} + +LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMPTRW m, FR r)) +{ + jit_unimplemented("raw_fmov_mr %x %x", m, r); +} + +LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMPTRR m)) +{ + jit_unimplemented("raw_fmov_rm %x %x", r, m); +} + +LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) +{ + jit_unimplemented("raw_fmov_rr %x %x", d, s); +} + +static inline void raw_emit_nop_filler(int nbytes) +{ + nbytes >>= 2; + while(nbytes--) { NOP(); } +} + +static inline void raw_emit_nop(void) +{ + NOP(); +} + +#ifdef UAE +static +#endif +void compiler_status() { + jit_log("compiled code starts at %p, current at %p (size 0x%x)", compiled_code, current_compile_p, (unsigned int)(current_compile_p - compiled_code)); +} + +// +// ARM doesn't have bsf, but clz is a good alternative instruction for it +// +static bool target_check_bsf(void) +{ + return false; +} + +static void raw_init_cpu(void) +{ + /* Have CMOV support, because ARM support conditions for all instructions */ + have_cmov = true; + + align_loops = 0; + align_jumps = 0; + + raw_flags_init(); +} + +// +// Arm instructions +// +LOWFUNC(WRITE,NONE,2,raw_ADD_l_rr,(RW4 d, RR4 s)) +{ + ADD_rrr(d, d, s); +} + +LOWFUNC(WRITE,NONE,2,raw_ADD_l_rri,(RW4 d, RR4 s, IMM i)) +{ + ADD_rri(d, s, i); +} + +LOWFUNC(WRITE,NONE,2,raw_SUB_l_rri,(RW4 d, RR4 s, IMM i)) +{ + SUB_rri(d, s, i); +} + +LOWFUNC(WRITE,NONE,2,raw_AND_b_rr,(RW1 d, RR1 s)) +{ + MVN_rrLSLi(REG_WORK1, s, 24); // mvn r2, %[s], lsl #24 + MVN_rrLSRi(REG_WORK1, REG_WORK1, 24); // mvn r2, %[s], lsr #24 + AND_rrr(d, d, REG_WORK1); // and %[d], %[d], r2 +} + +LOWFUNC(WRITE,NONE,2,raw_AND_l_rr,(RW4 d, RR4 s)) +{ + AND_rrr(d, d, s); +} + +LOWFUNC(WRITE,NONE,2,raw_AND_l_ri,(RW4 d, IMM i)) +{ + AND_rri(d, d, i); +} + +LOWFUNC(WRITE,NONE,2,raw_AND_w_rr,(RW2 d, RR2 s)) +{ + MVN_rrLSLi(REG_WORK1, s, 16); // mvn r2, %[s], lsl #16 + MVN_rrLSRi(REG_WORK1, REG_WORK1, 16); // mvn r2, %[s], lsr #16 + AND_rrr(d, d, REG_WORK1); // and %[d], %[d], r2 +} + +LOWFUNC(WRITE,NONE,2,raw_EOR_b_rr,(RW1 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(REG_WORK1, s); // UXTH r2, %[s] +#else + AND_rri(REG_WORK1, s, 0xFF); // and r2, %[s], 0xFF +#endif + EOR_rrr(d, d, REG_WORK1); // eor %[d], %[d], r2 +} + +LOWFUNC(WRITE,NONE,2,raw_EOR_l_rr,(RW4 d, RR4 s)) +{ + EOR_rrr(d, d, s); // eors r7, r7, r6 +} + +LOWFUNC(WRITE,NONE,2,raw_EOR_w_rr,(RW2 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, s); // UXTH r2, %[s] + EOR_rrr(d, d, REG_WORK1); // eor %[d], %[d], r2 +#else + LSL_rri(REG_WORK1, s, 16); // bic r2, %[s], #0xff000000 + EOR_rrrLSRi(d, d, REG_WORK1, 16); // orr %[d], %[d], r2 +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_LDR_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(d, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rR(d, RPC_INDEX); + B_i(0); + emit_long(i); +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_MOV_l_ri8,(RW4 d, IMM i)) +{ + MOV_ri(d, i); +} + +LOWFUNC(WRITE,NONE,2,raw_ORR_b_rr,(RW1 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(REG_WORK1, s); // UXTH r2, %[s] +#else + AND_rri(REG_WORK1, s, 0xFF); // and r2, %[s], 0xFF +#endif + ORR_rrr(d, d, REG_WORK1); // orr %[d], %[d], r2 +} + +LOWFUNC(WRITE,NONE,2,raw_ORR_l_rr,(RW4 d, RR4 s)) +{ + ORR_rrr(d, d, s); +} + +LOWFUNC(WRITE,NONE,2,raw_ORR_w_rr,(RW2 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, s); // UXTH r2, %[s] + ORR_rrr(d, d, REG_WORK1); // orr %[d], %[d], r2 +#else + LSL_rri(REG_WORK1, s, 16); // bic r2, %[s], #0xff000000 + ORR_rrrLSRi(d, d, REG_WORK1, 16); // orr %[d], %[d], r2 +#endif +} + +LOWFUNC(WRITE,NONE,2,raw_ROR_l_ri,(RW4 r, IMM i)) +{ + ROR_rri(r, r, i); +} + +// +// compuemu_support used raw calls +// +LOWFUNC(WRITE,RMW,2,compemu_raw_add_l_mi,(IMM d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + data_check_end(8, 24); + long target = data_long(d, 24); + long offs = get_data_offset(target); + + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + LDR_rR(REG_WORK2, REG_WORK1); // ldr r3, [r2] + + offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; s + + ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 + + offs = get_data_offset(target); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 20); // ldr r2, [pc, #20] ; + LDR_rR(REG_WORK2, REG_WORK1); // ldr r3, [r2] + + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #16] ; + + ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 + + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] + + B_i(1); // b + + //: + emit_long(d); + //: + emit_long(s); + //: +#endif +} + +LOWFUNC(WRITE,NONE,2,compemu_raw_and_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + AND_rrr(d, d, REG_WORK1); // ands %[d], %[d], r2 +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #16] ; + AND_rrr(d, d, REG_WORK1); // ands %[d], %[d], r2 + B_i(0); + emit_long(i); +#endif +} + +LOWFUNC(NONE,NONE,1,compemu_raw_bswap_32,(RW4 r)) +{ +#if defined(ARMV6_ASSEMBLY) + REV_rr(r,r); // rev %[r],%[r] +#else + EOR_rrrRORi(REG_WORK1, r, r, 16); // eor r2, r6, r6, ror #16 + BIC_rri(REG_WORK1, REG_WORK1, 0xff0000); // bic r2, r2, #0xff0000 + ROR_rri(r, r, 8); // ror r6, r6, #8 + EOR_rrrLSRi(r, r, REG_WORK1, 8); // eor r6, r6, r2, lsr #8 +#endif +} + +LOWFUNC(WRITE,NONE,2,compemu_raw_bt_l_ri,(RR4 r, IMM i)) +{ + int imm = (1 << (i & 0x1f)); + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_ri(r, imm); // tst r6, #0x1000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} + +LOWFUNC(NONE,READ,5,compemu_raw_cmov_l_rm_indexed,(W4 d, IMM base, RR4 index, IMM factor, IMM cond)) +{ + int shft; + switch(factor) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + + switch (cond) { + case 9: // LS + jit_unimplemented("cmov LS not implemented"); + abort(); + case 8: // HI + jit_unimplemented("cmov HI not implemented"); + abort(); + default: +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(base); + CC_LDR_rRI(cond, REG_WORK1, RPC_INDEX, offs); // ldrcc r2, [pc, #offs] ; + CC_LDR_rRR_LSLi(cond, d, REG_WORK1, index, shft); // ldrcc %[d], [r2, %[index], lsl #[shift]] +#else + CC_LDR_rRI(cond, REG_WORK1, RPC_INDEX, 4); // ldrcc r2, [pc, #4] ; + CC_LDR_rRR_LSLi(cond, d, REG_WORK1, index, shft); // ldrcc %[d], [r2, %[index], lsl #[shift]] + B_i(0); // b +#endif + break; + } +#if !defined(USE_DATA_BUFFER) + emit_long(base); // : + //: +#endif +} + +LOWFUNC(WRITE,READ,2,compemu_raw_cmp_l_mi,(MEMR d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + data_check_end(8, 16); + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + LDR_rR(REG_WORK1, REG_WORK1); // ldr r2, [r2] + + offs = data_long_offs(s); + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // ldr r3, [pc, #offs] ; s + + CMP_rr(REG_WORK1, REG_WORK2); // cmp r2, r3 + +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 12); // ldr r2, [pc, #24] ; + LDR_rR(REG_WORK1, REG_WORK1); // ldr r2, [r2] + + LDR_rRI(REG_WORK2, RPC_INDEX, 8); // ldr r3, [pc, #20] ; + + CMP_rr(REG_WORK1, REG_WORK2); // cmp r2, r3 + + B_i(1); // b + + //: + emit_long(d); + //: + emit_long(s); + //: +#endif +} + +LOWFUNC(WRITE,READ,2,compemu_raw_cmp_l_mi8,(MEMR d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // ldr r2, [pc, #8] ; +#endif + LDR_rR(REG_WORK1, REG_WORK1); // ldr r2, [r2] + + CMP_ri(REG_WORK1, s); // cmp r2, r3 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} + +LOWFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + ADD_rrr(d, s, REG_WORK1); // add r7, r6, r2 +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + ADD_rrr(d, s, REG_WORK1); // add r7, r6, r2 + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} + +LOWFUNC(NONE,NONE,4,compemu_raw_lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) +{ + int shft; + switch(factor) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + + ADD_rrrLSLi(d, s, index, shft); // ADD R7,R6,R5,LSL #2 +} + +LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + STRB_rR(s, REG_WORK1); // strb r6, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STRB_rR(s, REG_WORK1); // strb r6, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} + +LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s)) +{ + // TODO: optimize imm + +#if defined(USE_DATA_BUFFER) + data_check_end(8, 12); + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + offs = data_long_offs(s); + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // ldr r3, [pc, #offs] ; s + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // ldr r2, [pc, #8] ; + LDR_rRI(REG_WORK2, RPC_INDEX, 8); // ldr r3, [pc, #8] ; + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] + B_i(1); // b + + emit_long(d); //: + emit_long(s); //: + + //: +#endif +} + +LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + STR_rR(s, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STR_rR(s, REG_WORK1); // str r3, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} + +LOWFUNC(NONE,NONE,2,compemu_raw_mov_l_ri,(W4 d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(s); + LDR_rRI(d, RPC_INDEX, offs); // ldr %[d], [pc, #offs] ; +#else + LDR_rR(d, RPC_INDEX); // ldr %[d], [pc] ; + B_i(0); // b + + //: + emit_long(s); + //: +#endif +} + +LOWFUNC(NONE,READ,2,compemu_raw_mov_l_rm,(W4 d, MEMR s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + LDR_rR(d, REG_WORK1); // ldr r7, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + LDR_rR(d, REG_WORK1); // ldr r7, [r2] + B_i(0); // b + + emit_long(s); //: + //: +#endif +} + +LOWFUNC(NONE,NONE,2,compemu_raw_mov_l_rr,(W4 d, RR4 s)) +{ + MOV_rr(d, s); // mov %[d], %[s] +} + +LOWFUNC(NONE,WRITE,2,compemu_raw_mov_w_mr,(IMM d, RR2 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + STRH_rR(s, REG_WORK1); // strh r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STRH_rR(s, REG_WORK1); // strh r3, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} + +LOWFUNC(WRITE,RMW,2,compemu_raw_sub_l_mi,(MEMRW d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + data_check_end(8, 24); + long target = data_long(d, 24); + long offs = get_data_offset(target); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + LDR_rR(REG_WORK2, REG_WORK1); // ldr r3, [r2] + + offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; s + + SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + + offs = get_data_offset(target); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 20); // ldr r2, [pc, #32] ; + LDR_rR(REG_WORK2, REG_WORK1); // ldr r3, [r2] + + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #28] ; + + SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #16] ; + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] + + B_i(1); // b + + //: + emit_long(d); + //: + emit_long(s); + //: +#endif +} + +LOWFUNC(WRITE,NONE,2,compemu_raw_test_l_rr,(RR4 d, RR4 s)) +{ + TST_rr(d, s); // tst r7, r6 +} + +LOWFUNC(NONE,NONE,2,compemu_raw_zero_extend_16_rr,(W4 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(d, s); // UXTH %[d], %[s] +#else + BIC_rri(d, s, 0xff000000); // bic %[d], %[s], #0xff000000 + BIC_rri(d, d, 0x00ff0000); // bic %[d], %[d], #0x00ff0000 +#endif +} + +static inline void compemu_raw_call(uae_u32 t) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(t); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 12); // ldr r2, [pc, #12] ; +#endif + PUSH(RLR_INDEX); // push {lr} + BLX_r(REG_WORK1); // blx r2 + POP(RLR_INDEX); // pop {lr} +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(t); + //: +#endif +} + +#if defined(UAE) +static inline void compemu_raw_call_r(RR4 r) +{ + PUSH(RLR_INDEX); // push {lr} + BLX_r(r); // blx r0 + POP(RLR_INDEX); // pop {lr} +} +#endif + +static inline void compemu_raw_jcc_l_oponly(int cc) +{ + switch (cc) { + case 9: // LS + BEQ_i(0); // beq + BCC_i(2); // bcc + + //: + LDR_rR(REG_WORK1, RPC_INDEX); // ldr r2, [pc] ; + BX_r(REG_WORK1); // bx r2 + break; + + case 8: // HI + BEQ_i(3); // beq + BCS_i(2); // bcs + + //: + LDR_rR(REG_WORK1, RPC_INDEX); // ldr r2, [pc] ; + BX_r(REG_WORK1); // bx r2 + break; + + default: + CC_LDR_rRI(cc, REG_WORK1, RPC_INDEX, 4); // ldrlt r2, [pc, #4] ; + CC_BX_r(cc, REG_WORK1); // bxlt r2 + B_i(0); // b + break; + } + // emit of target will be done by caller +} + +static inline void compemu_raw_jl(uae_u32 t) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(t); + CC_LDR_rRI(NATIVE_CC_LT, RPC_INDEX, RPC_INDEX, offs); // ldrlt pc, [pc, offs] +#else + CC_LDR_rR(NATIVE_CC_LT, RPC_INDEX, RPC_INDEX); // ldrlt pc, [pc] + B_i(0); // b + + //: + emit_long(t); + //: +#endif +} + +static inline void compemu_raw_jmp(uae_u32 t) +{ + LDR_rR(REG_WORK1, RPC_INDEX); // ldr r2, [pc] + BX_r(REG_WORK1); // bx r2 + emit_long(t); +} + +static inline void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) +{ + int shft; + switch(m) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + + LDR_rR(REG_WORK1, RPC_INDEX); // ldr r2, [pc] ; + LDR_rRR_LSLi(RPC_INDEX, REG_WORK1, r, shft); // ldr pc, [r2, r6, lsl #3] + emit_long(base); +} + +static inline void compemu_raw_jmp_r(RR4 r) +{ + BX_r(r); +} + +static inline void compemu_raw_jnz(uae_u32 t) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(t); + CC_LDR_rRI(NATIVE_CC_NE, RPC_INDEX, RPC_INDEX, offs); // ldrne pc, [pc, offs] +#else + CC_LDR_rR(NATIVE_CC_NE, RPC_INDEX, RPC_INDEX); // ldrne pc, [pc] + B_i(0); // b + + emit_long(t); + //: +#endif +} + +static inline void compemu_raw_jz_b_oponly(void) +{ + BNE_i(2); // bne jp + LDRSB_rRI(REG_WORK1, RPC_INDEX, 3); // ldrsb r2,[pc,#3] + ADD_rrr(RPC_INDEX, RPC_INDEX, REG_WORK1); // add pc,pc,r2 + + skip_n_bytes(3); /* additionally 1 byte skipped by generic code */ + + // +} + +static inline void compemu_raw_jnz_b_oponly(void) +{ + BEQ_i(2); // beq jp + LDRSB_rRI(REG_WORK1, RPC_INDEX, 3); // ldrsb r2,[pc,#3] + ADD_rrr(RPC_INDEX, RPC_INDEX, REG_WORK1); // add pc,pc,r2 + + skip_n_bytes(3); /* additionally 1 byte skipped by generic code */ + + // +} + +static inline void compemu_raw_branch(IMM d) +{ + B_i((d >> 2) - 1); +} diff --git a/BasiliskII/src/uae_cpu/compiler/codegen_arm.h b/BasiliskII/src/uae_cpu/compiler/codegen_arm.h new file mode 100644 index 00000000..e04ab9b8 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/codegen_arm.h @@ -0,0 +1,1292 @@ +/* + * compiler/codegen_arm.h - IA-32 and AMD64 code generator + * + * Copyright (c) 2013 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * JIT compiler m68k -> ARM + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * This file is derived from CCG, copyright 1999-2003 Ian Piumarta + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef ARM_RTASM_H +#define ARM_RTASM_H + +/* NOTES + * + */ + +/* --- Configuration ------------------------------------------------------- */ + +/* CPSR flags */ + +#define ARM_N_FLAG 0x80000000 +#define ARM_Z_FLAG 0x40000000 +#define ARM_C_FLAG 0x20000000 +#define ARM_V_FLAG 0x10000000 +#define ARM_Q_FLAG 0x08000000 +#define ARM_CV_FLAGS (ARM_C_FLAG|ARM_V_FLAG) + +#define ARM_GE3 0x00080000 +#define ARM_GE2 0x00040000 +#define ARM_GE1 0x00020000 +#define ARM_GE0 0x00010000 + +/* --- Macros -------------------------------------------------------------- */ + +/* ========================================================================= */ +/* --- UTILITY ------------------------------------------------------------- */ +/* ========================================================================= */ + +#define _W(c) emit_long(c) +#define _LS2_ADDR(a) (((a) & 0x01f0000f) | (((a) & 0xf0) << 4)) + +/* ========================================================================= */ +/* --- ENCODINGS ----------------------------------------------------------- */ +/* ========================================================================= */ + +#define IMM32(c) (((c) & 0xffffff00) == 0 ? (c) : \ + ((c) & 0x3fffffc0) == 0 ? (0x100 | (((c) >> 30) & 0x3) | ((((c) & 0x0000003f) << 2))) : \ + ((c) & 0x0ffffff0) == 0 ? (0x200 | (((c) >> 28) & 0xf) | ((((c) & 0x0000000f) << 4))) : \ + ((c) & 0x03fffffc) == 0 ? (0x300 | (((c) >> 26) & 0x3f) | ((((c) & 0x00000003) << 6)) ) : \ + ((c) & 0x00ffffff) == 0 ? (0x400 | (((c) >> 24) & 0xff)) : \ + ((c) & 0xc03fffff) == 0 ? (0x500 | ((c) >> 22)) : \ + ((c) & 0xf00fffff) == 0 ? (0x600 | ((c) >> 20)) : \ + ((c) & 0xfc03ffff) == 0 ? (0x700 | ((c) >> 18)) : \ + ((c) & 0xff00ffff) == 0 ? (0x800 | ((c) >> 16)) : \ + ((c) & 0xffc03fff) == 0 ? (0x900 | ((c) >> 14)) : \ + ((c) & 0xfff00fff) == 0 ? (0xa00 | ((c) >> 12)) : \ + ((c) & 0xfffc03ff) == 0 ? (0xb00 | ((c) >> 10)) : \ + ((c) & 0xffff00ff) == 0 ? (0xc00 | ((c) >> 8)) : \ + ((c) & 0xffffc03f) == 0 ? (0xd00 | ((c) >> 6)) : \ + ((c) & 0xfffff00f) == 0 ? (0xe00 | ((c) >> 4)) : \ + ((c) & 0xfffffc03) == 0 ? (0xf00 | ((c) >> 2)) : \ + 0\ + ) + +#define SHIFT_IMM(c) (0x02000000 | (IMM32((c)))) + +#define UNSHIFTED_IMM8(c) (0x02000000 | (c)) +#define SHIFT_IMM8_ROR(c,r) (0x02000000 | (c) | ((r >> 1) << 8)) + +#define SHIFT_REG(Rm) (Rm) +#define SHIFT_LSL_i(Rm,s) ((Rm) | ((s) << 7)) +#define SHIFT_LSL_r(Rm,Rs) ((Rm) | ((Rs) << 8) | 0x10) +#define SHIFT_LSR_i(Rm,s) ((Rm) | ((s) << 7) | 0x20) +#define SHIFT_LSR_r(Rm,Rs) ((Rm) | ((Rs) << 8) | 0x30) +#define SHIFT_ASR_i(Rm,s) ((Rm) | ((s) << 7) | 0x40) +#define SHIFT_ASR_r(Rm,Rs) ((Rm) | ((Rs) << 8) | 0x50) +#define SHIFT_ROR_i(Rm,s) ((Rm) | ((s) << 7) | 0x60) +#define SHIFT_ROR_r(Rm,Rs) ((Rm) | ((Rs) << 8) | 0x70) +#define SHIFT_RRX(Rm) ((Rm) | 0x60) +#define SHIFT_PK(Rm,s) ((Rm) | ((s) << 7)) + +/* Load/Store addressings */ +#define ADR_ADD(v) ((1 << 23) | (v)) +#define ADR_SUB(v) (v) + +#define ADR_IMM(v) ((v) | (1 << 24)) +#define ADR_IMMPOST(v) (v) +#define ADR_REG(Rm) ((1 << 25) | (1 << 24) | (Rm)) +#define ADR_REGPOST(Rm) ((1 << 25) | (Rm)) + +#define ADD_IMM(i) ADR_ADD(ADR_IMM(i)) +#define SUB_IMM(i) ADR_SUB(ADR_IMM(i)) + +#define ADD_REG(Rm) ADR_ADD(ADR_REG(Rm)) +#define SUB_REG(Rm) ADR_SUB(ADR_REG(Rm)) + +#define ADD_LSL(Rm,i) ADR_ADD(ADR_REG(Rm) | ((i) << 7)) +#define SUB_LSL(Rm,i) ADR_SUB(ADR_REG(Rm) | ((i) << 7)) + +#define ADD_LSR(Rm,i) ADR_ADD(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (1 << 5)) +#define SUB_LSR(Rm,i) ADR_SUB(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (1 << 5)) + +#define ADD_ASR(Rm,i) ADR_ADD(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (2 << 5)) +#define SUB_ASR(Rm,i) ADR_SUB(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (2 << 5)) + +#define ADD_ROR(Rm,i) ADR_ADD(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (3 << 5)) +#define SUB_ROR(Rm,i) ADR_SUB(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (3 << 5)) + +#define ADD_RRX(Rm) ADR_ADD(ADR_REG(Rm) | (3 << 5)) +#define SUB_RRX(Rm) ADR_SUB(ADR_REG(Rm) | (3 << 5)) + +#define ADD2_IMM(i) ADR_ADD(i | (1 << 22)) +#define SUB2_IMM(i) ADR_SUB(i | (1 << 22)) + +#define ADD2_REG(Rm) ADR_ADD(Rm) +#define SUB2_REG(Rm) ADR_SUB(Rm) + +/* MOV, MVN */ +#define _OP1(cc,op,s,Rd,shift) _W(((cc) << 28) | ((op) << 21) | ((s) << 20) | ((Rd) << 12) | (shift)) + +/* CMP, CMN, TST, TEQ */ +#define _OP2(cc,op,Rn,shift) _W(((cc) << 28) | ((op) << 21) | (1 << 20) | ((Rn) << 16) | (shift)) + +/* ADD, SUB, RSB, ADC, SBC, RSC, AND, BIC, EOR, ORR */ +#define _OP3(cc,op,s,Rd,Rn,shift) _W(((cc) << 28) | ((op) << 21) | ((s) << 20) | ((Rn) << 16) | ((Rd) << 12) | (shift)) + +/* LDR, STR */ +#define _LS1(cc,l,b,Rd,Rn,a) _W(((cc) << 28) | (0x01 << 26) | ((l) << 20) | ((b) << 22) | ((Rn) << 16) | ((Rd) << 12) | (a)) +#define _LS2(cc,p,l,s,h,Rd,Rn,a) _W(((cc) << 28) | ((p) << 24) | ((l) << 20) | ((Rn) << 16) | ((Rd) << 12) | ((s) << 6) | ((h) << 5) | 0x90 | _LS2_ADDR((a))) + +/* ========================================================================= */ +/* --- OPCODES ------------------------------------------------------------- */ +/* ========================================================================= */ + +/* Branch instructions */ +#ifndef __ANDROID__ +enum { + _B, _BL, _BLX, _BX, _BXJ +}; +#endif + +/* Data processing instructions */ +enum { + _AND = 0, + _EOR, + _SUB, + _RSB, + _ADD, + _ADC, + _SBC, + _RSC, + _TST, + _TEQ, + _CMP, + _CMN, + _ORR, + _MOV, + _BIC, + _MVN +}; + +/* Single instruction Multiple Data (SIMD) instructions */ + +/* Multiply instructions */ + +/* Parallel instructions */ + +/* Extend instructions */ + +/* Miscellaneous arithmetic instrations */ + +/* Status register transfer instructions */ + +/* Load and Store instructions */ + +/* Coprocessor instructions */ + +/* Exception generation instructions */ + +/* ========================================================================= */ +/* --- ASSEMBLER ----------------------------------------------------------- */ +/* ========================================================================= */ + +#define NOP() _W(0xe1a00000) +#define SETEND_BE() _W(0xf1010200) +#define SETEND_LE() _W(0xf1010000) + +/* Data processing instructions */ + +/* Opcodes Type 1 */ +/* MOVcc rd,#i */ +#define CC_MOV_ri8(cc,Rd,i) _OP1(cc,_MOV,0,Rd,UNSHIFTED_IMM8(i)) +/* MOVcc Rd,#i ROR #s */ +#define CC_MOV_ri8RORi(cc,Rd,i,s) _OP1(cc,_MOV,0,Rd,SHIFT_IMM8_ROR(i,s)) +#define CC_MOV_ri(cc,Rd,i) _OP1(cc,_MOV,0,Rd,SHIFT_IMM(i)) +#define CC_MOV_rr(cc,Rd,Rm) _OP1(cc,_MOV,0,Rd,SHIFT_REG(Rm)) +#define CC_MOV_rrLSLi(cc,Rd,Rm,i) _OP1(cc,_MOV,0,Rd,SHIFT_LSL_i(Rm,i)) +#define CC_MOV_rrLSLr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,0,Rd,SHIFT_LSL_r(Rm,Rs)) +#define CC_MOV_rrLSRi(cc,Rd,Rm,i) _OP1(cc,_MOV,0,Rd,SHIFT_LSR_i(Rm,i)) +#define CC_MOV_rrLSRr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,0,Rd,SHIFT_LSR_r(Rm,Rs)) +#define CC_MOV_rrASRi(cc,Rd,Rm,i) _OP1(cc,_MOV,0,Rd,SHIFT_ASR_i(Rm,i)) +#define CC_MOV_rrASRr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,0,Rd,SHIFT_ASR_r(Rm,Rs)) +#define CC_MOV_rrRORi(cc,Rd,Rm,i) _OP1(cc,_MOV,0,Rd,SHIFT_ROR_i(Rm,i)) +#define CC_MOV_rrRORr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,0,Rd,SHIFT_ROR_r(Rm,Rs)) +#define CC_MOV_rrRRX(cc,Rd,Rm) _OP1(cc,_MOV,0,Rd,SHIFT_RRX(Rm)) + +/* MOV rd,#i */ +#define MOV_ri8(Rd,i) CC_MOV_ri8(NATIVE_CC_AL,Rd,i) +/* MOV Rd,#i ROR #s */ +#define MOV_ri8RORi(Rd,i,s) CC_MOV_ri8RORi(NATIVE_CC_AL,Rd,i,s) +#define MOV_ri(Rd,i) CC_MOV_ri(NATIVE_CC_AL,Rd,i) +#define MOV_rr(Rd,Rm) CC_MOV_rr(NATIVE_CC_AL,Rd,Rm) +#define MOV_rrLSLi(Rd,Rm,i) CC_MOV_rrLSLi(NATIVE_CC_AL,Rd,Rm,i) +#define MOV_rrLSLr(Rd,Rm,Rs) CC_MOV_rrLSLr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOV_rrLSRi(Rd,Rm,i) CC_MOV_rrLSRi(NATIVE_CC_AL,Rd,Rm,i) +#define MOV_rrLSRr(Rd,Rm,Rs) CC_MOV_rrLSRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOV_rrASRi(Rd,Rm,i) CC_MOV_rrASRi(NATIVE_CC_AL,Rd,Rm,i) +#define MOV_rrASRr(Rd,Rm,Rs) CC_MOV_rrASRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOV_rrRORi(Rd,Rm,i) CC_MOV_rrRORi(NATIVE_CC_AL,Rd,Rm,i) +#define MOV_rrRORr(Rd,Rm,Rs) CC_MOV_rrRORr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOV_rrRRX(Rd,Rm) CC_MOV_rrRRX(NATIVE_CC_AL,Rd,Rm) + +#define CC_MOVS_ri(cc,Rd,i) _OP1(cc,_MOV,1,Rd,SHIFT_IMM(i)) +#define CC_MOVS_rr(cc,Rd,Rm) _OP1(cc,_MOV,1,Rd,SHIFT_REG(Rm)) +#define CC_MOVS_rrLSLi(cc,Rd,Rm,i) _OP1(cc,_MOV,1,Rd,SHIFT_LSL_i(Rm,i)) +#define CC_MOVS_rrLSLr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,1,Rd,SHIFT_LSL_r(Rm,Rs)) +#define CC_MOVS_rrLSRi(cc,Rd,Rm,i) _OP1(cc,_MOV,1,Rd,SHIFT_LSR_i(Rm,i)) +#define CC_MOVS_rrLSRr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,1,Rd,SHIFT_LSR_r(Rm,Rs)) +#define CC_MOVS_rrASRi(cc,Rd,Rm,i) _OP1(cc,_MOV,1,Rd,SHIFT_ASR_i(Rm,i)) +#define CC_MOVS_rrASRr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,1,Rd,SHIFT_ASR_r(Rm,Rs)) +#define CC_MOVS_rrRORi(cc,Rd,Rm,i) _OP1(cc,_MOV,1,Rd,SHIFT_ROR_i(Rm,i)) +#define CC_MOVS_rrRORr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,1,Rd,SHIFT_ROR_r(Rm,Rs)) +#define CC_MOVS_rrRRX(cc,Rd,Rm) _OP1(cc,_MOV,1,Rd,SHIFT_RRX(Rm)) + +#define MOVS_ri(Rd,i) CC_MOVS_ri(NATIVE_CC_AL,Rd,i) +#define MOVS_rr(Rd,Rm) CC_MOVS_rr(NATIVE_CC_AL,Rd,Rm) +#define MOVS_rrLSLi(Rd,Rm,i) CC_MOVS_rrLSLi(NATIVE_CC_AL,Rd,Rm,i) +#define MOVS_rrLSLr(Rd,Rm,Rs) CC_MOVS_rrLSLr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOVS_rrLSRi(Rd,Rm,i) CC_MOVS_rrLSRi(NATIVE_CC_AL,Rd,Rm,i) +#define MOVS_rrLSRr(Rd,Rm,Rs) CC_MOVS_rrLSRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOVS_rrASRi(Rd,Rm,i) CC_MOVS_rrASRi(NATIVE_CC_AL,Rd,Rm,i) +#define MOVS_rrASRr(Rd,Rm,Rs) CC_MOVS_rrASRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOVS_rrRORi(Rd,Rm,i) CC_MOVS_rrRORi(NATIVE_CC_AL,Rd,Rm,i) +#define MOVS_rrRORr(Rd,Rm,Rs) CC_MOVS_rrRORr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOVS_rrRRX(Rd,Rm) CC_MOVS_rrRRX(NATIVE_CC_AL,Rd,Rm) + +/* MVNcc rd,#i */ +#define CC_MVN_ri8(cc,Rd,i) _OP1(cc,_MVN,0,Rd,UNSHIFTED_IMM8(i)) +/* MVNcc Rd,#i ROR #s */ +#define CC_MVN_ri8RORi(cc,Rd,i,s) _OP1(cc,_MVN,0,Rd,SHIFT_IMM8_ROR(i,s)) +#define CC_MVN_ri(cc,Rd,i) _OP1(cc,_MVN,0,Rd,SHIFT_IMM(i)) +#define CC_MVN_rr(cc,Rd,Rm) _OP1(cc,_MVN,0,Rd,SHIFT_REG(Rm)) +#define CC_MVN_rrLSLi(cc,Rd,Rm,i) _OP1(cc,_MVN,0,Rd,SHIFT_LSL_i(Rm,i)) +#define CC_MVN_rrLSLr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,0,Rd,SHIFT_LSL_r(Rm,Rs)) +#define CC_MVN_rrLSRi(cc,Rd,Rm,i) _OP1(cc,_MVN,0,Rd,SHIFT_LSR_i(Rm,i)) +#define CC_MVN_rrLSRr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,0,Rd,SHIFT_LSR_r(Rm,Rs)) +#define CC_MVN_rrASRi(cc,Rd,Rm,i) _OP1(cc,_MVN,0,Rd,SHIFT_ASR_i(Rm,i)) +#define CC_MVN_rrASRr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,0,Rd,SHIFT_ASR_r(Rm,Rs)) +#define CC_MVN_rrRORi(cc,Rd,Rm,i) _OP1(cc,_MVN,0,Rd,SHIFT_ROR_i(Rm,i)) +#define CC_MVN_rrRORr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,0,Rd,SHIFT_ROR_r(Rm,Rs)) +#define CC_MVN_rrRRX(cc,Rd,Rm) _OP1(cc,_MVN,0,Rd,SHIFT_RRX(Rm)) + +/* MVN rd,#i */ +#define MVN_ri8(Rd,i) CC_MVN_ri8(NATIVE_CC_AL,Rd,i) +/* MVN Rd,#i ROR #s */ +#define MVN_ri8RORi(Rd,i,s) CC_MVN_ri8RORi(NATIVE_CC_AL,Rd,i,s) +#define MVN_ri(Rd,i) CC_MVN_ri(NATIVE_CC_AL,Rd,i) +#define MVN_rr(Rd,Rm) CC_MVN_rr(NATIVE_CC_AL,Rd,Rm) +#define MVN_rrLSLi(Rd,Rm,i) CC_MVN_rrLSLi(NATIVE_CC_AL,Rd,Rm,i) +#define MVN_rrLSLr(Rd,Rm,Rs) CC_MVN_rrLSLr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVN_rrLSRi(Rd,Rm,i) CC_MVN_rrLSRi(NATIVE_CC_AL,Rd,Rm,i) +#define MVN_rrLSRr(Rd,Rm,Rs) CC_MVN_rrLSRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVN_rrASRi(Rd,Rm,i) CC_MVN_rrASRi(NATIVE_CC_AL,Rd,Rm,i) +#define MVN_rrASRr(Rd,Rm,Rs) CC_MVN_rrASRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVN_rrRORi(Rd,Rm,i) CC_MVN_rrRORi(NATIVE_CC_AL,Rd,Rm,i) +#define MVN_rrRORr(Rd,Rm,Rs) CC_MVN_rrRORr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVN_rrRRX(Rd,Rm) CC_MVN_rrRRX(NATIVE_CC_AL,Rd,Rm) + +#define CC_MVNS_ri(cc,Rd,i) _OP1(cc,_MVN,1,Rd,SHIFT_IMM(i)) +#define CC_MVNS_rr(cc,Rd,Rm) _OP1(cc,_MVN,1,Rd,SHIFT_REG(Rm)) +#define CC_MVNS_rrLSLi(cc,Rd,Rm,i) _OP1(cc,_MVN,1,Rd,SHIFT_LSL_i(Rm,i)) +#define CC_MVNS_rrLSLr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,1,Rd,SHIFT_LSL_r(Rm,Rs)) +#define CC_MVNS_rrLSRi(cc,Rd,Rm,i) _OP1(cc,_MVN,1,Rd,SHIFT_LSR_i(Rm,i)) +#define CC_MVNS_rrLSRr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,1,Rd,SHIFT_LSR_r(Rm,Rs)) +#define CC_MVNS_rrASRi(cc,Rd,Rm,i) _OP1(cc,_MVN,1,Rd,SHIFT_ASR_i(Rm,i)) +#define CC_MVNS_rrASRr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,1,Rd,SHIFT_ASR_r(Rm,Rs)) +#define CC_MVNS_rrRORi(cc,Rd,Rm,i) _OP1(cc,_MVN,1,Rd,SHIFT_ROR_i(Rm,i)) +#define CC_MVNS_rrRORr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,1,Rd,SHIFT_ROR_r(Rm,Rs)) +#define CC_MVNS_rrRRX(cc,Rd,Rm) _OP1(cc,_MVN,1,Rd,SHIFT_RRX(Rm)) + +#define MVNS_ri(Rd,i) CC_MVNS_ri(NATIVE_CC_AL,Rd,i) +#define MVNS_rr(Rd,Rm) CC_MVNS_rr(NATIVE_CC_AL,Rd,Rm) +#define MVNS_rrLSLi(Rd,Rm,i) CC_MVNS_rrLSLi(NATIVE_CC_AL,Rd,Rm,i) +#define MVNS_rrLSLr(Rd,Rm,Rs) CC_MVNS_rrLSLr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVNS_rrLSRi(Rd,Rm,i) CC_MVNS_rrLSRi(NATIVE_CC_AL,Rd,Rm,i) +#define MVNS_rrLSRr(Rd,Rm,Rs) CC_MVNS_rrLSRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVNS_rrASRi(Rd,Rm,i) CC_MVNS_rrASRi(NATIVE_CC_AL,Rd,Rm,i) +#define MVNS_rrASRr(Rd,Rm,Rs) CC_MVNS_rrASRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVNS_rrRORi(Rd,Rm,i) CC_MVNS_rrRORi(NATIVE_CC_AL,Rd,Rm,i) +#define MVNS_rrRORr(Rd,Rm,Rs) CC_MVNS_rrRORr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVNS_rrRRX(Rd,Rm) CC_MVNS_rrRRX(NATIVE_CC_AL,Rd,Rm) + +/* Opcodes Type 2 */ +#define CC_CMP_ri(cc,Rn,i) _OP2(cc,_CMP,Rn,SHIFT_IMM(i)) +#define CC_CMP_rr(cc,Rn,Rm) _OP2(cc,_CMP,Rn,SHIFT_REG(Rm)) +#define CC_CMP_rrLSLi(cc,Rn,Rm,i) _OP2(cc,_CMP,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_CMP_rrLSLr(cc,Rn,Rm,Rs) _OP2(cc,_CMP,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_CMP_rrLSRi(cc,Rn,Rm,i) _OP2(cc,_CMP,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_CMP_rrLSRr(cc,Rn,Rm,Rs) _OP2(cc,_CMP,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_CMP_rrASRi(cc,Rn,Rm,i) _OP2(cc,_CMP,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_CMP_rrASRr(cc,Rn,Rm,Rs) _OP2(cc,_CMP,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_CMP_rrRORi(cc,Rn,Rm,i) _OP2(cc,_CMP,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_CMP_rrRORr(cc,Rn,Rm,Rs) _OP2(cc,_CMP,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_CMP_rrRRX(cc,Rn,Rm) _OP2(cc,_CMP,Rn,SHIFT_RRX(Rm)) + +#define CMP_ri(Rn,i) CC_CMP_ri(NATIVE_CC_AL,Rn,i) +#define CMP_rr(Rn,Rm) CC_CMP_rr(NATIVE_CC_AL,Rn,Rm) +#define CMP_rrLSLi(Rn,Rm,i) CC_CMP_rrLSLi(NATIVE_CC_AL,Rn,Rm,i) +#define CMP_rrLSLr(Rn,Rm,Rs) CC_CMP_rrLSLr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMP_rrLSRi(Rn,Rm,i) CC_CMP_rrLSRi(NATIVE_CC_AL,Rn,Rm,i) +#define CMP_rrLSRr(Rn,Rm,Rs) CC_CMP_rrLSRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMP_rrASRi(Rn,Rm,i) CC_CMP_rrASRi(NATIVE_CC_AL,Rn,Rm,i) +#define CMP_rrASRr(Rn,Rm,Rs) CC_CMP_rrASRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMP_rrRORi(Rn,Rm,i) CC_CMP_rrRORi(NATIVE_CC_AL,Rn,Rm,i) +#define CMP_rrRORr(Rn,Rm,Rs) CC_CMP_rrRORr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMP_rrRRX(Rn,Rm) CC_CMP_rrRRX(NATIVE_CC_AL,Rn,Rm) + +#define CC_CMN_ri(cc,Rn,i) _OP2(cc,_CMN,Rn,SHIFT_IMM(i)) +#define CC_CMN_rr(cc,Rn,r) _OP2(cc,_CMN,Rn,SHIFT_REG(r)) +#define CC_CMN_rrLSLi(cc,Rn,Rm,i) _OP2(cc,_CMN,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_CMN_rrLSLr(cc,Rn,Rm,Rs) _OP2(cc,_CMN,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_CMN_rrLSRi(cc,Rn,Rm,i) _OP2(cc,_CMN,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_CMN_rrLSRr(cc,Rn,Rm,Rs) _OP2(cc,_CMN,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_CMN_rrASRi(cc,Rn,Rm,i) _OP2(cc,_CMN,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_CMN_rrASRr(cc,Rn,Rm,Rs) _OP2(cc,_CMN,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_CMN_rrRORi(cc,Rn,Rm,i) _OP2(cc,_CMN,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_CMN_rrRORr(cc,Rn,Rm,Rs) _OP2(cc,_CMN,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_CMN_rrRRX(cc,Rn,Rm) _OP2(cc,_CMN,Rn,SHIFT_RRX(Rm)) + +#define CMN_ri(Rn,i) CC_CMN_ri(NATIVE_CC_AL,Rn,i) +#define CMN_rr(Rn,r) CC_CMN_rr(NATIVE_CC_AL,Rn,r) +#define CMN_rrLSLi(Rn,Rm,i) CC_CMN_rrLSLi(NATIVE_CC_AL,Rn,Rm,i) +#define CMN_rrLSLr(Rn,Rm,Rs) CC_CMN_rrLSLr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMN_rrLSRi(Rn,Rm,i) CC_CMN_rrLSRi(NATIVE_CC_AL,Rn,Rm,i) +#define CMN_rrLSRr(Rn,Rm,Rs) CC_CMN_rrLSRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMN_rrASRi(Rn,Rm,i) CC_CMN_rrASRi(NATIVE_CC_AL,Rn,Rm,i) +#define CMN_rrASRr(Rn,Rm,Rs) CC_CMN_rrASRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMN_rrRORi(Rn,Rm,i) CC_CMN_rrRORi(NATIVE_CC_AL,Rn,Rm,i) +#define CMN_rrRORr(Rn,Rm,Rs) CC_CMN_rrRORr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMN_rrRRX(Rn,Rm) CC_CMN_rrRRX(NATIVE_CC_AL,Rn,Rm) + +#define CC_TST_ri(cc,Rn,i) _OP2(cc,_TST,Rn,SHIFT_IMM(i)) +#define CC_TST_rr(cc,Rn,r) _OP2(cc,_TST,Rn,SHIFT_REG(r)) +#define CC_TST_rrLSLi(cc,Rn,Rm,i) _OP2(cc,_TST,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_TST_rrLSLr(cc,Rn,Rm,Rs) _OP2(cc,_TST,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_TST_rrLSRi(cc,Rn,Rm,i) _OP2(cc,_TST,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_TST_rrLSRr(cc,Rn,Rm,Rs) _OP2(cc,_TST,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_TST_rrASRi(cc,Rn,Rm,i) _OP2(cc,_TST,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_TST_rrASRr(cc,Rn,Rm,Rs) _OP2(cc,_TST,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_TST_rrRORi(cc,Rn,Rm,i) _OP2(cc,_TST,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_TST_rrRORr(cc,Rn,Rm,Rs) _OP2(cc,_TST,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_TST_rrRRX(cc,Rn,Rm) _OP2(cc,_TST,Rn,SHIFT_RRX(Rm)) + +#define TST_ri(Rn,i) CC_TST_ri(NATIVE_CC_AL,Rn,i) +#define TST_rr(Rn,r) CC_TST_rr(NATIVE_CC_AL,Rn,r) +#define TST_rrLSLi(Rn,Rm,i) CC_TST_rrLSLi(NATIVE_CC_AL,Rn,Rm,i) +#define TST_rrLSLr(Rn,Rm,Rs) CC_TST_rrLSLr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TST_rrLSRi(Rn,Rm,i) CC_TST_rrLSRi(NATIVE_CC_AL,Rn,Rm,i) +#define TST_rrLSRr(Rn,Rm,Rs) CC_TST_rrLSRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TST_rrASRi(Rn,Rm,i) CC_TST_rrASRi(NATIVE_CC_AL,Rn,Rm,i) +#define TST_rrASRr(Rn,Rm,Rs) CC_TST_rrASRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TST_rrRORi(Rn,Rm,i) CC_TST_rrRORi(NATIVE_CC_AL,Rn,Rm,i) +#define TST_rrRORr(Rn,Rm,Rs) CC_TST_rrRORr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TST_rrRRX(Rn,Rm) CC_TST_rrRRX(NATIVE_CC_AL,Rn,Rm) + +#define CC_TEQ_ri(cc,Rn,i) _OP2(cc,_TEQ,Rn,SHIFT_IMM(i)) +#define CC_TEQ_rr(cc,Rn,r) _OP2(cc,_TEQ,Rn,SHIFT_REG(r)) +#define CC_TEQ_rrLSLi(cc,Rn,Rm,i) _OP2(cc,_TEQ,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_TEQ_rrLSLr(cc,Rn,Rm,Rs) _OP2(cc,_TEQ,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_TEQ_rrLSRi(cc,Rn,Rm,i) _OP2(cc,_TEQ,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_TEQ_rrLSRr(cc,Rn,Rm,Rs) _OP2(cc,_TEQ,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_TEQ_rrASRi(cc,Rn,Rm,i) _OP2(cc,_TEQ,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_TEQ_rrASRr(cc,Rn,Rm,Rs) _OP2(cc,_TEQ,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_TEQ_rrRORi(cc,Rn,Rm,i) _OP2(cc,_TEQ,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_TEQ_rrRORr(cc,Rn,Rm,Rs) _OP2(cc,_TEQ,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_TEQ_rrRRX(cc,Rn,Rm) _OP2(cc,_TEQ,Rn,SHIFT_RRX(Rm)) + +#define TEQ_ri(Rn,i) CC_TEQ_ri(NATIVE_CC_AL,Rn,i) +#define TEQ_rr(Rn,r) CC_TEQ_rr(NATIVE_CC_AL,Rn,r) +#define TEQ_rrLSLi(Rn,Rm,i) CC_TEQ_rrLSLi(NATIVE_CC_AL,Rn,Rm,i) +#define TEQ_rrLSLr(Rn,Rm,Rs) CC_TEQ_rrLSLr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TEQ_rrLSRi(Rn,Rm,i) CC_TEQ_rrLSRi(NATIVE_CC_AL,Rn,Rm,i) +#define TEQ_rrLSRr(Rn,Rm,Rs) CC_TEQ_rrLSRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TEQ_rrASRi(Rn,Rm,i) CC_TEQ_rrASRi(NATIVE_CC_AL,Rn,Rm,i) +#define TEQ_rrASRr(Rn,Rm,Rs) CC_TEQ_rrASRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TEQ_rrRORi(Rn,Rm,i) CC_TEQ_rrRORi(NATIVE_CC_AL,Rn,Rm,i) +#define TEQ_rrRORr(Rn,Rm,Rs) CC_TEQ_rrRORr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TEQ_rrRRX(Rn,Rm) CC_TEQ_rrRRX(NATIVE_CC_AL,Rn,Rm) + +/* Opcodes Type 3 */ +#define CC_AND_rri(cc,Rd,Rn,i) _OP3(cc,_AND,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_AND_rrr(cc,Rd,Rn,Rm) _OP3(cc,_AND,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_AND_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_AND_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_AND_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_AND_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_AND_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_AND_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_AND_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_AND_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_AND_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_AND,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define AND_rri(Rd,Rn,i) CC_AND_rri(NATIVE_CC_AL,Rd,Rn,i) +#define AND_rrr(Rd,Rn,Rm) CC_AND_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define AND_rrrLSLi(Rd,Rn,Rm,i) CC_AND_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define AND_rrrLSLr(Rd,Rn,Rm,Rs) CC_AND_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define AND_rrrLSRi(Rd,Rn,Rm,i) CC_AND_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define AND_rrrLSRr(Rd,Rn,Rm,Rs) CC_AND_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define AND_rrrASRi(Rd,Rn,Rm,i) CC_AND_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define AND_rrrASRr(Rd,Rn,Rm,Rs) CC_AND_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define AND_rrrRORi(Rd,Rn,Rm,i) CC_AND_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define AND_rrrRORr(Rd,Rn,Rm,Rs) CC_AND_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define AND_rrrRRX(Rd,Rn,Rm) CC_AND_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ANDS_rri(cc,Rd,Rn,i) _OP3(cc,_AND,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_ANDS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_AND,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ANDS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ANDS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ANDS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ANDS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ANDS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ANDS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ANDS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ANDS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ANDS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_AND,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define ANDS_rri(Rd,Rn,i) CC_ANDS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ANDS_rrr(Rd,Rn,Rm) CC_ANDS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ANDS_rrrLSLi(Rd,Rn,Rm,i) CC_ANDS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ANDS_rrrLSLr(Rd,Rn,Rm,Rs) CC_ANDS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ANDS_rrrLSRi(Rd,Rn,Rm,i) CC_ANDS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ANDS_rrrLSRr(Rd,Rn,Rm,Rs) CC_ANDS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ANDS_rrrASRi(Rd,Rn,Rm,i) CC_ANDS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ANDS_rrrASRr(Rd,Rn,Rm,Rs) CC_ANDS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ANDS_rrrRORi(Rd,Rn,Rm,i) CC_ANDS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ANDS_rrrRORr(Rd,Rn,Rm,Rs) CC_ANDS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ANDS_rrrRRX(Rd,Rn,Rm) CC_ANDS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_EOR_rri(cc,Rd,Rn,i) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_EOR_rrr(cc,Rd,Rn,Rm) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_EOR_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_EOR_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_EOR_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_EOR_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_EOR_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_EOR_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_EOR_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_EOR_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_EOR_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define EOR_rri(Rd,Rn,i) CC_EOR_rri(NATIVE_CC_AL,Rd,Rn,i) +#define EOR_rrr(Rd,Rn,Rm) CC_EOR_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define EOR_rrrLSLi(Rd,Rn,Rm,i) CC_EOR_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EOR_rrrLSLr(Rd,Rn,Rm,Rs) CC_EOR_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EOR_rrrLSRi(Rd,Rn,Rm,i) CC_EOR_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EOR_rrrLSRr(Rd,Rn,Rm,Rs) CC_EOR_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EOR_rrrASRi(Rd,Rn,Rm,i) CC_EOR_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EOR_rrrASRr(Rd,Rn,Rm,Rs) CC_EOR_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EOR_rrrRORi(Rd,Rn,Rm,i) CC_EOR_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EOR_rrrRORr(Rd,Rn,Rm,Rs) CC_EOR_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EOR_rrrRRX(Rd,Rn,Rm) CC_EOR_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_EORS_rri(cc,Rd,Rn,i) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_EORS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_EORS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_EORS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_EORS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_EORS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_EORS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_EORS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_EORS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_EORS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_EORS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define EORS_rri(Rd,Rn,i) CC_EORS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define EORS_rrr(Rd,Rn,Rm) CC_EORS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define EORS_rrrLSLi(Rd,Rn,Rm,i) CC_EORS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EORS_rrrLSLr(Rd,Rn,Rm,Rs) CC_EORS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EORS_rrrLSRi(Rd,Rn,Rm,i) CC_EORS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EORS_rrrLSRr(Rd,Rn,Rm,Rs) CC_EORS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EORS_rrrASRi(Rd,Rn,Rm,i) CC_EORS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EORS_rrrASRr(Rd,Rn,Rm,Rs) CC_EORS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EORS_rrrRORi(Rd,Rn,Rm,i) CC_EORS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EORS_rrrRORr(Rd,Rn,Rm,Rs) CC_EORS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EORS_rrrRRX(Rd,Rn,Rm) CC_EORS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_SUB_rri(cc,Rd,Rn,i) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_SUB_rrr(cc,Rd,Rn,Rm) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_SUB_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_SUB_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_SUB_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_SUB_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_SUB_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_SUB_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_SUB_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_SUB_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_SUB_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define SUB_rri(Rd,Rn,i) CC_SUB_rri(NATIVE_CC_AL,Rd,Rn,i) +#define SUB_rrr(Rd,Rn,Rm) CC_SUB_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define SUB_rrrLSLi(Rd,Rn,Rm,i) CC_SUB_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUB_rrrLSLr(Rd,Rn,Rm,Rs) CC_SUB_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUB_rrrLSRi(Rd,Rn,Rm,i) CC_SUB_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUB_rrrLSRr(Rd,Rn,Rm,Rs) CC_SUB_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUB_rrrASRi(Rd,Rn,Rm,i) CC_SUB_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUB_rrrASRr(Rd,Rn,Rm,Rs) CC_SUB_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUB_rrrRORi(Rd,Rn,Rm,i) CC_SUB_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUB_rrrRORr(Rd,Rn,Rm,Rs) CC_SUB_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUB_rrrRRX(Rd,Rn,Rm) CC_SUB_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_SUBS_rri(cc,Rd,Rn,i) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_SUBS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_SUBS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_SUBS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_SUBS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_SUBS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_SUBS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_SUBS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_SUBS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_SUBS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_SUBS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define SUBS_rri(Rd,Rn,i) CC_SUBS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define SUBS_rrr(Rd,Rn,Rm) CC_SUBS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define SUBS_rrrLSLi(Rd,Rn,Rm,i) CC_SUBS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUBS_rrrLSLr(Rd,Rn,Rm,Rs) CC_SUBS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUBS_rrrLSRi(Rd,Rn,Rm,i) CC_SUBS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUBS_rrrLSRr(Rd,Rn,Rm,Rs) CC_SUBS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUBS_rrrASRi(Rd,Rn,Rm,i) CC_SUBS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUBS_rrrASRr(Rd,Rn,Rm,Rs) CC_SUBS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUBS_rrrRORi(Rd,Rn,Rm,i) CC_SUBS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUBS_rrrRORr(Rd,Rn,Rm,Rs) CC_SUBS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUBS_rrrRRX(Rd,Rn,Rm) CC_SUBS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_RSB_rri(cc,Rd,Rn,i) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_RSB_rrr(cc,Rd,Rn,Rm) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_RSB_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_RSB_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_RSB_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_RSB_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_RSB_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_RSB_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_RSB_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_RSB_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_RSB_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define RSB_rri(Rd,Rn,i) CC_RSB_rri(NATIVE_CC_AL,Rd,Rn,i) +#define RSB_rrr(Rd,Rn,Rm) CC_RSB_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define RSB_rrrLSLi(Rd,Rn,Rm,i) CC_RSB_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSB_rrrLSLr(Rd,Rn,Rm,Rs) CC_RSB_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSB_rrrLSRi(Rd,Rn,Rm,i) CC_RSB_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSB_rrrLSRr(Rd,Rn,Rm,Rs) CC_RSB_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSB_rrrASRi(Rd,Rn,Rm,i) CC_RSB_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSB_rrrASRr(Rd,Rn,Rm,Rs) CC_RSB_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSB_rrrRORi(Rd,Rn,Rm,i) CC_RSB_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSB_rrrRORr(Rd,Rn,Rm,Rs) CC_RSB_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSB_rrrRRX(Rd,Rn,Rm) CC_RSB_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_RSBS_rri(cc,Rd,Rn,i) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_RSBS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_RSBS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_RSBS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_RSBS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_RSBS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_RSBS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_RSBS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_RSBS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_RSBS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_RSBS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define RSBS_rri(Rd,Rn,i) CC_RSBS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define RSBS_rrr(Rd,Rn,Rm) CC_RSBS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define RSBS_rrrLSLi(Rd,Rn,Rm,i) CC_RSBS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSBS_rrrLSLr(Rd,Rn,Rm,Rs) CC_RSBS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSBS_rrrLSRi(Rd,Rn,Rm,i) CC_RSBS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSBS_rrrLSRr(Rd,Rn,Rm,Rs) CC_RSBS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSBS_rrrASRi(Rd,Rn,Rm,i) CC_RSBS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSBS_rrrASRr(Rd,Rn,Rm,Rs) CC_RSBS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSBS_rrrRORi(Rd,Rn,Rm,i) CC_RSBS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSBS_rrrRORr(Rd,Rn,Rm,Rs) CC_RSBS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSBS_rrrRRX(Rd,Rn,Rm) CC_RSBS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ADD_rri8(cc,Rd,Rn,i) _OP3(cc,_ADD,0,Rd,Rn,UNSHIFT_IMM8(i)) +#define CC_ADD_rri8RORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_IMM8_ROR(Rm,i)) + +#define CC_ADD_rri(cc,Rd,Rn,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_ADD_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ADD_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ADD_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ADD_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ADD_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ADD_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ADD_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ADD_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ADD_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ADD_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define ADD_rri8(cc,Rd,Rn,i) CC_ADD_rri8(NATIVE_CC_AL,Rd,Rn,i) +#define ADD_rri8RORi(cc,Rd,Rn,Rm,i) CC_ADD_rri8RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) + +#define ADD_rri(Rd,Rn,i) CC_ADD_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ADD_rrr(Rd,Rn,Rm) CC_ADD_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ADD_rrrLSLi(Rd,Rn,Rm,i) CC_ADD_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADD_rrrLSLr(Rd,Rn,Rm,Rs) CC_ADD_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADD_rrrLSRi(Rd,Rn,Rm,i) CC_ADD_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADD_rrrLSRr(Rd,Rn,Rm,Rs) CC_ADD_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADD_rrrASRi(Rd,Rn,Rm,i) CC_ADD_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADD_rrrASRr(Rd,Rn,Rm,Rs) CC_ADD_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADD_rrrRORi(Rd,Rn,Rm,i) CC_ADD_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADD_rrrRORr(Rd,Rn,Rm,Rs) CC_ADD_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADD_rrrRRX(Rd,Rn,Rm) CC_ADD_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ADDS_rri(cc,Rd,Rn,i) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_ADDS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ADDS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ADDS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ADDS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ADDS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ADDS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ADDS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ADDS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ADDS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ADDS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define ADDS_rri(Rd,Rn,i) CC_ADDS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ADDS_rrr(Rd,Rn,Rm) CC_ADDS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ADDS_rrrLSLi(Rd,Rn,Rm,i) CC_ADDS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADDS_rrrLSLr(Rd,Rn,Rm,Rs) CC_ADDS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADDS_rrrLSRi(Rd,Rn,Rm,i) CC_ADDS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADDS_rrrLSRr(Rd,Rn,Rm,Rs) CC_ADDS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADDS_rrrASRi(Rd,Rn,Rm,i) CC_ADDS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADDS_rrrASRr(Rd,Rn,Rm,Rs) CC_ADDS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADDS_rrrRORi(Rd,Rn,Rm,i) CC_ADDS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADDS_rrrRORr(Rd,Rn,Rm,Rs) CC_ADDS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADDS_rrrRRX(Rd,Rn,Rm) CC_ADDS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ADC_rri(cc,Rd,Rn,i) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_ADC_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ADC_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ADC_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ADC_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ADC_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ADC_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ADC_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ADC_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ADC_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ADC_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define ADC_rri(Rd,Rn,i) CC_ADC_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ADC_rrr(Rd,Rn,Rm) CC_ADC_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ADC_rrrLSLi(Rd,Rn,Rm,i) CC_ADC_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADC_rrrLSLr(Rd,Rn,Rm,Rs) CC_ADC_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADC_rrrLSRi(Rd,Rn,Rm,i) CC_ADC_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADC_rrrLSRr(Rd,Rn,Rm,Rs) CC_ADC_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADC_rrrASRi(Rd,Rn,Rm,i) CC_ADC_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADC_rrrASRr(Rd,Rn,Rm,Rs) CC_ADC_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADC_rrrRORi(Rd,Rn,Rm,i) CC_ADC_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADC_rrrRORr(Rd,Rn,Rm,Rs) CC_ADC_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADC_rrrRRX(Rd,Rn,Rm) CC_ADC_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ADCS_rri(cc,Rd,Rn,i) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_ADCS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ADCS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ADCS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ADCS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ADCS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ADCS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ADCS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ADCS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ADCS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ADCS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define ADCS_rri(Rd,Rn,i) CC_ADCS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ADCS_rrr(Rd,Rn,Rm) CC_ADCS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ADCS_rrrLSLi(Rd,Rn,Rm,i) CC_ADCS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADCS_rrrLSLr(Rd,Rn,Rm,Rs) CC_ADCS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADCS_rrrLSRi(Rd,Rn,Rm,i) CC_ADCS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADCS_rrrLSRr(Rd,Rn,Rm,Rs) CC_ADCS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADCS_rrrASRi(Rd,Rn,Rm,i) CC_ADCS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADCS_rrrASRr(Rd,Rn,Rm,Rs) CC_ADCS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADCS_rrrRORi(Rd,Rn,Rm,i) CC_ADCS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADCS_rrrRORr(Rd,Rn,Rm,Rs) CC_ADCS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADCS_rrrRRX(Rd,Rn,Rm) CC_ADCS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_SBC_rri(cc,Rd,Rn,i) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_SBC_rrr(cc,Rd,Rn,Rm) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_SBC_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_SBC_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_SBC_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_SBC_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_SBC_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_SBC_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_SBC_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_SBC_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_SBC_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define SBC_rri(Rd,Rn,i) CC_SBC_rri(NATIVE_CC_AL,Rd,Rn,i) +#define SBC_rrr(Rd,Rn,Rm) CC_SBC_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define SBC_rrrLSLi(Rd,Rn,Rm,i) CC_SBC_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBC_rrrLSLr(Rd,Rn,Rm,Rs) CC_SBC_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBC_rrrLSRi(Rd,Rn,Rm,i) CC_SBC_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBC_rrrLSRr(Rd,Rn,Rm,Rs) CC_SBC_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBC_rrrASRi(Rd,Rn,Rm,i) CC_SBC_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBC_rrrASRr(Rd,Rn,Rm,Rs) CC_SBC_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBC_rrrRORi(Rd,Rn,Rm,i) CC_SBC_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBC_rrrRORr(Rd,Rn,Rm,Rs) CC_SBC_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBC_rrrRRX(Rd,Rn,Rm) CC_SBC_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_SBCS_rri(cc,Rd,Rn,i) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_SBCS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_SBCS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_SBCS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_SBCS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_SBCS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_SBCS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_SBCS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_SBCS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_SBCS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_SBCS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define SBCS_rri(Rd,Rn,i) CC_SBCS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define SBCS_rrr(Rd,Rn,Rm) CC_SBCS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define SBCS_rrrLSLi(Rd,Rn,Rm,i) CC_SBCS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBCS_rrrLSLr(Rd,Rn,Rm,Rs) CC_SBCS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBCS_rrrLSRi(Rd,Rn,Rm,i) CC_SBCS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBCS_rrrLSRr(Rd,Rn,Rm,Rs) CC_SBCS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBCS_rrrASRi(Rd,Rn,Rm,i) CC_SBCS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBCS_rrrASRr(Rd,Rn,Rm,Rs) CC_SBCS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBCS_rrrRORi(Rd,Rn,Rm,i) CC_SBCS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBCS_rrrRORr(Rd,Rn,Rm,Rs) CC_SBCS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBCS_rrrRRX(Rd,Rn,Rm) CC_SBCS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_RSC_rri(cc,Rd,Rn,i) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_RSC_rrr(cc,Rd,Rn,Rm) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_RSC_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_RSC_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_RSC_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_RSC_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_RSC_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_RSC_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_RSC_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_RSC_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_RSC_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define RSC_rri(Rd,Rn,i) CC_RSC_rri(NATIVE_CC_AL,Rd,Rn,i) +#define RSC_rrr(Rd,Rn,Rm) CC_RSC_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define RSC_rrrLSLi(Rd,Rn,Rm,i) CC_RSC_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSC_rrrLSLr(Rd,Rn,Rm,Rs) CC_RSC_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSC_rrrLSRi(Rd,Rn,Rm,i) CC_RSC_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSC_rrrLSRr(Rd,Rn,Rm,Rs) CC_RSC_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSC_rrrASRi(Rd,Rn,Rm,i) CC_RSC_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSC_rrrASRr(Rd,Rn,Rm,Rs) CC_RSC_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSC_rrrRORi(Rd,Rn,Rm,i) CC_RSC_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSC_rrrRORr(Rd,Rn,Rm,Rs) CC_RSC_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSC_rrrRRX(Rd,Rn,Rm) CC_RSC_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_RSCS_rri(cc,Rd,Rn,i) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_RSCS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_RSCS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_RSCS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_RSCS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_RSCS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_RSCS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_RSCS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_RSCS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_RSCS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_RSCS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define RSCS_rri(Rd,Rn,i) CC_RSCS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define RSCS_rrr(Rd,Rn,Rm) CC_RSCS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define RSCS_rrrLSLi(Rd,Rn,Rm,i) CC_RSCS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSCS_rrrLSLr(Rd,Rn,Rm,Rs) CC_RSCS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSCS_rrrLSRi(Rd,Rn,Rm,i) CC_RSCS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSCS_rrrLSRr(Rd,Rn,Rm,Rs) CC_RSCS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSCS_rrrASRi(Rd,Rn,Rm,i) CC_RSCS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSCS_rrrASRr(Rd,Rn,Rm,Rs) CC_RSCS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSCS_rrrRORi(Rd,Rn,Rm,i) CC_RSCS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSCS_rrrRORr(Rd,Rn,Rm,Rs) CC_RSCS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSCS_rrrRRX(Rd,Rn,Rm) CC_RSCS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +/* ORRcc Rd,Rn,#i */ +#define CC_ORR_rri8(cc,Rd,Rn,i) _OP3(cc,_ORR,0,Rd,Rn,UNSHIFTED_IMM8(i)) +/* ORRcc Rd,Rn,#i ROR #s */ +#define CC_ORR_rri8RORi(cc,Rd,Rn,i,s) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_IMM8_ROR(i,s)) + +#define CC_ORR_rri(cc,Rd,Rn,i) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_ORR_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ORR_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ORR_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ORR_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ORR_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ORR_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ORR_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ORR_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ORR_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ORR_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_RRX(Rm)) + +/* ORR Rd,Rn,#i */ +#define ORR_rri8(Rd,Rn,i) CC_ORR_rri8(NATIVE_CC_AL,Rd,Rn,i) +/* ORR Rd,Rn,#i ROR #s */ +#define ORR_rri8RORi(Rd,Rn,i,s) CC_ORR_rri8RORi(NATIVE_CC_AL,Rd,Rn,i,s) + +#define ORR_rri(Rd,Rn,i) CC_ORR_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ORR_rrr(Rd,Rn,Rm) CC_ORR_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ORR_rrrLSLi(Rd,Rn,Rm,i) CC_ORR_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORR_rrrLSLr(Rd,Rn,Rm,Rs) CC_ORR_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORR_rrrLSRi(Rd,Rn,Rm,i) CC_ORR_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORR_rrrLSRr(Rd,Rn,Rm,Rs) CC_ORR_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORR_rrrASRi(Rd,Rn,Rm,i) CC_ORR_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORR_rrrASRr(Rd,Rn,Rm,Rs) CC_ORR_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORR_rrrRORi(Rd,Rn,Rm,i) CC_ORR_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORR_rrrRORr(Rd,Rn,Rm,Rs) CC_ORR_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORR_rrrRRX(Rd,Rn,Rm) CC_ORR_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ORRS_rri(cc,Rd,Rn,i) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_ORRS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ORRS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ORRS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ORRS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ORRS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ORRS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ORRS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ORRS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ORRS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ORRS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define ORRS_rri(Rd,Rn,i) CC_ORRS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ORRS_rrr(Rd,Rn,Rm) CC_ORRS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ORRS_rrrLSLi(Rd,Rn,Rm,i) CC_ORRS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORRS_rrrLSLr(Rd,Rn,Rm,Rs) CC_ORRS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORRS_rrrLSRi(Rd,Rn,Rm,i) CC_ORRS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORRS_rrrLSRr(Rd,Rn,Rm,Rs) CC_ORRS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORRS_rrrASRi(Rd,Rn,Rm,i) CC_ORRS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORRS_rrrASRr(Rd,Rn,Rm,Rs) CC_ORRS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORRS_rrrRORi(Rd,Rn,Rm,i) CC_ORRS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORRS_rrrRORr(Rd,Rn,Rm,Rs) CC_ORRS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORRS_rrrRRX(Rd,Rn,Rm) CC_ORRS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_BIC_rri(cc,Rd,Rn,i) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_BIC_rrr(cc,Rd,Rn,Rm) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_BIC_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_BIC_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_BIC_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_BIC_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_BIC_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_BIC_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_BIC_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_BIC_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_BIC_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define BIC_rri(Rd,Rn,i) CC_BIC_rri(NATIVE_CC_AL,Rd,Rn,i) +#define BIC_rrr(Rd,Rn,Rm) CC_BIC_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define BIC_rrrLSLi(Rd,Rn,Rm,i) CC_BIC_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BIC_rrrLSLr(Rd,Rn,Rm,Rs) CC_BIC_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BIC_rrrLSRi(Rd,Rn,Rm,i) CC_BIC_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BIC_rrrLSRr(Rd,Rn,Rm,Rs) CC_BIC_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BIC_rrrASRi(Rd,Rn,Rm,i) CC_BIC_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BIC_rrrASRr(Rd,Rn,Rm,Rs) CC_BIC_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BIC_rrrRORi(Rd,Rn,Rm,i) CC_BIC_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BIC_rrrRORr(Rd,Rn,Rm,Rs) CC_BIC_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BIC_rrrRRX(Rd,Rn,Rm) CC_BIC_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_BICS_rri(cc,Rd,Rn,i) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_BICS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_BICS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_BICS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_BICS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_BICS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_BICS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_BICS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_BICS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_BICS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_BICS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define BICS_rri(Rd,Rn,i) CC_BICS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define BICS_rrr(Rd,Rn,Rm) CC_BICS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define BICS_rrrLSLi(Rd,Rn,Rm,i) CC_BICS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BICS_rrrLSLr(Rd,Rn,Rm,Rs) CC_BICS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BICS_rrrLSRi(Rd,Rn,Rm,i) CC_BICS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BICS_rrrLSRr(Rd,Rn,Rm,Rs) CC_BICS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BICS_rrrASRi(Rd,Rn,Rm,i) CC_BICS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BICS_rrrASRr(Rd,Rn,Rm,Rs) CC_BICS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BICS_rrrRORi(Rd,Rn,Rm,i) CC_BICS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BICS_rrrRORr(Rd,Rn,Rm,Rs) CC_BICS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BICS_rrrRRX(Rd,Rn,Rm) CC_BICS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +/* Branch instructions */ +#define CC_B_i(cc,i) _W(((cc) << 28) | (10 << 24) | (i)) +#define CC_BL_i(cc,i) _W(((cc) << 28) | (11 << 24) | (i)) +#define CC_BLX_r(cc,r) _W(((cc) << 28) | (0x12 << 20) | (3 << 4) | (0xfff << 8) | (r)) +#define CC_BX_r(cc,r) _W(((cc) << 28) | (0x12 << 20) | (1 << 4) | (0xfff << 8) | (r)) +#define CC_BXJ_r(cc,r) _W(((cc) << 28) | (0x12 << 20) | (2 << 4) | (0xfff << 8) | (r)) + +#define BEQ_i(i) CC_B_i(NATIVE_CC_EQ,i) +#define BNE_i(i) CC_B_i(NATIVE_CC_NE,i) +#define BCS_i(i) CC_B_i(NATIVE_CC_CS,i) +#define BCC_i(i) CC_B_i(NATIVE_CC_CC,i) +#define BMI_i(i) CC_B_i(NATIVE_CC_MI,i) +#define BPL_i(i) CC_B_i(NATIVE_CC_PL,i) +#define BVS_i(i) CC_B_i(NATIVE_CC_VS,i) +#define BVC_i(i) CC_B_i(NATIVE_CC_VC,i) +#define BHI_i(i) CC_B_i(NATIVE_CC_HI,i) +#define BLS_i(i) CC_B_i(NATIVE_CC_LS,i) +#define BGE_i(i) CC_B_i(NATIVE_CC_GE,i) +#define BLT_i(i) CC_B_i(NATIVE_CC_LT,i) +#define BGT_i(i) CC_B_i(NATIVE_CC_GT,i) +#define BLE_i(i) CC_B_i(NATIVE_CC_LE,i) +#define B_i(i) CC_B_i(NATIVE_CC_AL,i) + +#define BL_i(i) CC_BL_i(NATIVE_CC_AL,i) +#define BLX_i(i) _W((NATIVE_CC_AL << 28) | (10 << 24) | (i)) +#define BLX_r(r) CC_BLX_r(NATIVE_CC_AL,r) +#define BX_r(r) CC_BX_r(NATIVE_CC_AL,r) +#define BXJ_r(r) CC_BXJ_r(NATIVE_CC_AL,r) + +/* Status register instructions */ +#define CC_MRS_CPSR(cc,Rd) _W(((cc) << 28) | (0x10 << 20) | ((Rd) << 12) | (0xf << 16)) +#define MRS_CPSR(Rd) CC_MRS_CPSR(NATIVE_CC_AL,Rd) +#define CC_MRS_SPSR(cc,Rd) _W(((cc) << 28) | (0x14 << 20) | ((Rd) << 12) | (0xf << 16)) +#define MRS_SPSR(Rd) CC_MRS_SPSR(NATIVE_CC_AL,Rd) + +#define CC_MSR_CPSR_i(cc,i) _W(((cc) << 28) | (0x32 << 20) | (0x9 << 16) | (0xf << 12) | SHIFT_IMM(i)) +#define CC_MSR_CPSR_r(cc,Rm) _W(((cc) << 28) | (0x12 << 20) | (0x9 << 16) | (0xf << 12) | (Rm)) + +#define MSR_CPSR_i(i) CC_MSR_CPSR_i(NATIVE_CC_AL,(i)) +#define MSR_CPSR_r(Rm) CC_MSR_CPSR_r(NATIVE_CC_AL,(Rm)) + +#define CC_MSR_CPSRf_i(cc,i) _W(((cc) << 28) | (0x32 << 20) | (0x8 << 16) | (0xf << 12) | SHIFT_IMM(i)) +#define CC_MSR_CPSRf_r(cc,Rm) _W(((cc) << 28) | (0x12 << 20) | (0x8 << 16) | (0xf << 12) | (Rm)) + +#define MSR_CPSRf_i(i) CC_MSR_CPSRf_i(NATIVE_CC_AL,(i)) +#define MSR_CPSRf_r(Rm) CC_MSR_CPSRf_r(NATIVE_CC_AL,(Rm)) + +#define CC_MSR_CPSRc_i(cc,i) _W(((cc) << 28) | (0x32 << 20) | (0x1 << 16) | (0xf << 12) | SHIFT_IMM(i)) +#define CC_MSR_CPSRc_r(cc,Rm) _W(((cc) << 28) | (0x12 << 20) | (0x1 << 16) | (0xf << 12) | (Rm)) + +#define MSR_CPSRc_i(i) CC_MSR_CPSRc_i(NATIVE_CC_AL,(i)) +#define MSR_CPSRc_r(Rm) CC_MSR_CPSRc_r(NATIVE_CC_AL,(Rm)) + +/* Load Store instructions */ + +#define CC_PUSH(cc,r) _W(((cc) << 28) | (0x92d << 16) | (1 << (r))) +#define PUSH(r) CC_PUSH(NATIVE_CC_AL, r) + +#define CC_PUSH_REGS(cc,r) _W(((cc) << 28) | (0x92d << 16) | (r)) +#define PUSH_REGS(r) CC_PUSH_REGS(NATIVE_CC_AL, r) + +#define CC_POP(cc,r) _W(((cc) << 28) | (0x8bd << 16) | (1 << (r))) +#define POP(r) CC_POP(NATIVE_CC_AL, r) + +#define CC_POP_REGS(cc,r) _W(((cc) << 28) | (0x8bd << 16) | (r)) +#define POP_REGS(r) CC_POP_REGS(NATIVE_CC_AL, r) + +#define CC_LDR_rR(cc,Rd,Rn) _LS1(cc,1,0,Rd,Rn,ADD_IMM(0)) +#define CC_LDR_rRI(cc,Rd,Rn,i) _LS1(cc,1,0,Rd,Rn,(i) >= 0 ? ADD_IMM(i) : SUB_IMM(-(i))) +#define CC_LDR_rRi(cc,Rd,Rn,i) _LS1(cc,1,0,Rd,Rn,SUB_IMM(i)) +#define CC_LDR_rRR(cc,Rd,Rn,Rm) _LS1(cc,1,0,Rd,Rn,ADD_REG(Rm)) +#define CC_LDR_rRr(cc,Rd,Rn,Rm) _LS1(cc,1,0,Rd,Rn,SUB_REG(Rm)) +#define CC_LDR_rRR_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,ADD_LSL(Rm,i)) +#define CC_LDR_rRr_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,SUB_LSL(Rm,i)) +#define CC_LDR_rRR_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,ADD_LSR(Rm,i)) +#define CC_LDR_rRr_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,SUB_LSR(Rm,i)) +#define CC_LDR_rRR_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,ADD_ASR(Rm,i)) +#define CC_LDR_rRr_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,SUB_ASR(Rm,i)) +#define CC_LDR_rRR_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,ADD_ROR(Rm,i)) +#define CC_LDR_rRr_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,SUB_ROR(Rm,i)) +#define CC_LDR_rRR_RRX(cc,Rd,Rn,Rm) _LS1(cc,1,0,Rd,Rn,ADD_RRX(Rm)) +#define CC_LDR_rRr_RRX(cc,Rd,Rn,Rm) _LS1(cc,1,0,Rd,Rn,SUB_RRX(Rm)) + +#define LDR_rR(Rd,Rn) CC_LDR_rR(NATIVE_CC_AL,Rd,Rn) +#define LDR_rRI(Rd,Rn,i) CC_LDR_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDR_rRi(Rd,Rn,i) CC_LDR_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDR_rRR(Rd,Rn,Rm) CC_LDR_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDR_rRr(Rd,Rn,Rm) CC_LDR_rRr(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDR_rRR_LSLi(Rd,Rn,Rm,i) CC_LDR_rRR_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRr_LSLi(Rd,Rn,Rm,i) CC_LDR_rRr_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRR_LSRi(Rd,Rn,Rm,i) CC_LDR_rRR_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRr_LSRi(Rd,Rn,Rm,i) CC_LDR_rRr_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRR_ASRi(Rd,Rn,Rm,i) CC_LDR_rRR_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRr_ASRi(Rd,Rn,Rm,i) CC_LDR_rRr_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRR_RORi(Rd,Rn,Rm,i) CC_LDR_rRR_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRr_RORi(Rd,Rn,Rm,i) CC_LDR_rRr_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRR_RRX(Rd,Rn,Rm) CC_LDR_rRR_RRX(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDR_rRr_RRX(Rd,Rn,Rm) CC_LDR_rRr_RRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_STR_rR(cc,Rd,Rn) _LS1(cc,0,0,Rd,Rn,ADD_IMM(0)) +#define CC_STR_rRI(cc,Rd,Rn,i) _LS1(cc,0,0,Rd,Rn,ADD_IMM(i)) +#define CC_STR_rRi(cc,Rd,Rn,i) _LS1(cc,0,0,Rd,Rn,SUB_IMM(i)) +#define CC_STR_rRR(cc,Rd,Rn,Rm) _LS1(cc,0,0,Rd,Rn,ADD_REG(Rm)) +#define CC_STR_rRr(cc,Rd,Rn,Rm) _LS1(cc,0,0,Rd,Rn,SUB_REG(Rm)) +#define CC_STR_rRR_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,ADD_LSL(Rm,i)) +#define CC_STR_rRr_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,SUB_LSL(Rm,i)) +#define CC_STR_rRR_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,ADD_LSR(Rm,i)) +#define CC_STR_rRr_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,SUB_LSR(Rm,i)) +#define CC_STR_rRR_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,ADD_ASR(Rm,i)) +#define CC_STR_rRr_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,SUB_ASR(Rm,i)) +#define CC_STR_rRR_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,ADD_ROR(Rm,i)) +#define CC_STR_rRr_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,SUB_ROR(Rm,i)) +#define CC_STR_rRR_RRX(cc,Rd,Rn,Rm) _LS1(cc,0,0,Rd,Rn,ADD_RRX(Rm)) +#define CC_STR_rRr_RRX(cc,Rd,Rn,Rm) _LS1(cc,0,0,Rd,Rn,SUB_RRX(Rm)) + +#define STR_rR(Rd,Rn) CC_STR_rR(NATIVE_CC_AL,Rd,Rn) +#define STR_rRI(Rd,Rn,i) CC_STR_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define STR_rRi(Rd,Rn,i) CC_STR_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define STR_rRR(Rd,Rn,Rm) CC_STR_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define STR_rRr(Rd,Rn,Rm) CC_STR_rRr(NATIVE_CC_AL,Rd,Rn,Rm) +#define STR_rRR_LSLi(Rd,Rn,Rm,i) CC_STR_rRR_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRr_LSLi(Rd,Rn,Rm,i) CC_STR_rRr_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRR_LSRi(Rd,Rn,Rm,i) CC_STR_rRR_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRr_LSRi(Rd,Rn,Rm,i) CC_STR_rRr_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRR_ASRi(Rd,Rn,Rm,i) CC_STR_rRR_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRr_ASRi(Rd,Rn,Rm,i) CC_STR_rRr_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRR_RORi(Rd,Rn,Rm,i) CC_STR_rRR_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRr_RORi(Rd,Rn,Rm,i) CC_STR_rRr_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRR_RRX(Rd,Rn,Rm) CC_STR_rRR_RRX(NATIVE_CC_AL,Rd,Rn,Rm) +#define STR_rRr_RRX(Rd,Rn,Rm) CC_STR_rRr_RRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_LDRB_rR(cc,Rd,Rn) _LS1(cc,1,1,Rd,Rn,ADD_IMM(0)) +#define CC_LDRB_rRI(cc,Rd,Rn,i) _LS1(cc,1,1,Rd,Rn,ADD_IMM(i)) +#define CC_LDRB_rRi(cc,Rd,Rn,i) _LS1(cc,1,1,Rd,Rn,SUB_IMM(i)) +#define CC_LDRB_rRR(cc,Rd,Rn,Rm) _LS1(cc,1,1,Rd,Rn,ADD_REG(Rm)) +#define CC_LDRB_rRr(cc,Rd,Rn,Rm) _LS1(cc,1,1,Rd,Rn,SUB_REG(Rm)) +#define CC_LDRB_rRR_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,ADD_LSL(Rm,i)) +#define CC_LDRB_rRr_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,SUB_LSL(Rm,i)) +#define CC_LDRB_rRR_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,ADD_LSR(Rm,i)) +#define CC_LDRB_rRr_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,SUB_LSR(Rm,i)) +#define CC_LDRB_rRR_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,ADD_ASR(Rm,i)) +#define CC_LDRB_rRr_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,SUB_ASR(Rm,i)) +#define CC_LDRB_rRR_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,ADD_ROR(Rm,i)) +#define CC_LDRB_rRr_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,SUB_ROR(Rm,i)) +#define CC_LDRB_rRR_RRX(cc,Rd,Rn,Rm) _LS1(cc,1,1,Rd,Rn,ADD_RRX(Rm)) +#define CC_LDRB_rRr_RRX(cc,Rd,Rn,Rm) _LS1(cc,1,1,Rd,Rn,SUB_RRX(Rm)) + +#define LDRB_rR(Rd,Rn) CC_LDRB_rR(NATIVE_CC_AL,Rd,Rn) +#define LDRB_rRI(Rd,Rn,i) CC_LDRB_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDRB_rRi(Rd,Rn,i) CC_LDRB_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDRB_rRR(Rd,Rn,Rm) CC_LDRB_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRB_rRr(Rd,Rn,Rm) CC_LDRB_rRr(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRB_rRR_LSLi(Rd,Rn,Rm,i) CC_LDRB_rRR_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRr_LSLi(Rd,Rn,Rm,i) CC_LDRB_rRr_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRR_LSRi(Rd,Rn,Rm,i) CC_LDRB_rRR_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRr_LSRi(Rd,Rn,Rm,i) CC_LDRB_rRr_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRR_ASRi(Rd,Rn,Rm,i) CC_LDRB_rRR_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRr_ASRi(Rd,Rn,Rm,i) CC_LDRB_rRr_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRR_RORi(Rd,Rn,Rm,i) CC_LDRB_rRR_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRr_RORi(Rd,Rn,Rm,i) CC_LDRB_rRr_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRR_RRX(Rd,Rn,Rm) CC_LDRB_rRR_RRX(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRB_rRr_RRX(Rd,Rn,Rm) CC_LDRB_rRr_RRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_STRB_rR(cc,Rd,Rn) _LS1(cc,0,1,Rd,Rn,ADD_IMM(0)) +#define CC_STRB_rRI(cc,Rd,Rn,i) _LS1(cc,0,1,Rd,Rn,ADD_IMM(i)) +#define CC_STRB_rRi(cc,Rd,Rn,i) _LS1(cc,0,1,Rd,Rn,SUB_IMM(i)) +#define CC_STRB_rRR(cc,Rd,Rn,Rm) _LS1(cc,0,1,Rd,Rn,ADD_REG(Rm)) +#define CC_STRB_rRr(cc,Rd,Rn,Rm) _LS1(cc,0,1,Rd,Rn,SUB_REG(Rm)) +#define CC_STRB_rRR_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,ADD_LSL(Rm,i)) +#define CC_STRB_rRr_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,SUB_LSL(Rm,i)) +#define CC_STRB_rRR_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,ADD_LSR(Rm,i)) +#define CC_STRB_rRr_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,SUB_LSR(Rm,i)) +#define CC_STRB_rRR_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,ADD_ASR(Rm,i)) +#define CC_STRB_rRr_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,SUB_ASR(Rm,i)) +#define CC_STRB_rRR_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,ADD_ROR(Rm,i)) +#define CC_STRB_rRr_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,SUB_ROR(Rm,i)) +#define CC_STRB_rRR_RRX(cc,Rd,Rn,Rm) _LS1(cc,0,1,Rd,Rn,ADD_RRX(Rm)) +#define CC_STRB_rRr_RRX(cc,Rd,Rn,Rm) _LS1(cc,0,1,Rd,Rn,SUB_RRX(Rm)) + +#define STRB_rR(Rd,Rn) CC_STRB_rR(NATIVE_CC_AL,Rd,Rn) +#define STRB_rRI(Rd,Rn,i) CC_STRB_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define STRB_rRi(Rd,Rn,i) CC_STRB_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define STRB_rRR(Rd,Rn,Rm) CC_STRB_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define STRB_rRr(Rd,Rn,Rm) CC_STRB_rRr(NATIVE_CC_AL,Rd,Rn,Rm) +#define STRB_rRR_LSLi(Rd,Rn,Rm,i) CC_STRB_rRR_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRr_LSLi(Rd,Rn,Rm,i) CC_STRB_rRr_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRR_LSRi(Rd,Rn,Rm,i) CC_STRB_rRR_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRr_LSRi(Rd,Rn,Rm,i) CC_STRB_rRr_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRR_ASRi(Rd,Rn,Rm,i) CC_STRB_rRR_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRr_ASRi(Rd,Rn,Rm,i) CC_STRB_rRr_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRR_RORi(Rd,Rn,Rm,i) CC_STRB_rRR_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRr_RORi(Rd,Rn,Rm,i) CC_STRB_rRr_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRR_RRX(Rd,Rn,Rm) CC_STRB_rRR_RRX(NATIVE_CC_AL,Rd,Rn,Rm) +#define STRB_rRr_RRX(Rd,Rn,Rm) CC_STRB_rRr_RRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_LDRSH_rR(cc,Rd,Rn) _LS2(cc,1,1,1,1,Rd,Rn,ADD2_IMM(0)) +#define CC_LDRSH_rRI(cc,Rd,Rn,i) _LS2(cc,1,1,1,1,Rd,Rn,ADD2_IMM(i)) +#define CC_LDRSH_rRi(cc,Rd,Rn,i) _LS2(cc,1,1,1,1,Rd,Rn,SUB2_IMM(i)) +#define CC_LDRSH_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,1,1,1,Rd,Rn,ADD2_REG(Rm)) +#define CC_LDRSH_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,1,1,1,Rd,Rn,SUB2_REG(Rm)) + +#define LDRSH_rR(Rd,Rn) CC_LDRSH_rR(NATIVE_CC_AL,Rd,Rn) +#define LDRSH_rRI(Rd,Rn,i) CC_LDRSH_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDRSH_rRi(Rd,Rn,i) CC_LDRSH_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDRSH_rRR(Rd,Rn,Rm) CC_LDRSH_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRSH_rRr(Rd,Rn,Rm) CC_LDRSH_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_LDRH_rR(cc,Rd,Rn) _LS2(cc,1,1,0,1,Rd,Rn,ADD2_IMM(0)) +#define CC_LDRH_rRI(cc,Rd,Rn,i) _LS2(cc,1,1,0,1,Rd,Rn,(i) >= 0 ? ADD2_IMM(i) : SUB2_IMM(-(i))) +#define CC_LDRH_rRi(cc,Rd,Rn,i) _LS2(cc,1,1,0,1,Rd,Rn,SUB2_IMM(i)) +#define CC_LDRH_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,1,0,1,Rd,Rn,ADD2_REG(Rm)) +#define CC_LDRH_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,1,0,1,Rd,Rn,SUB2_REG(Rm)) + +#define LDRH_rR(Rd,Rn) CC_LDRH_rR(NATIVE_CC_AL,Rd,Rn) +#define LDRH_rRI(Rd,Rn,i) CC_LDRH_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDRH_rRi(Rd,Rn,i) CC_LDRH_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDRH_rRR(Rd,Rn,Rm) CC_LDRH_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRH_rRr(Rd,Rn,Rm) CC_LDRH_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_STRD_rR(cc,Rd,Rn) _LS2(cc,1,0,1,1,Rd,Rn,ADD2_IMM(0)) +#define CC_STRD_rRI(cc,Rd,Rn,i) _LS2(cc,1,0,1,1,Rd,Rn,ADD2_IMM(i)) +#define CC_STRD_rRi(cc,Rd,Rn,i) _LS2(cc,1,0,1,1,Rd,Rn,SUB2_IMM(i)) +#define CC_STRD_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,0,1,1,Rd,Rn,ADD2_REG(Rm)) +#define CC_STRD_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,0,1,1,Rd,Rn,SUB2_REG(Rm)) + +#define STRD_rR(Rd,Rn) CC_STRD_rR(NATIVE_CC_AL,Rd,Rn) +#define STRD_rRI(Rd,Rn,i) CC_STRD_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define STRD_rRi(Rd,Rn,i) CC_STRD_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define STRD_rRR(Rd,Rn,Rm) CC_STRD_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define STRD_rRr(Rd,Rn,Rm) CC_STRD_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_STRH_rR(cc,Rd,Rn) _LS2(cc,1,0,0,1,Rd,Rn,ADD2_IMM(0)) +#define CC_STRH_rRI(cc,Rd,Rn,i) _LS2(cc,1,0,0,1,Rd,Rn,ADD2_IMM(i)) +#define CC_STRH_rRi(cc,Rd,Rn,i) _LS2(cc,1,0,0,1,Rd,Rn,SUB2_IMM(i)) +#define CC_STRH_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,0,0,1,Rd,Rn,ADD2_REG(Rm)) +#define CC_STRH_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,0,0,1,Rd,Rn,SUB2_REG(Rm)) + +#define STRH_rR(Rd,Rn) CC_STRH_rR(NATIVE_CC_AL,Rd,Rn) +#define STRH_rRI(Rd,Rn,i) CC_STRH_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define STRH_rRi(Rd,Rn,i) CC_STRH_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define STRH_rRR(Rd,Rn,Rm) CC_STRH_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define STRH_rRr(Rd,Rn,Rm) CC_STRH_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_LDRSB_rR(cc,Rd,Rn) _LS2(cc,1,1,1,0,Rd,Rn,ADD2_IMM(0)) +#define CC_LDRSB_rRI(cc,Rd,Rn,i) _LS2(cc,1,1,1,0,Rd,Rn,ADD2_IMM(i)) +#define CC_LDRSB_rRi(cc,Rd,Rn,i) _LS2(cc,1,1,1,0,Rd,Rn,SUB2_IMM(i)) +#define CC_LDRSB_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,1,1,0,Rd,Rn,ADD2_REG(Rm)) +#define CC_LDRSB_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,1,1,0,Rd,Rn,SUB2_REG(Rm)) + +#define LDRSB_rR(Rd,Rn) CC_LDRSB_rR(NATIVE_CC_AL,Rd,Rn) +#define LDRSB_rRI(Rd,Rn,i) CC_LDRSB_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDRSB_rRi(Rd,Rn,i) CC_LDRSB_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDRSB_rRR(Rd,Rn,Rm) CC_LDRSB_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRSB_rRr(Rd,Rn,Rm) CC_LDRSB_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_LDRD_rR(cc,Rd,Rn) _LS2(cc,1,0,1,0,Rd,Rn,ADD2_IMM(0)) +#define CC_LDRD_rRI(cc,Rd,Rn,i) _LS2(cc,1,0,1,0,Rd,Rn,ADD2_IMM(i)) +#define CC_LDRD_rRi(cc,Rd,Rn,i) _LS2(cc,1,0,1,0,Rd,Rn,SUB2_IMM(i)) +#define CC_LDRD_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,0,1,0,Rd,Rn,ADD2_REG(Rm)) +#define CC_LDRD_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,0,1,0,Rd,Rn,SUB2_REG(Rm)) + +#define LDRD_rR(Rd,Rn) CC_LDRD_rR(NATIVE_CC_AL,Rd,Rn) +#define LDRD_rRI(Rd,Rn,i) CC_LDRD_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDRD_rRi(Rd,Rn,i) CC_LDRD_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDRD_rRR(Rd,Rn,Rm) CC_LDRD_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRD_rRr(Rd,Rn,Rm) CC_LDRD_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +/* Multiply */ +#define CC_SMULL_rrrr(cc, RdLo, RdHi, Rm, Rs) _W(((cc) << 28) | (0x0C << 20) | ((RdHi) << 16) | ((RdLo) << 12) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define SMULL_rrrr(RdLo,RdHi,Rm,Rs) CC_SMULL_rrrr(NATIVE_CC_AL,RdLo,RdHi,Rm,Rs) +#define CC_SMULLS_rrrr(cc, RdLo, RdHi, Rm, Rs) _W(((cc) << 28) | (0x0D << 20) | ((RdHi) << 16) | ((RdLo) << 12) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define SMULLS_rrrr(RdLo,RdHi,Rm,Rs) CC_SMULLS_rrrr(NATIVE_CC_AL,RdLo,RdHi,Rm,Rs) +#define CC_MUL_rrr(cc, Rd, Rm, Rs) _W(((cc) << 28) | (0x00 << 20) | ((Rd) << 16) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define MUL_rrr(Rd, Rm, Rs) CC_MUL_rrr(NATIVE_CC_AL, Rd, Rm, Rs) +#define CC_MULS_rrr(cc, Rd, Rm, Rs) _W(((cc) << 28) | (0x01 << 20) | ((Rd) << 16) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define MULS_rrr(Rd, Rm, Rs) CC_MULS_rrr(NATIVE_CC_AL, Rd, Rm, Rs) + +#define CC_UMULL_rrrr(cc, RdLo, RdHi, Rm, Rs) _W(((cc) << 28) | (0x08 << 20) | ((RdHi) << 16) | ((RdLo) << 12) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define UMULL_rrrr(RdLo,RdHi,Rm,Rs) CC_UMULL_rrrr(NATIVE_CC_AL,RdLo,RdHi,Rm,Rs) +#define CC_UMULLS_rrrr(cc, RdLo, RdHi, Rm, Rs) _W(((cc) << 28) | (0x09 << 20) | ((RdHi) << 16) | ((RdLo) << 12) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define UMULLS_rrrr(RdLo,RdHi,Rm,Rs) CC_UMULLS_rrrr(NATIVE_CC_AL,RdLo,RdHi,Rm,Rs) + +/* Others */ +#define CC_CLZ_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x16 << 20) | (0xf << 16) | ((Rd) << 12) | (0xf << 8) | (0x1 << 4) | SHIFT_REG(Rm)) +#define CLZ_rr(Rd,Rm) CC_CLZ_rr(NATIVE_CC_AL,Rd,Rm) + +/* Alias */ +#define LSL_rri(Rd,Rm,i) MOV_rrLSLi(Rd,Rm,i) +#define LSL_rrr(Rd,Rm,Rs) MOV_rrLSLr(Rd,Rm,Rs) +#define LSR_rri(Rd,Rm,i) MOV_rrLSRi(Rd,Rm,i) +#define LSR_rrr(Rd,Rm,Rs) MOV_rrLSRr(Rd,Rm,Rs) +#define ASR_rri(Rd,Rm,i) MOV_rrASRi(Rd,Rm,i) +#define ASR_rrr(Rd,Rm,Rs) MOV_rrASRr(Rd,Rm,Rs) +#define ROR_rri(Rd,Rm,i) MOV_rrRORi(Rd,Rm,i) +#define ROR_rrr(Rd,Rm,Rs) MOV_rrRORr(Rd,Rm,Rs) +#define RRX_rr(Rd,Rm) MOV_rrRRX(Rd,Rm) +#define LSLS_rri(Rd,Rm,i) MOVS_rrLSLi(Rd,Rm,i) +#define LSLS_rrr(Rd,Rm,Rs) MOVS_rrLSLr(Rd,Rm,Rs) +#define LSRS_rri(Rd,Rm,i) MOVS_rrLSRi(Rd,Rm,i) +#define LSRS_rrr(Rd,Rm,Rs) MOVS_rrLSRr(Rd,Rm,Rs) +#define ASRS_rri(Rd,Rm,i) MOVS_rrASRi(Rd,Rm,i) +#define ASRS_rrr(Rd,Rm,Rs) MOVS_rrASRr(Rd,Rm,Rs) +#define RORS_rri(Rd,Rm,i) MOVS_rrRORi(Rd,Rm,i) +#define RORS_rrr(Rd,Rm,Rs) MOVS_rrRORr(Rd,Rm,Rs) +#define RRXS_rr(Rd,Rm) MOVS_rrRRX(Rd,Rm) + +/* ARMV6 ops */ +#define CC_SXTB_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6a << 20) | (0xf << 16) | ((Rd) << 12) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTB_rr(Rd,Rm) CC_SXTB_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTB_rr_ROR8(cc,Rd,Rm) _W(((cc) << 28) | (0x6a << 20) | (0xf << 16) | ((Rd) << 12) | (1 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTB_rr_ROR8(Rd,Rm) CC_SXTB_rr_ROR8(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTB_rr_ROR16(cc,Rd,Rm) _W(((cc) << 28) | (0x6a << 20) | (0xf << 16) | ((Rd) << 12) | (2 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTB_rr_ROR16(Rd,Rm) CC_SXTB_rr_ROR16(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTB_rr_ROR24(cc,Rd,Rm) _W(((cc) << 28) | (0x6a << 20) | (0xf << 16) | ((Rd) << 12) | (3 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTB_rr_ROR24(Rd,Rm) CC_SXTB_rr_ROR24(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTH_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | ((Rd) << 12) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTH_rr(Rd,Rm) CC_SXTH_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTH_rr_ROR8(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | ((Rd) << 12) | (1 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTH_rr_ROR8(Rd,Rm) CC_SXTH_rr_ROR8(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTH_rr_ROR16(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | ((Rd) << 12) | (2 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTH_rr_ROR16(Rd,Rm) CC_SXTH_rr_ROR16(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTH_rr_ROR24(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | ((Rd) << 12) | (3 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTH_rr_ROR24(Rd,Rm) CC_SXTH_rr_ROR24(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTB_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6e << 20) | (0xf << 16) | ((Rd) << 12) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTB_rr(Rd,Rm) CC_UXTB_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTB_rr_ROR8(cc,Rd,Rm) _W(((cc) << 28) | (0x6e << 20) | (0xf << 16) | ((Rd) << 12) | (1 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTB_rr_ROR8(Rd,Rm) CC_UXTB_rr_ROR8(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTB_rr_ROR16(cc,Rd,Rm) _W(((cc) << 28) | (0x6e << 20) | (0xf << 16) | ((Rd) << 12) | (2 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTB_rr_ROR16(Rd,Rm) CC_UXTB_rr_ROR16(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTB_rr_ROR24(cc,Rd,Rm) _W(((cc) << 28) | (0x6e << 20) | (0xf << 16) | ((Rd) << 12) | (3 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTB_rr_ROR24(Rd,Rm) CC_UXTB_rr_ROR24(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTH_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6f << 20) | (0xf << 16) | ((Rd) << 12) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTH_rr(Rd,Rm) CC_UXTH_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTH_rr_ROR8(cc,Rd,Rm) _W(((cc) << 28) | (0x6f << 20) | (0xf << 16) | ((Rd) << 12) | (1 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTH_rr_ROR8(Rd,Rm) CC_UXTH_rr_ROR8(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTH_rr_ROR16(cc,Rd,Rm) _W(((cc) << 28) | (0x6f << 20) | (0xf << 16) | ((Rd) << 12) | (2 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTH_rr_ROR16(Rd,Rm) CC_UXTH_rr_ROR16(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTH_rr_ROR24(cc,Rd,Rm) _W(((cc) << 28) | (0x6f << 20) | (0xf << 16) | ((Rd) << 12) | (3 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTH_rr_ROR24(Rd,Rm) CC_UXTH_rr_ROR24(NATIVE_CC_AL,Rd,Rm) + +#define CC_REV_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | (0xf << 8) | ((Rd) << 12) | (0x3 << 4) | SHIFT_REG(Rm)) +#define REV_rr(Rd,Rm) CC_REV_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_REV16_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | (0xf << 8) | ((Rd) << 12) | (0xB << 4) | SHIFT_REG(Rm)) +#define REV16_rr(Rd,Rm) CC_REV16_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_REVSH_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6f << 20) | (0xf << 16) | (0xf << 8) | ((Rd) << 12) | (0xB << 4) | SHIFT_REG(Rm)) +#define REVSH_rr(Rd,Rm) CC_REVSH_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_PKHBT_rrr(cc,Rd,Rn,Rm) _W(((cc) << 28) | (0x68 << 20) | (Rn << 16) | (Rd << 12) | (0x1 << 4) | (Rm)) +#define CC_PKHBT_rrrLSLi(cc,Rd,Rn,Rm,s) _W(((cc) << 28) | (0x68 << 20) | (Rn << 16) | (Rd << 12) | (0x1 << 4) | SHIFT_PK(Rm, s)) +#define PKHBT_rrr(Rd,Rn,Rm) CC_PKHBT_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define PKHBT_rrrLSLi(Rd,Rn,Rm,s) CC_PKHBT_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,s) + +#define CC_PKHTB_rrrASRi(cc,Rd,Rn,Rm,s) _W(((cc) << 28) | (0x68 << 20) | (Rn << 16) | (Rd << 12) | (0x5 << 4) | SHIFT_PK(Rm, s)) +#define PKHTB_rrrASRi(Rd,Rn,Rm,s) CC_PKHTB_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,s) + +#endif /* ARM_RTASM_H */ diff --git a/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp b/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp index f03c4f3c..22c01080 100644 --- a/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp +++ b/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp @@ -1,28 +1,32 @@ /* - * compiler/codegen_x86.cpp - IA-32 code generator + * compiler/codegen_x86.cpp - IA-32 and AMD64 code generator * - * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * Adaptation for Basilisk II and improvements, copyright 2000-2005 - * Gwenole Beauchesne + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * Basilisk II (C) 1997-2008 Christian Bauer + * JIT compiler m68k -> IA-32 and AMD64 * - * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* This should eventually end up in machdep/, but for now, x86 is the @@ -34,6 +38,10 @@ * Some basic information about the the target CPU * *************************************************************************/ +#define R1 RR1 +#define R2 RR2 +#define R4 RR4 + #define EAX_INDEX 0 #define ECX_INDEX 1 #define EDX_INDEX 2 @@ -42,7 +50,7 @@ #define EBP_INDEX 5 #define ESI_INDEX 6 #define EDI_INDEX 7 -#if defined(__x86_64__) +#if defined(CPU_x86_64) #define R8_INDEX 8 #define R9_INDEX 9 #define R10_INDEX 10 @@ -62,11 +70,11 @@ #define REG_RESULT EAX_INDEX /* The registers subroutines take their first and second argument in */ -#if defined( _MSC_VER ) && !USE_NORMAL_CALLING_CONVENTION +#ifdef _WIN32 /* Handle the _fastcall parameters of ECX and EDX */ #define REG_PAR1 ECX_INDEX #define REG_PAR2 EDX_INDEX -#elif defined(__x86_64__) +#elif defined(CPU_x86_64) #define REG_PAR1 EDI_INDEX #define REG_PAR2 ESI_INDEX #else @@ -75,8 +83,8 @@ #endif #define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */ -#if defined( _MSC_VER ) && !USE_NORMAL_CALLING_CONVENTION -#define REG_PC_TMP EAX_INDEX +#ifdef _WIN32 +#define REG_PC_TMP ECX_INDEX #else #define REG_PC_TMP ECX_INDEX /* Another register that is not the above */ #endif @@ -88,15 +96,33 @@ #define STACK_ALIGN 16 #define STACK_OFFSET sizeof(void *) +#ifdef _WIN64 +/* In the Microsoft x64 calling convention, it's the caller's responsibility + * to allocate 32 bytes of "shadow space" on the stack right before calling + * the function (regardless of the actual number of parameters used). */ +#define STACK_SHADOW_SPACE 32 +#else +#define STACK_SHADOW_SPACE 0 +#endif -uae_s8 always_used[]={4,-1}; -#if defined(__x86_64__) +#if defined(CPU_x86_64) +#ifdef UAE +/* Register R12 (and ESP) cannot be used with simple [r/m + disp32] addressing, + * since r/m bits 100 implies SIB byte. Simplest fix is to not use these + * registers. Also note that these registers are listed in the freescratch + * function as well. */ +uae_s8 always_used[] = { ESP_INDEX, R12_INDEX, -1 }; +#else +uae_s8 always_used[] = { ESP_INDEX, -1 }; +#endif uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1}; uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1}; #else +uae_s8 always_used[] = { ESP_INDEX, -1 }; uae_s8 can_byte[]={0,1,2,3,-1}; uae_s8 can_word[]={0,1,2,3,5,6,7,-1}; #endif +static bool have_lahf_lm = true; // target has LAHF supported in long mode ? #if USE_OPTIMIZED_CALLS /* Make sure interpretive core does not use cpuopti */ @@ -115,10 +141,20 @@ uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}; - Special registers (such like the stack pointer) should not be "preserved" by pushing, even though they are "saved" across function calls */ -#if defined(__x86_64__) +#if defined(CPU_x86_64) +#ifdef _WIN64 +/* https://msdn.microsoft.com/en-us/library/6t169e9c.aspx: + * "The registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, and R15 are + * considered nonvolatile and must be saved and restored by a function that + * uses them". Also saving r11 for now (see comment below). */ +static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1}; +#else /* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */ /* preserve r11 because it's generally used to hold pointers to functions */ +/* FIXME: not really sure what the point of saving r11 is (??). If functions + * cannot assume calle preserves it, it will not be used across calls anyway? */ static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1}; +#endif #else /* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */ static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1}; @@ -145,8 +181,8 @@ static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1}; #define CLOBBER_SHRL clobber_flags() #define CLOBBER_SHRA clobber_flags() #define CLOBBER_TEST clobber_flags() -#define CLOBBER_CL16 -#define CLOBBER_CL8 +#define CLOBBER_CL16 +#define CLOBBER_CL8 #define CLOBBER_SE32 #define CLOBBER_SE16 #define CLOBBER_SE8 @@ -160,19 +196,14 @@ static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1}; #define CLOBBER_BT clobber_flags() #define CLOBBER_BSF clobber_flags() -/* The older code generator is now deprecated. */ -#define USE_NEW_RTASM 1 - -#if USE_NEW_RTASM - -#if defined(__x86_64__) +#if defined(CPU_x86_64) #define X86_TARGET_64BIT 1 /* The address override prefix causes a 5 cycles penalty on Intel Core processors. Another solution would be to decompose the load in an LEA, MOV (to zero-extend), MOV (from memory): is it better? */ #define ADDR32 x86_emit_byte(0x67), #else -#define ADDR32 /**/ +#define ADDR32 #endif #define X86_FLAT_REGISTERS 0 #define X86_OPTIMIZE_ALU 1 @@ -186,3095 +217,1056 @@ static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1}; #define x86_get_target() get_target() #define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__) +static inline void x86_64_addr32(void) +{ +#ifdef CPU_x86_64 + emit_byte(0x67); +#endif +} + +static inline void x86_64_rex(bool /* w */, uae_u32 * /* r */, uae_u32 * /* x */, uae_u32 *b) +{ +#ifdef CPU_x86_64 + int rex_byte = 0x40; + if (*b >= R8_INDEX) { + *b -= R8_INDEX; + rex_byte |= 1; + } + if (rex_byte != 0x40) { + emit_byte(rex_byte); + } +#else + UNUSED(b); +#endif +} + +static inline void x86_64_prefix( + bool addr32, bool w, uae_u32 *r, uae_u32 *x, uae_u32 *b) +{ + if (addr32) { + x86_64_addr32(); + } + x86_64_rex(w, r, x, b); +} + +// Some mappings to mark compemu_support calls as only used by compemu +// These are still mainly x86 minded. Should be more CPU independent in the future +#define compemu_raw_add_l_mi(a,b) raw_add_l_mi(a,b) +#define compemu_raw_and_l_ri(a,b) raw_and_l_ri(a,b) +#define compemu_raw_bswap_32(a) raw_bswap_32(a) +#define compemu_raw_bt_l_ri(a,b) raw_bt_l_ri(a,b) +#define compemu_raw_call(a) raw_call(a) +#define compemu_raw_cmov_l_rm_indexed(a,b,c,d,e) raw_cmov_l_rm_indexed(a,b,c,d,e) +#define compemu_raw_cmp_l_mi(a,b) raw_cmp_l_mi(a,b) +#define compemu_raw_cmp_l_mi8(a,b) raw_cmp_l_mi(a,b) +#define compemu_raw_jcc_b_oponly(a) raw_jcc_b_oponly(a) +#define compemu_raw_jcc_l_oponly(a) raw_jcc_l_oponly(a) +#define compemu_raw_jl(a) raw_jl(a) +#define compemu_raw_jmp(a) raw_jmp(a) +#define compemu_raw_jmp_m_indexed(a,b,c) raw_jmp_m_indexed(a,b,c) +#define compemu_raw_jmp_r(a) raw_jmp_r(a) +#define compemu_raw_jnz(a) raw_jnz(a) +#define compemu_raw_jz_b_oponly() raw_jz_b_oponly() +#define compemu_raw_jnz_b_oponly() raw_jnz_b_oponly() +#define compemu_raw_lea_l_brr(a,b,c) raw_lea_l_brr(a,b,c) +#define compemu_raw_lea_l_brr_indexed(a,b,c,d,e) raw_lea_l_brr_indexed(a,b,c,d,e) +#define compemu_raw_mov_b_mr(a,b) raw_mov_b_mr(a,b) +#define compemu_raw_mov_l_mi(a,b) raw_mov_l_mi(a,b) +#define compemu_raw_mov_l_mr(a,b) raw_mov_l_mr(a,b) +#define compemu_raw_mov_l_ri(a,b) raw_mov_l_ri(a,b) +#define compemu_raw_mov_l_rm(a,b) raw_mov_l_rm(a,b) +#define compemu_raw_mov_l_rr(a,b) raw_mov_l_rr(a,b) +#define compemu_raw_mov_w_mr(a,b) raw_mov_w_mr(a,b) +#define compemu_raw_sub_l_mi(a,b) raw_sub_l_mi(a,b) +#define compemu_raw_test_l_rr(a,b) raw_test_l_rr(a,b) +#define compemu_raw_zero_extend_16_rr(a,b) raw_zero_extend_16_rr(a,b) +#define compemu_raw_lea_l_rr_indexed(a,b,c,d) raw_lea_l_rr_indexed(a,b,c,d) + static void jit_fail(const char *msg, const char *file, int line, const char *function) { - fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n", + jit_abort("failure in function %s from file %s at line %d: %s", function, file, line, msg); - abort(); } LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) { -#if defined(__x86_64__) +#if defined(CPU_x86_64) PUSHQr(r); #else PUSHLr(r); #endif } -LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) { -#if defined(__x86_64__) +#if defined(CPU_x86_64) POPQr(r); #else POPLr(r); #endif } -LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) { -#if defined(__x86_64__) +#if defined(CPU_x86_64) POPQm(d, X86_NOREG, X86_NOREG, 1); #else POPLm(d, X86_NOREG, X86_NOREG, 1); #endif } -LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) { BTLir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) { BTLrr(b, r); } -LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) { BTCLir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) { BTCLrr(b, r); } -LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) { BTRLir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) { BTRLrr(b, r); } -LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) { BTSLir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) { BTSLrr(b, r); } -LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) { SUBWir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) { - MOVLmr(s, X86_NOREG, X86_NOREG, 1, d); + ADDR32 MOVLmr(s, X86_NOREG, X86_NOREG, 1, d); } -LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) { - MOVLim(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 MOVLim(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) { - MOVWim(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 MOVWim(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) { - MOVBim(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 MOVBim(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) { - ROLBim(i, d, X86_NOREG, X86_NOREG, 1); + ADDR32 ROLBim(i, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) { ROLBir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) { ROLWir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) { ROLLir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) { ROLLrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) { ROLWrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) { ROLBrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) { SHLLrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) { SHLWrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) { SHLBrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) { RORBir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) { RORWir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) { - ORLmr(s, X86_NOREG, X86_NOREG, 1, d); + ADDR32 ORLmr(s, X86_NOREG, X86_NOREG, 1, d); } -LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) { RORLir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) { RORLrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) { RORWrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) { RORBrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) { SHRLrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) { SHRWrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) { SHRBrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) { SARLrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) { SARWrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) { SARBrr(r, d); } -LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) { SHLLir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) { SHLWir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) { SHLBir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) { SHRLir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) { SHRWir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) { SHRBir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) { SARLir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) { SARWir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) { SARBir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) -LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) +LOWFUNC(WRITE,NONE,1,raw_sahf,(R2)) { SAHF(); } -LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) -LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) +LOWFUNC(NONE,NONE,1,raw_cpuid,(R4)) { CPUID(); } -LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) -LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) +LOWFUNC(READ,NONE,1,raw_lahf,(W2)) { LAHF(); } -LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) { SETCCir(cc, d); } -LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) { - SETCCim(cc, d, X86_NOREG, X86_NOREG, 1); + ADDR32 SETCCim(cc, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) - -LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc)) -{ - /* replacement using branch and mov */ - int8 *target_p = (int8 *)x86_get_target() + 1; - JCCSii(cc^1, 0); - MOVBrr(s, d); - *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); -} -LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc)) - -LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc)) -{ - if (have_cmov) - CMOVWrr(cc, s, d); - else { /* replacement using branch and mov */ - int8 *target_p = (int8 *)x86_get_target() + 1; - JCCSii(cc^1, 0); - MOVWrr(s, d); - *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); - } -} -LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc)) LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) { if (have_cmov) CMOVLrr(cc, s, d); else { /* replacement using branch and mov */ - int8 *target_p = (int8 *)x86_get_target() + 1; + uae_s8 *target_p = (uae_s8 *)x86_get_target() + 1; JCCSii(cc^1, 0); MOVLrr(s, d); - *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); } } -LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) { BSFLrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s)) { MOVSLQrr(s, d); } -LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s)) LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) { MOVSWLrr(s, d); } -LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) { MOVSBLrr(s, d); } -LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) { MOVZWLrr(s, d); } -LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) { MOVZBLrr(s, d); } -LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) { IMULLrr(s, d); } -LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) { if (d!=MUL_NREG1 || s!=MUL_NREG2) { - write_log("Bad register in IMUL: d=%d, s=%d\n",d,s); - abort(); + jit_abort("Bad register in IMUL: d=%d, s=%d",d,s); } IMULLr(s); } -LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) { if (d!=MUL_NREG1 || s!=MUL_NREG2) { - write_log("Bad register in MUL: d=%d, s=%d\n",d,s); - abort(); + jit_abort("Bad register in MUL: d=%d, s=%d",d,s); } MULLr(s); } -LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) -LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) +LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4, R4)) { - abort(); /* %^$&%^$%#^ x86! */ + x86_emit_failure("raw_mul_32_32"); /* %^$&%^$%#^ x86! */ } -LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) { MOVBrr(s, d); } -LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) { MOVWrr(s, d); } -LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) { ADDR32 MOVLmr(0, baser, index, factor, d); } -LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) { ADDR32 MOVWmr(0, baser, index, factor, d); } -LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) { ADDR32 MOVBmr(0, baser, index, factor, d); } -LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) { ADDR32 MOVLrm(s, 0, baser, index, factor); } -LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) { ADDR32 MOVWrm(s, 0, baser, index, factor); } -LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) { ADDR32 MOVBrm(s, 0, baser, index, factor); } -LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) { ADDR32 MOVLrm(s, base, baser, index, factor); } -LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) { ADDR32 MOVWrm(s, base, baser, index, factor); } -LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) { ADDR32 MOVBrm(s, base, baser, index, factor); } -LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) { ADDR32 MOVLmr(base, baser, index, factor, d); } -LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) { ADDR32 MOVWmr(base, baser, index, factor, d); } -LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) { ADDR32 MOVBmr(base, baser, index, factor, d); } -LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) { ADDR32 MOVLmr(base, X86_NOREG, index, factor, d); } -LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) { if (have_cmov) ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d); else { /* replacement using branch and mov */ - int8 *target_p = (int8 *)x86_get_target() + 1; + uae_s8 *target_p = (uae_s8 *)x86_get_target() + 1; JCCSii(cond^1, 0); ADDR32 MOVLmr(base, X86_NOREG, index, factor, d); - *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); } } -LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) { if (have_cmov) CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d); else { /* replacement using branch and mov */ - int8 *target_p = (int8 *)x86_get_target() + 1; + uae_s8 *target_p = (uae_s8 *)x86_get_target() + 1; JCCSii(cond^1, 0); - MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d); - *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + ADDR32 MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); } } -LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) { ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d); } -LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) { ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d); } -LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) { ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d); } -LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) { ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d); } -LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) { ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d); } -LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) { ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d); } -LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) { ADDR32 MOVLim(i, offset, d, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) { ADDR32 MOVWim(i, offset, d, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) { ADDR32 MOVBim(i, offset, d, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) { ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) { ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) { ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) { - LEALmr(offset, s, X86_NOREG, 1, d); + ADDR32 LEALmr(offset, s, X86_NOREG, 1, d); } -LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) { - LEALmr(offset, s, index, factor, d); + ADDR32 LEALmr(offset, s, index, factor, d); } -LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) { - LEALmr(0, s, index, factor, d); + ADDR32 LEALmr(0, s, index, factor, d); } -LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor)) { - LEALmr(0, X86_NOREG, index, factor, d); + ADDR32 LEALmr(0, X86_NOREG, index, factor, d); } -LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor)) LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) { ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) { ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) { ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) { BSWAPLr(r); } -LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) { ROLWir(8, r); } -LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) { MOVLrr(s, d); } -LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) { - MOVLrm(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 MOVLrm(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) { - MOVWrm(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 MOVWrm(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) { - MOVWmr(s, X86_NOREG, X86_NOREG, 1, d); + ADDR32 MOVWmr(s, X86_NOREG, X86_NOREG, 1, d); } -LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) { - MOVBrm(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 MOVBrm(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) { - MOVBmr(s, X86_NOREG, X86_NOREG, 1, d); + ADDR32 MOVBmr(s, X86_NOREG, X86_NOREG, 1, d); } -LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) { MOVLir(s, d); } -LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) { MOVWir(s, d); } -LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) { MOVBir(s, d); } -LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) { - ADCLim(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 ADCLim(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) { - ADDLim(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 ADDLim(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) { - ADDWim(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 ADDWim(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) { - ADDBim(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 ADDBim(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) { TESTLir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) { TESTLrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) { TESTWrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) { TESTBrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) + +LOWFUNC(WRITE,READ,2,raw_test_b_mi,(IMM d, IMM s)) +{ + ADDR32 TESTBim(s, d, X86_NOREG, X86_NOREG, 1); +} LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) { XORLir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) { ANDLir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) { ANDWir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) { ANDLrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) { ANDWrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) { ANDBrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) { ORLir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) { ORLrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) { ORWrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) { ORBrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) { ADCLrr(s, d); } -LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) { ADCWrr(s, d); } -LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) { ADCBrr(s, d); } -LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) { ADDLrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) { ADDWrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) { ADDBrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) { SUBLir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) { SUBBir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) { ADDLir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) { ADDWir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) { ADDBir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) { SBBLrr(s, d); } -LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) { SBBWrr(s, d); } -LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) { SBBBrr(s, d); } -LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) { SUBLrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) { SUBWrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) { SUBBrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) { CMPLrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) { CMPLir(i, r); } -LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) { CMPWrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s)) { - CMPBim(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 CMPBim(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) { CMPBir(i, d); } -LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) { CMPBrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) { ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d); } -LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) { XORLrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) { XORWrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) { XORBrr(s, d); } -LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) { - SUBLim(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 SUBLim(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) { - CMPLim(s, d, X86_NOREG, X86_NOREG, 1); + ADDR32 CMPLim(s, d, X86_NOREG, X86_NOREG, 1); } -LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) { XCHGLrr(r2, r1); } -LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2)) { XCHGBrr(r2, r1); } -LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2)) LOWFUNC(READ,WRITE,0,raw_pushfl,(void)) { PUSHF(); } -LENDFUNC(READ,WRITE,0,raw_pushfl,(void)) LOWFUNC(WRITE,READ,0,raw_popfl,(void)) { POPF(); } -LENDFUNC(WRITE,READ,0,raw_popfl,(void)) /* Generate floating-point instructions */ static inline void x86_fadd_m(MEMR s) { - FADDDm(s,X86_NOREG,X86_NOREG,1); + ADDR32 FADDLm(s,X86_NOREG,X86_NOREG,1); } -#else - -const bool optimize_accum = true; -const bool optimize_imm8 = true; -const bool optimize_shift_once = true; - -/************************************************************************* - * Actual encoding of the instructions on the target CPU * - *************************************************************************/ - -static __inline__ int isaccum(int r) -{ - return (r == EAX_INDEX); -} - -static __inline__ int isbyte(uae_s32 x) -{ - return (x>=-128 && x<=127); -} - -static __inline__ int isword(uae_s32 x) -{ - return (x>=-32768 && x<=32767); -} - -LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) -{ - emit_byte(0x50+r); -} -LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) - -LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) -{ - emit_byte(0x58+r); -} -LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) - -LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) -{ - emit_byte(0x8f); - emit_byte(0x05); - emit_long(d); -} -LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) - -LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) -{ - emit_byte(0x0f); - emit_byte(0xba); - emit_byte(0xe0+r); - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) -{ - emit_byte(0x0f); - emit_byte(0xa3); - emit_byte(0xc0+8*b+r); -} -LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) - -LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) -{ - emit_byte(0x0f); - emit_byte(0xba); - emit_byte(0xf8+r); - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) -{ - emit_byte(0x0f); - emit_byte(0xbb); - emit_byte(0xc0+8*b+r); -} -LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) - - -LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) -{ - emit_byte(0x0f); - emit_byte(0xba); - emit_byte(0xf0+r); - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) -{ - emit_byte(0x0f); - emit_byte(0xb3); - emit_byte(0xc0+8*b+r); -} -LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) - -LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) -{ - emit_byte(0x0f); - emit_byte(0xba); - emit_byte(0xe8+r); - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) -{ - emit_byte(0x0f); - emit_byte(0xab); - emit_byte(0xc0+8*b+r); -} -LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) - -LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) -{ - emit_byte(0x66); - if (isbyte(i)) { - emit_byte(0x83); - emit_byte(0xe8+d); - emit_byte(i); - } - else { - if (optimize_accum && isaccum(d)) - emit_byte(0x2d); - else { - emit_byte(0x81); - emit_byte(0xe8+d); - } - emit_word(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) - - -LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) -{ - emit_byte(0x8b); - emit_byte(0x05+8*d); - emit_long(s); -} -LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) - -LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) -{ - emit_byte(0xc7); - emit_byte(0x05); - emit_long(d); - emit_long(s); -} -LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) - -LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) -{ - emit_byte(0x66); - emit_byte(0xc7); - emit_byte(0x05); - emit_long(d); - emit_word(s); -} -LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) - -LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) -{ - emit_byte(0xc6); - emit_byte(0x05); - emit_long(d); - emit_byte(s); -} -LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) - -LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) -{ - if (optimize_shift_once && (i == 1)) { - emit_byte(0xd0); - emit_byte(0x05); - emit_long(d); - } - else { - emit_byte(0xc0); - emit_byte(0x05); - emit_long(d); - emit_byte(i); - } -} -LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) -{ - if (optimize_shift_once && (i == 1)) { - emit_byte(0xd0); - emit_byte(0xc0+r); - } - else { - emit_byte(0xc0); - emit_byte(0xc0+r); - emit_byte(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) -{ - emit_byte(0x66); - emit_byte(0xc1); - emit_byte(0xc0+r); - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) -{ - if (optimize_shift_once && (i == 1)) { - emit_byte(0xd1); - emit_byte(0xc0+r); - } - else { - emit_byte(0xc1); - emit_byte(0xc0+r); - emit_byte(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) -{ - emit_byte(0xd3); - emit_byte(0xc0+d); -} -LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) -{ - emit_byte(0x66); - emit_byte(0xd3); - emit_byte(0xc0+d); -} -LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) -{ - emit_byte(0xd2); - emit_byte(0xc0+d); -} -LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) -{ - emit_byte(0xd3); - emit_byte(0xe0+d); -} -LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) -{ - emit_byte(0x66); - emit_byte(0xd3); - emit_byte(0xe0+d); -} -LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) -{ - emit_byte(0xd2); - emit_byte(0xe0+d); -} -LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) -{ - if (optimize_shift_once && (i == 1)) { - emit_byte(0xd0); - emit_byte(0xc8+r); - } - else { - emit_byte(0xc0); - emit_byte(0xc8+r); - emit_byte(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) -{ - emit_byte(0x66); - emit_byte(0xc1); - emit_byte(0xc8+r); - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) - -// gb-- used for making an fpcr value in compemu_fpp.cpp -LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) -{ - emit_byte(0x0b); - emit_byte(0x05+8*d); - emit_long(s); -} -LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) - -LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) -{ - if (optimize_shift_once && (i == 1)) { - emit_byte(0xd1); - emit_byte(0xc8+r); - } - else { - emit_byte(0xc1); - emit_byte(0xc8+r); - emit_byte(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) -{ - emit_byte(0xd3); - emit_byte(0xc8+d); -} -LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) -{ - emit_byte(0x66); - emit_byte(0xd3); - emit_byte(0xc8+d); -} -LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) -{ - emit_byte(0xd2); - emit_byte(0xc8+d); -} -LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) -{ - emit_byte(0xd3); - emit_byte(0xe8+d); -} -LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) -{ - emit_byte(0x66); - emit_byte(0xd3); - emit_byte(0xe8+d); -} -LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) -{ - emit_byte(0xd2); - emit_byte(0xe8+d); -} -LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) -{ - emit_byte(0xd3); - emit_byte(0xf8+d); -} -LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) -{ - emit_byte(0x66); - emit_byte(0xd3); - emit_byte(0xf8+d); -} -LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) -{ - emit_byte(0xd2); - emit_byte(0xf8+d); -} -LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) - -LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) -{ - if (optimize_shift_once && (i == 1)) { - emit_byte(0xd1); - emit_byte(0xe0+r); - } - else { - emit_byte(0xc1); - emit_byte(0xe0+r); - emit_byte(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) -{ - emit_byte(0x66); - emit_byte(0xc1); - emit_byte(0xe0+r); - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) -{ - if (optimize_shift_once && (i == 1)) { - emit_byte(0xd0); - emit_byte(0xe0+r); - } - else { - emit_byte(0xc0); - emit_byte(0xe0+r); - emit_byte(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) -{ - if (optimize_shift_once && (i == 1)) { - emit_byte(0xd1); - emit_byte(0xe8+r); - } - else { - emit_byte(0xc1); - emit_byte(0xe8+r); - emit_byte(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) -{ - emit_byte(0x66); - emit_byte(0xc1); - emit_byte(0xe8+r); - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) -{ - if (optimize_shift_once && (i == 1)) { - emit_byte(0xd0); - emit_byte(0xe8+r); - } - else { - emit_byte(0xc0); - emit_byte(0xe8+r); - emit_byte(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) -{ - if (optimize_shift_once && (i == 1)) { - emit_byte(0xd1); - emit_byte(0xf8+r); - } - else { - emit_byte(0xc1); - emit_byte(0xf8+r); - emit_byte(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) -{ - emit_byte(0x66); - emit_byte(0xc1); - emit_byte(0xf8+r); - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) -{ - if (optimize_shift_once && (i == 1)) { - emit_byte(0xd0); - emit_byte(0xf8+r); - } - else { - emit_byte(0xc0); - emit_byte(0xf8+r); - emit_byte(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) - -LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) -{ - emit_byte(0x9e); -} -LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) - -LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) -{ - emit_byte(0x0f); - emit_byte(0xa2); -} -LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) - -LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) -{ - emit_byte(0x9f); -} -LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) - -LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) -{ - emit_byte(0x0f); - emit_byte(0x90+cc); - emit_byte(0xc0+d); -} -LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) - -LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) -{ - emit_byte(0x0f); - emit_byte(0x90+cc); - emit_byte(0x05); - emit_long(d); -} -LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) - -LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc)) -{ - /* replacement using branch and mov */ - int uncc=(cc^1); - emit_byte(0x70+uncc); - emit_byte(3); /* skip next 2 bytes if not cc=true */ - emit_byte(0x88); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc)) - -LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc)) -{ - if (have_cmov) { - emit_byte(0x66); - emit_byte(0x0f); - emit_byte(0x40+cc); - emit_byte(0xc0+8*d+s); - } - else { /* replacement using branch and mov */ - int uncc=(cc^1); - emit_byte(0x70+uncc); - emit_byte(3); /* skip next 3 bytes if not cc=true */ - emit_byte(0x66); - emit_byte(0x89); - emit_byte(0xc0+8*s+d); - } -} -LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc)) - -LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) -{ - if (have_cmov) { - emit_byte(0x0f); - emit_byte(0x40+cc); - emit_byte(0xc0+8*d+s); - } - else { /* replacement using branch and mov */ - int uncc=(cc^1); - emit_byte(0x70+uncc); - emit_byte(2); /* skip next 2 bytes if not cc=true */ - emit_byte(0x89); - emit_byte(0xc0+8*s+d); - } -} -LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) - -LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) -{ - emit_byte(0x0f); - emit_byte(0xbc); - emit_byte(0xc0+8*d+s); -} -LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) - -LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) -{ - emit_byte(0x0f); - emit_byte(0xbf); - emit_byte(0xc0+8*d+s); -} -LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) - -LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) -{ - emit_byte(0x0f); - emit_byte(0xbe); - emit_byte(0xc0+8*d+s); -} -LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) - -LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) -{ - emit_byte(0x0f); - emit_byte(0xb7); - emit_byte(0xc0+8*d+s); -} -LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) - -LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) -{ - emit_byte(0x0f); - emit_byte(0xb6); - emit_byte(0xc0+8*d+s); -} -LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) - -LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) -{ - emit_byte(0x0f); - emit_byte(0xaf); - emit_byte(0xc0+8*d+s); -} -LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) - -LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) -{ - if (d!=MUL_NREG1 || s!=MUL_NREG2) - abort(); - emit_byte(0xf7); - emit_byte(0xea); -} -LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) - -LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) -{ - if (d!=MUL_NREG1 || s!=MUL_NREG2) { - printf("Bad register in MUL: d=%d, s=%d\n",d,s); - abort(); - } - emit_byte(0xf7); - emit_byte(0xe2); -} -LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) - -LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) -{ - abort(); /* %^$&%^$%#^ x86! */ - emit_byte(0x0f); - emit_byte(0xaf); - emit_byte(0xc0+8*d+s); -} -LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) - -LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) -{ - emit_byte(0x88); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) - -LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) -{ - emit_byte(0x66); - emit_byte(0x89); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) - -LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) -{ - int isebp=(baser==5)?0x40:0; - int fi; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - - - emit_byte(0x8b); - emit_byte(0x04+8*d+isebp); - emit_byte(baser+8*index+0x40*fi); - if (isebp) - emit_byte(0x00); -} -LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) - -LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) -{ - int fi; - int isebp; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - isebp=(baser==5)?0x40:0; - - emit_byte(0x66); - emit_byte(0x8b); - emit_byte(0x04+8*d+isebp); - emit_byte(baser+8*index+0x40*fi); - if (isebp) - emit_byte(0x00); -} -LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) - -LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) -{ - int fi; - int isebp; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - isebp=(baser==5)?0x40:0; - - emit_byte(0x8a); - emit_byte(0x04+8*d+isebp); - emit_byte(baser+8*index+0x40*fi); - if (isebp) - emit_byte(0x00); -} -LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) - -LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) -{ - int fi; - int isebp; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - - - isebp=(baser==5)?0x40:0; - - emit_byte(0x89); - emit_byte(0x04+8*s+isebp); - emit_byte(baser+8*index+0x40*fi); - if (isebp) - emit_byte(0x00); -} -LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) - -LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) -{ - int fi; - int isebp; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - isebp=(baser==5)?0x40:0; - - emit_byte(0x66); - emit_byte(0x89); - emit_byte(0x04+8*s+isebp); - emit_byte(baser+8*index+0x40*fi); - if (isebp) - emit_byte(0x00); -} -LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) - -LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) -{ - int fi; - int isebp; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - isebp=(baser==5)?0x40:0; - - emit_byte(0x88); - emit_byte(0x04+8*s+isebp); - emit_byte(baser+8*index+0x40*fi); - if (isebp) - emit_byte(0x00); -} -LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) - -LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) -{ - int fi; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - - emit_byte(0x89); - emit_byte(0x84+8*s); - emit_byte(baser+8*index+0x40*fi); - emit_long(base); -} -LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) - -LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) -{ - int fi; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - - emit_byte(0x66); - emit_byte(0x89); - emit_byte(0x84+8*s); - emit_byte(baser+8*index+0x40*fi); - emit_long(base); -} -LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) - -LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) -{ - int fi; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - - emit_byte(0x88); - emit_byte(0x84+8*s); - emit_byte(baser+8*index+0x40*fi); - emit_long(base); -} -LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) - -LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) -{ - int fi; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - - emit_byte(0x8b); - emit_byte(0x84+8*d); - emit_byte(baser+8*index+0x40*fi); - emit_long(base); -} -LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) - -LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) -{ - int fi; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - - emit_byte(0x66); - emit_byte(0x8b); - emit_byte(0x84+8*d); - emit_byte(baser+8*index+0x40*fi); - emit_long(base); -} -LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) - -LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) -{ - int fi; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - - emit_byte(0x8a); - emit_byte(0x84+8*d); - emit_byte(baser+8*index+0x40*fi); - emit_long(base); -} -LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) - -LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) -{ - int fi; - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: - fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor); - abort(); - } - emit_byte(0x8b); - emit_byte(0x04+8*d); - emit_byte(0x05+8*index+64*fi); - emit_long(base); -} -LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) - -LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) -{ - int fi; - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: - fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor); - abort(); - } - if (have_cmov) { - emit_byte(0x0f); - emit_byte(0x40+cond); - emit_byte(0x04+8*d); - emit_byte(0x05+8*index+64*fi); - emit_long(base); - } - else { /* replacement using branch and mov */ - int uncc=(cond^1); - emit_byte(0x70+uncc); - emit_byte(7); /* skip next 7 bytes if not cc=true */ - emit_byte(0x8b); - emit_byte(0x04+8*d); - emit_byte(0x05+8*index+64*fi); - emit_long(base); - } -} -LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) - -LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) -{ - if (have_cmov) { - emit_byte(0x0f); - emit_byte(0x40+cond); - emit_byte(0x05+8*d); - emit_long(mem); - } - else { /* replacement using branch and mov */ - int uncc=(cond^1); - emit_byte(0x70+uncc); - emit_byte(6); /* skip next 6 bytes if not cc=true */ - emit_byte(0x8b); - emit_byte(0x05+8*d); - emit_long(mem); - } -} -LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) - -LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) -{ - Dif(!isbyte(offset)) abort(); - emit_byte(0x8b); - emit_byte(0x40+8*d+s); - emit_byte(offset); -} -LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) - -LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) -{ - Dif(!isbyte(offset)) abort(); - emit_byte(0x66); - emit_byte(0x8b); - emit_byte(0x40+8*d+s); - emit_byte(offset); -} -LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) - -LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) -{ - Dif(!isbyte(offset)) abort(); - emit_byte(0x8a); - emit_byte(0x40+8*d+s); - emit_byte(offset); -} -LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) - -LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) -{ - emit_byte(0x8b); - emit_byte(0x80+8*d+s); - emit_long(offset); -} -LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) - -LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) -{ - emit_byte(0x66); - emit_byte(0x8b); - emit_byte(0x80+8*d+s); - emit_long(offset); -} -LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) - -LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) -{ - emit_byte(0x8a); - emit_byte(0x80+8*d+s); - emit_long(offset); -} -LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) - -LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) -{ - Dif(!isbyte(offset)) abort(); - emit_byte(0xc7); - emit_byte(0x40+d); - emit_byte(offset); - emit_long(i); -} -LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) - -LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) -{ - Dif(!isbyte(offset)) abort(); - emit_byte(0x66); - emit_byte(0xc7); - emit_byte(0x40+d); - emit_byte(offset); - emit_word(i); -} -LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) - -LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) -{ - Dif(!isbyte(offset)) abort(); - emit_byte(0xc6); - emit_byte(0x40+d); - emit_byte(offset); - emit_byte(i); -} -LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) - -LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) -{ - Dif(!isbyte(offset)) abort(); - emit_byte(0x89); - emit_byte(0x40+8*s+d); - emit_byte(offset); -} -LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) - -LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) -{ - Dif(!isbyte(offset)) abort(); - emit_byte(0x66); - emit_byte(0x89); - emit_byte(0x40+8*s+d); - emit_byte(offset); -} -LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) - -LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) -{ - Dif(!isbyte(offset)) abort(); - emit_byte(0x88); - emit_byte(0x40+8*s+d); - emit_byte(offset); -} -LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) - -LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) -{ - if (optimize_imm8 && isbyte(offset)) { - emit_byte(0x8d); - emit_byte(0x40+8*d+s); - emit_byte(offset); - } - else { - emit_byte(0x8d); - emit_byte(0x80+8*d+s); - emit_long(offset); - } -} -LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) - -LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) -{ - int fi; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - - if (optimize_imm8 && isbyte(offset)) { - emit_byte(0x8d); - emit_byte(0x44+8*d); - emit_byte(0x40*fi+8*index+s); - emit_byte(offset); - } - else { - emit_byte(0x8d); - emit_byte(0x84+8*d); - emit_byte(0x40*fi+8*index+s); - emit_long(offset); - } -} -LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) - -LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) -{ - int isebp=(s==5)?0x40:0; - int fi; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - - emit_byte(0x8d); - emit_byte(0x04+8*d+isebp); - emit_byte(0x40*fi+8*index+s); - if (isebp) - emit_byte(0); -} -LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) - -LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) -{ - if (optimize_imm8 && isbyte(offset)) { - emit_byte(0x89); - emit_byte(0x40+8*s+d); - emit_byte(offset); - } - else { - emit_byte(0x89); - emit_byte(0x80+8*s+d); - emit_long(offset); - } -} -LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) - -LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) -{ - emit_byte(0x66); - emit_byte(0x89); - emit_byte(0x80+8*s+d); - emit_long(offset); -} -LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) - -LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) -{ - if (optimize_imm8 && isbyte(offset)) { - emit_byte(0x88); - emit_byte(0x40+8*s+d); - emit_byte(offset); - } - else { - emit_byte(0x88); - emit_byte(0x80+8*s+d); - emit_long(offset); - } -} -LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) - -LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) -{ - emit_byte(0x0f); - emit_byte(0xc8+r); -} -LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) - -LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) -{ - emit_byte(0x66); - emit_byte(0xc1); - emit_byte(0xc0+r); - emit_byte(0x08); -} -LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) - -LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) -{ - emit_byte(0x89); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) - -LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) -{ - emit_byte(0x89); - emit_byte(0x05+8*s); - emit_long(d); -} -LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) - -LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) -{ - emit_byte(0x66); - emit_byte(0x89); - emit_byte(0x05+8*s); - emit_long(d); -} -LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) - -LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) -{ - emit_byte(0x66); - emit_byte(0x8b); - emit_byte(0x05+8*d); - emit_long(s); -} -LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) - -LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) -{ - emit_byte(0x88); - emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */ - emit_long(d); -} -LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) - -LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) -{ - emit_byte(0x8a); - emit_byte(0x05+8*d); - emit_long(s); -} -LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) - -LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) -{ - emit_byte(0xb8+d); - emit_long(s); -} -LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) - -LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) -{ - emit_byte(0x66); - emit_byte(0xb8+d); - emit_word(s); -} -LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) - -LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) -{ - emit_byte(0xb0+d); - emit_byte(s); -} -LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) - -LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) -{ - emit_byte(0x81); - emit_byte(0x15); - emit_long(d); - emit_long(s); -} -LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) - -LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) -{ - if (optimize_imm8 && isbyte(s)) { - emit_byte(0x83); - emit_byte(0x05); - emit_long(d); - emit_byte(s); - } - else { - emit_byte(0x81); - emit_byte(0x05); - emit_long(d); - emit_long(s); - } -} -LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) - -LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) -{ - emit_byte(0x66); - emit_byte(0x81); - emit_byte(0x05); - emit_long(d); - emit_word(s); -} -LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) - -LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) -{ - emit_byte(0x80); - emit_byte(0x05); - emit_long(d); - emit_byte(s); -} -LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) - -LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) -{ - if (optimize_accum && isaccum(d)) - emit_byte(0xa9); - else { - emit_byte(0xf7); - emit_byte(0xc0+d); - } - emit_long(i); -} -LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) -{ - emit_byte(0x85); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) - -LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) -{ - emit_byte(0x66); - emit_byte(0x85); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) - -LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) -{ - emit_byte(0x84); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) - -LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) -{ - emit_byte(0x81); - emit_byte(0xf0+d); - emit_long(i); -} -LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) -{ - if (optimize_imm8 && isbyte(i)) { - emit_byte(0x83); - emit_byte(0xe0+d); - emit_byte(i); - } - else { - if (optimize_accum && isaccum(d)) - emit_byte(0x25); - else { - emit_byte(0x81); - emit_byte(0xe0+d); - } - emit_long(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) -{ - emit_byte(0x66); - if (optimize_imm8 && isbyte(i)) { - emit_byte(0x83); - emit_byte(0xe0+d); - emit_byte(i); - } - else { - if (optimize_accum && isaccum(d)) - emit_byte(0x25); - else { - emit_byte(0x81); - emit_byte(0xe0+d); - } - emit_word(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) -{ - emit_byte(0x21); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) - -LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) -{ - emit_byte(0x66); - emit_byte(0x21); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) - -LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) -{ - emit_byte(0x20); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) - -LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) -{ - if (optimize_imm8 && isbyte(i)) { - emit_byte(0x83); - emit_byte(0xc8+d); - emit_byte(i); - } - else { - if (optimize_accum && isaccum(d)) - emit_byte(0x0d); - else { - emit_byte(0x81); - emit_byte(0xc8+d); - } - emit_long(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) -{ - emit_byte(0x09); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) - -LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) -{ - emit_byte(0x66); - emit_byte(0x09); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) - -LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) -{ - emit_byte(0x08); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) - -LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) -{ - emit_byte(0x11); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) - -LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) -{ - emit_byte(0x66); - emit_byte(0x11); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) - -LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) -{ - emit_byte(0x10); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) - -LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) -{ - emit_byte(0x01); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) - -LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) -{ - emit_byte(0x66); - emit_byte(0x01); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) - -LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) -{ - emit_byte(0x00); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) - -LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) -{ - if (isbyte(i)) { - emit_byte(0x83); - emit_byte(0xe8+d); - emit_byte(i); - } - else { - if (optimize_accum && isaccum(d)) - emit_byte(0x2d); - else { - emit_byte(0x81); - emit_byte(0xe8+d); - } - emit_long(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) -{ - if (optimize_accum && isaccum(d)) - emit_byte(0x2c); - else { - emit_byte(0x80); - emit_byte(0xe8+d); - } - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) -{ - if (isbyte(i)) { - emit_byte(0x83); - emit_byte(0xc0+d); - emit_byte(i); - } - else { - if (optimize_accum && isaccum(d)) - emit_byte(0x05); - else { - emit_byte(0x81); - emit_byte(0xc0+d); - } - emit_long(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) -{ - emit_byte(0x66); - if (isbyte(i)) { - emit_byte(0x83); - emit_byte(0xc0+d); - emit_byte(i); - } - else { - if (optimize_accum && isaccum(d)) - emit_byte(0x05); - else { - emit_byte(0x81); - emit_byte(0xc0+d); - } - emit_word(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) -{ - if (optimize_accum && isaccum(d)) - emit_byte(0x04); - else { - emit_byte(0x80); - emit_byte(0xc0+d); - } - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) - -LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) -{ - emit_byte(0x19); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) - -LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) -{ - emit_byte(0x66); - emit_byte(0x19); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) - -LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) -{ - emit_byte(0x18); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) - -LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) -{ - emit_byte(0x29); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) - -LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) -{ - emit_byte(0x66); - emit_byte(0x29); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) - -LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) -{ - emit_byte(0x28); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) - -LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) -{ - emit_byte(0x39); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) - -LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) -{ - if (optimize_imm8 && isbyte(i)) { - emit_byte(0x83); - emit_byte(0xf8+r); - emit_byte(i); - } - else { - if (optimize_accum && isaccum(r)) - emit_byte(0x3d); - else { - emit_byte(0x81); - emit_byte(0xf8+r); - } - emit_long(i); - } -} -LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) -{ - emit_byte(0x66); - emit_byte(0x39); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) - -LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s)) -{ - emit_byte(0x80); - emit_byte(0x3d); - emit_long(d); - emit_byte(s); -} -LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) - -LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) -{ - if (optimize_accum && isaccum(d)) - emit_byte(0x3c); - else { - emit_byte(0x80); - emit_byte(0xf8+d); - } - emit_byte(i); -} -LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) - -LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) -{ - emit_byte(0x38); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) - -LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) -{ - int fi; - - switch(factor) { - case 1: fi=0; break; - case 2: fi=1; break; - case 4: fi=2; break; - case 8: fi=3; break; - default: abort(); - } - emit_byte(0x39); - emit_byte(0x04+8*d); - emit_byte(5+8*index+0x40*fi); - emit_long(offset); -} -LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) - -LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) -{ - emit_byte(0x31); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) - -LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) -{ - emit_byte(0x66); - emit_byte(0x31); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) - -LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) -{ - emit_byte(0x30); - emit_byte(0xc0+8*s+d); -} -LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) - -LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) -{ - if (optimize_imm8 && isbyte(s)) { - emit_byte(0x83); - emit_byte(0x2d); - emit_long(d); - emit_byte(s); - } - else { - emit_byte(0x81); - emit_byte(0x2d); - emit_long(d); - emit_long(s); - } -} -LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) - -LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) -{ - if (optimize_imm8 && isbyte(s)) { - emit_byte(0x83); - emit_byte(0x3d); - emit_long(d); - emit_byte(s); - } - else { - emit_byte(0x81); - emit_byte(0x3d); - emit_long(d); - emit_long(s); - } -} -LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) - -LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) -{ - emit_byte(0x87); - emit_byte(0xc0+8*r1+r2); -} -LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) - -LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2)) -{ - emit_byte(0x86); - emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */ -} -LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) - -/************************************************************************* - * FIXME: mem access modes probably wrong * - *************************************************************************/ - -LOWFUNC(READ,WRITE,0,raw_pushfl,(void)) -{ - emit_byte(0x9c); -} -LENDFUNC(READ,WRITE,0,raw_pushfl,(void)) - -LOWFUNC(WRITE,READ,0,raw_popfl,(void)) -{ - emit_byte(0x9d); -} -LENDFUNC(WRITE,READ,0,raw_popfl,(void)) - -/* Generate floating-point instructions */ -static inline void x86_fadd_m(MEMR s) -{ - emit_byte(0xdc); - emit_byte(0x05); - emit_long(s); -} - -#endif /************************************************************************* * Unoptimizable stuff --- jump * *************************************************************************/ -static __inline__ void raw_call_r(R4 r) +static inline void raw_call_r(R4 r) { -#if USE_NEW_RTASM - CALLsr(r); -#else - emit_byte(0xff); - emit_byte(0xd0+r); -#endif + CALLsr(r); } -static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) +static inline void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) { -#if USE_NEW_RTASM - CALLsm(base, X86_NOREG, r, m); -#else - int mu; - switch(m) { - case 1: mu=0; break; - case 2: mu=1; break; - case 4: mu=2; break; - case 8: mu=3; break; - default: abort(); - } - emit_byte(0xff); - emit_byte(0x14); - emit_byte(0x05+8*r+0x40*mu); - emit_long(base); -#endif + ADDR32 CALLsm(base, X86_NOREG, r, m); } -static __inline__ void raw_jmp_r(R4 r) +static inline void raw_jmp_r(R4 r) { -#if USE_NEW_RTASM - JMPsr(r); -#else - emit_byte(0xff); - emit_byte(0xe0+r); -#endif + JMPsr(r); } -static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) +static inline void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) { -#if USE_NEW_RTASM - JMPsm(base, X86_NOREG, r, m); -#else - int mu; - switch(m) { - case 1: mu=0; break; - case 2: mu=1; break; - case 4: mu=2; break; - case 8: mu=3; break; - default: abort(); - } - emit_byte(0xff); - emit_byte(0x24); - emit_byte(0x05+8*r+0x40*mu); - emit_long(base); -#endif + ADDR32 JMPsm(base, X86_NOREG, r, m); } -static __inline__ void raw_jmp_m(uae_u32 base) +static inline void raw_jmp_m(uae_u32 base) { - emit_byte(0xff); - emit_byte(0x25); - emit_long(base); + emit_byte(0xff); + emit_byte(0x25); + emit_long(base); } -static __inline__ void raw_call(uae_u32 t) +static inline void raw_call(uae_u32 t) { -#if USE_NEW_RTASM - CALLm(t); -#else - emit_byte(0xe8); - emit_long(t-(uae_u32)target-4); -#endif + ADDR32 CALLm(t); } -static __inline__ void raw_jmp(uae_u32 t) +static inline void raw_jmp(uae_u32 t) { -#if USE_NEW_RTASM - JMPm(t); -#else - emit_byte(0xe9); - emit_long(t-(uae_u32)target-4); -#endif + ADDR32 JMPm(t); } -static __inline__ void raw_jl(uae_u32 t) +static inline void raw_jcc_l_oponly(int cc) { - emit_byte(0x0f); - emit_byte(0x8c); - emit_long(t-(uintptr)target-4); + emit_byte(0x0f); + emit_byte(0x80+cc); } -static __inline__ void raw_jz(uae_u32 t) +static inline void raw_jz_l_oponly(void) { - emit_byte(0x0f); - emit_byte(0x84); - emit_long(t-(uintptr)target-4); + raw_jcc_l_oponly(NATIVE_CC_EQ); } -static __inline__ void raw_jnz(uae_u32 t) +static inline void raw_jnz_l_oponly(void) { - emit_byte(0x0f); - emit_byte(0x85); - emit_long(t-(uintptr)target-4); + raw_jcc_l_oponly(NATIVE_CC_NE); } -static __inline__ void raw_jnz_l_oponly(void) +static inline void raw_jl(uae_u32 t) { - emit_byte(0x0f); - emit_byte(0x85); + raw_jcc_l_oponly(NATIVE_CC_LT); + emit_long(t-(uintptr)target-4); } -static __inline__ void raw_jcc_l_oponly(int cc) +static inline void raw_jz(uae_u32 t) { - emit_byte(0x0f); - emit_byte(0x80+cc); + raw_jz_l_oponly(); + emit_long(t-(uintptr)target-4); } -static __inline__ void raw_jnz_b_oponly(void) +static inline void raw_jnz(uae_u32 t) { - emit_byte(0x75); + raw_jnz_l_oponly(); + emit_long(t-(uintptr)target-4); } -static __inline__ void raw_jz_b_oponly(void) -{ - emit_byte(0x74); -} - -static __inline__ void raw_jcc_b_oponly(int cc) +static inline void raw_jcc_b_oponly(int cc) { emit_byte(0x70+cc); } -static __inline__ void raw_jmp_l_oponly(void) +static inline void raw_jnz_b_oponly(void) { - emit_byte(0xe9); + raw_jcc_b_oponly(NATIVE_CC_NE); } -static __inline__ void raw_jmp_b_oponly(void) +static inline void raw_jz_b_oponly(void) { - emit_byte(0xeb); + raw_jcc_b_oponly(NATIVE_CC_EQ); } -static __inline__ void raw_ret(void) +static inline void raw_jmp_l_oponly(void) { - emit_byte(0xc3); + emit_byte(0xe9); } -static __inline__ void raw_nop(void) +static inline void raw_jmp_b_oponly(void) { - emit_byte(0x90); + emit_byte(0xeb); } -static __inline__ void raw_emit_nop_filler(int nbytes) +static inline void raw_ret(void) { - /* Source: GNU Binutils 2.12.90.0.15 */ - /* Various efficient no-op patterns for aligning code labels. - Note: Don't try to assemble the instructions in the comments. - 0L and 0w are not legal. */ - static const uae_u8 f32_1[] = - {0x90}; /* nop */ - static const uae_u8 f32_2[] = - {0x89,0xf6}; /* movl %esi,%esi */ - static const uae_u8 f32_3[] = - {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */ - static const uae_u8 f32_4[] = - {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ - static const uae_u8 f32_5[] = - {0x90, /* nop */ - 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ - static const uae_u8 f32_6[] = - {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */ - static const uae_u8 f32_7[] = - {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ - static const uae_u8 f32_8[] = - {0x90, /* nop */ - 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ - static const uae_u8 f32_9[] = - {0x89,0xf6, /* movl %esi,%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const uae_u8 f32_10[] = - {0x8d,0x76,0x00, /* leal 0(%esi),%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const uae_u8 f32_11[] = - {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const uae_u8 f32_12[] = - {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ - 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */ - static const uae_u8 f32_13[] = - {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const uae_u8 f32_14[] = - {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ - 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ - static const uae_u8 f32_15[] = - {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ - 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; - static const uae_u8 f32_16[] = - {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ - 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; - static const uae_u8 *const f32_patt[] = { - f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, - f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 - }; - static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 }; + emit_byte(0xc3); +} -#if defined(__x86_64__) +static inline void raw_emit_nop(void) +{ + emit_byte(0x90); +} + +static inline void raw_emit_nop_filler(int nbytes) +{ + +#if defined(CPU_x86_64) /* The recommended way to pad 64bit code is to use NOPs preceded by maximally four 0x66 prefixes. Balance the size of nops. */ + static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 }; if (nbytes == 0) return; @@ -3285,13 +1277,64 @@ static __inline__ void raw_emit_nop_filler(int nbytes) for (i = 0; i < remains; i++) { emit_block(prefixes, len); - raw_nop(); + raw_emit_nop(); } for (; i < nnops; i++) { emit_block(prefixes, len - 1); - raw_nop(); + raw_emit_nop(); } #else + /* Source: GNU Binutils 2.12.90.0.15 */ + /* Various efficient no-op patterns for aligning code labels. + Note: Don't try to assemble the instructions in the comments. + 0L and 0w are not legal. */ + static const uae_u8 f32_1[] = + {0x90}; /* nop */ + static const uae_u8 f32_2[] = + {0x89,0xf6}; /* movl %esi,%esi */ + static const uae_u8 f32_3[] = + {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */ + static const uae_u8 f32_4[] = + {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ + static const uae_u8 f32_5[] = + {0x90, /* nop */ + 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ + static const uae_u8 f32_6[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */ + static const uae_u8 f32_7[] = + {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ + static const uae_u8 f32_8[] = + {0x90, /* nop */ + 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ + static const uae_u8 f32_9[] = + {0x89,0xf6, /* movl %esi,%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_10[] = + {0x8d,0x76,0x00, /* leal 0(%esi),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_11[] = + {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_12[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ + 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */ + static const uae_u8 f32_13[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_14[] = + {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_15[] = + {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; + static const uae_u8 f32_16[] = + {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; + static const uae_u8 *const f32_patt[] = { + f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, + f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 + }; + int nloops = nbytes / 16; while (nloops-- > 0) emit_block(f32_16, sizeof(f32_16)); @@ -3307,56 +1350,58 @@ static __inline__ void raw_emit_nop_filler(int nbytes) * Flag handling, to and fro UAE flag register * *************************************************************************/ -static __inline__ void raw_flags_evicted(int r) +static inline void raw_flags_evicted(int r) { - //live.state[FLAGTMP].status=CLEAN; - live.state[FLAGTMP].status=INMEM; - live.state[FLAGTMP].realreg=-1; - /* We just "evicted" FLAGTMP. */ - if (live.nat[r].nholds!=1) { - /* Huh? */ - abort(); - } - live.nat[r].nholds=0; + //live.state[FLAGTMP].status=CLEAN; + live.state[FLAGTMP].status=INMEM; + live.state[FLAGTMP].realreg=-1; + /* We just "evicted" FLAGTMP. */ + if (live.nat[r].nholds!=1) { + /* Huh? */ + abort(); + } + live.nat[r].nholds=0; } -#define FLAG_NREG1_FLAGREG 0 /* Set to -1 if any register will do */ -static __inline__ void raw_flags_to_reg_FLAGREG(int r) +#define FLAG_NREG1_FLAGREG EAX_INDEX /* Set to -1 if any register will do */ +static inline void raw_flags_to_reg_FLAGREG(int r) { - raw_lahf(0); /* Most flags in AH */ - //raw_setcc(r,0); /* V flag in AL */ - raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0); - + raw_lahf(0); /* Most flags in AH */ + //raw_setcc(r,0); /* V flag in AL */ + raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0); + #if 1 /* Let's avoid those nasty partial register stalls */ - //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r); - raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX); - raw_flags_evicted(r); + //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r); + raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX); + raw_flags_evicted(r); #endif } -#define FLAG_NREG2_FLAGREG 0 /* Set to -1 if any register will do */ -static __inline__ void raw_reg_to_flags_FLAGREG(int r) +#define FLAG_NREG2_FLAGREG EAX_INDEX /* Set to -1 if any register will do */ +static inline void raw_reg_to_flags_FLAGREG(int r) { - raw_cmp_b_ri(r,-127); /* set V */ - raw_sahf(0); + raw_cmp_b_ri(r,-127); /* set V */ + raw_sahf(0); } -#define FLAG_NREG3_FLAGREG 0 /* Set to -1 if any register will do */ +#define FLAG_NREG3_FLAGREG EAX_INDEX /* Set to -1 if any register will do */ static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp) { - raw_mov_l_rr(tmp,s); - raw_lahf(s); /* flags into ah */ - raw_and_l_ri(s,0xffffbfff); - raw_and_l_ri(tmp,0x00004000); - raw_xor_l_ri(tmp,0x00004000); - raw_or_l(s,tmp); - raw_sahf(s); + raw_mov_l_rr(tmp,s); + raw_lahf(s); /* flags into ah */ + SETOr(X86_AL); /* V flag into al */ + raw_and_l_ri(s,0xffffbfff); + raw_and_l_ri(tmp,0x00004000); + raw_xor_l_ri(tmp,0x00004000); + raw_or_l(s,tmp); + raw_cmp_b_ri(X86_AL,-127); /* set V */ + raw_sahf(s); } -static __inline__ void raw_flags_init_FLAGREG(void) { } +static inline void raw_flags_init_FLAGREG(void) { } #define FLAG_NREG1_FLAGSTK -1 /* Set to -1 if any register will do */ -static __inline__ void raw_flags_to_reg_FLAGSTK(int r) +static inline void raw_flags_to_reg_FLAGSTK(int r) { raw_pushfl(); raw_pop_l_r(r); @@ -3365,41 +1410,41 @@ static __inline__ void raw_flags_to_reg_FLAGSTK(int r) } #define FLAG_NREG2_FLAGSTK -1 /* Set to -1 if any register will do */ -static __inline__ void raw_reg_to_flags_FLAGSTK(int r) +static inline void raw_reg_to_flags_FLAGSTK(int r) { raw_push_l_r(r); raw_popfl(); } #define FLAG_NREG3_FLAGSTK -1 /* Set to -1 if any register will do */ -static __inline__ void raw_flags_set_zero_FLAGSTK(int s, int tmp) +static inline void raw_flags_set_zero_FLAGSTK(int s, int tmp) { - raw_mov_l_rr(tmp,s); - raw_pushfl(); - raw_pop_l_r(s); - raw_and_l_ri(s,0xffffffbf); - raw_and_l_ri(tmp,0x00000040); - raw_xor_l_ri(tmp,0x00000040); - raw_or_l(s,tmp); - raw_push_l_r(s); - raw_popfl(); + raw_mov_l_rr(tmp,s); + raw_pushfl(); + raw_pop_l_r(s); + raw_and_l_ri(s,0xffffffbf); + raw_and_l_ri(tmp,0x00000040); + raw_xor_l_ri(tmp,0x00000040); + raw_or_l(s,tmp); + raw_push_l_r(s); + raw_popfl(); } -static __inline__ void raw_flags_init_FLAGSTK(void) { } +static inline void raw_flags_init_FLAGSTK(void) { } -#if defined(__x86_64__) +#if defined(CPU_x86_64) /* Try to use the LAHF/SETO method on x86_64 since it is faster. This can't be the default because some older CPUs don't support LAHF/SAHF in long mode. */ -static int FLAG_NREG1_FLAGGEN = 0; -static __inline__ void raw_flags_to_reg_FLAGGEN(int r) +static int FLAG_NREG1_FLAGGEN = EAX_INDEX; +static inline void raw_flags_to_reg_FLAGGEN(int r) { if (have_lahf_lm) { // NOTE: the interpreter uses the normal EFLAGS layout - // pushf/popf CF(0) ZF( 6) SF( 7) OF(11) - // sahf/lahf CF(8) ZF(14) SF(15) OF( 0) + // pushf/popf CF(0) ZF( 6) SF( 7) OF(11) + // sahf/lahf CF(8) ZF(14) SF(15) OF( 0) assert(r == 0); - raw_setcc(r,0); /* V flag in AL */ + raw_setcc(r,0); /* V flag in AL */ raw_lea_l_r_scaled(0,0,8); /* move it to its EFLAGS location */ raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0); raw_lahf(0); /* most flags in AH */ @@ -3410,8 +1455,8 @@ static __inline__ void raw_flags_to_reg_FLAGGEN(int r) raw_flags_to_reg_FLAGSTK(r); } -static int FLAG_NREG2_FLAGGEN = 0; -static __inline__ void raw_reg_to_flags_FLAGGEN(int r) +static int FLAG_NREG2_FLAGGEN = EAX_INDEX; +static inline void raw_reg_to_flags_FLAGGEN(int r) { if (have_lahf_lm) { raw_xchg_b_rr(0,AH_INDEX); @@ -3422,8 +1467,8 @@ static __inline__ void raw_reg_to_flags_FLAGGEN(int r) raw_reg_to_flags_FLAGSTK(r); } -static int FLAG_NREG3_FLAGGEN = 0; -static __inline__ void raw_flags_set_zero_FLAGGEN(int s, int tmp) +static int FLAG_NREG3_FLAGGEN = EAX_INDEX; +static inline void raw_flags_set_zero_FLAGGEN(int s, int tmp) { if (have_lahf_lm) raw_flags_set_zero_FLAGREG(s, tmp); @@ -3431,24 +1476,24 @@ static __inline__ void raw_flags_set_zero_FLAGGEN(int s, int tmp) raw_flags_set_zero_FLAGSTK(s, tmp); } -static __inline__ void raw_flags_init_FLAGGEN(void) +static inline void raw_flags_init_FLAGGEN(void) { if (have_lahf_lm) { FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG; FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG; - FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG; + FLAG_NREG3_FLAGGEN = FLAG_NREG3_FLAGREG; } else { FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK; FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK; - FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK; + FLAG_NREG3_FLAGGEN = FLAG_NREG3_FLAGSTK; } } #endif #ifdef SAHF_SETO_PROFITABLE #define FLAG_SUFFIX FLAGREG -#elif defined __x86_64__ +#elif defined CPU_x86_64 #define FLAG_SUFFIX FLAGGEN #else #define FLAG_SUFFIX FLAGSTK @@ -3468,352 +1513,150 @@ static __inline__ void raw_flags_init_FLAGGEN(void) /* Apparently, there are enough instructions between flag store and flag reload to avoid the partial memory stall */ -static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r) +static inline void raw_load_flagreg(uae_u32 target) { -#if 1 - raw_mov_l_rm(target,(uintptr)live.state[r].mem); -#else - raw_mov_b_rm(target,(uintptr)live.state[r].mem); - raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1); + /* attention: in 64bit mode, relies on LITTE_ENDIANESS of regflags.cznv */ + raw_mov_l_rm(target,(uintptr)live.state[FLAGTMP].mem); +} + +static inline void raw_load_flagx(uae_u32 target) +{ +#if FLAGBIT_X < 8 + if (live.nat[target].canbyte) + raw_mov_b_rm(target,(uintptr)live.state[FLAGX].mem); + else #endif + if (live.nat[target].canword) + raw_mov_w_rm(target,(uintptr)live.state[FLAGX].mem); + else + raw_mov_l_rm(target,(uintptr)live.state[FLAGX].mem); } -/* FLAGX is byte sized, and we *do* write it at that size */ -static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r) +static inline void raw_dec_sp(int off) { - if (live.nat[target].canbyte) - raw_mov_b_rm(target,(uintptr)live.state[r].mem); - else if (live.nat[target].canword) - raw_mov_w_rm(target,(uintptr)live.state[r].mem); - else - raw_mov_l_rm(target,(uintptr)live.state[r].mem); + if (off) { +#ifdef CPU_x86_64 + emit_byte(0x48); /* REX prefix */ +#endif + raw_sub_l_ri(ESP_INDEX,off); + } } -static __inline__ void raw_dec_sp(int off) +static inline void raw_inc_sp(int off) { - if (off) raw_sub_l_ri(ESP_INDEX,off); + if (off) { +#ifdef CPU_x86_64 + emit_byte(0x48); /* REX prefix */ +#endif + raw_add_l_ri(ESP_INDEX,off); + } } -static __inline__ void raw_inc_sp(int off) -{ - if (off) raw_add_l_ri(ESP_INDEX,off); +static inline void raw_push_regs_to_preserve(void) { + for (int i=N_REGS;i--;) { + if (need_to_preserve[i]) + raw_push_l_r(i); + } +} + +static inline void raw_pop_preserved_regs(void) { + for (int i=0;i -#include - -#define SIG_READ 1 -#define SIG_WRITE 2 - -static int in_handler=0; -static uae_u8 veccode[256]; - -static void vec(int x, struct sigcontext sc) -{ - uae_u8* i=(uae_u8*)sc.eip; - uae_u32 addr=sc.cr2; - int r=-1; - int size=4; - int dir=-1; - int len=0; - int j; - - write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip); - if (!canbang) - write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n"); - if (in_handler) - write_log("Argh --- Am already in a handler. Shouldn't happen!\n"); - - if (canbang && i>=compiled_code && i<=current_compile_p) { - if (*i==0x66) { - i++; - size=2; - len++; - } - - switch(i[0]) { - case 0x8a: - if ((i[1]&0xc0)==0x80) { - r=(i[1]>>3)&7; - dir=SIG_READ; - size=1; - len+=6; - break; - } - break; - case 0x88: - if ((i[1]&0xc0)==0x80) { - r=(i[1]>>3)&7; - dir=SIG_WRITE; - size=1; - len+=6; - break; - } - break; - case 0x8b: - if ((i[1]&0xc0)==0x80) { - r=(i[1]>>3)&7; - dir=SIG_READ; - len+=6; - break; - } - if ((i[1]&0xc0)==0x40) { - r=(i[1]>>3)&7; - dir=SIG_READ; - len+=3; - break; - } - break; - case 0x89: - if ((i[1]&0xc0)==0x80) { - r=(i[1]>>3)&7; - dir=SIG_WRITE; - len+=6; - break; - } - if ((i[1]&0xc0)==0x40) { - r=(i[1]>>3)&7; - dir=SIG_WRITE; - len+=3; - break; - } - break; - } - } - - if (r!=-1) { - void* pr=NULL; - write_log("register was %d, direction was %d, size was %d\n",r,dir,size); - - switch(r) { - case 0: pr=&(sc.eax); break; - case 1: pr=&(sc.ecx); break; - case 2: pr=&(sc.edx); break; - case 3: pr=&(sc.ebx); break; - case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break; - case 5: pr=(size>1)? - (void*)(&(sc.ebp)): - (void*)(((uae_u8*)&(sc.ecx))+1); break; - case 6: pr=(size>1)? - (void*)(&(sc.esi)): - (void*)(((uae_u8*)&(sc.edx))+1); break; - case 7: pr=(size>1)? - (void*)(&(sc.edi)): - (void*)(((uae_u8*)&(sc.ebx))+1); break; - default: abort(); - } - if (pr) { - blockinfo* bi; - - if (currprefs.comp_oldsegv) { - addr-=NATMEM_OFFSET; - - if ((addr>=0x10000000 && addr<0x40000000) || - (addr>=0x50000000)) { - write_log("Suspicious address in %x SEGV handler.\n",addr); - } - if (dir==SIG_READ) { - switch(size) { - case 1: *((uae_u8*)pr)=get_byte(addr); break; - case 2: *((uae_u16*)pr)=get_word(addr); break; - case 4: *((uae_u32*)pr)=get_long(addr); break; - default: abort(); - } - } - else { /* write */ - switch(size) { - case 1: put_byte(addr,*((uae_u8*)pr)); break; - case 2: put_word(addr,*((uae_u16*)pr)); break; - case 4: put_long(addr,*((uae_u32*)pr)); break; - default: abort(); - } - } - write_log("Handled one access!\n"); - fflush(stdout); - segvcount++; - sc.eip+=len; - } - else { - void* tmp=target; - int i; - uae_u8 vecbuf[5]; - - addr-=NATMEM_OFFSET; - - if ((addr>=0x10000000 && addr<0x40000000) || - (addr>=0x50000000)) { - write_log("Suspicious address in %x SEGV handler.\n",addr); - } - - target=(uae_u8*)sc.eip; - for (i=0;i<5;i++) - vecbuf[i]=target[i]; - emit_byte(0xe9); - emit_long((uintptr)veccode-(uintptr)target-4); - write_log("Create jump to %p\n",veccode); - - write_log("Handled one access!\n"); - fflush(stdout); - segvcount++; - - target=veccode; - - if (dir==SIG_READ) { - switch(size) { - case 1: raw_mov_b_ri(r,get_byte(addr)); break; - case 2: raw_mov_w_ri(r,get_byte(addr)); break; - case 4: raw_mov_l_ri(r,get_byte(addr)); break; - default: abort(); - } - } - else { /* write */ - switch(size) { - case 1: put_byte(addr,*((uae_u8*)pr)); break; - case 2: put_word(addr,*((uae_u16*)pr)); break; - case 4: put_long(addr,*((uae_u32*)pr)); break; - default: abort(); - } - } - for (i=0;i<5;i++) - raw_mov_b_mi(sc.eip+i,vecbuf[i]); - raw_mov_l_mi((uintptr)&in_handler,0); - emit_byte(0xe9); - emit_long(sc.eip+len-(uintptr)target-4); - in_handler=1; - target=tmp; - } - bi=active; - while (bi) { - if (bi->handler && - (uae_u8*)bi->direct_handler<=i && - (uae_u8*)bi->nexthandler>i) { - write_log("deleted trigger (%p<%p<%p) %p\n", - bi->handler, - i, - bi->nexthandler, - bi->pc_p); - invalidate_block(bi); - raise_in_cl_list(bi); - set_special(0); - return; - } - bi=bi->next; - } - /* Not found in the active list. Might be a rom routine that - is in the dormant list */ - bi=dormant; - while (bi) { - if (bi->handler && - (uae_u8*)bi->direct_handler<=i && - (uae_u8*)bi->nexthandler>i) { - write_log("deleted trigger (%p<%p<%p) %p\n", - bi->handler, - i, - bi->nexthandler, - bi->pc_p); - invalidate_block(bi); - raise_in_cl_list(bi); - set_special(0); - return; - } - bi=bi->next; - } - write_log("Huh? Could not find trigger!\n"); - return; - } - } - write_log("Can't handle access!\n"); - for (j=0;j<10;j++) { - write_log("instruction byte %2d is %02x\n",j,i[j]); - } - write_log("Please send the above info (starting at \"fault address\") to\n" - "bmeyer@csse.monash.edu.au\n" - "This shouldn't happen ;-)\n"); - fflush(stdout); - signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */ -} +#ifdef UAE +#include "exception_handler.cpp" #endif +#ifdef UAE +static +#endif +void compiler_status() { + jit_log("compiled code starts at %p, current at %p (size 0x%x)", compiled_code, current_compile_p, (unsigned int)(current_compile_p - compiled_code)); +} /************************************************************************* * Checking for CPU features * *************************************************************************/ struct cpuinfo_x86 { - uae_u8 x86; // CPU family - uae_u8 x86_vendor; // CPU vendor - uae_u8 x86_processor; // CPU canonical processor type - uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise - uae_u32 x86_hwcap; - uae_u8 x86_model; - uae_u8 x86_mask; - int cpuid_level; // Maximum supported CPUID level, -1=no CPUID - char x86_vendor_id[16]; + uae_u8 x86; // CPU family + uae_u8 x86_vendor; // CPU vendor + uae_u8 x86_processor; // CPU canonical processor type + uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise + uae_u32 x86_hwcap; + uae_u8 x86_model; + uae_u8 x86_mask; + bool x86_has_xmm2; + int cpuid_level; // Maximum supported CPUID level, -1=no CPUID + char x86_vendor_id[16]; + uintptr x86_clflush_size; }; struct cpuinfo_x86 cpuinfo; enum { - X86_VENDOR_INTEL = 0, - X86_VENDOR_CYRIX = 1, - X86_VENDOR_AMD = 2, - X86_VENDOR_UMC = 3, - X86_VENDOR_NEXGEN = 4, - X86_VENDOR_CENTAUR = 5, - X86_VENDOR_RISE = 6, - X86_VENDOR_TRANSMETA = 7, - X86_VENDOR_NSC = 8, - X86_VENDOR_UNKNOWN = 0xff + X86_VENDOR_INTEL = 0, + X86_VENDOR_CYRIX = 1, + X86_VENDOR_AMD = 2, + X86_VENDOR_UMC = 3, + X86_VENDOR_NEXGEN = 4, + X86_VENDOR_CENTAUR = 5, + X86_VENDOR_RISE = 6, + X86_VENDOR_TRANSMETA = 7, + X86_VENDOR_NSC = 8, + X86_VENDOR_UNKNOWN = 0xff }; enum { - X86_PROCESSOR_I386, /* 80386 */ - X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */ - X86_PROCESSOR_PENTIUM, - X86_PROCESSOR_PENTIUMPRO, - X86_PROCESSOR_K6, - X86_PROCESSOR_ATHLON, - X86_PROCESSOR_PENTIUM4, - X86_PROCESSOR_X86_64, - X86_PROCESSOR_max + X86_PROCESSOR_I386, /* 80386 */ + X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */ + X86_PROCESSOR_PENTIUM, + X86_PROCESSOR_PENTIUMPRO, + X86_PROCESSOR_K6, + X86_PROCESSOR_ATHLON, + X86_PROCESSOR_PENTIUM4, + X86_PROCESSOR_X86_64, + X86_PROCESSOR_max }; +// #if defined(UAE) || (defined(DEBUG) && DEBUG) static const char * x86_processor_string_table[X86_PROCESSOR_max] = { - "80386", - "80486", - "Pentium", - "PentiumPro", - "K6", - "Athlon", - "Pentium4", - "x86-64" + "80386", + "80486", + "Pentium", + "PentiumPro", + "K6", + "Athlon", + "Pentium4", + "x86-64" }; +// #endif static struct ptt { - const int align_loop; - const int align_loop_max_skip; - const int align_jump; - const int align_jump_max_skip; - const int align_func; + const int align_loop; + const int align_loop_max_skip; + const int align_jump; + const int align_jump_max_skip; + const int align_func; } x86_alignments[X86_PROCESSOR_max] = { - { 4, 3, 4, 3, 4 }, - { 16, 15, 16, 15, 16 }, - { 16, 7, 16, 7, 16 }, - { 16, 15, 16, 7, 16 }, - { 32, 7, 32, 7, 32 }, - { 16, 7, 16, 7, 16 }, - { 0, 0, 0, 0, 0 }, - { 16, 7, 16, 7, 16 } + { 4, 3, 4, 3, 4 }, + { 16, 15, 16, 15, 16 }, + { 16, 7, 16, 7, 16 }, + { 16, 15, 16, 7, 16 }, + { 32, 7, 32, 7, 32 }, + { 16, 7, 16, 7, 16 }, + { 0, 0, 0, 0, 0 }, + { 16, 7, 16, 7, 16 } }; -static void -x86_get_cpu_vendor(struct cpuinfo_x86 *c) +static void x86_get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; @@ -3833,917 +1676,1608 @@ x86_get_cpu_vendor(struct cpuinfo_x86 *c) c->x86_vendor = X86_VENDOR_NEXGEN; else if (!strcmp(v, "RiseRiseRise")) c->x86_vendor = X86_VENDOR_RISE; - else if (!strcmp(v, "GenuineTMx86") || - !strcmp(v, "TransmetaCPU")) + else if (!strcmp(v, "GenuineTMx86") || !strcmp(v, "TransmetaCPU")) c->x86_vendor = X86_VENDOR_TRANSMETA; else c->x86_vendor = X86_VENDOR_UNKNOWN; } +/* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx + * resulting in stale register contents being returned. + */ +/* Some CPUID calls want 'count' to be placed in ecx */ +#ifdef __GNUC__ +static void cpuid_count(uae_u32 op, uae_u32 count, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) +{ + uae_u32 _eax, _ebx, _ecx, _edx; + _eax = op; + _ecx = count; + __asm__ __volatile__( + " movl %0,%%eax \n" + " movl %2,%%ecx \n" + " cpuid \n" + " movl %%eax,%0 \n" + " movl %%ebx,%1 \n" + " movl %%ecx,%2 \n" + " movl %%edx,%3 \n" + : "+m" (_eax), + "=m" (_ebx), + "+m" (_ecx), + "=m" (_edx) + : + : "eax", "ebx", "ecx", "edx"); + *eax = _eax; + *ebx = _ebx; + *ecx = _ecx; + *edx = _edx; +} +#endif + +#ifdef _MSC_VER +#include +static void cpuid_count(uae_u32 op, uae_u32 count, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) +{ + int cpuinfo[4]; + cpuinfo[0] = op; + cpuinfo[1] = 0; + cpuinfo[2] = count; + cpuinfo[3] = 0; + __cpuidex(cpuinfo, op, count); + *eax = cpuinfo[0]; + *ebx = cpuinfo[1]; + *ecx = cpuinfo[2]; + *edx = cpuinfo[3]; +} +#endif + static void cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) { - const int CPUID_SPACE = 4096; - uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE); - if (cpuid_space == VM_MAP_FAILED) - abort(); - vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE); - - static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx; - uae_u8* tmp=get_target(); - - s_op = op; - set_target(cpuid_space); - raw_push_l_r(0); /* eax */ - raw_push_l_r(1); /* ecx */ - raw_push_l_r(2); /* edx */ - raw_push_l_r(3); /* ebx */ - raw_mov_l_rm(0,(uintptr)&s_op); - raw_cpuid(0); - raw_mov_l_mr((uintptr)&s_eax,0); - raw_mov_l_mr((uintptr)&s_ebx,3); - raw_mov_l_mr((uintptr)&s_ecx,1); - raw_mov_l_mr((uintptr)&s_edx,2); - raw_pop_l_r(3); - raw_pop_l_r(2); - raw_pop_l_r(1); - raw_pop_l_r(0); - raw_ret(); - set_target(tmp); - - ((cpuop_func*)cpuid_space)(0); - if (eax != NULL) *eax = s_eax; - if (ebx != NULL) *ebx = s_ebx; - if (ecx != NULL) *ecx = s_ecx; - if (edx != NULL) *edx = s_edx; - - vm_release(cpuid_space, CPUID_SPACE); + cpuid_count(op, 0, eax, ebx, ecx, edx); } -static void -raw_init_cpu(void) +static void raw_init_cpu(void) { - struct cpuinfo_x86 *c = &cpuinfo; + struct cpuinfo_x86 *c = &cpuinfo; + uae_u32 dummy; - /* Defaults */ - c->x86_processor = X86_PROCESSOR_max; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_hwcap = 0; - - /* Get vendor name */ - c->x86_vendor_id[12] = '\0'; - cpuid(0x00000000, + /* Defaults */ + c->x86_processor = X86_PROCESSOR_max; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_hwcap = 0; +#ifdef CPU_x86_64 + c->x86_clflush_size = 64; +#else + c->x86_clflush_size = 32; +#endif + + /* Get vendor name */ + c->x86_vendor_id[12] = '\0'; + cpuid(0x00000000, (uae_u32 *)&c->cpuid_level, (uae_u32 *)&c->x86_vendor_id[0], (uae_u32 *)&c->x86_vendor_id[8], (uae_u32 *)&c->x86_vendor_id[4]); - x86_get_cpu_vendor(c); + x86_get_cpu_vendor(c); - /* Intel-defined flags: level 0x00000001 */ - c->x86_brand_id = 0; - if ( c->cpuid_level >= 0x00000001 ) { - uae_u32 tfms, brand_id; - cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap); - c->x86 = (tfms >> 8) & 15; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; /* extended family */ - c->x86_model = (tfms >> 4) & 15; - if (c->x86_model == 0xf) - c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */ - c->x86_brand_id = brand_id & 0xff; - c->x86_mask = tfms & 15; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } - - /* AMD-defined flags: level 0x80000001 */ - uae_u32 xlvl; - cpuid(0x80000000, &xlvl, NULL, NULL, NULL); - if ( (xlvl & 0xffff0000) == 0x80000000 ) { - if ( xlvl >= 0x80000001 ) { - uae_u32 features, extra_features; - cpuid(0x80000001, NULL, NULL, &extra_features, &features); - if (features & (1 << 29)) { - /* Assume x86-64 if long mode is supported */ - c->x86_processor = X86_PROCESSOR_X86_64; - } - if (extra_features & (1 << 0)) - have_lahf_lm = true; + /* Intel-defined flags: level 0x00000001 */ + c->x86_brand_id = 0; + if ( c->cpuid_level >= 0x00000001 ) { + uae_u32 tfms, brand_id; + cpuid(0x00000001, &tfms, &brand_id, &dummy, &c->x86_hwcap); + c->x86 = (tfms >> 8) & 15; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; /* extended family */ + c->x86_model = (tfms >> 4) & 15; + if (c->x86_model == 0xf) + c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */ + c->x86_brand_id = brand_id & 0xff; + c->x86_mask = tfms & 15; + if (c->x86_hwcap & (1 << 19)) + { + c->x86_clflush_size = ((brand_id >> 8) & 0xff) * 8; + } + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; } - } - - /* Canonicalize processor ID */ - switch (c->x86) { - case 3: - c->x86_processor = X86_PROCESSOR_I386; - break; - case 4: - c->x86_processor = X86_PROCESSOR_I486; - break; - case 5: - if (c->x86_vendor == X86_VENDOR_AMD) - c->x86_processor = X86_PROCESSOR_K6; - else - c->x86_processor = X86_PROCESSOR_PENTIUM; - break; - case 6: - if (c->x86_vendor == X86_VENDOR_AMD) - c->x86_processor = X86_PROCESSOR_ATHLON; - else - c->x86_processor = X86_PROCESSOR_PENTIUMPRO; - break; - case 15: - if (c->x86_processor == X86_PROCESSOR_max) { - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - c->x86_processor = X86_PROCESSOR_PENTIUM4; - break; - case X86_VENDOR_AMD: - /* Assume a 32-bit Athlon processor if not in long mode */ - c->x86_processor = X86_PROCESSOR_ATHLON; - break; - } - } - break; - } - if (c->x86_processor == X86_PROCESSOR_max) { - c->x86_processor = X86_PROCESSOR_I386; - fprintf(stderr, "Error: unknown processor type, assuming i386\n"); - fprintf(stderr, " Family : %d\n", c->x86); - fprintf(stderr, " Model : %d\n", c->x86_model); - fprintf(stderr, " Mask : %d\n", c->x86_mask); - fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor); - if (c->x86_brand_id) - fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id); - } - /* Have CMOV support? */ - have_cmov = (c->x86_hwcap & (1 << 15)) != 0; -#if defined(__x86_64__) - if (!have_cmov) { - write_log("x86-64 implementations are bound to have CMOV!\n"); - abort(); - } + /* AMD-defined flags: level 0x80000001 */ + uae_u32 xlvl; + cpuid(0x80000000, &xlvl, &dummy, &dummy, &dummy); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) { + uae_u32 features, extra_features; + cpuid(0x80000001, &dummy, &dummy, &extra_features, &features); + if (features & (1 << 29)) { + /* Assume x86-64 if long mode is supported */ + c->x86_processor = X86_PROCESSOR_X86_64; + } + if (extra_features & (1 << 0)) + have_lahf_lm = true; + } + } + + /* Canonicalize processor ID */ + switch (c->x86) { + case 3: + c->x86_processor = X86_PROCESSOR_I386; + break; + case 4: + c->x86_processor = X86_PROCESSOR_I486; + break; + case 5: + if (c->x86_vendor == X86_VENDOR_AMD) + c->x86_processor = X86_PROCESSOR_K6; + else + c->x86_processor = X86_PROCESSOR_PENTIUM; + break; + case 6: + if (c->x86_vendor == X86_VENDOR_AMD) + c->x86_processor = X86_PROCESSOR_ATHLON; + else + c->x86_processor = X86_PROCESSOR_PENTIUMPRO; + break; + case 15: + if (c->x86_processor == X86_PROCESSOR_max) { + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + c->x86_processor = X86_PROCESSOR_PENTIUM4; + break; + case X86_VENDOR_AMD: + /* Assume a 32-bit Athlon processor if not in long mode */ + c->x86_processor = X86_PROCESSOR_ATHLON; + break; + } + } + break; + } + if (c->x86_processor == X86_PROCESSOR_max) { + c->x86_processor = X86_PROCESSOR_I386; + jit_log("Error: unknown processor type"); + jit_log(" Family : %d", c->x86); + jit_log(" Model : %d", c->x86_model); + jit_log(" Mask : %d", c->x86_mask); + jit_log(" Vendor : %s [%d]", c->x86_vendor_id, c->x86_vendor); + if (c->x86_brand_id) + { + jit_log(" BrandID : %02x", c->x86_brand_id); + } + } + + /* Have CMOV support? */ + have_cmov = (c->x86_hwcap & (1 << 15)) != 0; +#if defined(CPU_x86_64) + if (!have_cmov) { + jit_abort("x86-64 implementations are bound to have CMOV!"); + } #endif - /* Can the host CPU suffer from partial register stalls? */ - have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL); -#if 1 - /* It appears that partial register writes are a bad idea even on - AMD K7 cores, even though they are not supposed to have the - dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */ - if (c->x86_processor == X86_PROCESSOR_ATHLON) - have_rat_stall = true; + c->x86_has_xmm2 = (c->x86_hwcap & (1 << 26)) != 0; + + /* Can the host CPU suffer from partial register stalls? */ + // non-RAT_STALL mode is currently broken + have_rat_stall = true; //(c->x86_vendor == X86_VENDOR_INTEL); +#if 0 + /* It appears that partial register writes are a bad idea even on + AMD K7 cores, even though they are not supposed to have the + dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */ + if (c->x86_processor == X86_PROCESSOR_ATHLON) + have_rat_stall = true; #endif - /* Alignments */ - if (tune_alignment) { - align_loops = x86_alignments[c->x86_processor].align_loop; - align_jumps = x86_alignments[c->x86_processor].align_jump; - } + /* Alignments */ + if (tune_alignment) { + align_loops = x86_alignments[c->x86_processor].align_loop; + align_jumps = x86_alignments[c->x86_processor].align_jump; + } - write_log("Max CPUID level=%d Processor is %s [%s]\n", + jit_log(" : Max CPUID level=%d Processor is %s [%s]", c->cpuid_level, c->x86_vendor_id, x86_processor_string_table[c->x86_processor]); - raw_flags_init(); + raw_flags_init(); } +#ifndef UAE +static void __attribute__((noinline)) prevent_redzone_use(void) {} + static bool target_check_bsf(void) { bool mismatch = false; for (int g_ZF = 0; g_ZF <= 1; g_ZF++) { - for (int g_CF = 0; g_CF <= 1; g_CF++) { - for (int g_OF = 0; g_OF <= 1; g_OF++) { - for (int g_SF = 0; g_SF <= 1; g_SF++) { - for (int value = -1; value <= 1; value++) { - unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF; - unsigned long tmp = value; -#ifdef _MSC_VER - __writeeflags(flags); - _BitScanForward(&tmp, value); - flags = __readeflags(); -#else - __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0" - : "+r" (flags), "+r" (tmp) : : "cc"); -#endif - int OF = (flags >> 11) & 1; - int SF = (flags >> 7) & 1; - int ZF = (flags >> 6) & 1; - int CF = flags & 1; - tmp = (value == 0); - if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF) - mismatch = true; + for (int g_CF = 0; g_CF <= 1; g_CF++) { + for (int g_OF = 0; g_OF <= 1; g_OF++) { + for (int g_SF = 0; g_SF <= 1; g_SF++) { + for (int value = -1; value <= 1; value++) { + uintptr flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF; + intptr tmp = value; + prevent_redzone_use(); + __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0" + : "+r" (flags), "+r" (tmp) : : "cc"); + int OF = (flags >> 11) & 1; + int SF = (flags >> 7) & 1; + int ZF = (flags >> 6) & 1; + int CF = flags & 1; + tmp = (value == 0); + if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF) + mismatch = true; + } + } + } } - }}}} + } if (mismatch) - write_log("Target CPU defines all flags on BSF instruction\n"); + { + jit_log(" : Target CPU defines all flags on BSF instruction"); + } return !mismatch; } - +#endif /************************************************************************* * FPU stuff * *************************************************************************/ -static __inline__ void raw_fp_init(void) +static inline void raw_fp_init(void) { - int i; - - for (i=0;i1) { - emit_byte(0x9b); - emit_byte(0xdb); - emit_byte(0xe3); - live.tos=-1; - } + /* using FINIT instead of popping all the entries. + Seems to have side effects --- there is display corruption in + Quake when this is used */ + if (live.tos>1) { + emit_byte(0x9b); + emit_byte(0xdb); + emit_byte(0xe3); + live.tos=-1; + } #endif - while (live.tos>=1) { - emit_byte(0xde); - emit_byte(0xd9); - live.tos-=2; - } - while (live.tos>=0) { - emit_byte(0xdd); - emit_byte(0xd8); - live.tos--; - } - raw_fp_init(); + while (live.tos>=1) { + emit_byte(0xde); + emit_byte(0xd9); + live.tos-=2; + } + while (live.tos>=0) { + emit_byte(0xdd); + emit_byte(0xd8); + live.tos--; + } + raw_fp_init(); } -static __inline__ void make_tos(int r) +static inline void make_tos(int r) { - int p,q; + int p,q; + + if (live.spos[r]<0) { /* Register not yet on stack */ + emit_byte(0xd9); + emit_byte(0xe8); /* Push '1' on the stack, just to grow it */ + live.tos++; + live.spos[r]=live.tos; + live.onstack[live.tos]=r; + return; + } + /* Register is on stack */ + if (live.tos==live.spos[r]) + return; + p=live.spos[r]; + q=live.onstack[live.tos]; - if (live.spos[r]<0) { /* Register not yet on stack */ emit_byte(0xd9); - emit_byte(0xe8); /* Push '1' on the stack, just to grow it */ - live.tos++; - live.spos[r]=live.tos; + emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */ live.onstack[live.tos]=r; - return; - } - /* Register is on stack */ - if (live.tos==live.spos[r]) - return; - p=live.spos[r]; - q=live.onstack[live.tos]; - - emit_byte(0xd9); - emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */ - live.onstack[live.tos]=r; - live.spos[r]=live.tos; - live.onstack[p]=q; - live.spos[q]=p; + live.spos[r]=live.tos; + live.onstack[p]=q; + live.spos[q]=p; } -static __inline__ void make_tos2(int r, int r2) +static inline void make_tos2(int r, int r2) { - int q; + int q; - make_tos(r2); /* Put the reg that's supposed to end up in position2 - on top */ + make_tos(r2); /* Put the reg that's supposed to end up in position2 on top */ - if (live.spos[r]<0) { /* Register not yet on stack */ - make_tos(r); /* This will extend the stack */ - return; - } - /* Register is on stack */ - emit_byte(0xd9); - emit_byte(0xc9); /* Move r2 into position 2 */ + if (live.spos[r]<0) { /* Register not yet on stack */ + make_tos(r); /* This will extend the stack */ + return; + } + /* Register is on stack */ + emit_byte(0xd9); + emit_byte(0xc9); /* Move r2 into position 2 */ - q=live.onstack[live.tos-1]; - live.onstack[live.tos]=q; - live.spos[q]=live.tos; - live.onstack[live.tos-1]=r2; - live.spos[r2]=live.tos-1; + q=live.onstack[live.tos-1]; + live.onstack[live.tos]=q; + live.spos[q]=live.tos; + live.onstack[live.tos-1]=r2; + live.spos[r2]=live.tos-1; - make_tos(r); /* And r into 1 */ + make_tos(r); /* And r into 1 */ } -static __inline__ int stackpos(int r) +static inline int stackpos(int r) { - if (live.spos[r]<0) - abort(); - if (live.tos=0) { - /* source is on top of stack, and we already have the dest */ - int dd=stackpos(d); - emit_byte(0xdd); - emit_byte(0xd0+dd); - } - else { - emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source on tos */ - tos_make(d); /* store to destination, pop if necessary */ - } + usereg(s); + ds=stackpos(s); + if (ds==0 && live.spos[d]>=0) { + /* source is on top of stack, and we already have the dest */ + int dd=stackpos(d); + emit_byte(0xdd); + emit_byte(0xd0+dd); + } + else { + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source on tos */ + tos_make(d); /* store to destination, pop if necessary */ + } } -LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) -LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base)) +LOWFUNC(NONE,READ,2,raw_fldcw_m_indexed,(R4 index, IMM base)) { - emit_byte(0xd9); - emit_byte(0xa8+index); - emit_long(base); + x86_64_prefix(true, false, NULL, NULL, &index); + emit_byte(0xd9); + emit_byte(0xa8 + index); + emit_long(base); } -LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base)) - LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) { - int ds; + int ds; - if (d!=s) { - usereg(s); - ds=stackpos(s); - emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ - emit_byte(0xd9); - emit_byte(0xfa); /* take square root */ - tos_make(d); /* store to destination */ - } - else { - make_tos(d); - emit_byte(0xd9); - emit_byte(0xfa); /* take square root */ - } + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xfa); /* take square root */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfa); /* take square root */ + } } -LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) { - int ds; + int ds; - if (d!=s) { - usereg(s); - ds=stackpos(s); - emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ - emit_byte(0xd9); - emit_byte(0xe1); /* take fabs */ - tos_make(d); /* store to destination */ - } - else { - make_tos(d); - emit_byte(0xd9); - emit_byte(0xe1); /* take fabs */ - } + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe1); /* take fabs */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xe1); /* take fabs */ + } } -LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) { - int ds; + int ds; - if (d!=s) { - usereg(s); - ds=stackpos(s); - emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ - emit_byte(0xd9); - emit_byte(0xfc); /* take frndint */ - tos_make(d); /* store to destination */ - } - else { - make_tos(d); - emit_byte(0xd9); - emit_byte(0xfc); /* take frndint */ - } + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xfc); /* take frndint */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfc); /* take frndint */ + } } -LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) { - int ds; + int ds; - if (d!=s) { - usereg(s); - ds=stackpos(s); - emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ - emit_byte(0xd9); - emit_byte(0xff); /* take cos */ - tos_make(d); /* store to destination */ - } - else { - make_tos(d); - emit_byte(0xd9); - emit_byte(0xff); /* take cos */ - } + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xff); /* take cos */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xff); /* take cos */ + } } -LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) { - int ds; + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xfe); /* fsin sin(x) */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfe); /* fsin y=sin(x) */ + } +} + +static const double one = 1; + +LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) +{ + int ds; + + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x) */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub frac(x) = x - int(x) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); /* Add '1' without using extra stack space */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + tos_make(d); /* store y=2^x */ +} + +LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) +{ + int ds; - if (d!=s) { usereg(s); ds=stackpos(s); emit_byte(0xd9); emit_byte(0xc0+ds); /* duplicate source */ emit_byte(0xd9); - emit_byte(0xfe); /* take sin */ - tos_make(d); /* store to destination */ - } - else { - make_tos(d); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xde); + emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */ + emit_byte(0xd9); - emit_byte(0xfe); /* take sin */ - } + emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ + emit_byte(0xd9); + emit_byte(0xfc); /* rndint */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* subtract rounded from original */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 */ + x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */ + emit_byte(0xd9); + emit_byte(0xfd); /* and scale it */ + emit_byte(0xdd); + emit_byte(0xd9); /* take he rounded value off */ + tos_make(d); /* store to destination */ } -LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) -static const double one=1; -LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) -{ - int ds; - - usereg(s); - ds=stackpos(s); - emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ - - emit_byte(0xd9); - emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ - emit_byte(0xd9); - emit_byte(0xfc); /* rndint */ - emit_byte(0xd9); - emit_byte(0xc9); /* swap top two elements */ - emit_byte(0xd8); - emit_byte(0xe1); /* subtract rounded from original */ - emit_byte(0xd9); - emit_byte(0xf0); /* f2xm1 */ - x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */ - emit_byte(0xd9); - emit_byte(0xfd); /* and scale it */ - emit_byte(0xdd); - emit_byte(0xd9); /* take he rounded value off */ - tos_make(d); /* store to destination */ -} -LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) - -LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) -{ - int ds; - - usereg(s); - ds=stackpos(s); - emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ - emit_byte(0xd9); - emit_byte(0xea); /* fldl2e */ - emit_byte(0xde); - emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */ - - emit_byte(0xd9); - emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ - emit_byte(0xd9); - emit_byte(0xfc); /* rndint */ - emit_byte(0xd9); - emit_byte(0xc9); /* swap top two elements */ - emit_byte(0xd8); - emit_byte(0xe1); /* subtract rounded from original */ - emit_byte(0xd9); - emit_byte(0xf0); /* f2xm1 */ - x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */ - emit_byte(0xd9); - emit_byte(0xfd); /* and scale it */ - emit_byte(0xdd); - emit_byte(0xd9); /* take he rounded value off */ - tos_make(d); /* store to destination */ -} -LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) - LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s)) { - int ds; + int ds; - usereg(s); - ds=stackpos(s); - emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ - emit_byte(0xd9); - emit_byte(0xe8); /* push '1' */ - emit_byte(0xd9); - emit_byte(0xc9); /* swap top two */ - emit_byte(0xd9); - emit_byte(0xf1); /* take 1*log2(x) */ - tos_make(d); /* store to destination */ + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe8); /* push '1' */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two */ + emit_byte(0xd9); + emit_byte(0xf1); /* take 1*log2(x) */ + tos_make(d); /* store to destination */ } -LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) { - int ds; + int ds; - if (d!=s) { - usereg(s); - ds=stackpos(s); - emit_byte(0xd9); - emit_byte(0xc0+ds); /* duplicate source */ - emit_byte(0xd9); - emit_byte(0xe0); /* take fchs */ - tos_make(d); /* store to destination */ - } - else { - make_tos(d); - emit_byte(0xd9); - emit_byte(0xe0); /* take fchs */ - } + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe0); /* take fchs */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xe0); /* take fchs */ + } } -LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) { - int ds; + int ds; - usereg(s); - usereg(d); - - if (live.spos[s]==live.tos) { - /* Source is on top of stack */ - ds=stackpos(d); - emit_byte(0xdc); - emit_byte(0xc0+ds); /* add source to dest*/ - } - else { - make_tos(d); - ds=stackpos(s); - - emit_byte(0xd8); - emit_byte(0xc0+ds); /* add source to dest*/ - } + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xc0+ds); /* add source to dest*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xc0+ds); /* add source to dest*/ + } } -LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) { - int ds; + int ds; - usereg(s); - usereg(d); - - if (live.spos[s]==live.tos) { - /* Source is on top of stack */ - ds=stackpos(d); - emit_byte(0xdc); - emit_byte(0xe8+ds); /* sub source from dest*/ - } - else { - make_tos(d); - ds=stackpos(s); - - emit_byte(0xd8); - emit_byte(0xe0+ds); /* sub src from dest */ - } + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xe8+ds); /* sub source from dest*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xe0+ds); /* sub src from dest */ + } } -LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s)) { - int ds; + int ds; - usereg(s); - usereg(d); - - make_tos(d); - ds=stackpos(s); + usereg(s); + usereg(d); - emit_byte(0xdd); - emit_byte(0xe0+ds); /* cmp dest with source*/ + make_tos(d); + ds=stackpos(s); + + emit_byte(0xdd); + emit_byte(0xe0+ds); /* cmp dest with source*/ } -LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s)) LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) { - int ds; + int ds; - usereg(s); - usereg(d); - - if (live.spos[s]==live.tos) { - /* Source is on top of stack */ - ds=stackpos(d); - emit_byte(0xdc); - emit_byte(0xc8+ds); /* mul dest by source*/ - } - else { - make_tos(d); - ds=stackpos(s); - - emit_byte(0xd8); - emit_byte(0xc8+ds); /* mul dest by source*/ - } + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xc8+ds); /* mul dest by source*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xc8+ds); /* mul dest by source*/ + } } -LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) { - int ds; + int ds; - usereg(s); - usereg(d); - - if (live.spos[s]==live.tos) { - /* Source is on top of stack */ - ds=stackpos(d); - emit_byte(0xdc); - emit_byte(0xf8+ds); /* div dest by source */ - } - else { - make_tos(d); - ds=stackpos(s); - - emit_byte(0xd8); - emit_byte(0xf0+ds); /* div dest by source*/ - } + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xf8+ds); /* div dest by source */ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xf0+ds); /* div dest by source*/ + } } -LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s)) { - int ds; + int ds; - usereg(s); - usereg(d); - - make_tos2(d,s); - ds=stackpos(s); + usereg(s); + usereg(d); - if (ds!=1) { - printf("Failed horribly in raw_frem_rr! ds is %d\n",ds); - abort(); - } - emit_byte(0xd9); - emit_byte(0xf8); /* take rem from dest by source */ + make_tos2(d,s); + ds=stackpos(s); + + if (ds!=1) { + jit_abort("Failed horribly in raw_frem_rr! ds is %d",ds); + } + emit_byte(0xd9); + emit_byte(0xf8); /* take rem from dest by source */ } -LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) { - int ds; + int ds; - usereg(s); - usereg(d); - - make_tos2(d,s); - ds=stackpos(s); + usereg(s); + usereg(d); - if (ds!=1) { - printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds); - abort(); - } - emit_byte(0xd9); - emit_byte(0xf5); /* take rem1 from dest by source */ + make_tos2(d,s); + ds=stackpos(s); + + if (ds!=1) { + jit_abort("Failed horribly in raw_frem1_rr! ds is %d",ds); + } + emit_byte(0xd9); + emit_byte(0xf5); /* take rem1 from dest by source */ } -LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r)) { - make_tos(r); - emit_byte(0xd9); /* ftst */ - emit_byte(0xe4); + make_tos(r); + emit_byte(0xd9); /* ftst */ + emit_byte(0xe4); +} + +LOWFUNC(NONE,NONE,2,raw_fetoxM1_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy up */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale ((2^frac(x))-1)*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + if (s!=d) + tos_make(d); /* store y=(e^x)-1 */ +} + +LOWFUNC(NONE,NONE,2,raw_ftentox_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xe9); /* fldl2t log2(10) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(10) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy up */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(10)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(10) - int(x*log2(10)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(10)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + if (s!=d) + tos_make(d); /* store y=10^x */ +} + +LOWFUNC(NONE,NONE,3,raw_fsincos_rr,(FW d, FW c, FR s)) +{ + int ds; + + if (s==d) { + //write_log (_T("FSINCOS src = dest\n")); + make_tos(s); + emit_byte(0xd9); + emit_byte(0xfb); /* fsincos sin(x) push cos(x) */ + tos_make(c); /* store cos(x) to c */ + return; + } + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xfb); /* fsincos sin(x) push cos(x) */ + if (live.spos[c]<0) { + if (live.spos[d]<0) { /* occupy both regs directly */ + live.tos++; + live.spos[d]=live.tos; + live.onstack[live.tos]=d; /* sin(x) comes first */ + live.tos++; + live.spos[c]=live.tos; + live.onstack[live.tos]=c; + } + else { + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap cos(x) with sin(x) */ + emit_byte(0xdd); /* store sin(x) to d & pop */ + emit_byte(0xd8+(live.tos+2)-live.spos[d]); + live.tos++; /* occupy a reg for cos(x) here */ + live.spos[c]=live.tos; + live.onstack[live.tos]=c; + } + } + else { + emit_byte(0xdd); /* store cos(x) to c & pop */ + emit_byte(0xd8+(live.tos+2)-live.spos[c]); + tos_make(d); /* store sin(x) to destination */ + } +} + +LOWFUNC(NONE,NONE,2,raw_fscale_rr,(FRW d, FR s)) +{ + int ds; + + if (live.spos[d]==live.tos && live.spos[s]==live.tos-1) { + //write_log (_T("fscale found x in TOS-1 and y in TOS\n")); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale y*(2^x) */ + } + else { + make_tos(s); /* tos=x */ + ds=stackpos(d); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld y */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale y*(2^x) */ + tos_make(d); /* store y=y*(2^x) */ + } +} + +LOWFUNC(NONE,NONE,2,raw_ftan_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xf2); /* fptan tan(x)=y/1.0 */ + emit_byte(0xdd); + emit_byte(0xd8); /* fstp pop 1.0 */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xf2); /* fptan tan(x)=y/1.0 */ + emit_byte(0xdd); + emit_byte(0xd8); /* fstp pop 1.0 */ + } +} + +#ifdef CPU_x86_64 +#define REX64() emit_byte(0x48) +#else +#define REX64() +#endif + +LOWFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r)) +{ + make_tos(r); /* TOS = r */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xfc); /* add -4 to esp */ + emit_byte(0xd9); + emit_byte(0x1c); + emit_byte(0x24); /* fstp store r as SINGLE to [esp] and pop */ + emit_byte(0xd9); + emit_byte(0x04); + emit_byte(0x24); /* fld load r as SINGLE from [esp] */ + emit_byte(0x9b); /* let the CPU wait on FPU exceptions */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x04); /* add +4 to esp */ +} + +LOWFUNC(NONE,NONE,1,raw_fcut_r,(FRW r)) +{ + make_tos(r); /* TOS = r */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xf8); /* add -8 to esp */ + emit_byte(0xdd); + emit_byte(0x1c); + emit_byte(0x24); /* fstp store r as DOUBLE to [esp] and pop */ + emit_byte(0xdd); + emit_byte(0x04); + emit_byte(0x24); /* fld load r as DOUBLE from [esp] */ + emit_byte(0x9b); /* let the CPU wait on FPU exceptions */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x08); /* add +8 to esp */ +} + +LOWFUNC(NONE,NONE,2,raw_fgetexp_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xf4); /* fxtract exp push man */ + emit_byte(0xdd); + emit_byte(0xd8); /* fstp just pop man */ + tos_make(d); /* store exp to destination */ + } + else { + make_tos(d); /* tos=x=y */ + emit_byte(0xd9); + emit_byte(0xf4); /* fxtract exp push man */ + emit_byte(0xdd); + emit_byte(0xd8); /* fstp just pop man */ + } +} + +LOWFUNC(NONE,NONE,2,raw_fgetman_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xf4); /* fxtract exp push man */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy man up & pop */ + tos_make(d); /* store man to destination */ + } + else { + make_tos(d); /* tos=x=y */ + emit_byte(0xd9); + emit_byte(0xf4); /* fxtract exp push man */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy man up & pop */ + } +} + +LOWFUNC(NONE,NONE,2,raw_flogN_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xed); /* fldln2 logN(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap logN(2) with x */ + emit_byte(0xd9); + emit_byte(0xf1); /* fyl2x logN(2)*log2(x) */ + if (s!=d) + tos_make(d); /* store y=logN(x) */ +} + +LOWFUNC(NONE,NONE,2,raw_flogNP1_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xed); /* fldln2 logN(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap logN(2) with x */ + emit_byte(0xd9); + emit_byte(0xf9); /* fyl2xp1 logN(2)*log2(x+1) */ + if (s!=d) + tos_make(d); /* store y=logN(x+1) */ +} + +LOWFUNC(NONE,NONE,2,raw_flog10_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xec); /* fldlg2 log10(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap log10(2) with x */ + emit_byte(0xd9); + emit_byte(0xf1); /* fyl2x log10(2)*log2(x) */ + if (s!=d) + tos_make(d); /* store y=log10(x) */ +} + +LOWFUNC(NONE,NONE,2,raw_fasin_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd8); + emit_byte(0xc8); /* fmul x*x */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xde); + emit_byte(0xe1); /* fsubrp 1 - (x^2) */ + emit_byte(0xd9); + emit_byte(0xfa); /* fsqrt sqrt(1-(x^2)) */ + emit_byte(0xd9); + emit_byte(0xc1+ds); /* fld x again */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap x with sqrt(1-(x^2)) */ + emit_byte(0xd9); + emit_byte(0xf3); /* fpatan atan(x/sqrt(1-(x^2))) & pop */ + tos_make(d); /* store y=asin(x) */ +} + +static uae_u32 const pihalf[] = {0x2168c234, 0xc90fdaa2, 0x3fff}; // LSB=0 to get acos(1)=0 + +LOWFUNC(NONE,NONE,2,raw_facos_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd8); + emit_byte(0xc8); /* fmul x*x */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xde); + emit_byte(0xe1); /* fsubrp 1 - (x^2) */ + emit_byte(0xd9); + emit_byte(0xfa); /* fsqrt sqrt(1-(x^2)) */ + emit_byte(0xd9); + emit_byte(0xc1+ds); /* fld x again */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap x with sqrt(1-(x^2)) */ + emit_byte(0xd9); + emit_byte(0xf3); /* fpatan atan(x/sqrt(1-(x^2))) & pop */ + raw_fldt((uintptr) &pihalf); /* fld load pi/2 from pihalf */ + emit_byte(0xde); + emit_byte(0xe1); /* fsubrp pi/2 - asin(x) & pop */ + tos_make(d); /* store y=acos(x) */ +} + +LOWFUNC(NONE,NONE,2,raw_fatan_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xf3); /* fpatan atan(x)/1 & pop*/ + if (s!=d) + tos_make(d); /* store y=atan(x) */ +} + +LOWFUNC(NONE,NONE,2,raw_fatanh_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xdc); + emit_byte(0xc1); /* fadd 1 + x */ + emit_byte(0xd8); + emit_byte(0xe2+ds); /* fsub 1 - x */ + emit_byte(0xde); + emit_byte(0xf9); /* fdivp (1+x)/(1-x) */ + emit_byte(0xd9); + emit_byte(0xed); /* fldl2e logN(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap logN(2) with (1+x)/(1-x) */ + emit_byte(0xd9); + emit_byte(0xf1); /* fyl2x logN(2)*log2((1+x)/(1-x)) pop */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -1.0 */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale logN((1+x)/(1-x)) * 2^(-1) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + tos_make(d); /* store y=atanh(x) */ +} + +LOWFUNC(NONE,NONE,2,raw_fsinh_rr,(FW d, FR s)) +{ + int ds,tr; + + tr=live.onstack[live.tos+3]; + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + if (tr>=0) { + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with temp-reg */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xf4); /* add -12 to esp */ + emit_byte(0xdb); + emit_byte(0x3c); + emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */ + } + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xc0); /* fld -x*log2(e) again */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap e^-x with x*log2(e) in tr */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy e^x & pop */ + if (tr>=0) { + emit_byte(0xdb); + emit_byte(0x2c); + emit_byte(0x24); /* fld load temp-reg from [esp] */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */ + emit_byte(0xde); + emit_byte(0xe9); /* fsubp (e^x)-(e^-x) */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x0c); /* delayed add +12 to esp */ + } + else { + emit_byte(0xde); + emit_byte(0xe1); /* fsubrp (e^x)-(e^-x) */ + } + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -1.0 */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale ((e^x)-(e^-x))/2 */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + if (s!=d) + tos_make(d); /* store y=sinh(x) */ +} + +LOWFUNC(NONE,NONE,2,raw_fcosh_rr,(FW d, FR s)) +{ + int ds,tr; + + tr=live.onstack[live.tos+3]; + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + if (tr>=0) { + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with temp-reg */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xf4); /* add -12 to esp */ + emit_byte(0xdb); + emit_byte(0x3c); + emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */ + } + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xc0); /* fld -x*log2(e) again */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap e^-x with x*log2(e) in tr */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy e^x & pop */ + if (tr>=0) { + emit_byte(0xdb); + emit_byte(0x2c); + emit_byte(0x24); /* fld load temp-reg from [esp] */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x0c); /* delayed add +12 to esp */ + } + emit_byte(0xde); + emit_byte(0xc1); /* faddp (e^x)+(e^-x) */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -1.0 */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale ((e^x)+(e^-x))/2 */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + if (s!=d) + tos_make(d); /* store y=cosh(x) */ +} + +LOWFUNC(NONE,NONE,2,raw_ftanh_rr,(FW d, FR s)) +{ + int ds,tr; + + tr=live.onstack[live.tos+3]; + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + if (tr>=0) { + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with temp-reg */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xf4); /* add -12 to esp */ + emit_byte(0xdb); + emit_byte(0x3c); + emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */ + } + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xc0); /* fld -x*log2(e) again */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap e^-x with x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy e^x */ + emit_byte(0xd8); + emit_byte(0xc2); /* fadd (e^x)+(e^-x) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with e^-x */ + emit_byte(0xde); + emit_byte(0xe9); /* fsubp (e^x)-(e^-x) */ + if (tr>=0) { + emit_byte(0xdb); + emit_byte(0x2c); + emit_byte(0x24); /* fld load temp-reg from [esp] */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */ + emit_byte(0xde); + emit_byte(0xf9); /* fdivp ((e^x)-(e^-x))/((e^x)+(e^-x)) */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x0c); /* delayed add +12 to esp */ + } + else { + emit_byte(0xde); + emit_byte(0xf1); /* fdivrp ((e^x)-(e^-x))/((e^x)+(e^-x)) */ + } + if (s!=d) + tos_make(d); /* store y=tanh(x) */ } -LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r)) /* %eax register is clobbered if target processor doesn't support fucomi */ #define FFLAG_NREG_CLOBBER_CONDITION !have_cmov #define FFLAG_NREG EAX_INDEX -static __inline__ void raw_fflags_into_flags(int r) +static inline void raw_fflags_into_flags(int r) { - int p; + int p; - usereg(r); - p=stackpos(r); + usereg(r); + p=stackpos(r); - emit_byte(0xd9); - emit_byte(0xee); /* Push 0 */ - emit_byte(0xd9); - emit_byte(0xc9+p); /* swap top two around */ + emit_byte(0xd9); + emit_byte(0xee); /* Push 0 */ + emit_byte(0xd9); + emit_byte(0xc9+p); /* swap top two around */ if (have_cmov) { // gb-- fucomi is for P6 cores only, not K6-2 then... - emit_byte(0xdb); - emit_byte(0xe9+p); /* fucomi them */ + emit_byte(0xdb); + emit_byte(0xe9+p); /* fucomi them */ } else { emit_byte(0xdd); @@ -4753,6 +3287,6 @@ static __inline__ void raw_fflags_into_flags(int r) emit_byte(0xe0); /* fstsw ax */ raw_sahf(0); /* sahf */ } - emit_byte(0xdd); - emit_byte(0xd9+p); /* store value back, and get rid of 0 */ + emit_byte(0xdd); + emit_byte(0xd9+p); /* store value back, and get rid of 0 */ } diff --git a/BasiliskII/src/uae_cpu/compiler/codegen_x86.h b/BasiliskII/src/uae_cpu/compiler/codegen_x86.h index 08538b7a..0eaef50a 100644 --- a/BasiliskII/src/uae_cpu/compiler/codegen_x86.h +++ b/BasiliskII/src/uae_cpu/compiler/codegen_x86.h @@ -1,36 +1,34 @@ -/******************** -*- mode: C; tab-width: 8 -*- ******************** +/* + * compiler/codegen_x86.h - IA-32 and AMD64 code generator * - * Run-time assembler for IA-32 and AMD64 + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - ***********************************************************************/ - - -/*********************************************************************** + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This file is derived from CCG. + * JIT compiler m68k -> IA-32 and AMD64 * - * Copyright 1999, 2000, 2001, 2002, 2003 Ian Piumarta + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * This file is derived from CCG, copyright 1999-2003 Ian Piumarta + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c * - * Adaptations and enhancements for AMD64 support, Copyright 2003-2008 - * Gwenole Beauchesne + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. * - * Basilisk II (C) 1997-2008 Christian Bauer - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - ***********************************************************************/ + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ #ifndef X86_RTASM_H #define X86_RTASM_H @@ -42,6 +40,7 @@ * TODO * * o Fix FIXMEs + * o i387 FPU instructions * o SSE instructions * o Optimize for cases where register numbers are not integral constants */ @@ -110,7 +109,6 @@ enum { X86_Reg64_Base = 0x50, X86_RegMMX_Base = 0x60, X86_RegXMM_Base = 0x70, - X86_RegFPU_Base = 0x80 #else X86_NOREG = -1, X86_Reg8L_Base = 0, @@ -120,7 +118,6 @@ enum { X86_Reg64_Base = 0, X86_RegMMX_Base = 0, X86_RegXMM_Base = 0, - X86_RegFPU_Base = 0 #endif }; @@ -172,12 +169,6 @@ enum { X86_XMM12, X86_XMM13, X86_XMM14, X86_XMM15 }; -enum { - X86_ST0 = X86_RegFPU_Base, - X86_ST1, X86_ST2, X86_ST3, - X86_ST4, X86_ST5, X86_ST6, X86_ST7 -}; - /* Register control and access * * _r0P(R) Null register? @@ -194,23 +185,21 @@ enum { * _r8(R) 64-bit register ID * _rM(R) MMX register ID * _rX(R) XMM register ID - * _rF(R) FPU register ID * _rA(R) Address register ID used for EA calculation */ -#define _rST0P(R) ((int)(R) == (int)X86_ST0) #define _r0P(R) ((int)(R) == (int)X86_NOREG) -#define _rIP(R) (X86_TARGET_64BIT ? ((int)(R) == (int)X86_RIP) : 0) +#define _rIP(R) ((int)(R) == (int)X86_RIP) #if X86_FLAT_REGISTERS #define _rC(R) ((R) & 0xf0) #define _rR(R) ((R) & 0x0f) #define _rN(R) ((R) & 0x07) -#define _rXP(R) (((R) > 0 && _rR(R) > 7) ? 1 : 0) +#define _rXP(R) ((R) > 0 && _rR(R) > 7) #else #define _rN(R) ((R) & 0x07) #define _rR(R) (int(R)) -#define _rXP(R) ((_rR(R) > 7 && _rR(R) < 16) ? 1 : 0) +#define _rXP(R) (_rR(R) > 7 && _rR(R) < 16) #endif #if !defined(_ASM_SAFETY) || ! X86_FLAT_REGISTERS @@ -221,7 +210,6 @@ enum { #define _rA(R) _rN(R) #define _rM(R) _rN(R) #define _rX(R) _rN(R) -#define _rF(R) _rN(R) #else #define _r1(R) ( ((_rC(R) & (X86_Reg8L_Base | X86_Reg8H_Base)) != 0) ? _rN(R) : x86_emit_failure0( "8-bit register required")) #define _r2(R) ( (_rC(R) == X86_Reg16_Base) ? _rN(R) : x86_emit_failure0("16-bit register required")) @@ -232,7 +220,6 @@ enum { ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("not a valid 32-bit base/index expression")) ) #define _rM(R) ( (_rC(R) == X86_RegMMX_Base) ? _rN(R) : x86_emit_failure0("MMX register required")) #define _rX(R) ( (_rC(R) == X86_RegXMM_Base) ? _rN(R) : x86_emit_failure0("SSE register required")) -#define _rF(R) ( (_rC(R) == X86_RegFPU_Base) ? _rN(R) : x86_emit_failure0("FPU register required")) #endif #define _rSP() (X86_TARGET_64BIT ? (int)X86_RSP : (int)X86_ESP) @@ -254,16 +241,21 @@ typedef unsigned short _us; typedef signed int _sl; typedef unsigned int _ul; -#define _UC(X) ((_uc )(unsigned long)(X)) -#define _US(X) ((_us )(unsigned long)(X)) -#define _SL(X) ((_sl )(unsigned long)(X)) -#define _UL(X) ((_ul )(unsigned long)(X)) +#define _UC(X) ((_uc )(uintptr_t)(X)) +#define _US(X) ((_us )(uintptr_t)(X)) +#define _SL(X) ((_sl )(uintptr_t)(X)) +#define _UL(X) ((_ul )(uintptr_t)(X)) #define _PUC(X) ((_uc *)(X)) #define _PUS(X) ((_us *)(X)) #define _PSL(X) ((_sl *)(X)) #define _PUL(X) ((_ul *)(X)) +#undef _B +#undef _W +#undef _L +#undef _Q + #define _B(B) x86_emit_byte((B)) #define _W(W) x86_emit_word((W)) #define _L(L) x86_emit_long((L)) @@ -410,22 +402,54 @@ typedef unsigned int _ul; /* --- Memory subformats - urgh! ------------------------------------------- */ /* _r_D() is RIP addressing mode if X86_TARGET_64BIT, use _r_DSIB() instead */ -#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_L((_sl)(D))) -#define _r_DSIB(R, D ) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(1),_b100 ,_b101 ),_L((_sl)(D))) +#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_L((uae_u32)(D))) +#define _r_DSIB(R, D ) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(1),_b100 ,_b101 ),_L((uae_u32)(D))) #define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_rA(B)) ) #define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)) ) -#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_rA(B)) ,_B((_sc)(D))) -#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_B((_sc)(D))) -#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_rA(B)) ,_L((_sl)(D))) -#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_b101 ),_L((_sl)(D))) -#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_L((_sl)(D))) +#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_rA(B)) ,_B((uae_u32)(D))) +#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_B((uae_u32)(D))) +#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_rA(B)) ,_L((uae_u32)(D))) +#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_b101 ),_L((uae_u32)(D))) +#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_L((uae_u32)(D))) #define _r_DB( R, D,B ) ((_s0P(D) && (!_rbp13P(B)) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B )))) #define _r_DBIS(R, D,B,I,S) ((_s0P(D) && (!_rbp13P(B)) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S)))) /* Use RIP-addressing in 64-bit mode, if possible */ -#define _x86_RIP_addressing_possible(D,O) (X86_RIP_RELATIVE_ADDR && \ - ((uintptr)x86_get_target() + 4 + (O) - (D) <= 0xffffffff)) +#define _x86_RIP_addressing_possible(D,O) (X86_RIP_RELATIVE_ADDR && x86_RIP_addressing_possible(D, O)) + +static inline int x86_RIP_addressing_possible(uintptr addr, uintptr offset) +{ +#if X86_TARGET_64BIT + /* + * address of the next instruction. + * The opcode has already been emmitted, + * so this is the size of an 32bit displacement + + * the size of any immediate value that is part of the instruction (offset), + */ + uintptr dst = (uintptr)get_target() + 4 + offset; + intptr disp = dst - addr; + int ok = disp >= -0x80000000LL && disp <= 0x7fffffffLL; + /* fprintf(stderr, "x86_RIP_addressing_possible: %llx - %llx %16llx = %d\n", (unsigned long long)dst, (unsigned long long)addr, (long long)disp, ok); */ + return ok; +#else + UNUSED(addr); + UNUSED(offset); + return 0; +#endif +} + + +static inline int x86_DISP32_addressing_possible(uintptr addr) +{ +#if X86_TARGET_64BIT + return addr <= 0xFFFFFFFFULL; +#else + UNUSED(addr); + return 1; +#endif +} + #define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? (!X86_TARGET_64BIT ? _r_D(R,D) : \ (_x86_RIP_addressing_possible(D, O) ? \ @@ -450,8 +474,8 @@ typedef unsigned int _ul; #define _d16() ( _B(0x66 ) ) #define _O( OP ) ( _B( OP ) ) #define _Or( OP,R ) ( _B( (OP)|_r(R)) ) -#define _OO( OP ) ( _B((OP)>>8), _B(( (OP) )&0xff) ) -#define _OOr( OP,R ) ( _B((OP)>>8), _B(( (OP)|_r(R))&0xff) ) +#define _OO( OP ) ( _B((OP)>>8), _B( (uae_u8)(OP) ) ) +#define _OOr( OP,R ) ( _B((OP)>>8), _B( (OP)|_r(R)) ) #define _Os( OP,B ) ( _s8P(B) ? _B(((OP)|_b10)) : _B(OP) ) #define _sW( W ) ( _s8P(W) ? _B(W):_W(W) ) #define _sL( L ) ( _s8P(L) ? _B(L):_L(L) ) @@ -460,7 +484,6 @@ typedef unsigned int _ul; #define _O_B( OP ,B ) ( _O ( OP ) ,_B(B) ) #define _O_W( OP ,W ) ( _O ( OP ) ,_W(W) ) #define _O_L( OP ,L ) ( _O ( OP ) ,_L(L) ) -#define _OO_L( OP ,L ) ( _OO ( OP ) ,_L(L) ) #define _O_D8( OP ,D ) ( _O ( OP ) ,_D8(D) ) #define _O_D32( OP ,D ) ( _O ( OP ) ,_D32(D) ) #define _OO_D32( OP ,D ) ( _OO ( OP ) ,_D32(D) ) @@ -494,8 +517,10 @@ typedef unsigned int _ul; /* --- REX prefixes -------------------------------------------------------- */ +#undef _VOID + #define _VOID() ((void)0) -#define _BIT(X) ((X) ? 1 : 0) +#define _BIT(X) (!!(X)) #define _d64(W,R,X,B) (_B(0x40|(W)<<3|(R)<<2|(X)<<1|(B))) #define __REXwrxb(L,W,R,X,B) ((W|R|X|B) || (L) ? _d64(W,R,X,B) : _VOID()) @@ -555,8 +580,8 @@ enum { /* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ #define _ALUBrr(OP,RS, RD) (_REXBrr(RS, RD), _O_Mrm (((OP) << 3) ,_b11,_r1(RS),_r1(RD) )) -#define _ALUBmr(OP, MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (((OP) << 3) + 2 ,_r1(RD) ,MD,MB,MI,MS )) -#define _ALUBrm(OP, RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (((OP) << 3) ,_r1(RS) ,MD,MB,MI,MS )) +#define _ALUBmr(OP, MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (((OP) << 3) + 2,_r1(RD) ,MD,MB,MI,MS )) +#define _ALUBrm(OP, RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (((OP) << 3) , ,_r1(RS) ,MD,MB,MI,MS )) #define _ALUBir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \ (_REXBrr(0, RD), _O_B (((OP) << 3) + 4 ,_su8(IM))) : \ (_REXBrr(0, RD), _O_Mrm_B (0x80 ,_b11,OP ,_r1(RD) ,_su8(IM))) ) @@ -1033,7 +1058,7 @@ enum { #define _BTQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r8(RS) ,MD,MB,MI,MS )) #define BTWir(IM, RD) _BTWir(X86_BT, IM, RD) -#define BTWim(IM, MD, MB, MI, MS) _BTWim(X86_BT, IM, MD, MB, MI, MS) +#define BTWim(IM, MD, MB, MI, MS) _BTWim(X86_BT, IM, MD, MI, MS) #define BTWrr(RS, RD) _BTWrr(X86_BT, RS, RD) #define BTWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BT, RS, MD, MB, MI, MS) @@ -1048,7 +1073,7 @@ enum { #define BTQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BT, RS, MD, MB, MI, MS) #define BTCWir(IM, RD) _BTWir(X86_BTC, IM, RD) -#define BTCWim(IM, MD, MB, MI, MS) _BTWim(X86_BTC, IM, MD, MB, MI, MS) +#define BTCWim(IM, MD, MB, MI, MS) _BTWim(X86_BTC, IM, MD, MI, MS) #define BTCWrr(RS, RD) _BTWrr(X86_BTC, RS, RD) #define BTCWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTC, RS, MD, MB, MI, MS) @@ -1063,7 +1088,7 @@ enum { #define BTCQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTC, RS, MD, MB, MI, MS) #define BTRWir(IM, RD) _BTWir(X86_BTR, IM, RD) -#define BTRWim(IM, MD, MB, MI, MS) _BTWim(X86_BTR, IM, MD, MB, MI, MS) +#define BTRWim(IM, MD, MB, MI, MS) _BTWim(X86_BTR, IM, MD, MI, MS) #define BTRWrr(RS, RD) _BTWrr(X86_BTR, RS, RD) #define BTRWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTR, RS, MD, MB, MI, MS) @@ -1078,7 +1103,7 @@ enum { #define BTRQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTR, RS, MD, MB, MI, MS) #define BTSWir(IM, RD) _BTWir(X86_BTS, IM, RD) -#define BTSWim(IM, MD, MB, MI, MS) _BTWim(X86_BTS, IM, MD, MB, MI, MS) +#define BTSWim(IM, MD, MB, MI, MS) _BTWim(X86_BTS, IM, MD, MI, MS) #define BTSWrr(RS, RD) _BTWrr(X86_BTS, RS, RD) #define BTSWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTS, RS, MD, MB, MI, MS) @@ -1261,7 +1286,7 @@ enum { // FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode #define CALLm(M) _O_D32 (0xe8 ,(int)(M) ) #define _CALLLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r4(R) )) -#define _CALLQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r8(R) )) +#define _CALLQsr(R) (_REXQrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r8(R) )) #define CALLsr(R) ( X86_TARGET_64BIT ? _CALLQsr(R) : _CALLLsr(R)) #define CALLsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b010 ,(int)(D),B,I,S )) @@ -1269,135 +1294,135 @@ enum { #define JMPSm(M) _O_D8 (0xeb ,(int)(M) ) #define JMPm(M) _O_D32 (0xe9 ,(int)(M) ) #define _JMPLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r4(R) )) -#define _JMPQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r8(R) )) +#define _JMPQsr(R) (_REXQrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r8(R) )) #define JMPsr(R) ( X86_TARGET_64BIT ? _JMPQsr(R) : _JMPLsr(R)) #define JMPsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b100 ,(int)(D),B,I,S )) /* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ #define JCCSii(CC, D) _O_B (0x70|(CC) ,(_sc)(int)(D) ) #define JCCSim(CC, D) _O_D8 (0x70|(CC) ,(int)(D) ) -#define JOSm(D) JCCSim(X86_CC_O, D) -#define JNOSm(D) JCCSim(X86_CC_NO, D) -#define JBSm(D) JCCSim(X86_CC_B, D) -#define JNAESm(D) JCCSim(X86_CC_NAE, D) -#define JNBSm(D) JCCSim(X86_CC_NB, D) -#define JAESm(D) JCCSim(X86_CC_AE, D) -#define JESm(D) JCCSim(X86_CC_E, D) -#define JZSm(D) JCCSim(X86_CC_Z, D) -#define JNESm(D) JCCSim(X86_CC_NE, D) -#define JNZSm(D) JCCSim(X86_CC_NZ, D) -#define JBESm(D) JCCSim(X86_CC_BE, D) -#define JNASm(D) JCCSim(X86_CC_NA, D) -#define JNBESm(D) JCCSim(X86_CC_NBE, D) -#define JASm(D) JCCSim(X86_CC_A, D) -#define JSSm(D) JCCSim(X86_CC_S, D) -#define JNSSm(D) JCCSim(X86_CC_NS, D) -#define JPSm(D) JCCSim(X86_CC_P, D) -#define JPESm(D) JCCSim(X86_CC_PE, D) -#define JNPSm(D) JCCSim(X86_CC_NP, D) -#define JPOSm(D) JCCSim(X86_CC_PO, D) -#define JLSm(D) JCCSim(X86_CC_L, D) -#define JNGESm(D) JCCSim(X86_CC_NGE, D) -#define JNLSm(D) JCCSim(X86_CC_NL, D) -#define JGESm(D) JCCSim(X86_CC_GE, D) -#define JLESm(D) JCCSim(X86_CC_LE, D) -#define JNGSm(D) JCCSim(X86_CC_NG, D) -#define JNLESm(D) JCCSim(X86_CC_NLE, D) -#define JGSm(D) JCCSim(X86_CC_G, D) +#define JOSm(D) JCCSim(0x0, D) +#define JNOSm(D) JCCSim(0x1, D) +#define JBSm(D) JCCSim(0x2, D) +#define JNAESm(D) JCCSim(0x2, D) +#define JNBSm(D) JCCSim(0x3, D) +#define JAESm(D) JCCSim(0x3, D) +#define JESm(D) JCCSim(0x4, D) +#define JZSm(D) JCCSim(0x4, D) +#define JNESm(D) JCCSim(0x5, D) +#define JNZSm(D) JCCSim(0x5, D) +#define JBESm(D) JCCSim(0x6, D) +#define JNASm(D) JCCSim(0x6, D) +#define JNBESm(D) JCCSim(0x7, D) +#define JASm(D) JCCSim(0x7, D) +#define JSSm(D) JCCSim(0x8, D) +#define JNSSm(D) JCCSim(0x9, D) +#define JPSm(D) JCCSim(0xa, D) +#define JPESm(D) JCCSim(0xa, D) +#define JNPSm(D) JCCSim(0xb, D) +#define JPOSm(D) JCCSim(0xb, D) +#define JLSm(D) JCCSim(0xc, D) +#define JNGESm(D) JCCSim(0xc, D) +#define JNLSm(D) JCCSim(0xd, D) +#define JGESm(D) JCCSim(0xd, D) +#define JLESm(D) JCCSim(0xe, D) +#define JNGSm(D) JCCSim(0xe, D) +#define JNLESm(D) JCCSim(0xf, D) +#define JGSm(D) JCCSim(0xf, D) /* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ #define JCCii(CC, D) _OO_L (0x0f80|(CC) ,(int)(D) ) #define JCCim(CC, D) _OO_D32 (0x0f80|(CC) ,(int)(D) ) -#define JOm(D) JCCim(X86_CC_O, D) -#define JNOm(D) JCCim(X86_CC_NO, D) -#define JBm(D) JCCim(X86_CC_B, D) -#define JNAEm(D) JCCim(X86_CC_NAE, D) -#define JNBm(D) JCCim(X86_CC_NB, D) -#define JAEm(D) JCCim(X86_CC_AE, D) -#define JEm(D) JCCim(X86_CC_E, D) -#define JZm(D) JCCim(X86_CC_Z, D) -#define JNEm(D) JCCim(X86_CC_NE, D) -#define JNZm(D) JCCim(X86_CC_NZ, D) -#define JBEm(D) JCCim(X86_CC_BE, D) -#define JNAm(D) JCCim(X86_CC_NA, D) -#define JNBEm(D) JCCim(X86_CC_NBE, D) -#define JAm(D) JCCim(X86_CC_A, D) -#define JSm(D) JCCim(X86_CC_S, D) -#define JNSm(D) JCCim(X86_CC_NS, D) -#define JPm(D) JCCim(X86_CC_P, D) -#define JPEm(D) JCCim(X86_CC_PE, D) -#define JNPm(D) JCCim(X86_CC_NP, D) -#define JPOm(D) JCCim(X86_CC_PO, D) -#define JLm(D) JCCim(X86_CC_L, D) -#define JNGEm(D) JCCim(X86_CC_NGE, D) -#define JNLm(D) JCCim(X86_CC_NL, D) -#define JGEm(D) JCCim(X86_CC_GE, D) -#define JLEm(D) JCCim(X86_CC_LE, D) -#define JNGm(D) JCCim(X86_CC_NG, D) -#define JNLEm(D) JCCim(X86_CC_NLE, D) -#define JGm(D) JCCim(X86_CC_G, D) +#define JOm(D) JCCim(0x0, D) +#define JNOm(D) JCCim(0x1, D) +#define JBm(D) JCCim(0x2, D) +#define JNAEm(D) JCCim(0x2, D) +#define JNBm(D) JCCim(0x3, D) +#define JAEm(D) JCCim(0x3, D) +#define JEm(D) JCCim(0x4, D) +#define JZm(D) JCCim(0x4, D) +#define JNEm(D) JCCim(0x5, D) +#define JNZm(D) JCCim(0x5, D) +#define JBEm(D) JCCim(0x6, D) +#define JNAm(D) JCCim(0x6, D) +#define JNBEm(D) JCCim(0x7, D) +#define JAm(D) JCCim(0x7, D) +#define JSm(D) JCCim(0x8, D) +#define JNSm(D) JCCim(0x9, D) +#define JPm(D) JCCim(0xa, D) +#define JPEm(D) JCCim(0xa, D) +#define JNPm(D) JCCim(0xb, D) +#define JPOm(D) JCCim(0xb, D) +#define JLm(D) JCCim(0xc, D) +#define JNGEm(D) JCCim(0xc, D) +#define JNLm(D) JCCim(0xd, D) +#define JGEm(D) JCCim(0xd, D) +#define JLEm(D) JCCim(0xe, D) +#define JNGm(D) JCCim(0xe, D) +#define JNLEm(D) JCCim(0xf, D) +#define JGm(D) JCCim(0xf, D) /* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ #define SETCCir(CC, RD) (_REXBrr(0, RD), _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) )) -#define SETOr(RD) SETCCir(X86_CC_O, RD) -#define SETNOr(RD) SETCCir(X86_CC_NO, RD) -#define SETBr(RD) SETCCir(X86_CC_B, RD) -#define SETNAEr(RD) SETCCir(X86_CC_NAE, RD) -#define SETNBr(RD) SETCCir(X86_CC_NB, RD) -#define SETAEr(RD) SETCCir(X86_CC_AE, RD) -#define SETEr(RD) SETCCir(X86_CC_E, RD) -#define SETZr(RD) SETCCir(X86_CC_Z, RD) -#define SETNEr(RD) SETCCir(X86_CC_NE, RD) -#define SETNZr(RD) SETCCir(X86_CC_NZ, RD) -#define SETBEr(RD) SETCCir(X86_CC_BE, RD) -#define SETNAr(RD) SETCCir(X86_CC_NA, RD) -#define SETNBEr(RD) SETCCir(X86_CC_NBE, RD) -#define SETAr(RD) SETCCir(X86_CC_A, RD) -#define SETSr(RD) SETCCir(X86_CC_S, RD) -#define SETNSr(RD) SETCCir(X86_CC_NS, RD) -#define SETPr(RD) SETCCir(X86_CC_P, RD) -#define SETPEr(RD) SETCCir(X86_CC_PE, RD) -#define SETNPr(RD) SETCCir(X86_CC_NP, RD) -#define SETPOr(RD) SETCCir(X86_CC_PO, RD) -#define SETLr(RD) SETCCir(X86_CC_L, RD) -#define SETNGEr(RD) SETCCir(X86_CC_NGE, RD) -#define SETNLr(RD) SETCCir(X86_CC_NL, RD) -#define SETGEr(RD) SETCCir(X86_CC_GE, RD) -#define SETLEr(RD) SETCCir(X86_CC_LE, RD) -#define SETNGr(RD) SETCCir(X86_CC_NG, RD) -#define SETNLEr(RD) SETCCir(X86_CC_NLE, RD) -#define SETGr(RD) SETCCir(X86_CC_G, RD) +#define SETOr(RD) SETCCir(0x0,RD) +#define SETNOr(RD) SETCCir(0x1,RD) +#define SETBr(RD) SETCCir(0x2,RD) +#define SETNAEr(RD) SETCCir(0x2,RD) +#define SETNBr(RD) SETCCir(0x3,RD) +#define SETAEr(RD) SETCCir(0x3,RD) +#define SETEr(RD) SETCCir(0x4,RD) +#define SETZr(RD) SETCCir(0x4,RD) +#define SETNEr(RD) SETCCir(0x5,RD) +#define SETNZr(RD) SETCCir(0x5,RD) +#define SETBEr(RD) SETCCir(0x6,RD) +#define SETNAr(RD) SETCCir(0x6,RD) +#define SETNBEr(RD) SETCCir(0x7,RD) +#define SETAr(RD) SETCCir(0x7,RD) +#define SETSr(RD) SETCCir(0x8,RD) +#define SETNSr(RD) SETCCir(0x9,RD) +#define SETPr(RD) SETCCir(0xa,RD) +#define SETPEr(RD) SETCCir(0xa,RD) +#define SETNPr(RD) SETCCir(0xb,RD) +#define SETPOr(RD) SETCCir(0xb,RD) +#define SETLr(RD) SETCCir(0xc,RD) +#define SETNGEr(RD) SETCCir(0xc,RD) +#define SETNLr(RD) SETCCir(0xd,RD) +#define SETGEr(RD) SETCCir(0xd,RD) +#define SETLEr(RD) SETCCir(0xe,RD) +#define SETNGr(RD) SETCCir(0xe,RD) +#define SETNLEr(RD) SETCCir(0xf,RD) +#define SETGr(RD) SETCCir(0xf,RD) /* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ #define SETCCim(CC,MD,MB,MI,MS) (_REXBrm(0, MB, MI), _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS )) -#define SETOm(D, B, I, S) SETCCim(X86_CC_O, D, B, I, S) -#define SETNOm(D, B, I, S) SETCCim(X86_CC_NO, D, B, I, S) -#define SETBm(D, B, I, S) SETCCim(X86_CC_B, D, B, I, S) -#define SETNAEm(D, B, I, S) SETCCim(X86_CC_NAE, D, B, I, S) -#define SETNBm(D, B, I, S) SETCCim(X86_CC_NB, D, B, I, S) -#define SETAEm(D, B, I, S) SETCCim(X86_CC_AE, D, B, I, S) -#define SETEm(D, B, I, S) SETCCim(X86_CC_E, D, B, I, S) -#define SETZm(D, B, I, S) SETCCim(X86_CC_Z, D, B, I, S) -#define SETNEm(D, B, I, S) SETCCim(X86_CC_NE, D, B, I, S) -#define SETNZm(D, B, I, S) SETCCim(X86_CC_NZ, D, B, I, S) -#define SETBEm(D, B, I, S) SETCCim(X86_CC_BE, D, B, I, S) -#define SETNAm(D, B, I, S) SETCCim(X86_CC_NA, D, B, I, S) -#define SETNBEm(D, B, I, S) SETCCim(X86_CC_NBE, D, B, I, S) -#define SETAm(D, B, I, S) SETCCim(X86_CC_A, D, B, I, S) -#define SETSm(D, B, I, S) SETCCim(X86_CC_S, D, B, I, S) -#define SETNSm(D, B, I, S) SETCCim(X86_CC_NS, D, B, I, S) -#define SETPm(D, B, I, S) SETCCim(X86_CC_P, D, B, I, S) -#define SETPEm(D, B, I, S) SETCCim(X86_CC_PE, D, B, I, S) -#define SETNPm(D, B, I, S) SETCCim(X86_CC_NP, D, B, I, S) -#define SETPOm(D, B, I, S) SETCCim(X86_CC_PO, D, B, I, S) -#define SETLm(D, B, I, S) SETCCim(X86_CC_L, D, B, I, S) -#define SETNGEm(D, B, I, S) SETCCim(X86_CC_NGE, D, B, I, S) -#define SETNLm(D, B, I, S) SETCCim(X86_CC_NL, D, B, I, S) -#define SETGEm(D, B, I, S) SETCCim(X86_CC_GE, D, B, I, S) -#define SETLEm(D, B, I, S) SETCCim(X86_CC_LE, D, B, I, S) -#define SETNGm(D, B, I, S) SETCCim(X86_CC_NG, D, B, I, S) -#define SETNLEm(D, B, I, S) SETCCim(X86_CC_NLE, D, B, I, S) -#define SETGm(D, B, I, S) SETCCim(X86_CC_G, D, B, I, S) +#define SETOm(D, B, I, S) SETCCim(0x0, D, B, I, S) +#define SETNOm(D, B, I, S) SETCCim(0x1, D, B, I, S) +#define SETBm(D, B, I, S) SETCCim(0x2, D, B, I, S) +#define SETNAEm(D, B, I, S) SETCCim(0x2, D, B, I, S) +#define SETNBm(D, B, I, S) SETCCim(0x3, D, B, I, S) +#define SETAEm(D, B, I, S) SETCCim(0x3, D, B, I, S) +#define SETEm(D, B, I, S) SETCCim(0x4, D, B, I, S) +#define SETZm(D, B, I, S) SETCCim(0x4, D, B, I, S) +#define SETNEm(D, B, I, S) SETCCim(0x5, D, B, I, S) +#define SETNZm(D, B, I, S) SETCCim(0x5, D, B, I, S) +#define SETBEm(D, B, I, S) SETCCim(0x6, D, B, I, S) +#define SETNAm(D, B, I, S) SETCCim(0x6, D, B, I, S) +#define SETNBEm(D, B, I, S) SETCCim(0x7, D, B, I, S) +#define SETAm(D, B, I, S) SETCCim(0x7, D, B, I, S) +#define SETSm(D, B, I, S) SETCCim(0x8, D, B, I, S) +#define SETNSm(D, B, I, S) SETCCim(0x9, D, B, I, S) +#define SETPm(D, B, I, S) SETCCim(0xa, D, B, I, S) +#define SETPEm(D, B, I, S) SETCCim(0xa, D, B, I, S) +#define SETNPm(D, B, I, S) SETCCim(0xb, D, B, I, S) +#define SETPOm(D, B, I, S) SETCCim(0xb, D, B, I, S) +#define SETLm(D, B, I, S) SETCCim(0xc, D, B, I, S) +#define SETNGEm(D, B, I, S) SETCCim(0xc, D, B, I, S) +#define SETNLm(D, B, I, S) SETCCim(0xd, D, B, I, S) +#define SETGEm(D, B, I, S) SETCCim(0xd, D, B, I, S) +#define SETLEm(D, B, I, S) SETCCim(0xe, D, B, I, S) +#define SETNGm(D, B, I, S) SETCCim(0xe, D, B, I, S) +#define SETNLEm(D, B, I, S) SETCCim(0xf, D, B, I, S) +#define SETGm(D, B, I, S) SETCCim(0xf, D, B, I, S) /* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ #define CMOVWrr(CC,RS,RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r2(RD),_r2(RS) )) @@ -1591,10 +1616,10 @@ enum { #define MOVZWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r4(RD),_r2(RS) )) #define MOVZWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r4(RD) ,MD,MB,MI,MS )) -#define MOVSWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r8(RD),_r2(RS) ))) -#define MOVSWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r8(RD) ,MD,MB,MI,MS ))) -#define MOVZWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r8(RD),_r2(RS) ))) -#define MOVZWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r8(RD) ,MD,MB,MI,MS ))) +#define MOVSWQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r8(RD),_r2(RS) )) +#define MOVSWQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r8(RD) ,MD,MB,MI,MS )) +#define MOVZWQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r8(RD),_r2(RS) )) +#define MOVZWQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r8(RD) ,MD,MB,MI,MS )) #define MOVSLQrr(RS, RD) _m64only((_REXQrr(RD, RS), _O_Mrm (0x63 ,_b11,_r8(RD),_r4(RS) ))) #define MOVSLQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _O_r_X (0x63 ,_r8(RD) ,MD,MB,MI,MS ))) @@ -1602,15 +1627,14 @@ enum { /* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ #define LEALmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS )) -#define LEAQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS )) #define BSWAPLr(R) (_REXLrr(0, R), _OOr (0x0fc8,_r4(R) )) #define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) )) #define CLC() _O (0xf8 ) #define STC() _O (0xf9 ) -#define CMC() _O (0xf5 ) +#define CMC() _O (0xf5 ) #define CLD() _O (0xfc ) #define STD() _O (0xfd ) @@ -1647,313 +1671,13 @@ enum { #define NOP() _O (0x90 ) -/* --- Media 64-bit instructions ------------------------------------------- */ - -enum { - X86_MMX_PABSB = 0x1c, // 2P - X86_MMX_PABSW = 0x1d, // 2P - X86_MMX_PABSD = 0x1e, // 2P - X86_MMX_PACKSSWB = 0x63, - X86_MMX_PACKSSDW = 0x6b, - X86_MMX_PACKUSWB = 0x67, - X86_MMX_PADDB = 0xfc, - X86_MMX_PADDW = 0xfd, - X86_MMX_PADDD = 0xfe, - X86_MMX_PADDQ = 0xd4, - X86_MMX_PADDSB = 0xec, - X86_MMX_PADDSW = 0xed, - X86_MMX_PADDUSB = 0xdc, - X86_MMX_PADDUSW = 0xdd, - X86_MMX_PAND = 0xdb, - X86_MMX_PANDN = 0xdf, - X86_MMX_PAVGB = 0xe0, - X86_MMX_PAVGW = 0xe3, - X86_MMX_PCMPEQB = 0x74, - X86_MMX_PCMPEQW = 0x75, - X86_MMX_PCMPEQD = 0x76, - X86_MMX_PCMPGTB = 0x64, - X86_MMX_PCMPGTW = 0x65, - X86_MMX_PCMPGTD = 0x66, - X86_MMX_PEXTRW = 0xc5, // 64, /r ib - X86_MMX_PHADDW = 0x01, // 2P - X86_MMX_PHADDD = 0x02, // 2P - X86_MMX_PHADDSW = 0x03, // 2P - X86_MMX_PHSUBW = 0x05, // 2P - X86_MMX_PHSUBD = 0x06, // 2P - X86_MMX_PHSUBSW = 0x07, // 2P - X86_MMX_PINSRW = 0xc4, // 64, /r ib - X86_MMX_PMADDUBSW = 0x04, // 2P - X86_MMX_PMADDWD = 0xf5, - X86_MMX_PMAXSW = 0xee, - X86_MMX_PMAXUB = 0xde, - X86_MMX_PMINSW = 0xea, - X86_MMX_PMINUB = 0xda, - X86_MMX_PMOVMSKB = 0xd7, // 64 - X86_MMX_PMULHRSW = 0x0b, // 2P - X86_MMX_PMULHUW = 0xe4, - X86_MMX_PMULHW = 0xe5, - X86_MMX_PMULLW = 0xd5, - X86_MMX_PMULUDQ = 0xf4, - X86_MMX_POR = 0xeb, - X86_MMX_PSADBW = 0xf6, - X86_MMX_PSHUFB = 0x00, // 2P - X86_MMX_PSHUFW = 0x70, // /r ib - X86_MMX_PSIGNB = 0x08, // 2P - X86_MMX_PSIGNW = 0x09, // 2P - X86_MMX_PSIGND = 0x0a, // 2P - X86_MMX_PSLLW = 0xf1, - X86_MMX_PSLLWi = 0x71, // /6 ib - X86_MMX_PSLLD = 0xf2, - X86_MMX_PSLLDi = 0x72, // /6 ib - X86_MMX_PSLLQ = 0xf3, - X86_MMX_PSLLQi = 0x73, // /6 ib - X86_MMX_PSRAW = 0xe1, - X86_MMX_PSRAWi = 0x71, // /4 ib - X86_MMX_PSRAD = 0xe2, - X86_MMX_PSRADi = 0x72, // /4 ib - X86_MMX_PSRLW = 0xd1, - X86_MMX_PSRLWi = 0x71, // /2 ib - X86_MMX_PSRLD = 0xd2, - X86_MMX_PSRLDi = 0x72, // /2 ib - X86_MMX_PSRLQ = 0xd3, - X86_MMX_PSRLQi = 0x73, // /2 ib - X86_MMX_PSUBB = 0xf8, - X86_MMX_PSUBW = 0xf9, - X86_MMX_PSUBD = 0xfa, - X86_MMX_PSUBQ = 0xfb, - X86_MMX_PSUBSB = 0xe8, - X86_MMX_PSUBSW = 0xe9, - X86_MMX_PSUBUSB = 0xd8, - X86_MMX_PSUBUSW = 0xd9, - X86_MMX_PUNPCKHBW = 0x68, - X86_MMX_PUNPCKHWD = 0x69, - X86_MMX_PUNPCKHDQ = 0x6a, - X86_MMX_PUNPCKLBW = 0x60, - X86_MMX_PUNPCKLWD = 0x61, - X86_MMX_PUNPCKLDQ = 0x62, - X86_MMX_PXOR = 0xef, -}; - -#define __MMXLrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) -#define __MMXLmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) -#define __MMXLrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) -#define __MMXLirr(OP,IM,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM))) -#define __MMXLimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM))) -#define __MMXQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) -#define __MMXQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) -#define __MMXQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) -#define __MMXQirr(OP,IM,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM))) -#define __MMXQimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM))) -#define __MMX1Lrr(PX,OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _B(0x0f),_OO_Mrm(((PX)<<8)|(OP) ,_b11,RDA(RD),RSA(RS) )) -#define __MMX1Lmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RDA(RD) ,MD,MB,MI,MS )) -#define __MMX1Lrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RSA(RS) ,MD,MB,MI,MS )) - -#define _MMXLrr(OP,RS,RD) __MMXLrr(OP,RS,_rM,RD,_rM) -#define _MMXLmr(OP,MD,MB,MI,MS,RD) __MMXLmr(OP,MD,MB,MI,MS,RD,_rM) -#define _MMXLrm(OP,RS,MD,MB,MI,MS) __MMXLrm(OP,RS,_rM,MD,MB,MI,MS) -#define _MMXQrr(OP,RS,RD) __MMXQrr(OP,RS,_rM,RD,_rM) -#define _MMXQmr(OP,MD,MB,MI,MS,RD) __MMXQmr(OP,MD,MB,MI,MS,RD,_rM) -#define _MMXQrm(OP,RS,MD,MB,MI,MS) __MMXQrm(OP,RS,_rM,MD,MB,MI,MS) -#define _2P_MMXLrr(OP,RS,RD) __MMX1Lrr(0x38, OP,RS,_rM,RD,_rM) -#define _2P_MMXLmr(OP,MD,MB,MI,MS,RD) __MMX1Lmr(0x38, OP,MD,MB,MI,MS,RD,_rM) -#define _2P_MMXLrm(OP,RS,MD,MB,MI,MS) __MMX1Lrm(0x38, OP,RS,_rM,MD,MB,MI,MS) - -#define MMX_MOVDMDrr(RS, RD) __MMXLrr(0x6e, RS,_r4, RD,_rM) -#define MMX_MOVQMDrr(RS, RD) __MMXQrr(0x6e, RS,_r8, RD,_rM) -#define MMX_MOVDMSrr(RS, RD) __MMXLrr(0x7e, RD,_r4, RS,_rM) -#define MMX_MOVQMSrr(RS, RD) __MMXQrr(0x7e, RD,_r8, RS,_rM) - -#define MMX_MOVDmr(MD, MB, MI, MS, RD) _MMXLmr(0x6e, MD, MB, MI, MS, RD) -#define MMX_MOVDrm(RS, MD, MB, MI, MS) _MMXLrm(0x7e, RS, MD, MB, MI, MS) -#define MMX_MOVQrr(RS, RD) _MMXLrr(0x6f, RS, RD) -#define MMX_MOVQmr(MD, MB, MI, MS, RD) _MMXLmr(0x6f, MD, MB, MI, MS, RD) -#define MMX_MOVQrm(RS, MD, MB, MI, MS) _MMXLrm(0x7f, RS, MD, MB, MI, MS) - -// Original MMX instructions -#define MMX_PACKSSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKSSWB,RS,RD) -#define MMX_PACKSSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSWB, MD, MB, MI, MS, RD) -#define MMX_PACKSSDWrr(RS, RD) _MMXLrr(X86_MMX_PACKSSDW,RS,RD) -#define MMX_PACKSSDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSDW, MD, MB, MI, MS, RD) -#define MMX_PACKUSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKUSWB,RS,RD) -#define MMX_PACKUSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKUSWB, MD, MB, MI, MS, RD) -#define MMX_PADDBrr(RS, RD) _MMXLrr(X86_MMX_PADDB,RS,RD) -#define MMX_PADDBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDB, MD, MB, MI, MS, RD) -#define MMX_PADDWrr(RS, RD) _MMXLrr(X86_MMX_PADDW,RS,RD) -#define MMX_PADDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDW, MD, MB, MI, MS, RD) -#define MMX_PADDDrr(RS, RD) _MMXLrr(X86_MMX_PADDD,RS,RD) -#define MMX_PADDDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDD, MD, MB, MI, MS, RD) -#define MMX_PADDQrr(RS, RD) _MMXLrr(X86_MMX_PADDQ,RS,RD) -#define MMX_PADDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDQ, MD, MB, MI, MS, RD) -#define MMX_PADDSBrr(RS, RD) _MMXLrr(X86_MMX_PADDSB,RS,RD) -#define MMX_PADDSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSB, MD, MB, MI, MS, RD) -#define MMX_PADDSWrr(RS, RD) _MMXLrr(X86_MMX_PADDSW,RS,RD) -#define MMX_PADDSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSW, MD, MB, MI, MS, RD) -#define MMX_PADDUSBrr(RS, RD) _MMXLrr(X86_MMX_PADDUSB,RS,RD) -#define MMX_PADDUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSB, MD, MB, MI, MS, RD) -#define MMX_PADDUSWrr(RS, RD) _MMXLrr(X86_MMX_PADDUSW,RS,RD) -#define MMX_PADDUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSW, MD, MB, MI, MS, RD) -#define MMX_PANDrr(RS, RD) _MMXLrr(X86_MMX_PAND,RS,RD) -#define MMX_PANDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAND, MD, MB, MI, MS, RD) -#define MMX_PANDNrr(RS, RD) _MMXLrr(X86_MMX_PANDN,RS,RD) -#define MMX_PANDNmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PANDN, MD, MB, MI, MS, RD) -#define MMX_PAVGBrr(RS, RD) _MMXLrr(X86_MMX_PAVGB,RS,RD) -#define MMX_PAVGBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGB, MD, MB, MI, MS, RD) -#define MMX_PAVGWrr(RS, RD) _MMXLrr(X86_MMX_PAVGW,RS,RD) -#define MMX_PAVGWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGW, MD, MB, MI, MS, RD) -#define MMX_PCMPEQBrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQB,RS,RD) -#define MMX_PCMPEQBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQB, MD, MB, MI, MS, RD) -#define MMX_PCMPEQWrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQW,RS,RD) -#define MMX_PCMPEQWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQW, MD, MB, MI, MS, RD) -#define MMX_PCMPEQDrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQD,RS,RD) -#define MMX_PCMPEQDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQD, MD, MB, MI, MS, RD) -#define MMX_PCMPGTBrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTB,RS,RD) -#define MMX_PCMPGTBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTB, MD, MB, MI, MS, RD) -#define MMX_PCMPGTWrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTW,RS,RD) -#define MMX_PCMPGTWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTW, MD, MB, MI, MS, RD) -#define MMX_PCMPGTDrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTD,RS,RD) -#define MMX_PCMPGTDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTD, MD, MB, MI, MS, RD) -#define MMX_PMADDWDrr(RS, RD) _MMXLrr(X86_MMX_PMADDWD,RS,RD) -#define MMX_PMADDWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMADDWD, MD, MB, MI, MS, RD) -#define MMX_PMAXSWrr(RS, RD) _MMXLrr(X86_MMX_PMAXSW,RS,RD) -#define MMX_PMAXSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXSW, MD, MB, MI, MS, RD) -#define MMX_PMAXUBrr(RS, RD) _MMXLrr(X86_MMX_PMAXUB,RS,RD) -#define MMX_PMAXUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXUB, MD, MB, MI, MS, RD) -#define MMX_PMINSWrr(RS, RD) _MMXLrr(X86_MMX_PMINSW,RS,RD) -#define MMX_PMINSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINSW, MD, MB, MI, MS, RD) -#define MMX_PMINUBrr(RS, RD) _MMXLrr(X86_MMX_PMINUB,RS,RD) -#define MMX_PMINUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINUB, MD, MB, MI, MS, RD) -#define MMX_PMULHUWrr(RS, RD) _MMXLrr(X86_MMX_PMULHUW,RS,RD) -#define MMX_PMULHUWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHUW, MD, MB, MI, MS, RD) -#define MMX_PMULHWrr(RS, RD) _MMXLrr(X86_MMX_PMULHW,RS,RD) -#define MMX_PMULHWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHW, MD, MB, MI, MS, RD) -#define MMX_PMULLWrr(RS, RD) _MMXLrr(X86_MMX_PMULLW,RS,RD) -#define MMX_PMULLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULLW, MD, MB, MI, MS, RD) -#define MMX_PMULUDQrr(RS, RD) _MMXLrr(X86_MMX_PMULUDQ,RS,RD) -#define MMX_PMULUDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULUDQ, MD, MB, MI, MS, RD) -#define MMX_PORrr(RS, RD) _MMXLrr(X86_MMX_POR,RS,RD) -#define MMX_PORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_POR, MD, MB, MI, MS, RD) -#define MMX_PSADBWrr(RS, RD) _MMXLrr(X86_MMX_PSADBW,RS,RD) -#define MMX_PSADBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSADBW, MD, MB, MI, MS, RD) -#define MMX_PSLLWir(IM, RD) __MMXLirr(X86_MMX_PSLLWi, IM, RD,_rM, _b110,_rN) -#define MMX_PSLLWrr(RS, RD) _MMXLrr(X86_MMX_PSLLW,RS,RD) -#define MMX_PSLLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLW, MD, MB, MI, MS, RD) -#define MMX_PSLLDir(IM, RD) __MMXLirr(X86_MMX_PSLLDi, IM, RD,_rM, _b110,_rN) -#define MMX_PSLLDrr(RS, RD) _MMXLrr(X86_MMX_PSLLD,RS,RD) -#define MMX_PSLLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLD, MD, MB, MI, MS, RD) -#define MMX_PSLLQir(IM, RD) __MMXLirr(X86_MMX_PSLLQi, IM, RD,_rM, _b110,_rN) -#define MMX_PSLLQrr(RS, RD) _MMXLrr(X86_MMX_PSLLQ,RS,RD) -#define MMX_PSLLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLQ, MD, MB, MI, MS, RD) -#define MMX_PSRAWir(IM, RD) __MMXLirr(X86_MMX_PSRAWi, IM, RD,_rM, _b100,_rN) -#define MMX_PSRAWrr(RS, RD) _MMXLrr(X86_MMX_PSRAW,RS,RD) -#define MMX_PSRAWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAW, MD, MB, MI, MS, RD) -#define MMX_PSRADir(IM, RD) __MMXLirr(X86_MMX_PSRADi, IM, RD,_rM, _b100,_rN) -#define MMX_PSRADrr(RS, RD) _MMXLrr(X86_MMX_PSRAD,RS,RD) -#define MMX_PSRADmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAD, MD, MB, MI, MS, RD) -#define MMX_PSRLWir(IM, RD) __MMXLirr(X86_MMX_PSRLWi, IM, RD,_rM, _b010,_rN) -#define MMX_PSRLWrr(RS, RD) _MMXLrr(X86_MMX_PSRLW,RS,RD) -#define MMX_PSRLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLW, MD, MB, MI, MS, RD) -#define MMX_PSRLDir(IM, RD) __MMXLirr(X86_MMX_PSRLDi, IM, RD,_rM, _b010,_rN) -#define MMX_PSRLDrr(RS, RD) _MMXLrr(X86_MMX_PSRLD,RS,RD) -#define MMX_PSRLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLD, MD, MB, MI, MS, RD) -#define MMX_PSRLQir(IM, RD) __MMXLirr(X86_MMX_PSRLQi, IM, RD,_rM, _b010,_rN) -#define MMX_PSRLQrr(RS, RD) _MMXLrr(X86_MMX_PSRLQ,RS,RD) -#define MMX_PSRLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLQ, MD, MB, MI, MS, RD) -#define MMX_PSUBBrr(RS, RD) _MMXLrr(X86_MMX_PSUBB,RS,RD) -#define MMX_PSUBBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBB, MD, MB, MI, MS, RD) -#define MMX_PSUBWrr(RS, RD) _MMXLrr(X86_MMX_PSUBW,RS,RD) -#define MMX_PSUBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBW, MD, MB, MI, MS, RD) -#define MMX_PSUBDrr(RS, RD) _MMXLrr(X86_MMX_PSUBD,RS,RD) -#define MMX_PSUBDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBD, MD, MB, MI, MS, RD) -#define MMX_PSUBQrr(RS, RD) _MMXLrr(X86_MMX_PSUBQ,RS,RD) -#define MMX_PSUBQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBQ, MD, MB, MI, MS, RD) -#define MMX_PSUBSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBSB,RS,RD) -#define MMX_PSUBSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSB, MD, MB, MI, MS, RD) -#define MMX_PSUBSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBSW,RS,RD) -#define MMX_PSUBSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSW, MD, MB, MI, MS, RD) -#define MMX_PSUBUSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSB,RS,RD) -#define MMX_PSUBUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSB, MD, MB, MI, MS, RD) -#define MMX_PSUBUSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSW,RS,RD) -#define MMX_PSUBUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSW, MD, MB, MI, MS, RD) -#define MMX_PUNPCKHBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHBW,RS,RD) -#define MMX_PUNPCKHBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHBW, MD, MB, MI, MS, RD) -#define MMX_PUNPCKHWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHWD,RS,RD) -#define MMX_PUNPCKHWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHWD, MD, MB, MI, MS, RD) -#define MMX_PUNPCKHDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHDQ,RS,RD) -#define MMX_PUNPCKHDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHDQ, MD, MB, MI, MS, RD) -#define MMX_PUNPCKLBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLBW,RS,RD) -#define MMX_PUNPCKLBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLBW, MD, MB, MI, MS, RD) -#define MMX_PUNPCKLWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLWD,RS,RD) -#define MMX_PUNPCKLWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLWD, MD, MB, MI, MS, RD) -#define MMX_PUNPCKLDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLDQ,RS,RD) -#define MMX_PUNPCKLDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLDQ, MD, MB, MI, MS, RD) -#define MMX_PXORrr(RS, RD) _MMXLrr(X86_MMX_PXOR,RS,RD) -#define MMX_PXORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PXOR, MD, MB, MI, MS, RD) - -#define MMX_PSHUFWirr(IM, RS, RD) __MMXLirr(X86_MMX_PSHUFW, IM, RS,_rM, RD,_rM) -#define MMX_PSHUFWimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PSHUFW, IM, MD, MB, MI, MS, RD,_rM) -#define MMX_PEXTRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r4) -#define MMX_PEXTRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r8) -#define MMX_PINSRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM) -#define MMX_PINSRWLimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r4) -#define MMX_PINSRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM) -#define MMX_PINSRWQimr(IM, MD, MB, MI, MS, RD) __MMXQimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r8) - -// Additionnal MMX instructions, brought by SSSE3 ISA -#define MMX_PABSBrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSB,RS,RD) -#define MMX_PABSBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSB, MD, MB, MI, MS, RD) -#define MMX_PABSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSW,RS,RD) -#define MMX_PABSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSW, MD, MB, MI, MS, RD) -#define MMX_PABSDrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSD,RS,RD) -#define MMX_PABSDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSD, MD, MB, MI, MS, RD) -#define MMX_PHADDWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDW,RS,RD) -#define MMX_PHADDWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDW, MD, MB, MI, MS, RD) -#define MMX_PHADDDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDD,RS,RD) -#define MMX_PHADDDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDD, MD, MB, MI, MS, RD) -#define MMX_PHADDSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDSW,RS,RD) -#define MMX_PHADDSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDSW, MD, MB, MI, MS, RD) -#define MMX_PHSUBWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBW,RS,RD) -#define MMX_PHSUBWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBW, MD, MB, MI, MS, RD) -#define MMX_PHSUBDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBD,RS,RD) -#define MMX_PHSUBDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBD, MD, MB, MI, MS, RD) -#define MMX_PHSUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBSW,RS,RD) -#define MMX_PHSUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBSW, MD, MB, MI, MS, RD) -#define MMX_PMADDUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMADDUBSW,RS,RD) -#define MMX_PMADDUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMADDUBSW, MD, MB, MI, MS, RD) -#define MMX_PMULHRSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMULHRSW,RS,RD) -#define MMX_PMULHRSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMULHRSW, MD, MB, MI, MS, RD) -#define MMX_PSHUFBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSHUFB,RS,RD) -#define MMX_PSHUFBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSHUFB, MD, MB, MI, MS, RD) -#define MMX_PSIGNBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNB,RS,RD) -#define MMX_PSIGNBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNB, MD, MB, MI, MS, RD) -#define MMX_PSIGNWrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNW,RS,RD) -#define MMX_PSIGNWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNW, MD, MB, MI, MS, RD) -#define MMX_PSIGNDrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGND,RS,RD) -#define MMX_PSIGNDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGND, MD, MB, MI, MS, RD) - -#define EMMS() _OO (0x0f77 ) - - /* --- Media 128-bit instructions ------------------------------------------ */ enum { - X86_SSE_CC_EQ = 0, - X86_SSE_CC_LT = 1, - X86_SSE_CC_GT = 1, - X86_SSE_CC_LE = 2, - X86_SSE_CC_GE = 2, - X86_SSE_CC_U = 3, - X86_SSE_CC_NEQ = 4, - X86_SSE_CC_NLT = 5, - X86_SSE_CC_NGT = 5, - X86_SSE_CC_NLE = 6, - X86_SSE_CC_NGE = 6, - X86_SSE_CC_O = 7 -}; - -enum { + X86_SSE_CVTIS = 0x2a, + X86_SSE_CVTSI = 0x2d, X86_SSE_UCOMI = 0x2e, X86_SSE_COMI = 0x2f, - X86_SSE_CMP = 0xc2, X86_SSE_SQRT = 0x51, X86_SSE_RSQRT = 0x52, X86_SSE_RCP = 0x53, @@ -1963,108 +1687,19 @@ enum { X86_SSE_XOR = 0x57, X86_SSE_ADD = 0x58, X86_SSE_MUL = 0x59, + X86_SSE_CVTSD = 0x5a, + X86_SSE_CVTDT = 0x5b, X86_SSE_SUB = 0x5c, X86_SSE_MIN = 0x5d, X86_SSE_DIV = 0x5e, X86_SSE_MAX = 0x5f, - X86_SSE_CVTDQ2PD = 0xe6, - X86_SSE_CVTDQ2PS = 0x5b, - X86_SSE_CVTPD2DQ = 0xe6, - X86_SSE_CVTPD2PI = 0x2d, - X86_SSE_CVTPD2PS = 0x5a, - X86_SSE_CVTPI2PD = 0x2a, - X86_SSE_CVTPI2PS = 0x2a, - X86_SSE_CVTPS2DQ = 0x5b, - X86_SSE_CVTPS2PD = 0x5a, - X86_SSE_CVTPS2PI = 0x2d, - X86_SSE_CVTSD2SI = 0x2d, - X86_SSE_CVTSD2SS = 0x5a, - X86_SSE_CVTSI2SD = 0x2a, - X86_SSE_CVTSI2SS = 0x2a, - X86_SSE_CVTSS2SD = 0x5a, - X86_SSE_CVTSS2SI = 0x2d, - X86_SSE_CVTTPD2PI = 0x2c, - X86_SSE_CVTTPD2DQ = 0xe6, - X86_SSE_CVTTPS2DQ = 0x5b, - X86_SSE_CVTTPS2PI = 0x2c, - X86_SSE_CVTTSD2SI = 0x2c, - X86_SSE_CVTTSS2SI = 0x2c, - X86_SSE_MOVMSK = 0x50, - X86_SSE_PACKSSDW = 0x6b, - X86_SSE_PACKSSWB = 0x63, - X86_SSE_PACKUSWB = 0x67, - X86_SSE_PADDB = 0xfc, - X86_SSE_PADDD = 0xfe, - X86_SSE_PADDQ = 0xd4, - X86_SSE_PADDSB = 0xec, - X86_SSE_PADDSW = 0xed, - X86_SSE_PADDUSB = 0xdc, - X86_SSE_PADDUSW = 0xdd, - X86_SSE_PADDW = 0xfd, - X86_SSE_PAND = 0xdb, - X86_SSE_PANDN = 0xdf, - X86_SSE_PAVGB = 0xe0, - X86_SSE_PAVGW = 0xe3, - X86_SSE_PCMPEQB = 0x74, - X86_SSE_PCMPEQD = 0x76, - X86_SSE_PCMPEQW = 0x75, - X86_SSE_PCMPGTB = 0x64, - X86_SSE_PCMPGTD = 0x66, - X86_SSE_PCMPGTW = 0x65, - X86_SSE_PMADDWD = 0xf5, - X86_SSE_PMAXSW = 0xee, - X86_SSE_PMAXUB = 0xde, - X86_SSE_PMINSW = 0xea, - X86_SSE_PMINUB = 0xda, - X86_SSE_PMOVMSKB = 0xd7, - X86_SSE_PMULHUW = 0xe4, - X86_SSE_PMULHW = 0xe5, - X86_SSE_PMULLW = 0xd5, - X86_SSE_PMULUDQ = 0xf4, - X86_SSE_POR = 0xeb, - X86_SSE_PSADBW = 0xf6, - X86_SSE_PSLLD = 0xf2, - X86_SSE_PSLLQ = 0xf3, - X86_SSE_PSLLW = 0xf1, - X86_SSE_PSRAD = 0xe2, - X86_SSE_PSRAW = 0xe1, - X86_SSE_PSRLD = 0xd2, - X86_SSE_PSRLQ = 0xd3, - X86_SSE_PSRLW = 0xd1, - X86_SSE_PSUBB = 0xf8, - X86_SSE_PSUBD = 0xfa, - X86_SSE_PSUBQ = 0xfb, - X86_SSE_PSUBSB = 0xe8, - X86_SSE_PSUBSW = 0xe9, - X86_SSE_PSUBUSB = 0xd8, - X86_SSE_PSUBUSW = 0xd9, - X86_SSE_PSUBW = 0xf9, - X86_SSE_PUNPCKHBW = 0x68, - X86_SSE_PUNPCKHDQ = 0x6a, - X86_SSE_PUNPCKHQDQ = 0x6d, - X86_SSE_PUNPCKHWD = 0x69, - X86_SSE_PUNPCKLBW = 0x60, - X86_SSE_PUNPCKLDQ = 0x62, - X86_SSE_PUNPCKLQDQ = 0x6c, - X86_SSE_PUNPCKLWD = 0x61, - X86_SSE_PXOR = 0xef, - X86_SSSE3_PSHUFB = 0x00, }; /* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ -#define _SSSE3Lrr(OP1,OP2,RS,RSA,RD,RDA) (_B(0x66), _REXLrr(RD,RD), _B(0x0f), _OO_Mrm (((OP1)<<8)|(OP2) ,_b11,RDA(RD),RSA(RS) )) -#define _SSSE3Lmr(OP1,OP2,MD,MB,MI,MS,RD,RDA) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X (((OP1)<<8)|(OP2) ,RDA(RD) ,MD,MB,MI,MS )) -#define _SSSE3Lirr(OP1,OP2,IM,RS,RD) (_B(0x66), _REXLrr(RD, RS), _B(0x0f), _OO_Mrm_B (((OP1)<<8)|(OP2) ,_b11,_rX(RD),_rX(RS) ,_u8(IM))) -#define _SSSE3Limr(OP1,OP2,IM,MD,MB,MI,MS,RD) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X_B (((OP1)<<8)|(OP2) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM))) - -#define __SSELir(OP,MO,IM,RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0f00|(OP) ,_b11,MO ,_rX(RD) ,_u8(IM))) -#define __SSELim(OP,MO,IM,MD,MB,MI,MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0f00|(OP) ,MO ,MD,MB,MI,MS ,_u8(IM))) #define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) #define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) #define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) -#define __SSELirr(OP,IM,RS,RD) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,_rX(RD),_rX(RS) ,_u8(IM))) -#define __SSELimr(OP,IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X_B (0x0f00|(OP) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM))) #define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) #define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) @@ -2073,10 +1708,6 @@ enum { #define _SSELrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSELrr(OP, RS, RSA, RD, RDA)) #define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA)) #define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS)) -#define _SSELir(PX,OP,MO,IM,RD) (_B(PX), __SSELir(OP, MO, IM, RD)) -#define _SSELim(PX,OP,MO,IM,MD,MB,MI,MS) (_B(PX), __SSELim(OP, MO, IM, MD, MB, MI, MS)) -#define _SSELirr(PX,OP,IM,RS,RD) (_B(PX), __SSELirr(OP, IM, RS, RD)) -#define _SSELimr(PX,OP,IM,MD,MB,MI,MS,RD) (_B(PX), __SSELimr(OP, IM, MD, MB, MI, MS, RD)) #define _SSEQrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA)) #define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA)) @@ -2085,26 +1716,18 @@ enum { #define _SSEPSrr(OP,RS,RD) __SSELrr( OP, RS,_rX, RD,_rX) #define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr( OP, MD, MB, MI, MS, RD,_rX) #define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm( OP, RS,_rX, MD, MB, MI, MS) -#define _SSEPSirr(OP,IM,RS,RD) __SSELirr( OP, IM, RS, RD) -#define _SSEPSimr(OP,IM,MD,MB,MI,MS,RD) __SSELimr( OP, IM, MD, MB, MI, MS, RD) #define _SSEPDrr(OP,RS,RD) _SSELrr(0x66, OP, RS,_rX, RD,_rX) #define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0x66, OP, MD, MB, MI, MS, RD,_rX) #define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0x66, OP, RS,_rX, MD, MB, MI, MS) -#define _SSEPDirr(OP,IM,RS,RD) _SSELirr(0x66, OP, IM, RS, RD) -#define _SSEPDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0x66, OP, IM, MD, MB, MI, MS, RD) #define _SSESSrr(OP,RS,RD) _SSELrr(0xf3, OP, RS,_rX, RD,_rX) #define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf3, OP, MD, MB, MI, MS, RD,_rX) #define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf3, OP, RS,_rX, MD, MB, MI, MS) -#define _SSESSirr(OP,IM,RS,RD) _SSELirr(0xf3, OP, IM, RS, RD) -#define _SSESSimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf3, OP, IM, MD, MB, MI, MS, RD) #define _SSESDrr(OP,RS,RD) _SSELrr(0xf2, OP, RS,_rX, RD,_rX) #define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf2, OP, MD, MB, MI, MS, RD,_rX) #define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf2, OP, RS,_rX, MD, MB, MI, MS) -#define _SSESDirr(OP,IM,RS,RD) _SSELirr(0xf2, OP, IM, RS, RD) -#define _SSESDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf2, OP, IM, MD, MB, MI, MS, RD) #define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD) #define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD) @@ -2126,16 +1749,6 @@ enum { #define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD) #define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD) -#define CMPPSrr(IM, RS, RD) _SSEPSirr(X86_SSE_CMP, IM, RS, RD) -#define CMPPSmr(IM, MD, MB, MI, MS, RD) _SSEPSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) -#define CMPPDrr(IM, RS, RD) _SSEPDirr(X86_SSE_CMP, IM, RS, RD) -#define CMPPDmr(IM, MD, MB, MI, MS, RD) _SSEPDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) - -#define CMPSSrr(IM, RS, RD) _SSESSirr(X86_SSE_CMP, IM, RS, RD) -#define CMPSSmr(IM, MD, MB, MI, MS, RD) _SSESSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) -#define CMPSDrr(IM, RS, RD) _SSESDirr(X86_SSE_CMP, IM, RS, RD) -#define CMPSDmr(IM, MD, MB, MI, MS, RD) _SSESDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) - #define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD) #define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD) #define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD) @@ -2216,15 +1829,15 @@ enum { #define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD) #define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD) -#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD) -#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD) -#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD) -#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD) +#define COMISSrr(RS, RD) _SSESSrr(X86_SSE_COMI, RS, RD) +#define COMISSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_COMI, MD, MB, MI, MS, RD) +#define COMISDrr(RS, RD) _SSESDrr(X86_SSE_COMI, RS, RD) +#define COMISDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_COMI, MD, MB, MI, MS, RD) -#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD) -#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) -#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD) -#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) +#define UCOMISSrr(RS, RD) _SSESSrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) +#define UCOMISDrr(RS, RD) _SSESDrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) #define MOVAPSrr(RS, RD) _SSEPSrr(0x28, RS, RD) #define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr(0x28, MD, MB, MI, MS, RD) @@ -2234,72 +1847,55 @@ enum { #define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr(0x28, MD, MB, MI, MS, RD) #define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPDrm(0x29, RS, MD, MB, MI, MS) -#define CVTDQ2PDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTDQ2PD, RS,_rX, RD,_rX) -#define CVTDQ2PDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTDQ2PD, MD, MB, MI, MS, RD,_rX) -#define CVTDQ2PSrr(RS, RD) __SSELrr( X86_SSE_CVTDQ2PS, RS,_rX, RD,_rX) -#define CVTDQ2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTDQ2PS, MD, MB, MI, MS, RD,_rX) -#define CVTPD2DQrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTPD2DQ, RS,_rX, RD,_rX) -#define CVTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTPD2DQ, MD, MB, MI, MS, RD,_rX) -#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PI, RS,_rX, RD,_rM) -#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PI, MD, MB, MI, MS, RD,_rM) -#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PS, RS,_rX, RD,_rX) -#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PS, MD, MB, MI, MS, RD,_rX) -#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPI2PD, RS,_rM, RD,_rX) -#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPI2PD, MD, MB, MI, MS, RD,_rX) -#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTPI2PS, RS,_rM, RD,_rX) -#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPI2PS, MD, MB, MI, MS, RD,_rX) -#define CVTPS2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPS2DQ, RS,_rX, RD,_rX) -#define CVTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPS2DQ, MD, MB, MI, MS, RD,_rX) -#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PD, RS,_rX, RD,_rX) -#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PD, MD, MB, MI, MS, RD,_rX) -#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PI, RS,_rX, RD,_rM) -#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PI, MD, MB, MI, MS, RD,_rM) -#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r4) -#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r4) -#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r8) -#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r8) -#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SS, RS,_rX, RD,_rX) -#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SS, MD, MB, MI, MS, RD,_rX) -#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI2SD, RS,_r4, RD,_rX) -#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX) -#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI2SD, RS,_r8, RD,_rX) -#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX) -#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI2SS, RS,_r4, RD,_rX) -#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX) -#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI2SS, RS,_r8, RD,_rX) -#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX) -#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SD, RS,_rX, RD,_rX) -#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SD, MD, MB, MI, MS, RD,_rX) -#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r4) -#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r4) -#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r8) -#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r8) -#define CVTTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2PI, RS,_rX, RD,_rM) -#define CVTTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2PI, MD, MB, MI, MS, RD,_rM) -#define CVTTPD2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2DQ, RS,_rX, RD,_rX) -#define CVTTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2DQ, MD, MB, MI, MS, RD,_rX) -#define CVTTPS2DQrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTPS2DQ, RS,_rX, RD,_rX) -#define CVTTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTPS2DQ, MD, MB, MI, MS, RD,_rX) -#define CVTTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTTPS2PI, RS,_rX, RD,_rM) -#define CVTTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTTPS2PI, MD, MB, MI, MS, RD,_rM) -#define CVTTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r4) -#define CVTTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r4) -#define CVTTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r8) -#define CVTTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r8) -#define CVTTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r4) -#define CVTTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r4) -#define CVTTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r8) -#define CVTTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r8) +#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTSI, RS,_rX, RD,_rM) +#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM) +#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSI, RS,_rX, RD,_rM) +#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM) -#define MOVDXDrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX) -#define MOVDXDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) -#define MOVQXDrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX) -#define MOVQXDmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) +#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTIS, RS,_rM, RD,_rX) +#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTIS, RS,_rM, RD,_rX) +#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) -#define MOVDXSrr(RS, RD) _SSELrr(0x66, 0x7e, RD,_r4, RS,_rX) -#define MOVDXSrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) -#define MOVQXSrr(RS, RD) _SSEQrr(0x66, 0x7e, RD,_r8, RS,_rX) -#define MOVQXSrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) +#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) +#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) + +#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) +#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) + +#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r4) +#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4) +#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r4) +#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4) + +#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTIS, RS,_r4, RD,_rX) +#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTIS, RS,_r4, RD,_rX) +#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) + +#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r8) +#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8) +#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r8) +#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8) + +#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTIS, RS,_r8, RD,_rX) +#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTIS, RS,_r8, RD,_rX) +#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) + +#define MOVDLXrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX) +#define MOVDLXmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) +#define MOVDQXrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX) +#define MOVDQXmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) + +#define MOVDXLrr(RS, RD) _SSELrr(0x66, 0x7e, RS,_rX, RD,_r4) +#define MOVDXLrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) +#define MOVDXQrr(RS, RD) _SSEQrr(0x66, 0x7e, RS,_rX, RD,_r8) +#define MOVDXQrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) #define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM) #define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS, RD,_rM) @@ -2312,9 +1908,6 @@ enum { #define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB, MI, MS) #define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, 0xd6, RS,_rX, RD,_rM) -#define MOVMSKPSrr(RS, RD) __SSELrr( 0x50, RS,_rX, RD,_r4) -#define MOVMSKPDrr(RS, RD) _SSELrr(0x66, 0x50, RS,_rX, RD,_r4) - #define MOVHLPSrr(RS, RD) __SSELrr( 0x12, RS,_rX, RD,_rX) #define MOVLHPSrr(RS, RD) __SSELrr( 0x16, RS,_rX, RD,_rX) @@ -2337,229 +1930,99 @@ enum { #define MOVLPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x13, RS,_rX, MD, MB, MI, MS) -/* --- Floating-Point instructions ----------------------------------------- */ +/* --- FLoating-Point instructions ----------------------------------------- */ -enum { - X86_F2XM1 = 0xd9f0, - X86_FABS = 0xd9e1, - X86_FADD = 0xd8c0, // m32fp, m64fp, sti0, st0i, pst0i - X86_FIADD = 0xda00, // m32int, m16int - X86_FBLD = 0xdf04, // mem - X86_FBSTP = 0xdf06, // mem - X86_FCHS = 0xd9e0, - X86_FCMOVB = 0xdac0, // sti0 - X86_FCMOVE = 0xdac8, // sti0 - X86_FCMOVBE = 0xdad0, // sti0 - X86_FCMOVU = 0xdad8, // sti0 - X86_FCMOVNB = 0xdbc0, // sti0 - X86_FCMOVNE = 0xdbc8, // sti0 - X86_FCMOVNBE = 0xdbd0, // sti0 - X86_FCMOVNU = 0xdbd8, // sti0 - X86_FCOM = 0xd8d2, // m32fp, m64fp, sti - X86_FCOMP = 0xd8db, // m32fp, m64fp, sti - X86_FCOMPP = 0xded9, - X86_FCOMI = 0xdbf0, // sti0 - X86_FCOMIP = 0xdff0, // sti0 - X86_FUCOMI = 0xdbe8, // sti0 - X86_FUCOMIP = 0xdfe8, // sti0 - X86_FCOS = 0xd9ff, - X86_FDECSTP = 0xd9f6, - X86_FDIV = 0xd8f6, // m32fp, m64fp, sti0, st0i, pst0i - X86_FIDIV = 0xda06, // m32int, m16int - X86_FDIVR = 0xd8ff, // m32fp, m64fp, sti0, st0i, pst0i - X86_FIDIVR = 0xda07, // m32int, m16int - X86_FFREE = 0xddc0, // sti - X86_FICOM = 0xda02, // m32int, m16int - X86_FICOMP = 0xda03, // m32int, m16int - X86_FILD = 0xdb00, // m32int, m16int - X86_FILDQ = 0xdf05, // mem - X86_FINCSTP = 0xd9f7, - X86_FIST = 0xdb02, // m32int, m16int - X86_FISTP = 0xdb03, // m32int, m16int - X86_FISTPQ = 0xdf07, // mem - X86_FISTTP = 0xdb01, // m32int, m16int - X86_FISTTPQ = 0xdd01, // mem - X86_FLD = 0xd900, // m32fp, m64fp - X86_FLDT = 0xdb05, // mem - X86_FLD1 = 0xd9e8, - X86_FLDL2T = 0xd9e9, - X86_FLDL2E = 0xd9ea, - X86_FLDPI = 0xd9eb, - X86_FLDLG2 = 0xd9ec, - X86_FLDLN2 = 0xd9ed, - X86_FLDZ = 0xd9ee, - X86_FMUL = 0xd8c9, // m32fp, m64fp, sti0, st0i, pst0i - X86_FIMUL = 0xda01, // m32int, m16int - X86_FNOP = 0xd9d0, - X86_FPATAN = 0xd9f3, - X86_FPREM = 0xd9f8, - X86_FPREM1 = 0xd9f5, - X86_FPTAN = 0xd9f2, - X86_FRNDINT = 0xd9fc, - X86_FSCALE = 0xd9fd, - X86_FSIN = 0xd9fe, - X86_FSINCOS = 0xd9fb, - X86_FSQRT = 0xd9fa, - X86_FSTS = 0xd902, // mem - X86_FSTD = 0xdd02, // mem - X86_FST = 0xddd0, // sti - X86_FSTPS = 0xd903, // mem - X86_FSTPD = 0xdd03, // mem - X86_FSTPT = 0xdb07, // mem - X86_FSTP = 0xddd8, // sti - X86_FSUB = 0xd8e4, // m32fp, m64fp, sti0, st0i, pst0i - X86_FISUB = 0xda04, // m32int, m16int - X86_FSUBR = 0xd8ed, // m32fp, m64fp, sti0, st0i, pst0i - X86_FISUBR = 0xda05, // m32int, m16int - X86_FTST = 0xd9e4, - X86_FUCOM = 0xdde0, // sti - X86_FUCOMP = 0xdde8, // sti - X86_FUCOMPP = 0xdae9, - X86_FXAM = 0xd9e5, - X86_FXCH = 0xd9c8, // sti - X86_FXTRACT = 0xd9f4, - X86_FYL2X = 0xd9f1, - X86_FYL2XP1 = 0xd9f9, -}; +#define _ESCmi(D,B,I,S,OP) (_REXLrm(0,B,I), _O_r_X(0xd8|(OP & 7), (OP >> 3), D,B,I,S)) -#define _FPU(OP) _OO(OP) -#define _FPUm(OP, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X((OP)>>8, (OP)&7, MD, MB, MI, MS)) -#define _FPUSm(OP, MD, MB, MI, MS) _FPUm(OP, MD, MB, MI, MS) -#define _FPUDm(OP, MD, MB, MI, MS) _FPUm((OP)|0x400, MD, MB, MI, MS) -#define _FPULm(OP, MD, MB, MI, MS) _FPUm(OP, MD, MB, MI, MS) -#define _FPUWm(OP, MD, MB, MI, MS) _FPUm((OP)|0x400, MD, MB, MI, MS) -#define _FPUr(OP, RR) _OOr((OP)&0xfff8, _rF(RR)) -#define _FPU0r(OP, RD) _FPUr((OP)|0x400, RD) -#define _FPUr0(OP, RS) _FPUr((OP) , RS) -#define _FPUrr(OP, RS, RD) (_rST0P(RS) ? _FPU0r(OP, RD) : (_rST0P(RD) ? _FPUr0(OP, RS) : x86_emit_failure("FPU instruction without st0"))) -#define _FPUP0r(OP, RD) _FPU0r((OP)|0x200, RD) +#define FLDr(R) _OOr(0xd9c0,_rN(R)) +#define FLDLm(D,B,I,S) _ESCmi(D,B,I,S,005) +#define FLDSm(D,B,I,S) _ESCmi(D,B,I,S,001) +#define FLDTm(D,B,I,S) _ESCmi(D,B,I,S,053) -#define F2XM1() _FPU(X86_F2XM1) -#define FABS() _FPU(X86_FABS) -#define FADDSm(MD, MB, MI, MS) _FPUSm(X86_FADD, MD, MB, MI, MS) -#define FADDDm(MD, MB, MI, MS) _FPUDm(X86_FADD, MD, MB, MI, MS) -#define FADDP0r(RD) _FPUP0r(X86_FADD, RD) -#define FADDrr(RS, RD) _FPUrr(X86_FADD, RS, RD) -#define FADD0r(RD) _FPU0r(X86_FADD, RD) -#define FADDr0(RS) _FPUr0(X86_FADD, RS) -#define FIADDWm(MD, MB, MI, MS) _FPUWm(X86_FIADD, MD, MB, MI, MS) -#define FIADDLm(MD, MB, MI, MS) _FPULm(X86_FIADD, MD, MB, MI, MS) -#define FBLDm(MD, MB, MI, MS) _FPUm(X86_FBLD, MD, MB, MI, MS) -#define FBSTPm(MD, MB, MI, MS) _FPUm(X86_FBSTP, MD, MB, MI, MS) -#define FCHS() _FPU(X86_FCHS) -#define FCMOVBr0(RS) _FPUr0(X86_FCMOVB, RS) -#define FCMOVEr0(RS) _FPUr0(X86_FCMOVE, RS) -#define FCMOVBEr0(RS) _FPUr0(X86_FCMOVBE, RS) -#define FCMOVUr0(RS) _FPUr0(X86_FCMOVU, RS) -#define FCMOVNBr0(RS) _FPUr0(X86_FCMOVNB, RS) -#define FCMOVNEr0(RS) _FPUr0(X86_FCMOVNE, RS) -#define FCMOVNBEr0(RS) _FPUr0(X86_FCMOVNBE, RS) -#define FCMOVNUr0(RS) _FPUr0(X86_FCMOVNU, RS) -#define FCOMSm(MD, MB, MI, MS) _FPUSm(X86_FCOM, MD, MB, MI, MS) -#define FCOMDm(MD, MB, MI, MS) _FPUDm(X86_FCOM, MD, MB, MI, MS) -#define FCOMr(RD) _FPUr(X86_FCOM, RD) -#define FCOMPSm(MD, MB, MI, MS) _FPUSm(X86_FCOMP, MD, MB, MI, MS) -#define FCOMPDm(MD, MB, MI, MS) _FPUDm(X86_FCOMP, MD, MB, MI, MS) -#define FCOMPr(RD) _FPUr(X86_FCOMP, RD) -#define FCOMPP() _FPU(X86_FCOMPP) -#define FCOMIr0(RS) _FPUr0(X86_FCOMI, RS) -#define FCOMIPr0(RS) _FPUr0(X86_FCOMIP, RS) -#define FUCOMIr0(RS) _FPUr0(X86_FUCOMI, RS) -#define FUCOMIPr0(RS) _FPUr0(X86_FUCOMIP, RS) -#define FCOS() _FPU(X86_FCOS) -#define FDECSTP() _FPU(X86_FDECSTP) -#define FDIVSm(MD, MB, MI, MS) _FPUSm(X86_FDIV, MD, MB, MI, MS) -#define FDIVDm(MD, MB, MI, MS) _FPUDm(X86_FDIV, MD, MB, MI, MS) -#define FDIVP0r(RD) _FPUP0r(X86_FDIV, RD) -#define FDIVrr(RS, RD) _FPUrr(X86_FDIV, RS, RD) -#define FDIV0r(RD) _FPU0r(X86_FDIV, RD) -#define FDIVr0(RS) _FPUr0(X86_FDIV, RS) -#define FIDIVWm(MD, MB, MI, MS) _FPUWm(X86_FIDIV, MD, MB, MI, MS) -#define FIDIVLm(MD, MB, MI, MS) _FPULm(X86_FIDIV, MD, MB, MI, MS) -#define FDIVRSm(MD, MB, MI, MS) _FPUSm(X86_FDIVR, MD, MB, MI, MS) -#define FDIVRDm(MD, MB, MI, MS) _FPUDm(X86_FDIVR, MD, MB, MI, MS) -#define FDIVRP0r(RD) _FPUP0r(X86_FDIVR, RD) -#define FDIVRrr(RS, RD) _FPUrr(X86_FDIVR, RS, RD) -#define FDIVR0r(RD) _FPU0r(X86_FDIVR, RD) -#define FDIVRr0(RS) _FPUr0(X86_FDIVR, RS) -#define FIDIVRWm(MD, MB, MI, MS) _FPUWm(X86_FIDIVR, MD, MB, MI, MS) -#define FIDIVRLm(MD, MB, MI, MS) _FPULm(X86_FIDIVR, MD, MB, MI, MS) -#define FFREEr(RD) _FPUr(X86_FFREE, RD) -#define FICOMWm(MD, MB, MI, MS) _FPUWm(X86_FICOM, MD, MB, MI, MS) -#define FICOMLm(MD, MB, MI, MS) _FPULm(X86_FICOM, MD, MB, MI, MS) -#define FICOMPWm(MD, MB, MI, MS) _FPUWm(X86_FICOMP, MD, MB, MI, MS) -#define FICOMPLm(MD, MB, MI, MS) _FPULm(X86_FICOMP, MD, MB, MI, MS) -#define FILDWm(MD, MB, MI, MS) _FPUWm(X86_FILD, MD, MB, MI, MS) -#define FILDLm(MD, MB, MI, MS) _FPULm(X86_FILD, MD, MB, MI, MS) -#define FILDQm(MD, MB, MI, MS) _FPUm(X86_FILDQ, MD, MB, MI, MS) -#define FINCSTP() _FPU(X86_FINCSTP) -#define FISTWm(MD, MB, MI, MS) _FPUWm(X86_FIST, MD, MB, MI, MS) -#define FISTLm(MD, MB, MI, MS) _FPULm(X86_FIST, MD, MB, MI, MS) -#define FISTPWm(MD, MB, MI, MS) _FPUWm(X86_FISTP, MD, MB, MI, MS) -#define FISTPLm(MD, MB, MI, MS) _FPULm(X86_FISTP, MD, MB, MI, MS) -#define FISTPQm(MD, MB, MI, MS) _FPUm(X86_FISTPQ, MD, MB, MI, MS) -#define FISTTPWm(MD, MB, MI, MS) _FPUWm(X86_FISTTP, MD, MB, MI, MS) -#define FISTTPLm(MD, MB, MI, MS) _FPULm(X86_FISTTP, MD, MB, MI, MS) -#define FISTTPQm(MD, MB, MI, MS) _FPUm(X86_FISTTPQ, MD, MB, MI, MS) -#define FLDSm(MD, MB, MI, MS) _FPUSm(X86_FLD, MD, MB, MI, MS) -#define FLDDm(MD, MB, MI, MS) _FPUDm(X86_FLD, MD, MB, MI, MS) -#define FLDTm(MD, MB, MI, MS) _FPUm(X86_FLDT, MD, MB, MI, MS) -#define FLD1() _FPU(X86_FLD1) -#define FLDL2T() _FPU(X86_FLDL2T) -#define FLDL2E() _FPU(X86_FLDL2E) -#define FLDPI() _FPU(X86_FLDPI) -#define FLDLG2() _FPU(X86_FLDLG2) -#define FLDLN2() _FPU(X86_FLDLN2) -#define FLDZ() _FPU(X86_FLDZ) -#define FMULSm(MD, MB, MI, MS) _FPUSm(X86_FMUL, MD, MB, MI, MS) -#define FMULDm(MD, MB, MI, MS) _FPUDm(X86_FMUL, MD, MB, MI, MS) -#define FMULP0r(RD) _FPUP0r(X86_FMUL, RD) -#define FMULrr(RS, RD) _FPUrr(X86_FMUL, RS, RD) -#define FMUL0r(RD) _FPU0r(X86_FMUL, RD) -#define FMULr0(RS) _FPUr0(X86_FMUL, RS) -#define FIMULWm(MD, MB, MI, MS) _FPUWm(X86_FIMUL, MD, MB, MI, MS) -#define FIMULLm(MD, MB, MI, MS) _FPULm(X86_FIMUL, MD, MB, MI, MS) -#define FNOP() _FPU(X86_FNOP) -#define FPATAN() _FPU(X86_FPATAN) -#define FPREM() _FPU(X86_FPREM) -#define FPREM1() _FPU(X86_FPREM1) -#define FPTAN() _FPU(X86_FPTAN) -#define FRNDINT() _FPU(X86_FRNDINT) -#define FSCALE() _FPU(X86_FSCALE) -#define FSIN() _FPU(X86_FSIN) -#define FSINCOS() _FPU(X86_FSINCOS) -#define FSQRT() _FPU(X86_FSQRT) -#define FSTSm(MD, MB, MI, MS) _FPUm(X86_FSTS, MD, MB, MI, MS) -#define FSTDm(MD, MB, MI, MS) _FPUm(X86_FSTD, MD, MB, MI, MS) -#define FSTr(RD) _FPUr(X86_FST, RD) -#define FSTPSm(MD, MB, MI, MS) _FPUm(X86_FSTPS, MD, MB, MI, MS) -#define FSTPDm(MD, MB, MI, MS) _FPUm(X86_FSTPD, MD, MB, MI, MS) -#define FSTPTm(MD, MB, MI, MS) _FPUm(X86_FSTPT, MD, MB, MI, MS) -#define FSTPr(RD) _FPUr(X86_FSTP, RD) -#define FSUBSm(MD, MB, MI, MS) _FPUSm(X86_FSUB, MD, MB, MI, MS) -#define FSUBDm(MD, MB, MI, MS) _FPUDm(X86_FSUB, MD, MB, MI, MS) -#define FSUBP0r(RD) _FPUP0r(X86_FSUB, RD) -#define FSUBrr(RS, RD) _FPUrr(X86_FSUB, RS, RD) -#define FSUB0r(RD) _FPU0r(X86_FSUB, RD) -#define FSUBr0(RS) _FPUr0(X86_FSUB, RS) -#define FISUBWm(MD, MB, MI, MS) _FPUWm(X86_FISUB, MD, MB, MI, MS) -#define FISUBLm(MD, MB, MI, MS) _FPULm(X86_FISUB, MD, MB, MI, MS) -#define FSUBRSm(MD, MB, MI, MS) _FPUSm(X86_FSUBR, MD, MB, MI, MS) -#define FSUBRDm(MD, MB, MI, MS) _FPUDm(X86_FSUBR, MD, MB, MI, MS) -#define FSUBRP0r(RD) _FPUP0r(X86_FSUBR, RD) -#define FSUBRrr(RS, RD) _FPUrr(X86_FSUBR, RS, RD) -#define FSUBR0r(RD) _FPU0r(X86_FSUBR, RD) -#define FSUBRr0(RS) _FPUr0(X86_FSUBR, RS) -#define FISUBRWm(MD, MB, MI, MS) _FPUWm(X86_FISUBR, MD, MB, MI, MS) -#define FISUBRLm(MD, MB, MI, MS) _FPULm(X86_FISUBR, MD, MB, MI, MS) -#define FTST() _FPU(X86_FTST) -#define FUCOMr(RD) _FPUr(X86_FUCOM, RD) -#define FUCOMPr(RD) _FPUr(X86_FUCOMP, RD) -#define FUCOMPP() _FPU(X86_FUCOMPP) -#define FXAM() _FPU(X86_FXAM) -#define FXCHr(RD) _FPUr(X86_FXCH, RD) -#define FXTRACT() _FPU(X86_FXTRACT) -#define FYL2X() _FPU(X86_FYL2X) -#define FYL2XP1() _FPU(X86_FYL2XP1) +#define FSTr(R) _OOr(0xddd0,_rN(R)) +#define FSTSm(D,B,I,S) _ESCmi(D,B,I,S,021) +#define FSTLm(D,B,I,S) _ESCmi(D,B,I,S,025) + +#define FSTPr(R) _OOr(0xddd8,_rN(R)) +#define FSTPSm(D,B,I,S) _ESCmi(D,B,I,S,031) +#define FSTPLm(D,B,I,S) _ESCmi(D,B,I,S,035) +#define FSTPTm(D,B,I,S) _ESCmi(D,B,I,S,073) + +#define FADDr0(R) _OOr(0xd8c0,_rN(R)) +#define FADD0r(R) _OOr(0xdcc0,_rN(R)) +#define FADDP0r(R) _OOr(0xdec0,_rN(R)) +#define FADDSm(D,B,I,S) _ESCmi(D,B,I,S,000) +#define FADDLm(D,B,I,S) _ESCmi(D,B,I,S,004) + +#define FSUBSm(D,B,I,S) _ESCmi(D,B,I,S,040) +#define FSUBLm(D,B,I,S) _ESCmi(D,B,I,S,044) +#define FSUBr0(R) _OOr(0xd8e0,_rN(R)) +#define FSUB0r(R) _OOr(0xdce8,_rN(R)) +#define FSUBP0r(R) _OOr(0xdee8,_rN(R)) + +#define FSUBRr0(R) _OOr(0xd8e8,_rN(R)) +#define FSUBR0r(R) _OOr(0xdce0,_rN(R)) +#define FSUBRP0r(R) _OOr(0xdee0,_rN(R)) +#define FSUBRSm(D,B,I,S) _ESCmi(D,B,I,S,050) +#define FSUBRLm(D,B,I,S) _ESCmi(D,B,I,S,054) + +#define FMULr0(R) _OOr(0xd8c8,_rN(R)) +#define FMUL0r(R) _OOr(0xdcc8,_rN(R)) +#define FMULP0r(R) _OOr(0xdec8,_rN(R)) +#define FMULSm(D,B,I,S) _ESCmi(D,B,I,S,010) +#define FMULLm(D,B,I,S) _ESCmi(D,B,I,S,014) + +#define FDIVr0(R) _OOr(0xd8f0,_rN(R)) +#define FDIV0r(R) _OOr(0xdcf8,_rN(R)) +#define FDIVP0r(R) _OOr(0xdef8,_rN(R)) +#define FDIVSm(D,B,I,S) _ESCmi(D,B,I,S,060) +#define FDIVLm(D,B,I,S) _ESCmi(D,B,I,S,064) + +#define FDIVRr0(R) _OOr(0xd8f8,_rN(R)) +#define FDIVR0r(R) _OOr(0xdcf0,_rN(R)) +#define FDIVRP0r(R) _OOr(0xdef0,_rN(R)) +#define FDIVRSm(D,B,I,S) _ESCmi(D,B,I,S,070) +#define FDIVRLm(D,B,I,S) _ESCmi(D,B,I,S,074) + +#define FCMOVBr0(R) _OOr(0xdac0,_rN(R)) +#define FCMOVBEr0(R) _OOr(0xdad0,_rN(R)) +#define FCMOVEr0(R) _OOr(0xdac8,_rN(R)) +#define FCMOVNBr0(R) _OOr(0xdbc0,_rN(R)) +#define FCMOVNBEr0(R) _OOr(0xdbd0,_rN(R)) +#define FCMOVNEr0(R) _OOr(0xdbc8,_rN(R)) +#define FCMOVNUr0(R) _OOr(0xdbd8,_rN(R)) +#define FCMOVUr0(R) _OOr(0xdad8,_rN(R)) +#define FCOMIr0(R) _OOr(0xdbf0,_rN(R)) +#define FCOMIPr0(R) _OOr(0xdff0,_rN(R)) + +#define FCOMr(R) _OOr(0xd8d0,_rN(R)) +#define FCOMSm(D,B,I,S) _ESCmi(D,B,I,S,020) +#define FCOMLm(D,B,I,S) _ESCmi(D,B,I,S,024) + +#define FCOMPr(R) _OOr(0xd8d8,_rN(R)) +#define FCOMPSm(D,B,I,S) _ESCmi(D,B,I,S,030) +#define FCOMPLm(D,B,I,S) _ESCmi(D,B,I,S,034) + +#define FUCOMIr0(R) _OOr(0xdbe8,_rN(R)) +#define FUCOMIPr0(R) _OOr(0xdfe8,_rN(R)) +#define FUCOMPr(R) _OOr(0xdde8,_rN(R)) +#define FUCOMr(R) _OOr(0xdde0,_rN(R)) + +#define FIADDLm(D,B,I,S) _ESCmi(D,B,I,S,002) +#define FICOMLm(D,B,I,S) _ESCmi(D,B,I,S,022) +#define FICOMPLm(D,B,I,S) _ESCmi(D,B,I,S,032) +#define FIDIVLm(D,B,I,S) _ESCmi(D,B,I,S,062) +#define FIDIVRLm(D,B,I,S) _ESCmi(D,B,I,S,072) +#define FILDLm(D,B,I,S) _ESCmi(D,B,I,S,003) +#define FILDQm(D,B,I,S) _ESCmi(D,B,I,S,057) +#define FIMULLm(D,B,I,S) _ESCmi(D,B,I,S,012) +#define FISTLm(D,B,I,S) _ESCmi(D,B,I,S,023) +#define FISTPLm(D,B,I,S) _ESCmi(D,B,I,S,033) +#define FISTPQm(D,B,I,S) _ESCmi(D,B,I,S,077) +#define FISUBLm(D,B,I,S) _ESCmi(D,B,I,S,042) +#define FISUBRLm(D,B,I,S) _ESCmi(D,B,I,S,052) + +#define FREEr(R) _OOr(0xddc0,_rN(R)) +#define FXCHr(R) _OOr(0xd9c8,_rN(R)) #endif /* X86_RTASM_H */ diff --git a/BasiliskII/src/uae_cpu/compiler/compemu.h b/BasiliskII/src/uae_cpu/compiler/compemu.h index 9a612fb2..62ee94e4 100644 --- a/BasiliskII/src/uae_cpu/compiler/compemu.h +++ b/BasiliskII/src/uae_cpu/compiler/compemu.h @@ -1,42 +1,53 @@ /* - * compiler/compemu.h - Public interface and definitions + * compiler/compemu.h - Public interface and definitions * - * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) * - * Adaptation for Basilisk II and improvements, copyright 2000-2005 - * Gwenole Beauchesne + * Inspired by Christian Bauer's Basilisk II * - * Basilisk II (C) 1997-2008 Christian Bauer - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * JIT compiler m68k -> IA-32 and AMD64 * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef COMPEMU_H #define COMPEMU_H +// #include "sysconfig.h" #include "newcpu.h" -#if USE_JIT - -#if defined __i386__ || defined __x86_64__ -#include "flags_x86.h" +#ifdef UAE +#ifdef CPU_64_BIT +typedef uae_u64 uintptr; #else -#error "Unsupported JIT compiler for this architecture" +typedef uae_u32 uintptr; +#endif +/* FIXME: cpummu.cpp also checks for USE_JIT, possibly others */ +#define USE_JIT #endif -#if JIT_DEBUG +#ifdef USE_JIT + +#ifdef JIT_DEBUG /* dump some information (m68k block, x86 block addresses) about the compiler state */ extern void compiler_dumpstate(void); #endif @@ -55,11 +66,14 @@ extern uae_u32 start_pc; struct blockinfo_t; struct cpu_history { - uae_u16 * location; + uae_u16* location; +#ifdef UAE + uae_u8 specmem; +#endif }; union cacheline { - cpuop_func * handler; + cpuop_func* handler; blockinfo_t * bi; }; @@ -102,8 +116,13 @@ union cacheline { #define SCALE 2 #define BYTES_PER_INST 10240 /* paranoid ;-) */ +#if defined(CPU_arm) +#define LONGEST_68K_INST 256 /* The number of bytes the longest possible + 68k instruction takes */ +#else #define LONGEST_68K_INST 16 /* The number of bytes the longest possible 68k instruction takes */ +#endif #define MAX_CHECKSUM_LEN 2048 /* The maximum size we calculate checksums for. Anything larger will be flushed unconditionally even with SOFT_FLUSH */ @@ -111,8 +130,7 @@ union cacheline { for jump targets */ #define INDIVIDUAL_INST 0 -#if 1 -// gb-- my format from readcpu.cpp is not the same +#ifdef WINUAE_ARANYM #define FLAG_X 0x0010 #define FLAG_N 0x0008 #define FLAG_Z 0x0004 @@ -126,43 +144,108 @@ union cacheline { #define FLAG_X 0x0001 #endif #define FLAG_CZNV (FLAG_C | FLAG_Z | FLAG_N | FLAG_V) +#define FLAG_ALL (FLAG_C | FLAG_Z | FLAG_N | FLAG_V | FLAG_X) #define FLAG_ZNV (FLAG_Z | FLAG_N | FLAG_V) #define KILLTHERAT 1 /* Set to 1 to avoid some partial_rat_stalls */ -#if defined(__x86_64__) +#if defined(CPU_arm) +#define USE_DATA_BUFFER +#define N_REGS 13 /* really 16, but 13 to 15 are SP, LR, PC */ +#else +#if defined(CPU_x86_64) #define N_REGS 16 /* really only 15, but they are numbered 0-3,5-15 */ #else #define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */ #endif +#endif #define N_FREGS 6 /* That leaves us two positions on the stack to play with */ /* Functions exposed to newcpu, or to what was moved from newcpu.c to * compemu_support.c */ +#ifdef WINUAE_ARANYM extern void compiler_init(void); extern void compiler_exit(void); extern bool compiler_use_jit(void); -extern void init_comp(void); +#endif extern void flush(int save_regs); -extern void small_flush(int save_regs); +void flush_reg(int reg); extern void set_target(uae_u8* t); extern uae_u8* get_target(void); -extern void freescratch(void); +#ifdef UAE extern void build_comp(void); +#endif extern void set_cache_state(int enabled); extern int get_cache_state(void); extern uae_u32 get_jitted_size(void); -extern void (*flush_icache)(int n); +#ifdef JIT +extern void (*flush_icache)(void); +#endif extern void alloc_cache(void); extern int check_for_cache_miss(void); /* JIT FPU compilation */ +struct jit_disable_opcodes { + bool fbcc; + bool fdbcc; + bool fscc; + bool ftrapcc; + bool fsave; + bool frestore; + bool fmove; + bool fmovem; + bool fmovec; /* for move control register */ + bool fmovecr; /* for move from constant rom */ + bool fint; + bool fsinh; + bool fintrz; + bool fsqrt; + bool flognp1; + bool fetoxm1; + bool ftanh; + bool fatan; + bool fasin; + bool fatanh; + bool fsin; + bool ftan; + bool fetox; + bool ftwotox; + bool ftentox; + bool flogn; + bool flog10; + bool flog2; + bool fabs; + bool fcosh; + bool fneg; + bool facos; + bool fcos; + bool fgetexp; + bool fgetman; + bool fdiv; + bool fmod; + bool fadd; + bool fmul; + bool fsgldiv; + bool frem; + bool fscale; + bool fsglmul; + bool fsub; + bool fsincos; + bool fcmp; + bool ftst; +}; +extern struct jit_disable_opcodes jit_disable; + + extern void comp_fpp_opp (uae_u32 opcode, uae_u16 extra); extern void comp_fbcc_opp (uae_u32 opcode); extern void comp_fscc_opp (uae_u32 opcode, uae_u16 extra); +void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra); +void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc); +void comp_fsave_opp (uae_u32 opcode); +void comp_frestore_opp (uae_u32 opcode); extern uae_u32 needed_flags; -extern cacheline cache_tags[]; extern uae_u8* comp_pc_p; extern void* pushall_call_handler; @@ -193,10 +276,11 @@ typedef struct { double val; uae_u8 status; uae_s8 realreg; /* gb-- realreg can hold -1 */ - uae_u8 realind; + uae_u8 realind; uae_u8 needflush; } freg_status; +#define SP_REG 15 #define PC_P 16 #define FLAGX 17 #define FLAGTMP 18 @@ -263,19 +347,23 @@ typedef struct { } bigstate; typedef struct { - /* Integer part */ - char virt[VREGS]; - char nat[N_REGS]; + /* Integer part */ + uae_s8 virt[VREGS]; + uae_s8 nat[N_REGS]; } smallstate; -extern bigstate live; extern int touchcnt; - -#define IMM uae_s32 +#define IMM uae_s32 +#define RR1 uae_u32 +#define RR2 uae_u32 +#define RR4 uae_u32 +/* + R1, R2, R4 collides with ARM registers defined in ucontext #define R1 uae_u32 #define R2 uae_u32 #define R4 uae_u32 +*/ #define W1 uae_u32 #define W2 uae_u32 #define W4 uae_u32 @@ -284,220 +372,37 @@ extern int touchcnt; #define RW4 uae_u32 #define MEMR uae_u32 #define MEMW uae_u32 -#define MEMRW uae_u32 +#define MEMRW uae_u32 +#define MEMPTR uintptr +#define MEMPTRR MEMPTR +#define MEMPTRW MEMPTR +#define MEMPTRRW MEMPTR #define FW uae_u32 #define FR uae_u32 #define FRW uae_u32 #define MIDFUNC(nargs,func,args) void func args -#define MENDFUNC(nargs,func,args) #define COMPCALL(func) func -#define LOWFUNC(flags,mem,nargs,func,args) static __inline__ void func args -#define LENDFUNC(flags,mem,nargs,func,args) +#define LOWFUNC(flags,mem,nargs,func,args) static inline void func args /* What we expose to the outside */ #define DECLARE_MIDFUNC(func) extern void func -DECLARE_MIDFUNC(bt_l_ri(R4 r, IMM i)); -DECLARE_MIDFUNC(bt_l_rr(R4 r, R4 b)); -DECLARE_MIDFUNC(btc_l_ri(RW4 r, IMM i)); -DECLARE_MIDFUNC(btc_l_rr(RW4 r, R4 b)); -DECLARE_MIDFUNC(bts_l_ri(RW4 r, IMM i)); -DECLARE_MIDFUNC(bts_l_rr(RW4 r, R4 b)); -DECLARE_MIDFUNC(btr_l_ri(RW4 r, IMM i)); -DECLARE_MIDFUNC(btr_l_rr(RW4 r, R4 b)); -DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s)); -DECLARE_MIDFUNC(call_r(R4 r)); -DECLARE_MIDFUNC(sub_l_mi(IMM d, IMM s)); -DECLARE_MIDFUNC(mov_l_mi(IMM d, IMM s)); -DECLARE_MIDFUNC(mov_w_mi(IMM d, IMM s)); -DECLARE_MIDFUNC(mov_b_mi(IMM d, IMM s)); -DECLARE_MIDFUNC(rol_b_ri(RW1 r, IMM i)); -DECLARE_MIDFUNC(rol_w_ri(RW2 r, IMM i)); -DECLARE_MIDFUNC(rol_l_ri(RW4 r, IMM i)); -DECLARE_MIDFUNC(rol_l_rr(RW4 d, R1 r)); -DECLARE_MIDFUNC(rol_w_rr(RW2 d, R1 r)); -DECLARE_MIDFUNC(rol_b_rr(RW1 d, R1 r)); -DECLARE_MIDFUNC(shll_l_rr(RW4 d, R1 r)); -DECLARE_MIDFUNC(shll_w_rr(RW2 d, R1 r)); -DECLARE_MIDFUNC(shll_b_rr(RW1 d, R1 r)); -DECLARE_MIDFUNC(ror_b_ri(R1 r, IMM i)); -DECLARE_MIDFUNC(ror_w_ri(R2 r, IMM i)); -DECLARE_MIDFUNC(ror_l_ri(R4 r, IMM i)); -DECLARE_MIDFUNC(ror_l_rr(R4 d, R1 r)); -DECLARE_MIDFUNC(ror_w_rr(R2 d, R1 r)); -DECLARE_MIDFUNC(ror_b_rr(R1 d, R1 r)); -DECLARE_MIDFUNC(shrl_l_rr(RW4 d, R1 r)); -DECLARE_MIDFUNC(shrl_w_rr(RW2 d, R1 r)); -DECLARE_MIDFUNC(shrl_b_rr(RW1 d, R1 r)); -DECLARE_MIDFUNC(shra_l_rr(RW4 d, R1 r)); -DECLARE_MIDFUNC(shra_w_rr(RW2 d, R1 r)); -DECLARE_MIDFUNC(shra_b_rr(RW1 d, R1 r)); -DECLARE_MIDFUNC(shll_l_ri(RW4 r, IMM i)); -DECLARE_MIDFUNC(shll_w_ri(RW2 r, IMM i)); -DECLARE_MIDFUNC(shll_b_ri(RW1 r, IMM i)); -DECLARE_MIDFUNC(shrl_l_ri(RW4 r, IMM i)); -DECLARE_MIDFUNC(shrl_w_ri(RW2 r, IMM i)); -DECLARE_MIDFUNC(shrl_b_ri(RW1 r, IMM i)); -DECLARE_MIDFUNC(shra_l_ri(RW4 r, IMM i)); -DECLARE_MIDFUNC(shra_w_ri(RW2 r, IMM i)); -DECLARE_MIDFUNC(shra_b_ri(RW1 r, IMM i)); -DECLARE_MIDFUNC(setcc(W1 d, IMM cc)); -DECLARE_MIDFUNC(setcc_m(IMM d, IMM cc)); -DECLARE_MIDFUNC(cmov_b_rr(RW1 d, R1 s, IMM cc)); -DECLARE_MIDFUNC(cmov_w_rr(RW2 d, R2 s, IMM cc)); -DECLARE_MIDFUNC(cmov_l_rr(RW4 d, R4 s, IMM cc)); -DECLARE_MIDFUNC(cmov_l_rm(RW4 d, IMM s, IMM cc)); -DECLARE_MIDFUNC(bsf_l_rr(W4 d, R4 s)); -DECLARE_MIDFUNC(pop_m(IMM d)); -DECLARE_MIDFUNC(push_m(IMM d)); -DECLARE_MIDFUNC(pop_l(W4 d)); -DECLARE_MIDFUNC(push_l_i(IMM i)); -DECLARE_MIDFUNC(push_l(R4 s)); -DECLARE_MIDFUNC(clear_16(RW4 r)); -DECLARE_MIDFUNC(clear_8(RW4 r)); -DECLARE_MIDFUNC(sign_extend_16_rr(W4 d, R2 s)); -DECLARE_MIDFUNC(sign_extend_8_rr(W4 d, R1 s)); -DECLARE_MIDFUNC(zero_extend_16_rr(W4 d, R2 s)); -DECLARE_MIDFUNC(zero_extend_8_rr(W4 d, R1 s)); -DECLARE_MIDFUNC(imul_64_32(RW4 d, RW4 s)); -DECLARE_MIDFUNC(mul_64_32(RW4 d, RW4 s)); -DECLARE_MIDFUNC(imul_32_32(RW4 d, R4 s)); -DECLARE_MIDFUNC(mul_32_32(RW4 d, R4 s)); -DECLARE_MIDFUNC(mov_b_rr(W1 d, R1 s)); -DECLARE_MIDFUNC(mov_w_rr(W2 d, R2 s)); -DECLARE_MIDFUNC(mov_l_rrm_indexed(W4 d,R4 baser, R4 index, IMM factor)); -DECLARE_MIDFUNC(mov_w_rrm_indexed(W2 d, R4 baser, R4 index, IMM factor)); -DECLARE_MIDFUNC(mov_b_rrm_indexed(W1 d, R4 baser, R4 index, IMM factor)); -DECLARE_MIDFUNC(mov_l_mrr_indexed(R4 baser, R4 index, IMM factor, R4 s)); -DECLARE_MIDFUNC(mov_w_mrr_indexed(R4 baser, R4 index, IMM factor, R2 s)); -DECLARE_MIDFUNC(mov_b_mrr_indexed(R4 baser, R4 index, IMM factor, R1 s)); -DECLARE_MIDFUNC(mov_l_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R4 s)); -DECLARE_MIDFUNC(mov_w_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R2 s)); -DECLARE_MIDFUNC(mov_b_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R1 s)); -DECLARE_MIDFUNC(mov_l_brrm_indexed(W4 d, IMM base, R4 baser, R4 index, IMM factor)); -DECLARE_MIDFUNC(mov_w_brrm_indexed(W2 d, IMM base, R4 baser, R4 index, IMM factor)); -DECLARE_MIDFUNC(mov_b_brrm_indexed(W1 d, IMM base, R4 baser, R4 index, IMM factor)); -DECLARE_MIDFUNC(mov_l_rm_indexed(W4 d, IMM base, R4 index, IMM factor)); -DECLARE_MIDFUNC(mov_l_rR(W4 d, R4 s, IMM offset)); -DECLARE_MIDFUNC(mov_w_rR(W2 d, R4 s, IMM offset)); -DECLARE_MIDFUNC(mov_b_rR(W1 d, R4 s, IMM offset)); -DECLARE_MIDFUNC(mov_l_brR(W4 d, R4 s, IMM offset)); -DECLARE_MIDFUNC(mov_w_brR(W2 d, R4 s, IMM offset)); -DECLARE_MIDFUNC(mov_b_brR(W1 d, R4 s, IMM offset)); -DECLARE_MIDFUNC(mov_l_Ri(R4 d, IMM i, IMM offset)); -DECLARE_MIDFUNC(mov_w_Ri(R4 d, IMM i, IMM offset)); -DECLARE_MIDFUNC(mov_b_Ri(R4 d, IMM i, IMM offset)); -DECLARE_MIDFUNC(mov_l_Rr(R4 d, R4 s, IMM offset)); -DECLARE_MIDFUNC(mov_w_Rr(R4 d, R2 s, IMM offset)); -DECLARE_MIDFUNC(mov_b_Rr(R4 d, R1 s, IMM offset)); -DECLARE_MIDFUNC(lea_l_brr(W4 d, R4 s, IMM offset)); -DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, R4 s, R4 index, IMM factor, IMM offset)); -DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, R4 s, R4 index, IMM factor)); -DECLARE_MIDFUNC(mov_l_bRr(R4 d, R4 s, IMM offset)); -DECLARE_MIDFUNC(mov_w_bRr(R4 d, R2 s, IMM offset)); -DECLARE_MIDFUNC(mov_b_bRr(R4 d, R1 s, IMM offset)); -DECLARE_MIDFUNC(bswap_32(RW4 r)); -DECLARE_MIDFUNC(bswap_16(RW2 r)); -DECLARE_MIDFUNC(mov_l_rr(W4 d, R4 s)); -DECLARE_MIDFUNC(mov_l_mr(IMM d, R4 s)); -DECLARE_MIDFUNC(mov_w_mr(IMM d, R2 s)); -DECLARE_MIDFUNC(mov_w_rm(W2 d, IMM s)); -DECLARE_MIDFUNC(mov_b_mr(IMM d, R1 s)); -DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s)); -DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s)); -DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s)); -DECLARE_MIDFUNC(mov_b_ri(W1 d, IMM s)); -DECLARE_MIDFUNC(add_l_mi(IMM d, IMM s) ); -DECLARE_MIDFUNC(add_w_mi(IMM d, IMM s) ); -DECLARE_MIDFUNC(add_b_mi(IMM d, IMM s) ); -DECLARE_MIDFUNC(test_l_ri(R4 d, IMM i)); -DECLARE_MIDFUNC(test_l_rr(R4 d, R4 s)); -DECLARE_MIDFUNC(test_w_rr(R2 d, R2 s)); -DECLARE_MIDFUNC(test_b_rr(R1 d, R1 s)); -DECLARE_MIDFUNC(and_l_ri(RW4 d, IMM i)); -DECLARE_MIDFUNC(and_l(RW4 d, R4 s)); -DECLARE_MIDFUNC(and_w(RW2 d, R2 s)); -DECLARE_MIDFUNC(and_b(RW1 d, R1 s)); -DECLARE_MIDFUNC(or_l_rm(RW4 d, IMM s)); -DECLARE_MIDFUNC(or_l_ri(RW4 d, IMM i)); -DECLARE_MIDFUNC(or_l(RW4 d, R4 s)); -DECLARE_MIDFUNC(or_w(RW2 d, R2 s)); -DECLARE_MIDFUNC(or_b(RW1 d, R1 s)); -DECLARE_MIDFUNC(adc_l(RW4 d, R4 s)); -DECLARE_MIDFUNC(adc_w(RW2 d, R2 s)); -DECLARE_MIDFUNC(adc_b(RW1 d, R1 s)); -DECLARE_MIDFUNC(add_l(RW4 d, R4 s)); -DECLARE_MIDFUNC(add_w(RW2 d, R2 s)); -DECLARE_MIDFUNC(add_b(RW1 d, R1 s)); -DECLARE_MIDFUNC(sub_l_ri(RW4 d, IMM i)); -DECLARE_MIDFUNC(sub_w_ri(RW2 d, IMM i)); -DECLARE_MIDFUNC(sub_b_ri(RW1 d, IMM i)); -DECLARE_MIDFUNC(add_l_ri(RW4 d, IMM i)); -DECLARE_MIDFUNC(add_w_ri(RW2 d, IMM i)); -DECLARE_MIDFUNC(add_b_ri(RW1 d, IMM i)); -DECLARE_MIDFUNC(sbb_l(RW4 d, R4 s)); -DECLARE_MIDFUNC(sbb_w(RW2 d, R2 s)); -DECLARE_MIDFUNC(sbb_b(RW1 d, R1 s)); -DECLARE_MIDFUNC(sub_l(RW4 d, R4 s)); -DECLARE_MIDFUNC(sub_w(RW2 d, R2 s)); -DECLARE_MIDFUNC(sub_b(RW1 d, R1 s)); -DECLARE_MIDFUNC(cmp_l(R4 d, R4 s)); -DECLARE_MIDFUNC(cmp_l_ri(R4 r, IMM i)); -DECLARE_MIDFUNC(cmp_w(R2 d, R2 s)); -DECLARE_MIDFUNC(cmp_b(R1 d, R1 s)); -DECLARE_MIDFUNC(xor_l(RW4 d, R4 s)); -DECLARE_MIDFUNC(xor_w(RW2 d, R2 s)); -DECLARE_MIDFUNC(xor_b(RW1 d, R1 s)); -DECLARE_MIDFUNC(live_flags(void)); -DECLARE_MIDFUNC(dont_care_flags(void)); -DECLARE_MIDFUNC(duplicate_carry(void)); -DECLARE_MIDFUNC(restore_carry(void)); -DECLARE_MIDFUNC(start_needflags(void)); -DECLARE_MIDFUNC(end_needflags(void)); -DECLARE_MIDFUNC(make_flags_live(void)); -DECLARE_MIDFUNC(call_r_11(R4 r, W4 out1, R4 in1, IMM osize, IMM isize)); -DECLARE_MIDFUNC(call_r_02(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)); -DECLARE_MIDFUNC(forget_about(W4 r)); -DECLARE_MIDFUNC(nop(void)); -DECLARE_MIDFUNC(f_forget_about(FW r)); -DECLARE_MIDFUNC(fmov_pi(FW r)); -DECLARE_MIDFUNC(fmov_log10_2(FW r)); -DECLARE_MIDFUNC(fmov_log2_e(FW r)); -DECLARE_MIDFUNC(fmov_loge_2(FW r)); -DECLARE_MIDFUNC(fmov_1(FW r)); -DECLARE_MIDFUNC(fmov_0(FW r)); -DECLARE_MIDFUNC(fmov_rm(FW r, MEMR m)); -DECLARE_MIDFUNC(fmovi_rm(FW r, MEMR m)); -DECLARE_MIDFUNC(fmovi_mr(MEMW m, FR r)); -DECLARE_MIDFUNC(fmovs_rm(FW r, MEMR m)); -DECLARE_MIDFUNC(fmovs_mr(MEMW m, FR r)); -DECLARE_MIDFUNC(fmov_mr(MEMW m, FR r)); -DECLARE_MIDFUNC(fmov_ext_mr(MEMW m, FR r)); -DECLARE_MIDFUNC(fmov_ext_rm(FW r, MEMR m)); -DECLARE_MIDFUNC(fmov_rr(FW d, FR s)); -DECLARE_MIDFUNC(fldcw_m_indexed(R4 index, IMM base)); -DECLARE_MIDFUNC(ftst_r(FR r)); -DECLARE_MIDFUNC(dont_care_fflags(void)); -DECLARE_MIDFUNC(fsqrt_rr(FW d, FR s)); -DECLARE_MIDFUNC(fabs_rr(FW d, FR s)); -DECLARE_MIDFUNC(frndint_rr(FW d, FR s)); -DECLARE_MIDFUNC(fsin_rr(FW d, FR s)); -DECLARE_MIDFUNC(fcos_rr(FW d, FR s)); -DECLARE_MIDFUNC(ftwotox_rr(FW d, FR s)); -DECLARE_MIDFUNC(fetox_rr(FW d, FR s)); -DECLARE_MIDFUNC(flog2_rr(FW d, FR s)); -DECLARE_MIDFUNC(fneg_rr(FW d, FR s)); -DECLARE_MIDFUNC(fadd_rr(FRW d, FR s)); -DECLARE_MIDFUNC(fsub_rr(FRW d, FR s)); -DECLARE_MIDFUNC(fmul_rr(FRW d, FR s)); -DECLARE_MIDFUNC(frem_rr(FRW d, FR s)); -DECLARE_MIDFUNC(frem1_rr(FRW d, FR s)); -DECLARE_MIDFUNC(fdiv_rr(FRW d, FR s)); -DECLARE_MIDFUNC(fcmp_rr(FR d, FR s)); -DECLARE_MIDFUNC(fflags_into_flags(W2 tmp)); +#if defined(CPU_arm) + +#include "compemu_midfunc_arm.h" + +#if defined(USE_JIT2) +#include "compemu_midfunc_arm2.h" +#endif +#endif + +#if defined(CPU_i386) || defined(CPU_x86_64) +#include "compemu_midfunc_x86.h" +#endif + #undef DECLARE_MIDFUNC extern int failure; @@ -519,10 +424,16 @@ extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp); /* Set native Z flag only if register is zero */ extern void set_zero(int r, int tmp); extern int kill_rodent(int r); +#define SYNC_PC_OFFSET 100 extern void sync_m68k_pc(void); extern uae_u32 get_const(int r); extern int is_const(int r); extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond); +void compemu_make_sr(int sr, int tmp); +void compemu_enter_super(int sr); +void compemu_exc_make_frame(int format, int sr, int currpc, int nr, int tmp); +void compemu_bkpt(void); +extern bool disasm_this_inst; #define comp_get_ibyte(o) do_get_mem_byte((uae_u8 *)(comp_pc_p + (o) + 1)) #define comp_get_iword(o) do_get_mem_word((uae_u16 *)(comp_pc_p + (o))) @@ -550,40 +461,43 @@ typedef struct blockinfo_t { cpuop_func* handler_to_use; /* The direct handler does not check for the correct address */ - cpuop_func* handler; + cpuop_func* handler; cpuop_func* direct_handler; cpuop_func* direct_pen; cpuop_func* direct_pcc; +#ifdef UAE + uae_u8* nexthandler; +#endif uae_u8* pc_p; - - uae_u32 c1; + + uae_u32 c1; uae_u32 c2; #if USE_CHECKSUM_INFO checksum_info *csi; #else uae_u32 len; - uae_u32 min_pcp; + uae_u32 min_pcp; #endif struct blockinfo_t* next_same_cl; - struct blockinfo_t** prev_same_cl_p; + struct blockinfo_t** prev_same_cl_p; struct blockinfo_t* next; - struct blockinfo_t** prev_p; + struct blockinfo_t** prev_p; - uae_u8 optlevel; - uae_u8 needed_flags; - uae_u8 status; + uae_u8 optlevel; + uae_u8 needed_flags; + uae_u8 status; uae_u8 havestate; - + dependency dep[2]; /* Holds things we depend on */ dependency* deplist; /* List of things that depend on this */ smallstate env; - -#if JIT_DEBUG - /* (gb) size of the compiled block (direct handler) */ - uae_u32 direct_handler_size; + +#ifdef JIT_DEBUG + /* (gb) size of the compiled block (direct handler) */ + uae_u32 direct_handler_size; #endif } blockinfo; @@ -601,9 +515,92 @@ void do_nothing(void); #else -static __inline__ void flush_icache(int) { } -static __inline__ void build_comp() { } +static inline void flush_icache(void) { } #endif /* !USE_JIT */ +#ifdef UAE + +typedef struct { + uae_u8 type; + uae_u8 reg; + uae_u32 next; +} regacc; + +#define JIT_EXCEPTION_HANDLER +// #define JIT_ALWAYS_DISTRUST + +/* ARAnyM uses fpu_register name, used in scratch_t */ +/* FIXME: check that no ARAnyM code assumes different floating point type */ +typedef fptype fpu_register; + +extern void compile_block(cpu_history* pc_hist, int blocklen, int totcyles); + +#define MAXCYCLES (1000 * CYCLE_UNIT) +#define scaled_cycles(x) (currprefs.m68k_speed<0?(((x)/SCALE)?(((x)/SCALE (uintptr_t) 0xffffffff) { + jit_abort("JIT: 64-bit pointer (0x%llx) at %s:%d (fatal)", + (unsigned long long)address, file, line); + } + return (uae_u32) address; +} +#define uae_p32(x) (check_uae_p32((uintptr)(x), __FILE__, __LINE__)) +#else +#define uae_p32(x) ((uae_u32)(x)) +#endif + #endif /* COMPEMU_H */ diff --git a/BasiliskII/src/uae_cpu/compiler/compemu1.cpp b/BasiliskII/src/uae_cpu/compiler/compemu1.cpp new file mode 100644 index 00000000..297c6250 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu1.cpp @@ -0,0 +1,2 @@ +#define PART_1 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu2.cpp b/BasiliskII/src/uae_cpu/compiler/compemu2.cpp new file mode 100644 index 00000000..8c0ddeac --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu2.cpp @@ -0,0 +1,2 @@ +#define PART_2 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu3.cpp b/BasiliskII/src/uae_cpu/compiler/compemu3.cpp new file mode 100644 index 00000000..975e0669 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu3.cpp @@ -0,0 +1,2 @@ +#define PART_3 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu4.cpp b/BasiliskII/src/uae_cpu/compiler/compemu4.cpp new file mode 100644 index 00000000..a49b5444 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu4.cpp @@ -0,0 +1,2 @@ +#define PART_4 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu5.cpp b/BasiliskII/src/uae_cpu/compiler/compemu5.cpp new file mode 100644 index 00000000..41e872f6 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu5.cpp @@ -0,0 +1,2 @@ +#define PART_5 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu6.cpp b/BasiliskII/src/uae_cpu/compiler/compemu6.cpp new file mode 100644 index 00000000..9156e597 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu6.cpp @@ -0,0 +1,2 @@ +#define PART_6 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu7.cpp b/BasiliskII/src/uae_cpu/compiler/compemu7.cpp new file mode 100644 index 00000000..63108e04 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu7.cpp @@ -0,0 +1,2 @@ +#define PART_7 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu8.cpp b/BasiliskII/src/uae_cpu/compiler/compemu8.cpp new file mode 100644 index 00000000..543f9dfd --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu8.cpp @@ -0,0 +1,2 @@ +#define PART_8 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_fpp.cpp b/BasiliskII/src/uae_cpu/compiler/compemu_fpp.cpp index cadecf98..4ffcca0d 100644 --- a/BasiliskII/src/uae_cpu/compiler/compemu_fpp.cpp +++ b/BasiliskII/src/uae_cpu/compiler/compemu_fpp.cpp @@ -1,43 +1,47 @@ /* - * compiler/compemu_fpp.cpp - Dynamic translation of FPU instructions + * compiler/compemu_fpp.cpp - Dynamic translation of FPU instructions * - * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * Adaptation for Basilisk II and improvements, copyright 2000-2005 - * Gwenole Beauchesne + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * Basilisk II (C) 1997-2008 Christian Bauer - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * JIT compiler m68k -> IA-32 and AMD64 * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - /* - * UAE - The Un*x Amiga Emulator - * - * MC68881 emulation - * - * Copyright 1996 Herman ten Brugge - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - */ + * UAE - The Un*x Amiga Emulator + * + * MC68881 emulation + * + * Copyright 1996 Herman ten Brugge + * Adapted for JIT compilation (c) Bernd Meyer, 2000 + */ #include "sysdeps.h" -#if USE_JIT - -#include -#include +#include +#include +#include #include "memory.h" #include "readcpu.h" @@ -52,6 +56,14 @@ #define DEBUG 0 #include "debug.h" +struct jit_disable_opcodes jit_disable; + +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) +#define LD(x) x ## L +#else +#define LD(x) x +#endif + // gb-- WARNING: get_fpcr() and set_fpcr() support is experimental #define HANDLE_FPCR 0 @@ -83,7 +95,8 @@ #define delay2 nop() ;nop() #define UNKNOWN_EXTRA 0xFFFFFFFF -static void fpuop_illg(uae_u32 opcode, uae_u32 extra) +#if 0 +static void fpuop_illg(uae_u32 opcode, uae_u32 /* extra */) { /* if (extra == UNKNOWN_EXTRA) @@ -93,628 +106,717 @@ static void fpuop_illg(uae_u32 opcode, uae_u32 extra) */ op_illg(opcode); } +#endif -static uae_s32 temp_fp[4]; /* To convert between FP/integer */ +uae_s32 temp_fp[4]; /* To convert between FP/integer */ /* return register number, or -1 for failure */ -STATIC_INLINE int get_fp_value (uae_u32 opcode, uae_u16 extra) +STATIC_INLINE int get_fp_value(uae_u32 opcode, uae_u16 extra) { - uaecptr tmppc; - uae_u16 tmp; - int size; - int mode; - int reg; - double* src; - uae_u32 ad = 0; - static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; - static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; + int size; + int mode; + int reg; + uae_u32 ad = 0; + static int const sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; + static int const sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; - if ((extra & 0x4000) == 0) { - return ((extra >> 10) & 7); - } - - mode = (opcode >> 3) & 7; - reg = opcode & 7; - size = (extra >> 10) & 7; - switch (mode) { - case 0: - switch (size) { - case 6: - sign_extend_8_rr(S1,reg); - mov_l_mr((uintptr)temp_fp,S1); - delay2; - fmovi_rm(FS1,(uintptr)temp_fp); - return FS1; - case 4: - sign_extend_16_rr(S1,reg); - mov_l_mr((uintptr)temp_fp,S1); - delay2; - fmovi_rm(FS1,(uintptr)temp_fp); - return FS1; - case 0: - mov_l_mr((uintptr)temp_fp,reg); - delay2; - fmovi_rm(FS1,(uintptr)temp_fp); - return FS1; - case 1: - mov_l_mr((uintptr)temp_fp,reg); - delay2; - fmovs_rm(FS1,(uintptr)temp_fp); - return FS1; - default: - return -1; + if ((extra & 0x4000) == 0) + { + return ((extra >> 10) & 7); } - return -1; /* Should be unreachable */ - case 1: - return -1; /* Genuine invalid instruction */ - default: - break; - } - /* OK, we *will* have to load something from an address. Let's make - sure we know how to handle that, or quit early --- i.e. *before* - we do any postincrement/predecrement that we may regret */ - switch (size) { - case 3: - return -1; - case 0: - case 1: - case 2: - case 4: - case 5: - case 6: - break; - default: - return -1; - } - - switch (mode) { - case 2: - ad=S1; /* We will change it, anyway ;-) */ - mov_l_rr(ad,reg+8); - break; - case 3: - ad=S1; - mov_l_rr(ad,reg+8); - lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size])); - break; - case 4: - ad=S1; - - lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size])); - mov_l_rr(ad,reg+8); - break; - case 5: - { - uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); - ad=S1; - mov_l_rr(ad,reg+8); - lea_l_brr(ad,ad,off); - break; - } - case 6: - { - uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2); - ad=S1; - calc_disp_ea_020(reg+8,dp,ad,S2); - break; - } - case 7: - switch (reg) { - case 0: - { - uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); - ad=S1; - mov_l_ri(ad,off); - break; - } - case 1: - { - uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4); - ad=S1; - mov_l_ri(ad,off); - break; - } - case 2: - { - uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ - m68k_pc_offset; - uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2) --2); - ad=S1; - mov_l_ri(ad,address+PC16off); - break; - } - case 3: - return -1; - tmppc = m68k_getpc (); - tmp = next_iword (); - ad = get_disp_ea_020 (tmppc, tmp); - break; - case 4: - { - uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ m68k_pc_offset; - ad=S1; - // Immediate addressing mode && Operation Length == Byte -> - // Use the low-order byte of the extension word. - if (size == 6) address++; - mov_l_ri(ad,address); - m68k_pc_offset+=sz2[size]; - break; - } - default: - return -1; + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + switch (mode) + { + case 0: /* Dn */ + switch (size) + { + case 6: /* byte */ + sign_extend_8_rr(S1, reg); + mov_l_mr((uintptr) temp_fp, S1); + delay2; + fmovi_rm(FS1, (uintptr) temp_fp); + return FS1; + case 4: /* word */ + sign_extend_16_rr(S1, reg); + mov_l_mr((uintptr) temp_fp, S1); + delay2; + fmovi_rm(FS1, (uintptr) temp_fp); + return FS1; + case 0: /* long */ + mov_l_mr((uintptr) temp_fp, reg); + delay2; + fmovi_rm(FS1, (uintptr) temp_fp); + return FS1; + case 1: /* single precision */ + mov_l_mr((uintptr) temp_fp, reg); + delay2; + fmovs_rm(FS1, (uintptr) temp_fp); + return FS1; + default: + return -1; + } + return -1; /* Should be unreachable */ + case 1: /* An */ + return -1; /* Genuine invalid instruction */ + default: + break; } - } - switch (size) { - case 0: - readlong(ad,S2,S3); - mov_l_mr((uintptr)temp_fp,S2); - delay2; - fmovi_rm(FS1,(uintptr)temp_fp); - break; - case 1: - readlong(ad,S2,S3); - mov_l_mr((uintptr)temp_fp,S2); - delay2; - fmovs_rm(FS1,(uintptr)temp_fp); - break; - case 2: - readword(ad,S2,S3); - mov_w_mr(((uintptr)temp_fp)+8,S2); - add_l_ri(ad,4); - readlong(ad,S2,S3); - mov_l_mr((uintptr)(temp_fp)+4,S2); - add_l_ri(ad,4); - readlong(ad,S2,S3); - mov_l_mr((uintptr)(temp_fp),S2); - delay2; - fmov_ext_rm(FS1,(uintptr)(temp_fp)); - break; - case 3: - return -1; /* Some silly "packed" stuff */ - case 4: - readword(ad,S2,S3); - sign_extend_16_rr(S2,S2); - mov_l_mr((uintptr)temp_fp,S2); - delay2; - fmovi_rm(FS1,(uintptr)temp_fp); - break; - case 5: - readlong(ad,S2,S3); - mov_l_mr(((uintptr)temp_fp)+4,S2); - add_l_ri(ad,4); - readlong(ad,S2,S3); - mov_l_mr((uintptr)(temp_fp),S2); - delay2; - fmov_rm(FS1,(uintptr)(temp_fp)); - break; - case 6: - readbyte(ad,S2,S3); - sign_extend_8_rr(S2,S2); - mov_l_mr((uintptr)temp_fp,S2); - delay2; - fmovi_rm(FS1,(uintptr)temp_fp); - break; - default: - return -1; - } - return FS1; + /* OK, we *will* have to load something from an address. Let's make + sure we know how to handle that, or quit early --- i.e. *before* + we do any postincrement/predecrement that we may regret */ + switch (size) + { + case 0: /* long */ + case 1: /* single precision */ + case 2: /* extended precision */ + case 4: /* word */ + case 5: /* double precision */ + case 6: /* byte */ + break; + case 3: /* packed decimal static */ + default: + return -1; + } + + switch (mode) + { + case 2: /* (An) */ + ad = S1; /* We will change it, anyway ;-) */ + mov_l_rr(ad, reg + 8); + break; + case 3: /* (An)+ */ + ad = S1; + mov_l_rr(ad, reg + 8); + lea_l_brr(reg + 8, reg + 8, (reg == 7 ? sz2[size] : sz1[size])); + break; + case 4: /* -(An) */ + ad = S1; + lea_l_brr(reg + 8, reg + 8, -(reg == 7 ? sz2[size] : sz1[size])); + mov_l_rr(ad, reg + 8); + break; + case 5: /* d16(An) */ + { + uae_u32 off = (uae_s32) (uae_s16) comp_get_iword((m68k_pc_offset += 2) - 2); + + ad = S1; + mov_l_rr(ad, reg + 8); + lea_l_brr(ad, ad, off); + } + break; + case 6: /* d8(An,Xn) */ + { + uae_u32 dp = comp_get_iword((m68k_pc_offset += 2) - 2); + + ad = S1; + calc_disp_ea_020(reg + 8, dp, ad, S2); + } + break; + case 7: + switch (reg) + { + case 0: /* abs.w */ + { + uae_u32 off = (uae_s32) (uae_s16) comp_get_iword((m68k_pc_offset += 2) - 2); + + ad = S1; + mov_l_ri(ad, off); + } + break; + case 1: /* abs.l */ + { + uae_u32 off = comp_get_ilong((m68k_pc_offset += 4) - 4); + + ad = S1; + mov_l_ri(ad, off); + } + break; + case 2: /* d16(pc) */ + { + uae_u32 address = start_pc + ((char *) comp_pc_p - (char *) start_pc_p) + m68k_pc_offset; + uae_s32 PC16off = (uae_s32) (uae_s16) comp_get_iword((m68k_pc_offset += 2) - 2); + + ad = S1; + mov_l_ri(ad, address + PC16off); + } + break; + case 3: /* d8(pc,Xn) */ + return -1; + case 4: /* #imm */ + { + uae_u32 address = start_pc + ((char *) comp_pc_p - (char *) start_pc_p) + m68k_pc_offset; + + ad = S1; + // Immediate addressing mode && Operation Length == Byte -> + // Use the low-order byte of the extension word. + if (size == 6) + address++; + mov_l_ri(ad, address); + m68k_pc_offset += sz2[size]; + } + break; + default: + return -1; + } + } + + switch (size) + { + case 0: /* long */ + readlong(ad, S2, S3); + mov_l_mr((uintptr) temp_fp, S2); + delay2; + fmovi_rm(FS1, (uintptr) temp_fp); + break; + case 1: /* single precision */ + readlong(ad, S2, S3); + mov_l_mr((uintptr) temp_fp, S2); + delay2; + fmovs_rm(FS1, (uintptr) temp_fp); + break; + case 2: /* extended precision */ + readword(ad, S2, S3); + mov_w_mr(((uintptr) temp_fp) + 8, S2); + add_l_ri(ad, 4); + readlong(ad, S2, S3); + // always set the explicit integer bit. + or_l_ri(S2, 0x80000000); + mov_l_mr((uintptr) (temp_fp) + 4, S2); + add_l_ri(ad, 4); + readlong(ad, S2, S3); + mov_l_mr((uintptr) (temp_fp), S2); + delay2; + fmov_ext_rm(FS1, (uintptr) (temp_fp)); + break; + case 3: /* packed decimal static */ + return -1; /* Some silly "packed" stuff */ + case 4: /* word */ + readword(ad, S2, S3); + sign_extend_16_rr(S2, S2); + mov_l_mr((uintptr) temp_fp, S2); + delay2; + fmovi_rm(FS1, (uintptr) temp_fp); + break; + case 5: /* double precision */ + readlong(ad, S2, S3); + mov_l_mr(((uintptr) temp_fp) + 4, S2); + add_l_ri(ad, 4); + readlong(ad, S2, S3); + mov_l_mr((uintptr) (temp_fp), S2); + delay2; + fmov_rm(FS1, (uintptr) (temp_fp)); + break; + case 6: /* byte */ + readbyte(ad, S2, S3); + sign_extend_8_rr(S2, S2); + mov_l_mr((uintptr) temp_fp, S2); + delay2; + fmovi_rm(FS1, (uintptr) temp_fp); + break; + default: + return -1; + } + return FS1; } -/* return of -1 means failure, >=0 means OK */ -STATIC_INLINE int put_fp_value (int val, uae_u32 opcode, uae_u16 extra) -{ - uae_u16 tmp; - uaecptr tmppc; - int size; - int mode; - int reg; - uae_u32 ad; - static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; - static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; - if ((extra & 0x4000) == 0) { +/* return of -1 means failure, >=0 means OK */ +STATIC_INLINE int put_fp_value(int val, uae_u32 opcode, uae_u16 extra) +{ + int size; + int mode; + int reg; + uae_u32 ad; + static int const sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; + static int const sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; + + if ((extra & 0x4000) == 0) + { const int dest_reg = (extra >> 10) & 7; + fmov_rr(dest_reg, val); // gb-- status register is affected MAKE_FPSR(dest_reg); return 0; - } - - mode = (opcode >> 3) & 7; - reg = opcode & 7; - size = (extra >> 10) & 7; - ad = (uae_u32)-1; - switch (mode) { - case 0: - switch (size) { - case 6: - fmovi_mr((uintptr)temp_fp,val); - delay; - mov_b_rm(reg,(uintptr)temp_fp); - return 0; - case 4: - fmovi_mr((uintptr)temp_fp,val); - delay; - mov_w_rm(reg,(uintptr)temp_fp); - return 0; - case 0: - fmovi_mr((uintptr)temp_fp,val); - delay; - mov_l_rm(reg,(uintptr)temp_fp); - return 0; - case 1: - fmovs_mr((uintptr)temp_fp,val); - delay; - mov_l_rm(reg,(uintptr)temp_fp); - return 0; - default: - return -1; } - case 1: - return -1; /* genuine invalid instruction */ - default: break; - } - /* Let's make sure we get out *before* doing something silly if - we can't handle the size */ - switch (size) { - case 0: - case 4: - case 5: - case 6: - case 2: - case 1: - break; - case 3: - default: - return -1; - } - - switch (mode) { - case 2: - ad=S1; - mov_l_rr(ad,reg+8); - break; - case 3: - ad=S1; - mov_l_rr(ad,reg+8); - lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size])); - break; - case 4: - ad=S1; - lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size])); - mov_l_rr(ad,reg+8); - break; - case 5: - { - uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); - ad=S1; - mov_l_rr(ad,reg+8); - add_l_ri(ad,off); - break; - } - case 6: - { - uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2); - ad=S1; - calc_disp_ea_020(reg+8,dp,ad,S2); - break; - } - case 7: - switch (reg) { - case 0: - { - uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); - ad=S1; - mov_l_ri(ad,off); - break; - } - case 1: - { - uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4); - ad=S1; - mov_l_ri(ad,off); - break; - } - case 2: - { - uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ - m68k_pc_offset; - uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); - ad=S1; - mov_l_ri(ad,address+PC16off); - break; - } - case 3: - return -1; - tmppc = m68k_getpc (); - tmp = next_iword (); - ad = get_disp_ea_020 (tmppc, tmp); - break; - case 4: - { - uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ - m68k_pc_offset; - ad=S1; - mov_l_ri(ad,address); - m68k_pc_offset+=sz2[size]; - break; - } - default: - return -1; + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + ad = (uae_u32) -1; + switch (mode) + { + case 0: /* Dn */ + switch (size) + { + case 6: /* byte */ + fmovi_mr((uintptr) temp_fp, val); + delay; + mov_b_rm(reg, (uintptr) temp_fp); + return 0; + case 4: /* word */ + fmovi_mr((uintptr) temp_fp, val); + delay; + mov_w_rm(reg, (uintptr) temp_fp); + return 0; + case 0: /* long */ + fmovi_mr((uintptr) temp_fp, val); + delay; + mov_l_rm(reg, (uintptr) temp_fp); + return 0; + case 1: /* single precision */ + fmovs_mr((uintptr) temp_fp, val); + delay; + mov_l_rm(reg, (uintptr) temp_fp); + return 0; + default: + return -1; + } + case 1: /* An */ + return -1; /* genuine invalid instruction */ + default: + break; } - } - switch (size) { - case 0: - fmovi_mr((uintptr)temp_fp,val); - delay; - mov_l_rm(S2,(uintptr)temp_fp); - writelong_clobber(ad,S2,S3); - break; - case 1: - fmovs_mr((uintptr)temp_fp,val); - delay; - mov_l_rm(S2,(uintptr)temp_fp); - writelong_clobber(ad,S2,S3); - break; - case 2: - fmov_ext_mr((uintptr)temp_fp,val); - delay; - mov_w_rm(S2,(uintptr)temp_fp+8); - writeword_clobber(ad,S2,S3); - add_l_ri(ad,4); - mov_l_rm(S2,(uintptr)temp_fp+4); - writelong_clobber(ad,S2,S3); - add_l_ri(ad,4); - mov_l_rm(S2,(uintptr)temp_fp); - writelong_clobber(ad,S2,S3); - break; - case 3: return -1; /* Packed */ - case 4: - fmovi_mr((uintptr)temp_fp,val); - delay; - mov_l_rm(S2,(uintptr)temp_fp); - writeword_clobber(ad,S2,S3); - break; - case 5: - fmov_mr((uintptr)temp_fp,val); - delay; - mov_l_rm(S2,(uintptr)temp_fp+4); - writelong_clobber(ad,S2,S3); - add_l_ri(ad,4); - mov_l_rm(S2,(uintptr)temp_fp); - writelong_clobber(ad,S2,S3); - break; - case 6: - fmovi_mr((uintptr)temp_fp,val); - delay; - mov_l_rm(S2,(uintptr)temp_fp); - writebyte(ad,S2,S3); - break; - default: - return -1; - } - return 0; + /* Let's make sure we get out *before* doing something silly if + we can't handle the size */ + switch (size) + { + case 0: /* long */ + case 1: /* single precision */ + case 2: /* extended precision */ + case 4: /* word */ + case 5: /* double precision */ + case 6: /* byte */ + break; + case 3: /* packed decimal static */ + default: + return -1; + } + + switch (mode) + { + case 2: /* (An) */ + ad = S1; + mov_l_rr(ad, reg + 8); + break; + case 3: /* (An)+ */ + ad = S1; + mov_l_rr(ad, reg + 8); + lea_l_brr(reg + 8, reg + 8, (reg == 7 ? sz2[size] : sz1[size])); + break; + case 4: /* -(An) */ + ad = S1; + lea_l_brr(reg + 8, reg + 8, -(reg == 7 ? sz2[size] : sz1[size])); + mov_l_rr(ad, reg + 8); + break; + case 5: /* d16(An) */ + { + uae_u32 off = (uae_s32) (uae_s16) comp_get_iword((m68k_pc_offset += 2) - 2); + + ad = S1; + mov_l_rr(ad, reg + 8); + add_l_ri(ad, off); + } + break; + case 6: /* d8(An,Xn) */ + { + uae_u32 dp = comp_get_iword((m68k_pc_offset += 2) - 2); + + ad = S1; + calc_disp_ea_020(reg + 8, dp, ad, S2); + } + break; + case 7: + switch (reg) + { + case 0: /* abs.w */ + { + uae_u32 off = (uae_s32) (uae_s16) comp_get_iword((m68k_pc_offset += 2) - 2); + + ad = S1; + mov_l_ri(ad, off); + } + break; + case 1: /* abs.l */ + { + uae_u32 off = comp_get_ilong((m68k_pc_offset += 4) - 4); + + ad = S1; + mov_l_ri(ad, off); + } + break; + case 2: /* d16(pc) */ + { + uae_u32 address = start_pc + ((char *) comp_pc_p - (char *) start_pc_p) + m68k_pc_offset; + uae_s32 PC16off = (uae_s32) (uae_s16) comp_get_iword((m68k_pc_offset += 2) - 2); + + ad = S1; + mov_l_ri(ad, address + PC16off); + } + break; + case 3: /* d8(pc,Xn) */ + return -1; + case 4: /* #imm */ + { + uae_u32 address = start_pc + ((char *) comp_pc_p - (char *) start_pc_p) + m68k_pc_offset; + + ad = S1; + mov_l_ri(ad, address); + m68k_pc_offset += sz2[size]; + } + break; + default: + return -1; + } + } + + switch (size) + { + case 0: /* long */ + fmovi_mr((uintptr) temp_fp, val); + delay; + mov_l_rm(S2, (uintptr) temp_fp); + writelong_clobber(ad, S2, S3); + break; + case 1: /* single precision */ + fmovs_mr((uintptr) temp_fp, val); + delay; + mov_l_rm(S2, (uintptr) temp_fp); + writelong_clobber(ad, S2, S3); + break; + case 2: /* extended precision */ + fmov_ext_mr((uintptr) temp_fp, val); + delay; + mov_w_rm(S2, (uintptr) temp_fp + 8); + writeword_clobber(ad, S2, S3); + add_l_ri(ad, 4); + mov_l_rm(S2, (uintptr) temp_fp + 4); + writelong_clobber(ad, S2, S3); + add_l_ri(ad, 4); + mov_l_rm(S2, (uintptr) temp_fp); + writelong_clobber(ad, S2, S3); + break; + case 3: /* packed decimal static */ + return -1; /* Packed */ + case 4: /* word */ + fmovi_mr((uintptr) temp_fp, val); + delay; + mov_l_rm(S2, (uintptr) temp_fp); + writeword_clobber(ad, S2, S3); + break; + case 5: /* double precision */ + fmov_mr((uintptr) temp_fp, val); + delay; + mov_l_rm(S2, (uintptr) temp_fp + 4); + writelong_clobber(ad, S2, S3); + add_l_ri(ad, 4); + mov_l_rm(S2, (uintptr) temp_fp); + writelong_clobber(ad, S2, S3); + break; + case 6: /* byte */ + fmovi_mr((uintptr) temp_fp, val); + delay; + mov_l_rm(S2, (uintptr) temp_fp); + writebyte(ad, S2, S3); + break; + default: + return -1; + } + return 0; } + /* return -1 for failure, or register number for success */ -STATIC_INLINE int get_fp_ad (uae_u32 opcode, uae_u32 * ad) +STATIC_INLINE int get_fp_ad(uae_u32 opcode) { - uae_u16 tmp; - uaecptr tmppc; - int mode; - int reg; - uae_s32 off; + int mode; + int reg; + uae_s32 off; - mode = (opcode >> 3) & 7; - reg = opcode & 7; - switch (mode) { - case 0: - case 1: - return -1; - case 2: - case 3: - case 4: - mov_l_rr(S1,8+reg); - return S1; - *ad = m68k_areg (regs, reg); - break; - case 5: - off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); - - mov_l_rr(S1,8+reg); - add_l_ri(S1,off); - return S1; - case 6: - return -1; - break; - case 7: - switch (reg) { - case 0: - off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); - mov_l_ri(S1,off); - return S1; - case 1: - off=comp_get_ilong((m68k_pc_offset+=4)-4); - mov_l_ri(S1,off); - return S1; - case 2: - return -1; -// *ad = m68k_getpc (); -// *ad += (uae_s32) (uae_s16) next_iword (); - off=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset; - off+=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); - mov_l_ri(S1,off); - return S1; - case 3: - return -1; - tmppc = m68k_getpc (); - tmp = next_iword (); - *ad = get_disp_ea_020 (tmppc, tmp); - break; - default: - return -1; + mode = (opcode >> 3) & 7; + reg = opcode & 7; + switch (mode) + { + case 0: /* Dn */ + case 1: /* An */ + return -1; + case 2: /* (An) */ + case 3: /* (An)+ */ + case 4: /* -(An) */ + mov_l_rr(S1, 8 + reg); + return S1; + case 5: /* d16(An) */ + off = (uae_s32) (uae_s16) comp_get_iword((m68k_pc_offset += 2) - 2); + mov_l_rr(S1, 8 + reg); + add_l_ri(S1, off); + return S1; + case 6: /* d8(An,Xn) */ + return -1; + break; + case 7: + switch (reg) + { + case 0: /* abs.w */ + off = (uae_s32) (uae_s16) comp_get_iword((m68k_pc_offset += 2) - 2); + mov_l_ri(S1, off); + return S1; + case 1: /* abs.l */ + off = comp_get_ilong((m68k_pc_offset += 4) - 4); + mov_l_ri(S1, off); + return S1; + case 2: /* d16(pc) */ + off = start_pc + ((char *) comp_pc_p - (char *) start_pc_p) + m68k_pc_offset; + off += (uae_s32) (uae_s16) comp_get_iword((m68k_pc_offset += 2) - 2); + mov_l_ri(S1, off); + return S1; + case 3: /* d8(pc,Xn) */ + return -1; + default: + return -1; + } } - } - abort(); + abort(); } -void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra) + +/* return -1 for failure, or register number for success */ +void comp_fdbcc_opp (uae_u32 /* opcode */, uae_u16 /* extra */) { + if (jit_disable.fdbcc) + { + FAIL(1); + return; + } FAIL(1); return; } -void comp_fscc_opp (uae_u32 opcode, uae_u16 extra) + +void comp_fscc_opp(uae_u32 opcode, uae_u16 extra) { - uae_u32 ad; - int cc; - int reg; + int reg; -#if DEBUG_FPP - printf ("fscc_opp at %08lx\n", m68k_getpc ()); - fflush (stdout); + if (jit_disable.fscc) + { + FAIL(1); + return; + } + + if (extra & 0x20) + { /* only cc from 00 to 1f are defined */ + FAIL(1); + return; + } + if ((opcode & 0x38) != 0) + { /* We can only do to integer register */ + FAIL(1); + return; + } + + fflags_into_flags(S2); + reg = (opcode & 7); + + mov_l_ri(S1, 255); + mov_l_ri(S4, 0); + switch (extra & 0x0f) + { /* according to fpp.c, the 0x10 bit is ignored + */ + case 0: + break; /* set never */ + case 1: + mov_l_rr(S2, S4); + cmov_l_rr(S4, S1, 4); + cmov_l_rr(S4, S2, 10); + break; + case 2: + cmov_l_rr(S4, S1, 7); + break; + case 3: + cmov_l_rr(S4, S1, 3); + break; + case 4: + mov_l_rr(S2, S4); + cmov_l_rr(S4, S1, 2); + cmov_l_rr(S4, S2, 10); + break; + case 5: + mov_l_rr(S2, S4); + cmov_l_rr(S4, S1, 6); + cmov_l_rr(S4, S2, 10); + break; + case 6: + cmov_l_rr(S4, S1, 5); + break; + case 7: + cmov_l_rr(S4, S1, 11); + break; + case 8: + cmov_l_rr(S4, S1, 10); + break; + case 9: + cmov_l_rr(S4, S1, 4); + break; + case 10: + cmov_l_rr(S4, S1, 10); + cmov_l_rr(S4, S1, 7); + break; + case 11: + cmov_l_rr(S4, S1, 4); + cmov_l_rr(S4, S1, 3); + break; + case 12: + cmov_l_rr(S4, S1, 2); + break; + case 13: + cmov_l_rr(S4, S1, 6); + break; + case 14: + cmov_l_rr(S4, S1, 5); + cmov_l_rr(S4, S1, 10); + break; + case 15: + mov_l_rr(S4, S1); + break; + } + + if ((opcode & 0x38) == 0) + { + mov_b_rr(reg, S4); + } else + { + abort(); +#if 0 + int cc; + + if (get_fp_ad(opcode) < 0) + { + FAIL(1); + } else + { + put_byte(ad, cc ? 0xff : 0x00); + } #endif + } +} - if (extra&0x20) { /* only cc from 00 to 1f are defined */ +void comp_ftrapcc_opp (uae_u32 /* opcode */, uaecptr /* oldpc */) +{ FAIL(1); return; - } - if ((opcode & 0x38) != 0) { /* We can only do to integer register */ - FAIL(1); - return; - } - - fflags_into_flags(S2); - reg=(opcode&7); - - mov_l_ri(S1,255); - mov_l_ri(S4,0); - switch(extra&0x0f) { /* according to fpp.c, the 0x10 bit is ignored - */ - case 0: break; /* set never */ - case 1: mov_l_rr(S2,S4); - cmov_l_rr(S4,S1,4); - cmov_l_rr(S4,S2,10); break; - case 2: cmov_l_rr(S4,S1,7); break; - case 3: cmov_l_rr(S4,S1,3); break; - case 4: mov_l_rr(S2,S4); - cmov_l_rr(S4,S1,2); - cmov_l_rr(S4,S2,10); break; - case 5: mov_l_rr(S2,S4); - cmov_l_rr(S4,S1,6); - cmov_l_rr(S4,S2,10); break; - case 6: cmov_l_rr(S4,S1,5); break; - case 7: cmov_l_rr(S4,S1,11); break; - case 8: cmov_l_rr(S4,S1,10); break; - case 9: cmov_l_rr(S4,S1,4); break; - case 10: cmov_l_rr(S4,S1,10); cmov_l_rr(S4,S1,7); break; - case 11: cmov_l_rr(S4,S1,4); cmov_l_rr(S4,S1,3); break; - case 12: cmov_l_rr(S4,S1,2); break; - case 13: cmov_l_rr(S4,S1,6); break; - case 14: cmov_l_rr(S4,S1,5); cmov_l_rr(S4,S1,10); break; - case 15: mov_l_rr(S4,S1); break; - } - - if ((opcode & 0x38) == 0) { - mov_b_rr(reg,S4); - } else { - abort(); - if (get_fp_ad (opcode, &ad) == 0) { - m68k_setpc (m68k_getpc () - 4); - fpuop_illg (opcode,extra); - } else - put_byte (ad, cc ? 0xff : 0x00); - } } -void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc) -{ - int cc; - FAIL(1); - return; -} - -void comp_fbcc_opp (uae_u32 opcode) +void comp_fbcc_opp(uae_u32 opcode) { - uae_u32 start_68k_offset=m68k_pc_offset; - uae_u32 off; - uae_u32 v1; - uae_u32 v2; - uae_u32 nh; - int cc; + uae_u32 start_68k_offset = m68k_pc_offset; + uae_u32 off; + uae_u32 v1; + uae_u32 v2; + int cc; // comp_pc_p is expected to be bound to 32-bit addresses - assert((uintptr)comp_pc_p <= 0xffffffffUL); + assert((uintptr) comp_pc_p <= 0xffffffffUL); - if (opcode&0x20) { /* only cc from 00 to 1f are defined */ - FAIL(1); - return; - } - if ((opcode&0x40)==0) { - off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); - } - else { - off=comp_get_ilong((m68k_pc_offset+=4)-4); - } - mov_l_ri(S1,(uintptr) - (comp_pc_p+off-(m68k_pc_offset-start_68k_offset))); - mov_l_ri(PC_P,(uintptr)comp_pc_p); + if (jit_disable.fbcc) + { + FAIL(1); + return; + } + if (opcode & 0x20) + { /* only cc from 00 to 1f are defined */ + FAIL(1); + return; + } + if ((opcode & 0x40) == 0) + { + off = (uae_s32) (uae_s16) comp_get_iword((m68k_pc_offset += 2) - 2); + } else + { + off = comp_get_ilong((m68k_pc_offset += 4) - 4); + } + mov_l_ri(S1, (uintptr) (comp_pc_p + off - (m68k_pc_offset - start_68k_offset))); + mov_l_ri(PC_P, (uintptr) comp_pc_p); - /* Now they are both constant. Might as well fold in m68k_pc_offset */ - add_l_ri(S1,m68k_pc_offset); - add_l_ri(PC_P,m68k_pc_offset); - m68k_pc_offset=0; + /* Now they are both constant. Might as well fold in m68k_pc_offset */ + add_l_ri(S1, m68k_pc_offset); + add_l_ri(PC_P, m68k_pc_offset); + m68k_pc_offset = 0; - /* according to fpp.c, the 0x10 bit is ignored - (it handles exception handling, which we don't - do, anyway ;-) */ - cc=opcode&0x0f; - v1=get_const(PC_P); - v2=get_const(S1); - fflags_into_flags(S2); + /* according to fpp.c, the 0x10 bit is ignored + (it handles exception handling, which we don't + do, anyway ;-) */ + cc = opcode & 0x0f; + v1 = get_const(PC_P); + v2 = get_const(S1); + fflags_into_flags(S2); - switch(cc) { - case 0: break; /* jump never */ - case 1: - mov_l_rr(S2,PC_P); - cmov_l_rr(PC_P,S1,4); - cmov_l_rr(PC_P,S2,10); break; - case 2: register_branch(v1,v2,7); break; - case 3: register_branch(v1,v2,3); break; - case 4: - mov_l_rr(S2,PC_P); - cmov_l_rr(PC_P,S1,2); - cmov_l_rr(PC_P,S2,10); break; - case 5: - mov_l_rr(S2,PC_P); - cmov_l_rr(PC_P,S1,6); - cmov_l_rr(PC_P,S2,10); break; - case 6: register_branch(v1,v2,5); break; - case 7: register_branch(v1,v2,11); break; - case 8: register_branch(v1,v2,10); break; - case 9: register_branch(v1,v2,4); break; - case 10: - cmov_l_rr(PC_P,S1,10); - cmov_l_rr(PC_P,S1,7); break; - case 11: - cmov_l_rr(PC_P,S1,4); - cmov_l_rr(PC_P,S1,3); break; - case 12: register_branch(v1,v2,2); break; - case 13: register_branch(v1,v2,6); break; - case 14: - cmov_l_rr(PC_P,S1,5); - cmov_l_rr(PC_P,S1,10); break; - case 15: mov_l_rr(PC_P,S1); break; - } + switch (cc) + { + case 0: + break; /* jump never */ + case 1: + mov_l_rr(S2, PC_P); + cmov_l_rr(PC_P, S1, 4); + cmov_l_rr(PC_P, S2, 10); + break; + case 2: + register_branch(v1, v2, 7); + break; + case 3: + register_branch(v1, v2, 3); + break; + case 4: + mov_l_rr(S2, PC_P); + cmov_l_rr(PC_P, S1, 2); + cmov_l_rr(PC_P, S2, 10); + break; + case 5: + mov_l_rr(S2, PC_P); + cmov_l_rr(PC_P, S1, 6); + cmov_l_rr(PC_P, S2, 10); + break; + case 6: + register_branch(v1, v2, 5); + break; + case 7: + register_branch(v1, v2, 11); + break; + case 8: + register_branch(v1, v2, 10); + break; + case 9: + register_branch(v1, v2, 4); + break; + case 10: + cmov_l_rr(PC_P, S1, 10); + cmov_l_rr(PC_P, S1, 7); + break; + case 11: + cmov_l_rr(PC_P, S1, 4); + cmov_l_rr(PC_P, S1, 3); + break; + case 12: + register_branch(v1, v2, 2); + break; + case 13: + register_branch(v1, v2, 6); + break; + case 14: + cmov_l_rr(PC_P, S1, 5); + cmov_l_rr(PC_P, S1, 10); + break; + case 15: + mov_l_rr(PC_P, S1); + break; + } } + /* Floating point conditions The "NotANumber" part could be problematic; Howver, when NaN is encountered, the ftst instruction sets bot N and Z to 1 on the x87, @@ -785,154 +887,185 @@ x86 conditions 0001 : 10 1110 : 11 */ -void comp_fsave_opp (uae_u32 opcode) + +void comp_fsave_opp(uae_u32 opcode) { - uae_u32 ad; - int incr = (opcode & 0x38) == 0x20 ? -1 : 1; - int i; + int incr = (opcode & 0x38) == 0x20 ? -1 : 1; + int i; + int ad; - FAIL(1); - return; - -#if DEBUG_FPP - printf ("fsave_opp at %08lx\n", m68k_getpc ()); - fflush (stdout); -#endif - if (get_fp_ad (opcode, &ad) == 0) { - m68k_setpc (m68k_getpc () - 2); - fpuop_illg (opcode,UNKNOWN_EXTRA); + if (jit_disable.fsave) + { + FAIL(1); + return; + } + FAIL(1); return; - } - if (CPUType == 4) { - /* 4 byte 68040 IDLE frame. */ - if (incr < 0) { - ad -= 4; - put_long (ad, 0x41000000); - } else { - put_long (ad, 0x41000000); - ad += 4; + if ((ad = get_fp_ad(opcode)) < 0) + { + FAIL(1); + return; } - } else { - if (incr < 0) { - ad -= 4; - put_long (ad, 0x70000000); - for (i = 0; i < 5; i++) { - ad -= 4; - put_long (ad, 0x00000000); - } - ad -= 4; - put_long (ad, 0x1f180000); - } else { - put_long (ad, 0x1f180000); - ad += 4; - for (i = 0; i < 5; i++) { - put_long (ad, 0x00000000); - ad += 4; - } - put_long (ad, 0x70000000); - ad += 4; + + if (CPUType == 4) + { + /* 4 byte 68040 IDLE frame. */ + if (incr < 0) + { + ad -= 4; + put_long(ad, 0x41000000); + } else + { + put_long(ad, 0x41000000); + ad += 4; + } + } else + { + if (incr < 0) + { + ad -= 4; + put_long(ad, 0x70000000); + for (i = 0; i < 5; i++) + { + ad -= 4; + put_long(ad, 0x00000000); + } + ad -= 4; + put_long(ad, 0x1f180000); + } else + { + put_long(ad, 0x1f180000); + ad += 4; + for (i = 0; i < 5; i++) + { + put_long(ad, 0x00000000); + ad += 4; + } + put_long(ad, 0x70000000); + ad += 4; + } } - } - if ((opcode & 0x38) == 0x18) - m68k_areg (regs, opcode & 7) = ad; - if ((opcode & 0x38) == 0x20) - m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x18) + m68k_areg(regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) + m68k_areg(regs, opcode & 7) = ad; } -void comp_frestore_opp (uae_u32 opcode) + +void comp_frestore_opp(uae_u32 opcode) { - uae_u32 ad; - uae_u32 d; - int incr = (opcode & 0x38) == 0x20 ? -1 : 1; + uae_u32 d; + int incr = (opcode & 0x38) == 0x20 ? -1 : 1; + int ad; - FAIL(1); - return; - -#if DEBUG_FPP - printf ("frestore_opp at %08lx\n", m68k_getpc ()); - fflush (stdout); -#endif - if (get_fp_ad (opcode, &ad) == 0) { - m68k_setpc (m68k_getpc () - 2); - fpuop_illg (opcode,UNKNOWN_EXTRA); + if (jit_disable.frestore) + { + FAIL(1); + return; + } + FAIL(1); return; - } - if (CPUType == 4) { - /* 68040 */ - if (incr < 0) { - /* @@@ This may be wrong. */ - ad -= 4; - d = get_long (ad); - if ((d & 0xff000000) != 0) { /* Not a NULL frame? */ - if ((d & 0x00ff0000) == 0) { /* IDLE */ - } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */ - ad -= 44; - } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */ - ad -= 92; - } - } - } else { - d = get_long (ad); - ad += 4; - if ((d & 0xff000000) != 0) { /* Not a NULL frame? */ - if ((d & 0x00ff0000) == 0) { /* IDLE */ - } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */ - ad += 44; - } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */ - ad += 92; - } - } + + if ((ad = get_fp_ad(opcode)) < 0) + { + FAIL(1); + return; } - } else { - if (incr < 0) { - ad -= 4; - d = get_long (ad); - if ((d & 0xff000000) != 0) { - if ((d & 0x00ff0000) == 0x00180000) - ad -= 6 * 4; - else if ((d & 0x00ff0000) == 0x00380000) - ad -= 14 * 4; - else if ((d & 0x00ff0000) == 0x00b40000) - ad -= 45 * 4; - } - } else { - d = get_long (ad); - ad += 4; - if ((d & 0xff000000) != 0) { - if ((d & 0x00ff0000) == 0x00180000) - ad += 6 * 4; - else if ((d & 0x00ff0000) == 0x00380000) - ad += 14 * 4; - else if ((d & 0x00ff0000) == 0x00b40000) - ad += 45 * 4; - } + if (CPUType == 4) + { + /* 68040 */ + if (incr < 0) + { + /* @@@ This may be wrong. */ + ad -= 4; + d = get_long(ad); + if ((d & 0xff000000) != 0) + { /* Not a NULL frame? */ + if ((d & 0x00ff0000) == 0) + { /* IDLE */ + } else if ((d & 0x00ff0000) == 0x00300000) + { /* UNIMP */ + ad -= 44; + } else if ((d & 0x00ff0000) == 0x00600000) + { /* BUSY */ + ad -= 92; + } + } + } else + { + d = get_long(ad); + ad += 4; + if ((d & 0xff000000) != 0) + { /* Not a NULL frame? */ + if ((d & 0x00ff0000) == 0) + { /* IDLE */ + } else if ((d & 0x00ff0000) == 0x00300000) + { /* UNIMP */ + ad += 44; + } else if ((d & 0x00ff0000) == 0x00600000) + { /* BUSY */ + ad += 92; + } + } + } + } else + { + if (incr < 0) + { + ad -= 4; + d = get_long(ad); + if ((d & 0xff000000) != 0) + { + if ((d & 0x00ff0000) == 0x00180000) + ad -= 6 * 4; + else if ((d & 0x00ff0000) == 0x00380000) + ad -= 14 * 4; + else if ((d & 0x00ff0000) == 0x00b40000) + ad -= 45 * 4; + } + } else + { + d = get_long(ad); + ad += 4; + if ((d & 0xff000000) != 0) + { + if ((d & 0x00ff0000) == 0x00180000) + ad += 6 * 4; + else if ((d & 0x00ff0000) == 0x00380000) + ad += 14 * 4; + else if ((d & 0x00ff0000) == 0x00b40000) + ad += 45 * 4; + } + } } - } - if ((opcode & 0x38) == 0x18) - m68k_areg (regs, opcode & 7) = ad; - if ((opcode & 0x38) == 0x20) - m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x18) + m68k_areg(regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) + m68k_areg(regs, opcode & 7) = ad; } -#if USE_LONG_DOUBLE -static const fpu_register const_e = 2.7182818284590452353602874713526625L; -static const fpu_register const_log10_e = 0.4342944819032518276511289189166051L; -static const fpu_register const_loge_10 = 2.3025850929940456840179914546843642L; + +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) +static const fpu_register const_e = LD(2.7182818284590452353); // LD(2.7182818284590452353602874713526625); +static const fpu_register const_log10_e = LD(0.4342944819032518276511289189166051); +static const fpu_register const_loge_10 = LD(2.3025850929940456840179914546843642); #else -static const fpu_register const_e = 2.7182818284590452354; +static const fpu_register const_e = 2.7182818284590452354; static const fpu_register const_log10_e = 0.43429448190325182765; static const fpu_register const_loge_10 = 2.30258509299404568402; #endif static const fpu_register power10[] = { - 1e0, 1e1, 1e2, 1e4, 1e8, 1e16, 1e32, 1e64, 1e128, 1e256 -#if USE_LONG_DOUBLE -, 1e512, 1e1024, 1e2048, 1e4096 + LD(1e0), LD(1e1), LD(1e2), LD(1e4), LD(1e8), LD(1e16), LD(1e32), LD(1e64), LD(1e128), LD(1e256) +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) +, LD(1e512), LD(1e1024), LD(1e2048), LD(1e4096) #endif }; /* 128 words, indexed through the low byte of the 68k fpu control word */ +#if 0 +/* unused*/ static uae_u16 x86_fpucw[]={ 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* p0r0 */ 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* p0r1 */ @@ -954,283 +1087,349 @@ static uae_u16 x86_fpucw[]={ 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* p3r2 */ 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f /* p3r3 */ }; +#endif -void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) + +void comp_fpp_opp(uae_u32 opcode, uae_u16 extra) { - int reg; - int src; - - switch ((extra >> 13) & 0x7) { - case 3: /* 2nd most common */ - if (put_fp_value ((extra >> 7)&7 , opcode, extra) < 0) { - FAIL(1); - return; + int reg; + int src; - } - return; - case 6: - case 7: + switch ((extra >> 13) & 0x7) { - uae_u32 ad, list = 0; - int incr = 0; - if (extra & 0x2000) { - uae_u32 ad; + case 1: /* illegal */ + break; - /* FMOVEM FPP->memory */ - switch ((extra >> 11) & 3) { /* Get out early if failure */ - case 0: - case 2: - break; - case 1: - case 3: - default: - FAIL(1); return; - } - ad=get_fp_ad (opcode, &ad); - if (ad<0) { - abort(); - m68k_setpc (m68k_getpc () - 4); - fpuop_illg (opcode,extra); - return; - } - switch ((extra >> 11) & 3) { - case 0: /* static pred */ - list = extra & 0xff; - incr = -1; - break; - case 2: /* static postinc */ - list = extra & 0xff; - incr = 1; - break; - case 1: /* dynamic pred */ - case 3: /* dynamic postinc */ - abort(); - } - if (incr < 0) { /* Predecrement */ - for (reg = 7; reg >= 0; reg--) { - if (list & 0x80) { - fmov_ext_mr((uintptr)temp_fp,reg); - delay; - sub_l_ri(ad,4); - mov_l_rm(S2,(uintptr)temp_fp); - writelong_clobber(ad,S2,S3); - sub_l_ri(ad,4); - mov_l_rm(S2,(uintptr)temp_fp+4); - writelong_clobber(ad,S2,S3); - sub_l_ri(ad,4); - mov_w_rm(S2,(uintptr)temp_fp+8); - writeword_clobber(ad,S2,S3); - } - list <<= 1; - } - } - else { /* Postincrement */ - for (reg = 0; reg < 8; reg++) { - if (list & 0x80) { - fmov_ext_mr((uintptr)temp_fp,reg); - delay; - mov_w_rm(S2,(uintptr)temp_fp+8); - writeword_clobber(ad,S2,S3); - add_l_ri(ad,4); - mov_l_rm(S2,(uintptr)temp_fp+4); - writelong_clobber(ad,S2,S3); - add_l_ri(ad,4); - mov_l_rm(S2,(uintptr)temp_fp); - writelong_clobber(ad,S2,S3); - add_l_ri(ad,4); - } - list <<= 1; - } - } - if ((opcode & 0x38) == 0x18) - mov_l_rr((opcode & 7)+8,ad); - if ((opcode & 0x38) == 0x20) - mov_l_rr((opcode & 7)+8,ad); - } else { - /* FMOVEM memory->FPP */ - - uae_u32 ad; - switch ((extra >> 11) & 3) { /* Get out early if failure */ - case 0: - case 2: - break; - case 1: - case 3: - default: - FAIL(1); return; - } - ad=get_fp_ad (opcode, &ad); - if (ad<0) { - abort(); - m68k_setpc (m68k_getpc () - 4); - write_log("no ad\n"); - fpuop_illg (opcode,extra); - return; - } - switch ((extra >> 11) & 3) { - case 0: /* static pred */ - list = extra & 0xff; - incr = -1; - break; - case 2: /* static postinc */ - list = extra & 0xff; - incr = 1; - break; - case 1: /* dynamic pred */ - case 3: /* dynamic postinc */ - abort(); - } - - if (incr < 0) { - // not reached - for (reg = 7; reg >= 0; reg--) { - uae_u32 wrd1, wrd2, wrd3; - if (list & 0x80) { - sub_l_ri(ad,4); - readlong(ad,S2,S3); - mov_l_mr((uintptr)(temp_fp),S2); - sub_l_ri(ad,4); - readlong(ad,S2,S3); - mov_l_mr((uintptr)(temp_fp)+4,S2); - sub_l_ri(ad,4); - readword(ad,S2,S3); - mov_w_mr(((uintptr)temp_fp)+8,S2); - delay2; - fmov_ext_rm(reg,(uintptr)(temp_fp)); - } - list <<= 1; - } - } - else { - for (reg = 0; reg < 8; reg++) { - uae_u32 wrd1, wrd2, wrd3; - if (list & 0x80) { - readword(ad,S2,S3); - mov_w_mr(((uintptr)temp_fp)+8,S2); - add_l_ri(ad,4); - readlong(ad,S2,S3); - mov_l_mr((uintptr)(temp_fp)+4,S2); - add_l_ri(ad,4); - readlong(ad,S2,S3); - mov_l_mr((uintptr)(temp_fp),S2); - add_l_ri(ad,4); - delay2; - fmov_ext_rm(reg,(uintptr)(temp_fp)); - } - list <<= 1; - } - } - if ((opcode & 0x38) == 0x18) - mov_l_rr((opcode & 7)+8,ad); - if ((opcode & 0x38) == 0x20) - mov_l_rr((opcode & 7)+8,ad); - } - } - return; - - case 4: - case 5: /* rare */ - if ((opcode & 0x30) == 0) { - if (extra & 0x2000) { - if (extra & 0x1000) { -#if HANDLE_FPCR - mov_l_rm(opcode & 15, (uintptr)&fpu.fpcr.rounding_mode); - or_l_rm(opcode & 15, (uintptr)&fpu.fpcr.rounding_precision); -#else + case 3: /* FMOVE Fpn, */ + /* 2nd most common */ + if (jit_disable.fmove) + { FAIL(1); return; -#endif } - if (extra & 0x0800) { - FAIL(1); - return; - } - if (extra & 0x0400) { - mov_l_rm(opcode & 15,(uintptr)&fpu.instruction_address); - return; - } - } else { - // gb-- moved here so that we may FAIL() without generating any code - if (extra & 0x0800) { - // set_fpsr(m68k_dreg (regs, opcode & 15)); - FAIL(1); - return; - } - if (extra & 0x1000) { -#if HANDLE_FPCR -#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) + + if (put_fp_value((extra >> 7) & 7, opcode, extra) < 0) + { FAIL(1); return; -#endif - mov_l_rr(S1,opcode & 15); - mov_l_rr(S2,opcode & 15); - and_l_ri(S1,FPCR_ROUNDING_PRECISION); - and_l_ri(S2,FPCR_ROUNDING_MODE); - mov_l_mr((uintptr)&fpu.fpcr.rounding_precision,S1); - mov_l_mr((uintptr)&fpu.fpcr.rounding_mode,S2); -#else - FAIL(1); - return; -#endif -// return; gb-- FMOVEM could also operate on fpiar - } - if (extra & 0x0400) { - mov_l_mr((uintptr)&fpu.instruction_address,opcode & 15); -// return; gb-- we have to process all FMOVEM bits before returning } return; - } - } else if ((opcode & 0x3f) == 0x3c) { - if ((extra & 0x2000) == 0) { - // gb-- moved here so that we may FAIL() without generating any code - if (extra & 0x0800) { - FAIL(1); - return; - } - if (extra & 0x1000) { - uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4); -#if HANDLE_FPCR -#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) + + case 6: /* FMOVEM , */ + case 7: /* FMOVEM , */ + if (jit_disable.fmovem) + { FAIL(1); return; -#endif -// mov_l_mi((uintptr)®s.fpcr,val); - mov_l_ri(S1,val); - mov_l_ri(S2,val); - and_l_ri(S1,FPCR_ROUNDING_PRECISION); - and_l_ri(S2,FPCR_ROUNDING_MODE); - mov_l_mr((uintptr)&fpu.fpcr.rounding_precision,S1); - mov_l_mr((uintptr)&fpu.fpcr.rounding_mode,S2); -#else - FAIL(1); - return; -#endif -// return; gb-- FMOVEM could also operate on fpiar } - if (extra & 0x0400) { - uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4); - mov_l_mi((uintptr)&fpu.instruction_address,val); -// return; gb-- we have to process all FMOVEM bits before returning + + { + int ad; + uae_u32 list = 0; + int incr = 0; + + if (extra & 0x2000) + { + /* FMOVEM FPP->memory */ + switch ((extra >> 11) & 3) + { /* Get out early if failure */ + case 0: /* static pred */ + case 2: /* static postinc */ + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + default: + FAIL(1); + return; + } + if ((ad = get_fp_ad(opcode)) < 0) + { + FAIL(1); + return; + } + switch ((extra >> 11) & 3) + { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + abort(); + } + if (incr < 0) + { /* Predecrement */ + for (reg = 7; reg >= 0; reg--) + { + if (list & 0x80) + { + fmov_ext_mr((uintptr) temp_fp, reg); + delay; + sub_l_ri(ad, 4); + mov_l_rm(S2, (uintptr) temp_fp); + writelong_clobber(ad, S2, S3); + sub_l_ri(ad, 4); + mov_l_rm(S2, (uintptr) temp_fp + 4); + writelong_clobber(ad, S2, S3); + sub_l_ri(ad, 4); + mov_w_rm(S2, (uintptr) temp_fp + 8); + writeword_clobber(ad, S2, S3); + } + list <<= 1; + } + } else + { /* Postincrement */ + for (reg = 0; reg < 8; reg++) + { + if (list & 0x80) + { + fmov_ext_mr((uintptr) temp_fp, reg); + delay; + mov_w_rm(S2, (uintptr) temp_fp + 8); + writeword_clobber(ad, S2, S3); + add_l_ri(ad, 4); + mov_l_rm(S2, (uintptr) temp_fp + 4); + writelong_clobber(ad, S2, S3); + add_l_ri(ad, 4); + mov_l_rm(S2, (uintptr) temp_fp); + writelong_clobber(ad, S2, S3); + add_l_ri(ad, 4); + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) + mov_l_rr((opcode & 7) + 8, ad); + if ((opcode & 0x38) == 0x20) + mov_l_rr((opcode & 7) + 8, ad); + } else + { + /* FMOVEM memory->FPP */ + + int ad; + + switch ((extra >> 11) & 3) + { /* Get out early if failure */ + case 0: /* static pred */ + case 2: /* static postinc */ + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + default: + FAIL(1); + return; + } + ad = get_fp_ad(opcode); + if (ad < 0) + { + D(bug("no ad\n")); + FAIL(1); + return; + } + switch ((extra >> 11) & 3) + { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + abort(); + } + + if (incr < 0) + { + // not reached + for (reg = 7; reg >= 0; reg--) + { + if (list & 0x80) + { + sub_l_ri(ad, 4); + readlong(ad, S2, S3); + mov_l_mr((uintptr) (temp_fp), S2); + sub_l_ri(ad, 4); + readlong(ad, S2, S3); + mov_l_mr((uintptr) (temp_fp) + 4, S2); + sub_l_ri(ad, 4); + readword(ad, S2, S3); + mov_w_mr(((uintptr) temp_fp) + 8, S2); + delay2; + fmov_ext_rm(reg, (uintptr) (temp_fp)); + } + list <<= 1; + } + } else + { + for (reg = 0; reg < 8; reg++) + { + if (list & 0x80) + { + readword(ad, S2, S3); + mov_w_mr(((uintptr) temp_fp) + 8, S2); + add_l_ri(ad, 4); + readlong(ad, S2, S3); + mov_l_mr((uintptr) (temp_fp) + 4, S2); + add_l_ri(ad, 4); + readlong(ad, S2, S3); + mov_l_mr((uintptr) (temp_fp), S2); + add_l_ri(ad, 4); + delay2; + fmov_ext_rm(reg, (uintptr) (temp_fp)); + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) + mov_l_rr((opcode & 7) + 8, ad); + if ((opcode & 0x38) == 0x20) + mov_l_rr((opcode & 7) + 8, ad); + } } return; - } - FAIL(1); - return; - } else if (extra & 0x2000) { - FAIL(1); - return; - } else { - FAIL(1); - return; - } - FAIL(1); - return; + + case 4: /* FMOVEM , */ + case 5: /* FMOVEM , */ + if (jit_disable.fmovec) + { + FAIL(1); + return; + } + + /* rare */ + if ((opcode & 0x30) == 0) + { + /* = Dn or An */ + if (extra & 0x2000) + { + if (extra & 0x1000) + { +#if HANDLE_FPCR + mov_l_rm(opcode & 15, (uintptr) & fpu.fpcr.rounding_mode); + or_l_rm(opcode & 15, (uintptr) & fpu.fpcr.rounding_precision); +#else + FAIL(1); + return; +#endif + } + if (extra & 0x0800) + { + FAIL(1); + return; + } + if (extra & 0x0400) + { + /* FPIAR: fixme; we cannot correctly return the address from compiled code */ + mov_l_rm(opcode & 15, (uintptr) & fpu.instruction_address); + return; + } + } else + { + // gb-- moved here so that we may FAIL() without generating any code + if (extra & 0x0800) + { + // set_fpsr(m68k_dreg (regs, opcode & 15)); + FAIL(1); + return; + } + if (extra & 0x1000) + { +#if HANDLE_FPCR +#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) + FAIL(1); + return; +#endif + mov_l_rr(S1, opcode & 15); + mov_l_rr(S2, opcode & 15); + and_l_ri(S1, FPCR_ROUNDING_PRECISION); + and_l_ri(S2, FPCR_ROUNDING_MODE); + mov_l_mr((uintptr) & fpu.fpcr.rounding_precision, S1); + mov_l_mr((uintptr) & fpu.fpcr.rounding_mode, S2); +#else + FAIL(1); + return; +#endif + } + if (extra & 0x0400) + { + /* FPIAR: does that make sense at all? */ + mov_l_mr((uintptr) & fpu.instruction_address, opcode & 15); + } + return; + } + } else if ((opcode & 0x3f) == 0x3c) + { + /* = #imm */ + if ((extra & 0x2000) == 0) + { + // gb-- moved here so that we may FAIL() without generating any code + if (extra & 0x0800) + { + FAIL(1); + return; + } + if (extra & 0x1000) + { + comp_get_ilong((m68k_pc_offset += 4) - 4); +#if HANDLE_FPCR +#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) + FAIL(1); + return; +#endif + // mov_l_mi((uintptr)®s.fpcr,val); + mov_l_ri(S1, val); + mov_l_ri(S2, val); + and_l_ri(S1, FPCR_ROUNDING_PRECISION); + and_l_ri(S2, FPCR_ROUNDING_MODE); + mov_l_mr((uintptr) & fpu.fpcr.rounding_precision, S1); + mov_l_mr((uintptr) & fpu.fpcr.rounding_mode, S2); +#else + FAIL(1); + return; +#endif + } + if (extra & 0x0400) + { + uae_u32 val = comp_get_ilong((m68k_pc_offset += 4) - 4); + + mov_l_mi((uintptr) & fpu.instruction_address, val); + } + return; + } + FAIL(1); + return; + } else if (extra & 0x2000) + { + FAIL(1); + return; + } else + { + FAIL(1); + return; + } + FAIL(1); + return; case 0: - case 2: /* Extremely common */ + case 2: /* Extremely common */ reg = (extra >> 7) & 7; - if ((extra & 0xfc00) == 0x5c00) { - switch (extra & 0x7f) { + if ((extra & 0xfc00) == 0x5c00) + { + if (jit_disable.fmovecr) + { + FAIL(1); + return; + } + + switch (extra & 0x7f) + { case 0x00: fmov_pi(reg); break; @@ -1238,20 +1437,20 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) fmov_log10_2(reg); break; case 0x0c: -#if USE_LONG_DOUBLE - fmov_ext_rm(reg,(uintptr)&const_e); +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fmov_ext_rm(reg, (uintptr) & const_e); #else - fmov_rm(reg,(uintptr)&const_e); + fmov_rm(reg, (uintptr) & const_e); #endif break; case 0x0d: fmov_log2_e(reg); break; case 0x0e: -#if USE_LONG_DOUBLE - fmov_ext_rm(reg,(uintptr)&const_log10_e); +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fmov_ext_rm(reg, (uintptr) & const_log10_e); #else - fmov_rm(reg,(uintptr)&const_log10_e); + fmov_rm(reg, (uintptr) & const_log10_e); #endif break; case 0x0f: @@ -1261,10 +1460,10 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) fmov_loge_2(reg); break; case 0x31: -#if USE_LONG_DOUBLE - fmov_ext_rm(reg,(uintptr)&const_loge_10); +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fmov_ext_rm(reg, (uintptr) & const_loge_10); #else - fmov_rm(reg,(uintptr)&const_loge_10); + fmov_rm(reg, (uintptr) & const_loge_10); #endif break; case 0x32: @@ -1279,14 +1478,14 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) case 0x39: case 0x3a: case 0x3b: -#if USE_LONG_DOUBLE +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) case 0x3c: case 0x3d: case 0x3e: case 0x3f: - fmov_ext_rm(reg,(uintptr)(power10+(extra & 0x7f)-0x32)); + fmov_ext_rm(reg, (uintptr) (power10 + (extra & 0x7f) - 0x32)); #else - fmov_rm(reg,(uintptr)(power10+(extra & 0x7f)-0x32)); + fmov_rm(reg, (uintptr) (power10 + (extra & 0x7f) - 0x32)); #endif break; default: @@ -1296,309 +1495,537 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) } return; } - - switch (extra & 0x7f) { - case 0x00: /* FMOVE */ - case 0x40: /* Explicit rounding. This is just a quick fix. Same - * for all other cases that have three choices */ - case 0x44: - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + + switch (extra & 0x7f) + { + case 0x00: /* FMOVE */ + case 0x40: /* FSMOVE: Explicit rounding. This is just a quick fix. Same + * for all other cases that have three choices */ + case 0x44: /* FDMOVE */ + if (jit_disable.fmove) + { + FAIL(1); return; } - fmov_rr(reg,src); - MAKE_FPSR (src); - break; - case 0x01: /* FINT */ - FAIL(1); - return; + dont_care_fflags(); - case 0x02: /* FSINH */ - FAIL(1); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(reg, src); + MAKE_FPSR(src); + break; + case 0x01: /* FINT */ + if (jit_disable.fint) + { + FAIL(1); + return; + } + + FAIL(1); return; dont_care_fflags(); break; - case 0x03: /* FINTRZ */ -#if USE_X86_FPUCW + case 0x02: /* FSINH */ + if (jit_disable.fsinh) + { + FAIL(1); + return; + } + + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x03: /* FINTRZ */ + if (jit_disable.fintrz) + { + FAIL(1); + return; + } +#ifdef USE_X86_FPUCW /* If we have control over the CW, we can do this */ dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ return; } - mov_l_ri(S1,16); /* Switch to "round to zero" mode */ - fldcw_m_indexed(S1,(uae_u32)x86_fpucw); - - frndint_rr(reg,src); + mov_l_ri(S1, 16); /* Switch to "round to zero" mode */ + fldcw_m_indexed(S1, (uintptr) x86_fpucw); + + frndint_rr(reg, src); /* restore control word */ - mov_l_rm(S1,(uintptr)®s.fpcr); - and_l_ri(S1,0x000000f0); - fldcw_m_indexed(S1,(uintptr)x86_fpucw); + mov_l_rm(S1, (uintptr) & regs.fpcr); + and_l_ri(S1, 0x000000f0); + fldcw_m_indexed(S1, (uintptr) x86_fpucw); - MAKE_FPSR (reg); + MAKE_FPSR(reg); break; -#endif - FAIL(1); +#endif + FAIL(1); return; break; - case 0x04: /* FSQRT */ - case 0x41: - case 0x45: - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + case 0x04: /* FSQRT */ + case 0x41: /* FSSQRT */ + case 0x45: /* FDSQRT */ + if (jit_disable.fsqrt) + { + FAIL(1); return; } - fsqrt_rr(reg,src); - MAKE_FPSR (reg); - break; - case 0x06: /* FLOGNP1 */ - FAIL(1); - return; + dont_care_fflags(); - break; - case 0x08: /* FETOXM1 */ - FAIL(1); - return; - dont_care_fflags(); - break; - case 0x09: /* FTANH */ - FAIL(1); - return; - dont_care_fflags(); - break; - case 0x0a: /* FATAN */ - FAIL(1); - return; - dont_care_fflags(); - break; - case 0x0c: /* FASIN */ - FAIL(1); - return; - dont_care_fflags(); - break; - case 0x0d: /* FATANH */ - FAIL(1); - return; - dont_care_fflags(); - break; - case 0x0e: /* FSIN */ - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ return; } - fsin_rr(reg,src); - MAKE_FPSR (reg); + fsqrt_rr(reg, src); + MAKE_FPSR(reg); break; - case 0x0f: /* FTAN */ - FAIL(1); + case 0x06: /* FLOGNP1 */ + if (jit_disable.flognp1) + { + FAIL(1); + return; + } + + FAIL(1); return; dont_care_fflags(); break; - case 0x10: /* FETOX */ - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + case 0x08: /* FETOXM1 */ + if (jit_disable.fetoxm1) + { + FAIL(1); return; } - fetox_rr(reg,src); - MAKE_FPSR (reg); - break; - case 0x11: /* FTWOTOX */ - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ - return; - } - ftwotox_rr(reg,src); - MAKE_FPSR (reg); - break; - case 0x12: /* FTENTOX */ - FAIL(1); + + FAIL(1); return; dont_care_fflags(); break; - case 0x14: /* FLOGN */ - FAIL(1); + case 0x09: /* FTANH */ + if (jit_disable.ftanh) + { + FAIL(1); + return; + } + + FAIL(1); return; dont_care_fflags(); break; - case 0x15: /* FLOG10 */ - FAIL(1); + case 0x0a: /* FATAN */ + if (jit_disable.fatan) + { + FAIL(1); + return; + } + + FAIL(1); return; dont_care_fflags(); break; - case 0x16: /* FLOG2 */ - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + case 0x0c: /* FASIN */ + if (jit_disable.fasin) + { + FAIL(1); return; } - flog2_rr(reg,src); - MAKE_FPSR (reg); - break; - case 0x18: /* FABS */ - case 0x58: - case 0x5c: - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ - return; - } - fabs_rr(reg,src); - MAKE_FPSR (reg); - break; - case 0x19: /* FCOSH */ - FAIL(1); + + FAIL(1); return; dont_care_fflags(); break; - case 0x1a: /* FNEG */ - case 0x5a: - case 0x5e: - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + case 0x0d: /* FATANH */ + if (jit_disable.fatanh) + { + FAIL(1); return; } - fneg_rr(reg,src); - MAKE_FPSR (reg); - break; - case 0x1c: /* FACOS */ - FAIL(1); + + FAIL(1); return; dont_care_fflags(); break; - case 0x1d: /* FCOS */ - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + case 0x0e: /* FSIN */ + if (jit_disable.fsin) + { + FAIL(1); return; } - fcos_rr(reg,src); - MAKE_FPSR (reg); + + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fsin_rr(reg, src); + MAKE_FPSR(reg); break; - case 0x1e: /* FGETEXP */ - FAIL(1); + case 0x0f: /* FTAN */ + if (jit_disable.ftan) + { + FAIL(1); + return; + } + + FAIL(1); return; dont_care_fflags(); break; - case 0x1f: /* FGETMAN */ - FAIL(1); + case 0x10: /* FETOX */ + if (jit_disable.fetox) + { + FAIL(1); + return; + } + + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fetox_rr(reg, src); + MAKE_FPSR(reg); + break; + case 0x11: /* FTWOTOX */ + if (jit_disable.ftwotox) + { + FAIL(1); + return; + } + + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + ftwotox_rr(reg, src); + MAKE_FPSR(reg); + break; + case 0x12: /* FTENTOX */ + if (jit_disable.ftentox) + { + FAIL(1); + return; + } + + FAIL(1); return; dont_care_fflags(); break; - case 0x20: /* FDIV */ - case 0x60: - case 0x64: - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + case 0x14: /* FLOGN */ + if (jit_disable.flogn) + { + FAIL(1); return; } - fdiv_rr(reg,src); - MAKE_FPSR (reg); - break; - case 0x21: /* FMOD */ + + FAIL(1); + return; dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + break; + case 0x15: /* FLOG10 */ + if (jit_disable.flog10) + { + FAIL(1); return; } - frem_rr(reg,src); - MAKE_FPSR (reg); - break; - case 0x22: /* FADD */ - case 0x62: - case 0x66: + + FAIL(1); + return; dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + break; + case 0x16: /* FLOG2 */ + if (jit_disable.flog2) + { + FAIL(1); return; } - fadd_rr(reg,src); - MAKE_FPSR (reg); - break; - case 0x23: /* FMUL */ - case 0x63: - case 0x67: + dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ return; } - fmul_rr(reg,src); - MAKE_FPSR (reg); + flog2_rr(reg, src); + MAKE_FPSR(reg); break; - case 0x24: /* FSGLDIV */ - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + case 0x18: /* FABS */ + case 0x58: /* FSABS */ + case 0x5c: /* FDABS */ + if (jit_disable.fabs) + { + FAIL(1); return; } - fdiv_rr(reg,src); - MAKE_FPSR (reg); + + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fabs_rr(reg, src); + MAKE_FPSR(reg); break; - case 0x25: /* FREM */ + case 0x19: /* FCOSH */ + if (jit_disable.fcosh) + { + FAIL(1); + return; + } + + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x1a: /* FNEG */ + case 0x5a: /* FSNEG */ + case 0x5e: /* FDNEG */ + if (jit_disable.fneg) + { + FAIL(1); + return; + } + + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fneg_rr(reg, src); + MAKE_FPSR(reg); + break; + case 0x1c: /* FACOS */ + if (jit_disable.facos) + { + FAIL(1); + return; + } + + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x1d: /* FCOS */ + if (jit_disable.fcos) + { + FAIL(1); + return; + } + + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fcos_rr(reg, src); + MAKE_FPSR(reg); + break; + case 0x1e: /* FGETEXP */ + if (jit_disable.fgetexp) + { + FAIL(1); + return; + } + + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x1f: /* FGETMAN */ + if (jit_disable.fgetman) + { + FAIL(1); + return; + } + + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x20: /* FDIV */ + case 0x60: /* FSDIV */ + case 0x64: /* FDDIV */ + if (jit_disable.fdiv) + { + FAIL(1); + return; + } + + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fdiv_rr(reg, src); + MAKE_FPSR(reg); + break; + case 0x21: /* FMOD */ + if (jit_disable.fmod) + { + FAIL(1); + return; + } + + // FIXME: the quotient byte must be computed + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + frem_rr(reg, src); + MAKE_FPSR(reg); + break; + case 0x22: /* FADD */ + case 0x62: /* FSADD */ + case 0x66: /* FDADD */ + if (jit_disable.fadd) + { + FAIL(1); + return; + } + + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fadd_rr(reg, src); + MAKE_FPSR(reg); + break; + case 0x23: /* FMUL */ + case 0x63: /* FSMUL */ + case 0x67: /* FDMUL */ + if (jit_disable.fmul) + { + FAIL(1); + return; + } + + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fmul_rr(reg, src); + MAKE_FPSR(reg); + break; + case 0x24: /* FSGLDIV */ + if (jit_disable.fsgldiv) + { + FAIL(1); + return; + } + + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fdiv_rr(reg, src); + MAKE_FPSR(reg); + break; + case 0x25: /* FREM */ + if (jit_disable.frem) + { + FAIL(1); + return; + } // gb-- disabled because the quotient byte must be computed // otherwise, free rotation in ClarisWorks doesn't work. FAIL(1); return; dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ return; } - frem1_rr(reg,src); - MAKE_FPSR (reg); + frem1_rr(reg, src); + MAKE_FPSR(reg); break; - case 0x26: /* FSCALE */ - dont_care_fflags(); - FAIL(1); + case 0x26: /* FSCALE */ + if (jit_disable.fscale) + { + FAIL(1); + return; + } + + FAIL(1); return; break; - case 0x27: /* FSGLMUL */ - dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + case 0x27: /* FSGLMUL */ + if (jit_disable.fsglmul) + { + FAIL(1); return; } - fmul_rr(reg,src); - MAKE_FPSR (reg); - break; - case 0x28: /* FSUB */ - case 0x68: - case 0x6c: + dont_care_fflags(); - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ return; } - fsub_rr(reg,src); - MAKE_FPSR (reg); + fmul_rr(reg, src); + MAKE_FPSR(reg); break; - case 0x30: /* FSINCOS */ + case 0x28: /* FSUB */ + case 0x68: /* FSSUB */ + case 0x6c: /* FDSUB */ + if (jit_disable.fsub) + { + FAIL(1); + return; + } + + dont_care_fflags(); + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fsub_rr(reg, src); + MAKE_FPSR(reg); + break; + case 0x30: /* FSINCOS */ case 0x31: case 0x32: case 0x33: @@ -1606,36 +2033,53 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra) case 0x35: case 0x36: case 0x37: - FAIL(1); + if (jit_disable.fsincos) + { + FAIL(1); + return; + } + + FAIL(1); return; dont_care_fflags(); break; - case 0x38: /* FCMP */ - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + case 0x38: /* FCMP */ + if (jit_disable.fcmp) + { + FAIL(1); return; } - fmov_rr(FP_RESULT,reg); - fsub_rr(FP_RESULT,src); /* Right way? */ + + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(FP_RESULT, reg); + fsub_rr(FP_RESULT, src); /* Right way? */ break; - case 0x3a: /* FTST */ - src=get_fp_value (opcode, extra); - if (src < 0) { - FAIL(1); /* Illegal instruction */ + case 0x3a: /* FTST */ + if (jit_disable.ftst) + { + FAIL(1); return; } - fmov_rr(FP_RESULT,src); + + src = get_fp_value(opcode, extra); + if (src < 0) + { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(FP_RESULT, src); break; default: - FAIL(1); + FAIL(1); return; break; } return; - } - m68k_setpc (m68k_getpc () - 4); - fpuop_illg (opcode,extra); + } + FAIL(1); } - -#endif //USE_JIT diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.cpp b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.cpp new file mode 100644 index 00000000..aa9a7181 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.cpp @@ -0,0 +1,1967 @@ +/* + * compiler/compemu_midfunc_arm.cpp - Native MIDFUNCS for ARM + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu_support.cpp + * + */ + +/******************************************************************** + * CPU functions exposed to gencomp. Both CREATE and EMIT time * + ********************************************************************/ + +/* + * RULES FOR HANDLING REGISTERS: + * + * * In the function headers, order the parameters + * - 1st registers written to + * - 2nd read/modify/write registers + * - 3rd registers read from + * * Before calling raw_*, you must call readreg, writereg or rmw for + * each register + * * The order for this is + * - 1st call remove_offset for all registers written to with size<4 + * - 2nd call readreg for all registers read without offset + * - 3rd call rmw for all rmw registers + * - 4th call readreg_offset for all registers that can handle offsets + * - 5th call get_offset for all the registers from the previous step + * - 6th call writereg for all written-to registers + * - 7th call raw_* + * - 8th unlock2 all registers that were locked + */ + +MIDFUNC(0,live_flags,(void)) +{ + live.flags_on_stack=TRASH; + live.flags_in_flags=VALID; + live.flags_are_important=1; +} + +MIDFUNC(0,dont_care_flags,(void)) +{ + live.flags_are_important=0; +} + +MIDFUNC(0,duplicate_carry,(void)) +{ + evict(FLAGX); + make_flags_live_internal(); + COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,NATIVE_CC_CS); + log_vwrite(FLAGX); +} + +MIDFUNC(0,restore_carry,(void)) +{ +#if defined(USE_JIT2) + RR4 r=readreg(FLAGX,4); + MRS_CPSR(REG_WORK1); + TEQ_ri(r,1); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_C_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSRf_r(REG_WORK1); + unlock2(r); +#else + if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */ + bt_l_ri_noclobber(FLAGX,0); + } + else { /* Avoid the stall the above creates. + This is slow on non-P6, though. + */ + COMPCALL(rol_b_ri(FLAGX,8)); + isclean(FLAGX); + } +#endif +} + +MIDFUNC(0,start_needflags,(void)) +{ + needflags=1; +} + +MIDFUNC(0,end_needflags,(void)) +{ + needflags=0; +} + +MIDFUNC(0,make_flags_live,(void)) +{ + make_flags_live_internal(); +} + +MIDFUNC(2,bt_l_ri,(RR4 r, IMM i)) /* This is defined as only affecting C */ +{ + int size=4; + if (i<16) + size=2; + CLOBBER_BT; + r=readreg(r,size); + raw_bt_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,bt_l_rr,(RR4 r, RR4 b)) /* This is defined as only affecting C */ +{ + CLOBBER_BT; + r=readreg(r,4); + b=readreg(b,4); + raw_bt_l_rr(r,b); + unlock2(r); + unlock2(b); +} + +MIDFUNC(2,btc_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_btc_l_rr(r,b); + unlock2(r); + unlock2(b); +} + +MIDFUNC(2,btr_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_btr_l_rr(r,b); + unlock2(r); + unlock2(b); +} + +MIDFUNC(2,bts_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_bts_l_rr(r,b); + unlock2(r); + unlock2(b); +} + +MIDFUNC(2,mov_l_rm,(W4 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,4); + raw_mov_l_rm(d,s); + unlock2(d); +} + +MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, RR4 index, IMM factor)) +{ + CLOBBER_MOV; + index=readreg(index,4); + d=writereg(d,4); + raw_mov_l_rm_indexed(d,base,index,factor); + unlock2(index); + unlock2(d); +} + +MIDFUNC(2,mov_l_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_l_mi(d,s); +} + +MIDFUNC(2,mov_w_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_w_mi(d,s); +} + +MIDFUNC(2,mov_b_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_b_mi(d,s); +} + +MIDFUNC(2,rol_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,1,1); + raw_rol_b_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,rol_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,2,2); + raw_rol_w_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,rol_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,4,4); + raw_rol_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,rol_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROL; + r=readreg(r,1); + d=rmw(d,4,4); + raw_rol_l_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,rol_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROL; + r=readreg(r,1); + d=rmw(d,2,2); + raw_rol_w_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,rol_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROL; + r=readreg(r,1); + d=rmw(d,1,1); + raw_rol_b_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shll_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHLL; + r=readreg(r,1); + d=rmw(d,4,4); + raw_shll_l_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shll_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHLL; + r=readreg(r,1); + d=rmw(d,2,2); + raw_shll_w_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shll_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHLL; + r=readreg(r,1); + d=rmw(d,1,1); + raw_shll_b_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,ror_b_ri,(RR1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,1,1); + raw_ror_b_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,ror_w_ri,(RR2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,2,2); + raw_ror_w_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,ror_l_ri,(RR4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,4,4); + raw_ror_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,ror_l_rr,(RR4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROR; + r=readreg(r,1); + d=rmw(d,4,4); + raw_ror_l_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,ror_w_rr,(RR2 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROR; + r=readreg(r,1); + d=rmw(d,2,2); + raw_ror_w_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,ror_b_rr,(RR1 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_ROR; + r=readreg(r,1); + d=rmw(d,1,1); + raw_ror_b_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shrl_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRL; + r=readreg(r,1); + d=rmw(d,4,4); + raw_shrl_l_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shrl_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRL; + r=readreg(r,1); + d=rmw(d,2,2); + raw_shrl_w_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shrl_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHRL; + r=readreg(r,1); + d=rmw(d,1,1); + raw_shrl_b_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shll_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(r) && !needflags) { + live.state[r].val<<=i; + return; + } + CLOBBER_SHLL; + r=rmw(r,4,4); + raw_shll_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shll_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHLL; + r=rmw(r,2,2); + raw_shll_w_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shll_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHLL; + r=rmw(r,1,1); + raw_shll_b_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(r) && !needflags) { + live.state[r].val>>=i; + return; + } + CLOBBER_SHRL; + r=rmw(r,4,4); + raw_shrl_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,2,2); + raw_shrl_w_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,1,1); + raw_shrl_b_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shra_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,4,4); + raw_shra_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shra_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,2,2); + raw_shra_w_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shra_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,1,1); + raw_shra_b_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shra_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg(r,1); + d=rmw(d,4,4); + raw_shra_l_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shra_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg(r,1); + d=rmw(d,2,2); + raw_shra_w_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shra_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHRA; + r=readreg(r,1); + d=rmw(d,1,1); + raw_shra_b_rr(d,r); + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,setcc,(W1 d, IMM cc)) +{ + CLOBBER_SETCC; + d=writereg(d,1); + raw_setcc(d,cc); + unlock2(d); +} + +MIDFUNC(2,setcc_m,(IMM d, IMM cc)) +{ + CLOBBER_SETCC; + raw_setcc_m(d,cc); +} + +MIDFUNC(3,cmov_l_rr,(RW4 d, RR4 s, IMM cc)) +{ + if (d==s) + return; + CLOBBER_CMOV; + s=readreg(s,4); + d=rmw(d,4,4); + raw_cmov_l_rr(d,s,cc); + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,bsf_l_rr,(W4 d, W4 s)) +{ + CLOBBER_BSF; + s = readreg(s, 4); + d = writereg(d, 4); + raw_bsf_l_rr(d, s); + unlock2(s); + unlock2(d); +} + +/* Set the Z flag depending on the value in s. Note that the + value has to be 0 or -1 (or, more precisely, for non-zero + values, bit 14 must be set)! */ +MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) +{ + CLOBBER_BSF; + s=rmw_specific(s,4,4,FLAG_NREG3); + tmp=writereg(tmp,4); + raw_flags_set_zero(s, tmp); + unlock2(tmp); + unlock2(s); +} + +MIDFUNC(2,imul_32_32,(RW4 d, RR4 s)) +{ + CLOBBER_MUL; + s=readreg(s,4); + d=rmw(d,4,4); + raw_imul_32_32(d,s); + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,imul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_imul_64_32(d,s); + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,mul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_mul_64_32(d,s); + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,sign_extend_16_rr,(W4 d, RR2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s16)live.state[s].val); + return; + } + + CLOBBER_SE16; + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_sign_extend_16_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(2,sign_extend_8_rr,(W4 d, RR1 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_SE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_sign_extend_8_rr(d,s); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(2,zero_extend_16_rr,(W4 d, RR2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u16)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE16; + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_zero_extend_16_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(2,zero_extend_8_rr,(W4 d, RR1 s)) +{ + int isrmw; + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_zero_extend_8_rr(d,s); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(2,mov_b_rr,(W1 d, RR1 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=writereg(d,1); + raw_mov_b_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,mov_w_rr,(W2 d, RR2 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=writereg(d,2); + raw_mov_w_rr(d,s); + unlock2(d); + unlock2(s); +} + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_rR,(W4 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,4); + + raw_mov_l_rR(d,s,offset); + unlock2(d); + unlock2(s); +} + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_rR,(W2 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,2); + + raw_mov_w_rR(d,s,offset); + unlock2(d); + unlock2(s); +} + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_brR,(W4 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,4); + + raw_mov_l_brR(d,s,offset); + unlock2(d); + unlock2(s); +} + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_brR,(W2 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,2); + + raw_mov_w_brR(d,s,offset); + unlock2(d); + unlock2(s); +} + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_b_brR,(W1 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_b_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,1); + + raw_mov_b_brR(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,mov_l_Ri,(RR4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_l_Ri(d,i,offset); + unlock2(d); +} + +MIDFUNC(3,mov_w_Ri,(RR4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_w_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_Ri(d,i,offset); + unlock2(d); +} + +/* Warning! OFFSET is byte sized only! */ +MIDFUNC(3,mov_l_Rr,(RR4 d, RR4 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_l_Ri)(d,live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg(d,4); + + raw_mov_l_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,mov_w_Rr,(RR4 d, RR2 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg(d,4); + raw_mov_w_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,lea_l_brr,(W4 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val+offset); + return; + } +#if USE_OFFSET + if (d==s) { + add_offset(d,offset); + return; + } +#endif + CLOBBER_LEA; + s=readreg(s,4); + d=writereg(d,4); + raw_lea_l_brr(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(5,lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)) +{ + if (!offset) { + COMPCALL(lea_l_rr_indexed)(d,s,index,factor); + return; + } + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_brr_indexed(d,s,index,factor,offset); + unlock2(d); + unlock2(index); + unlock2(s); +} + +MIDFUNC(4,lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) +{ + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_rr_indexed(d,s,index,factor); + unlock2(d); + unlock2(index); + unlock2(s); +} + +/* write d to the long at the address contained in s+offset */ +MIDFUNC(3,mov_l_bRr,(RR4 d, RR4 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + + raw_mov_l_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} + +/* write the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_bRr,(RR4 d, RR2 s, IMM offset)) +{ + int dreg=d; + + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,mov_b_bRr,(RR4 d, RR1 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_b_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_b_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(1,mid_bswap_32,(RW4 r)) +{ + + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=reverse32(oldv); + return; + } + + CLOBBER_SW32; + r=rmw(r,4,4); + raw_bswap_32(r); + unlock2(r); +} + +MIDFUNC(1,mid_bswap_16,(RW2 r)) +{ + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) | + (oldv&0xffff0000); + return; + } + + CLOBBER_SW16; + r=rmw(r,2,2); + + raw_bswap_16(r); + unlock2(r); +} + +MIDFUNC(2,mov_l_rr,(W4 d, RR4 s)) +{ + int olds; + + if (d==s) { /* How pointless! */ + return; + } + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val); + return; + } + olds=s; + disassociate(d); + s=readreg_offset(s,4); + live.state[d].realreg=s; + live.state[d].realind=live.nat[s].nholds; + live.state[d].val=live.state[olds].val; + live.state[d].validsize=4; + live.state[d].dirtysize=4; + set_status(d,DIRTY); + + live.nat[s].holds[live.nat[s].nholds]=d; + live.nat[s].nholds++; + log_clobberreg(d); + D2(panicbug("Added %d to nreg %d(%d), now holds %d regs", d,s,live.state[d].realind,live.nat[s].nholds)); + unlock2(s); +} + +MIDFUNC(2,mov_l_mr,(IMM d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(mov_l_mi)(d,live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + + raw_mov_l_mr(d,s); + unlock2(s); +} + +MIDFUNC(2,mov_w_mr,(IMM d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,2); + + raw_mov_w_mr(d,s); + unlock2(s); +} + +MIDFUNC(2,mov_w_rm,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_rm(d,s); + unlock2(d); +} + +MIDFUNC(2,mov_b_mr,(IMM d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + + raw_mov_b_mr(d,s); + unlock2(s); +} + +MIDFUNC(2,mov_b_rm,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_rm(d,s); + unlock2(d); +} + +MIDFUNC(2,mov_l_ri,(W4 d, IMM s)) +{ + set_const(d,s); + return; +} + +MIDFUNC(2,mov_w_ri,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_ri(d,s); + unlock2(d); +} + +MIDFUNC(2,mov_b_ri,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_ri(d,s); + unlock2(d); +} + +MIDFUNC(2,test_l_ri,(RR4 d, IMM i)) +{ + CLOBBER_TEST; + d=readreg(d,4); + + raw_test_l_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,test_l_rr,(RR4 d, RR4 s)) +{ + CLOBBER_TEST; + d=readreg(d,4); + s=readreg(s,4); + + raw_test_l_rr(d,s);; + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,test_w_rr,(RR2 d, RR2 s)) +{ + CLOBBER_TEST; + d=readreg(d,2); + s=readreg(s,2); + + raw_test_w_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,test_b_rr,(RR1 d, RR1 s)) +{ + CLOBBER_TEST; + d=readreg(d,1); + s=readreg(s,1); + + raw_test_b_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,and_l_ri,(RW4 d, IMM i)) +{ + if (isconst(d) && !needflags) { + live.state[d].val &= i; + return; + } + + CLOBBER_AND; + d=rmw(d,4,4); + + raw_and_l_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,and_l,(RW4 d, RR4 s)) +{ + CLOBBER_AND; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_and_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,and_w,(RW2 d, RR2 s)) +{ + CLOBBER_AND; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_and_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,and_b,(RW1 d, RR1 s)) +{ + CLOBBER_AND; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_and_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,or_l_ri,(RW4 d, IMM i)) +{ + if (isconst(d) && !needflags) { + live.state[d].val|=i; + return; + } + CLOBBER_OR; + d=rmw(d,4,4); + + raw_or_l_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,or_l,(RW4 d, RR4 s)) +{ + if (isconst(d) && isconst(s) && !needflags) { + live.state[d].val|=live.state[s].val; + return; + } + CLOBBER_OR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_or_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,or_w,(RW2 d, RR2 s)) +{ + CLOBBER_OR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_or_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,or_b,(RW1 d, RR1 s)) +{ + CLOBBER_OR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_or_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,adc_l,(RW4 d, RR4 s)) +{ + CLOBBER_ADC; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_adc_l(d,s); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,adc_w,(RW2 d, RR2 s)) +{ + CLOBBER_ADC; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_adc_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,adc_b,(RW1 d, RR1 s)) +{ + CLOBBER_ADC; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_adc_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,add_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(add_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_add_l(d,s); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,add_w,(RW2 d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_add_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,add_b,(RW1 d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_add_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sub_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val-=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,-i); + return; + } +#endif + + CLOBBER_SUB; + d=rmw(d,4,4); + + raw_sub_l_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,sub_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,2,2); + + raw_sub_w_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,sub_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,1,1); + + raw_sub_b_ri(d,i); + + unlock2(d); +} + +MIDFUNC(2,add_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val+=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,i); + return; + } +#endif + CLOBBER_ADD; + d=rmw(d,4,4); + raw_add_l_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,add_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,2,2); + + raw_add_w_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,add_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,1,1); + + raw_add_b_ri(d,i); + + unlock2(d); +} + +MIDFUNC(2,sbb_l,(RW4 d, RR4 s)) +{ + CLOBBER_SBB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sbb_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sbb_w,(RW2 d, RR2 s)) +{ + CLOBBER_SBB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sbb_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sbb_b,(RW1 d, RR1 s)) +{ + CLOBBER_SBB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sbb_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sub_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(sub_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sub_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sub_w,(RW2 d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sub_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sub_b,(RW1 d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sub_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,cmp_l,(RR4 d, RR4 s)) +{ + CLOBBER_CMP; + s=readreg(s,4); + d=readreg(d,4); + + raw_cmp_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,cmp_w,(RR2 d, RR2 s)) +{ + CLOBBER_CMP; + s=readreg(s,2); + d=readreg(d,2); + + raw_cmp_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,cmp_b,(RR1 d, RR1 s)) +{ + CLOBBER_CMP; + s=readreg(s,1); + d=readreg(d,1); + + raw_cmp_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,xor_l,(RW4 d, RR4 s)) +{ + CLOBBER_XOR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_xor_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,xor_w,(RW2 d, RR2 s)) +{ + CLOBBER_XOR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_xor_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,xor_b,(RW1 d, RR1 s)) +{ + CLOBBER_XOR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_xor_b(d,s); + unlock2(d); + unlock2(s); +} + +#if defined(UAE) +MIDFUNC(5,call_r_02,(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2)) +{ + clobber_flags(); + in1=readreg_specific(in1,isize1,REG_PAR1); + in2=readreg_specific(in2,isize2,REG_PAR2); + r=readreg(r,4); + prepare_for_call_1(); + unlock2(r); + unlock2(in1); + unlock2(in2); + prepare_for_call_2(); + compemu_raw_call_r(r); +} +#endif + +#if defined(UAE) +MIDFUNC(5,call_r_11,(W4 out1, RR4 r, RR4 in1, IMM osize, IMM isize)) +{ + clobber_flags(); + + if (osize==4) { + if (out1!=in1 && out1!=r) { + COMPCALL(forget_about)(out1); + } + } + else { + tomem_c(out1); + } + + in1=readreg_specific(in1,isize,REG_PAR1); + r=readreg(r,4); + + prepare_for_call_1(); + unlock2(in1); + unlock2(r); + + prepare_for_call_2(); + + compemu_raw_call_r(r); + + live.nat[REG_RESULT].holds[0]=out1; + live.nat[REG_RESULT].nholds=1; + live.nat[REG_RESULT].touched=touchcnt++; + + live.state[out1].realreg=REG_RESULT; + live.state[out1].realind=0; + live.state[out1].val=0; + live.state[out1].validsize=osize; + live.state[out1].dirtysize=osize; + set_status(out1,DIRTY); +} +#endif + +MIDFUNC(0,nop,(void)) +{ + raw_emit_nop(); +} + +/* forget_about() takes a mid-layer register */ +MIDFUNC(1,forget_about,(W4 r)) +{ + if (isinreg(r)) + disassociate(r); + live.state[r].val=0; + set_status(r,UNDEF); +} + +MIDFUNC(1,f_forget_about,(FW r)) +{ + if (f_isinreg(r)) + f_disassociate(r); + live.fate[r].status=UNDEF; +} + +// ARM optimized functions + +MIDFUNC(2,arm_ADD_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(arm_ADD_l_ri)(d,live.state[s].val); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + raw_ADD_l_rr(d,s); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,arm_ADD_l_ri,(RW4 d, IMM i)) +{ + if (!i) return; + if (isconst(d)) { + live.state[d].val+=i; + return; + } +#if USE_OFFSET + add_offset(d,i); + return; +#endif + d=rmw(d,4,4); + + raw_LDR_l_ri(REG_WORK1, i); + raw_ADD_l_rr(d,REG_WORK1); + unlock2(d); +} + +MIDFUNC(2,arm_ADD_l_ri8,(RW4 d, IMM i)) +{ + if (!i) return; + if (isconst(d)) { + live.state[d].val+=i; + return; + } +#if USE_OFFSET + add_offset(d,i); + return; +#endif + d=rmw(d,4,4); + + raw_ADD_l_rri(d,d,i); + unlock2(d); +} + +MIDFUNC(2,arm_SUB_l_ri8,(RW4 d, IMM i)) +{ + if (!i) return; + if (isconst(d)) { + live.state[d].val-=i; + return; + } +#if USE_OFFSET + add_offset(d,-i); + return; +#endif + d=rmw(d,4,4); + + raw_SUB_l_rri(d,d,i); + unlock2(d); +} + +MIDFUNC(2,arm_AND_l,(RW4 d, RR4 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + raw_AND_l_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,arm_AND_w,(RW2 d, RR2 s)) +{ + s=readreg(s,2); + d=rmw(d,2,2); + + raw_AND_w_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,arm_AND_b,(RW1 d, RR1 s)) +{ + s=readreg(s,1); + d=rmw(d,1,1); + + raw_AND_b_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,arm_AND_l_ri8,(RW4 d, IMM i)) +{ + if (isconst(d)) { + live.state[d].val &= i; + return; + } + + d=rmw(d,4,4); + + raw_AND_l_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,arm_EOR_b,(RW1 d, RR1 s)) +{ + s=readreg(s,1); + d=rmw(d,1,1); + + raw_EOR_b_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,arm_EOR_l,(RW4 d, RR4 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + raw_EOR_l_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,arm_EOR_w,(RW2 d, RR2 s)) +{ + s=readreg(s,2); + d=rmw(d,2,2); + + raw_EOR_w_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,arm_ORR_b,(RW1 d, RR1 s)) +{ + s=readreg(s,1); + d=rmw(d,1,1); + + raw_ORR_b_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,arm_ORR_l,(RW4 d, RR4 s)) +{ + if (isconst(d) && isconst(s)) { + live.state[d].val|=live.state[s].val; + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + raw_ORR_l_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,arm_ORR_w,(RW2 d, RR2 s)) +{ + s=readreg(s,2); + d=rmw(d,2,2); + + raw_ORR_w_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,arm_ROR_l_ri8,(RW4 r, IMM i)) +{ + if (!i) + return; + + r=rmw(r,4,4); + raw_ROR_l_ri(r,i); + unlock2(r); +} + +// Other +static inline void flush_cpu_icache(void *start, void *stop) +{ + + register void *_beg __asm ("a1") = start; + register void *_end __asm ("a2") = stop; + register void *_flg __asm ("a3") = 0; +#ifdef __ARM_EABI__ + register unsigned long _scno __asm ("r7") = 0xf0002; + __asm __volatile ("swi 0x0 @ sys_cacheflush" + : "=r" (_beg) + : "0" (_beg), "r" (_end), "r" (_flg), "r" (_scno)); +#else + __asm __volatile ("swi 0x9f0002 @ sys_cacheflush" + : "=r" (_beg) + : "0" (_beg), "r" (_end), "r" (_flg)); +#endif +} + +static inline void write_jmp_target(uae_u32* jmpaddr, cpuop_func* a) { + *(jmpaddr) = (uae_u32) a; + flush_cpu_icache((void *) jmpaddr, (void *) &jmpaddr[1]); +} + +static inline void emit_jmp_target(uae_u32 a) { + emit_long((uae_u32) a); +} diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.h b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.h new file mode 100644 index 00000000..baedb153 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.h @@ -0,0 +1,186 @@ +/* + * compiler/compemu_midfunc_arm.h - Native MIDFUNCS for ARM + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu.h + * + */ + +// Arm optimized midfunc +DECLARE_MIDFUNC(arm_ADD_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(arm_ADD_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(arm_ADD_l_ri8(RW4 d, IMM i)); +DECLARE_MIDFUNC(arm_SUB_l_ri8(RW4 d, IMM i)); +DECLARE_MIDFUNC(arm_AND_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(arm_AND_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(arm_AND_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(arm_AND_l_ri8(RW4 d, IMM i)); +DECLARE_MIDFUNC(arm_EOR_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(arm_EOR_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(arm_EOR_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(arm_ORR_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(arm_ORR_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(arm_ORR_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(arm_ROR_l_ri8(RW4 r, IMM i)); + +// Emulated midfunc +DECLARE_MIDFUNC(bt_l_ri(RR4 r, IMM i)); +DECLARE_MIDFUNC(bt_l_rr(RR4 r, RR4 b)); +DECLARE_MIDFUNC(btc_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(bts_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(btr_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s)); +DECLARE_MIDFUNC(mov_l_rm_indexed(W4 d, IMM base, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_w_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_b_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(rol_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(rol_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(rol_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(rol_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(rol_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(rol_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shll_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shll_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shll_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(ror_b_ri(RR1 r, IMM i)); +DECLARE_MIDFUNC(ror_w_ri(RR2 r, IMM i)); +DECLARE_MIDFUNC(ror_l_ri(RR4 r, IMM i)); +DECLARE_MIDFUNC(ror_l_rr(RR4 d, RR1 r)); +DECLARE_MIDFUNC(ror_w_rr(RR2 d, RR1 r)); +DECLARE_MIDFUNC(ror_b_rr(RR1 d, RR1 r)); +DECLARE_MIDFUNC(shrl_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shrl_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shrl_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(shra_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shra_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shra_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(shll_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shll_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shll_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(shrl_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shrl_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shrl_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(shra_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shra_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shra_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(setcc(W1 d, IMM cc)); +DECLARE_MIDFUNC(setcc_m(IMM d, IMM cc)); +DECLARE_MIDFUNC(cmov_l_rr(RW4 d, RR4 s, IMM cc)); +DECLARE_MIDFUNC(bsf_l_rr(W4 d, RR4 s)); +DECLARE_MIDFUNC(pop_l(W4 d)); +DECLARE_MIDFUNC(push_l(RR4 s)); +DECLARE_MIDFUNC(sign_extend_16_rr(W4 d, RR2 s)); +DECLARE_MIDFUNC(sign_extend_8_rr(W4 d, RR1 s)); +DECLARE_MIDFUNC(zero_extend_16_rr(W4 d, RR2 s)); +DECLARE_MIDFUNC(zero_extend_8_rr(W4 d, RR1 s)); +DECLARE_MIDFUNC(simulate_bsf(W4 tmp, RW4 s)); +DECLARE_MIDFUNC(imul_64_32(RW4 d, RW4 s)); +DECLARE_MIDFUNC(mul_64_32(RW4 d, RW4 s)); +DECLARE_MIDFUNC(imul_32_32(RW4 d, RR4 s)); +DECLARE_MIDFUNC(mov_b_rr(W1 d, RR1 s)); +DECLARE_MIDFUNC(mov_w_rr(W2 d, RR2 s)); +DECLARE_MIDFUNC(mov_l_rR(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_rR(W2 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_l_brR(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_brR(W2 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_brR(W1 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_l_Ri(RR4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_w_Ri(RR4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_l_Rr(RR4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_Rr(RR4 d, RR2 s, IMM offset)); +DECLARE_MIDFUNC(lea_l_brr(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)); +DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, RR4 s, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_bRr(RR4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_bRr(RR4 d, RR2 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_bRr(RR4 d, RR1 s, IMM offset)); +DECLARE_MIDFUNC(mid_bswap_32(RW4 r)); +DECLARE_MIDFUNC(mid_bswap_16(RW2 r)); +DECLARE_MIDFUNC(mov_l_rr(W4 d, RR4 s)); +DECLARE_MIDFUNC(mov_l_mr(IMM d, RR4 s)); +DECLARE_MIDFUNC(mov_w_mr(IMM d, RR2 s)); +DECLARE_MIDFUNC(mov_w_rm(W2 d, IMM s)); +DECLARE_MIDFUNC(mov_b_mr(IMM d, RR1 s)); +DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s)); +DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s)); +DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s)); +DECLARE_MIDFUNC(mov_b_ri(W1 d, IMM s)); +DECLARE_MIDFUNC(test_l_ri(RR4 d, IMM i)); +DECLARE_MIDFUNC(test_l_rr(RR4 d, RR4 s)); +DECLARE_MIDFUNC(test_w_rr(RR2 d, RR2 s)); +DECLARE_MIDFUNC(test_b_rr(RR1 d, RR1 s)); +DECLARE_MIDFUNC(and_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(and_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(and_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(and_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(or_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(or_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(or_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(or_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(adc_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(adc_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(adc_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(add_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(add_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(add_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(sub_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(sub_w_ri(RW2 d, IMM i)); +DECLARE_MIDFUNC(sub_b_ri(RW1 d, IMM i)); +DECLARE_MIDFUNC(add_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(add_w_ri(RW2 d, IMM i)); +DECLARE_MIDFUNC(add_b_ri(RW1 d, IMM i)); +DECLARE_MIDFUNC(sbb_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(sbb_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(sbb_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(sub_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(sub_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(sub_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(cmp_l(RR4 d, RR4 s)); +DECLARE_MIDFUNC(cmp_w(RR2 d, RR2 s)); +DECLARE_MIDFUNC(cmp_b(RR1 d, RR1 s)); +DECLARE_MIDFUNC(xor_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(xor_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(xor_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(call_r_02(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2)); +DECLARE_MIDFUNC(call_r_11(W4 out1, RR4 r, RR4 in1, IMM osize, IMM isize)); +DECLARE_MIDFUNC(live_flags(void)); +DECLARE_MIDFUNC(dont_care_flags(void)); +DECLARE_MIDFUNC(duplicate_carry(void)); +DECLARE_MIDFUNC(restore_carry(void)); +DECLARE_MIDFUNC(start_needflags(void)); +DECLARE_MIDFUNC(end_needflags(void)); +DECLARE_MIDFUNC(make_flags_live(void)); +DECLARE_MIDFUNC(forget_about(W4 r)); +DECLARE_MIDFUNC(nop(void)); + +DECLARE_MIDFUNC(f_forget_about(FW r)); + + + + diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.cpp b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.cpp new file mode 100644 index 00000000..5f55d1bf --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.cpp @@ -0,0 +1,5195 @@ +/* + * compiler/compemu_midfunc_arm.cpp - Native MIDFUNCS for ARM (JIT v2) + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu_support.cpp + * + */ + +const uae_u32 ARM_CCR_MAP[] = { 0, ARM_C_FLAG, // 1 C + ARM_V_FLAG, // 2 V + ARM_C_FLAG | ARM_V_FLAG, // 3 VC + ARM_Z_FLAG, // 4 Z + ARM_Z_FLAG | ARM_C_FLAG, // 5 ZC + ARM_Z_FLAG | ARM_V_FLAG, // 6 ZV + ARM_Z_FLAG | ARM_C_FLAG | ARM_V_FLAG, // 7 ZVC + ARM_N_FLAG, // 8 N + ARM_N_FLAG | ARM_C_FLAG, // 9 NC + ARM_N_FLAG | ARM_V_FLAG, // 10 NV + ARM_N_FLAG | ARM_C_FLAG | ARM_V_FLAG, // 11 NVC + ARM_N_FLAG | ARM_Z_FLAG, // 12 NZ + ARM_N_FLAG | ARM_Z_FLAG | ARM_C_FLAG, // 13 NZC + ARM_N_FLAG | ARM_Z_FLAG | ARM_V_FLAG, // 14 NZV + ARM_N_FLAG | ARM_Z_FLAG | ARM_C_FLAG | ARM_V_FLAG, // 15 NZVC + }; + +// First we start with some helper functions (may be moved to codegen_arm) +static inline void UNSIGNED8_IMM_2_REG(W4 r, IMM v) { + MOV_ri8(r, (uint8) v); +} + +static inline void SIGNED8_IMM_2_REG(W4 r, IMM v) { + if (v & 0x80) { + MVN_ri8(r, (uint8) ~v); + } else { + MOV_ri8(r, (uint8) v); + } +} + +static inline void UNSIGNED16_IMM_2_REG(W4 r, IMM v) { + MOV_ri8(r, (uint8) v); + ORR_rri8RORi(r, r, (uint8)(v >> 8), 24); +} + +static inline void SIGNED16_IMM_2_REG(W4 r, IMM v) { +#if defined(ARMV6_ASSEMBLY) + MOV_ri8(r, (uint8) v); + ORR_rri8RORi(r, r, (uint8)(v >> 8), 24); + SXTH_rr(r, r); +#else + MOV_ri8(r, (uint8)(v << 16)); + ORR_rri8RORi(r, r, (uint8)(v >> 8), 8); + ASR_rri(r, r, 16); +#endif +} + +static inline void UNSIGNED8_REG_2_REG(W4 d, RR4 s) { +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(d, s); +#else + ROR_rri(d, s, 8); + LSR_rri(d, d, 24); +#endif +} + +static inline void SIGNED8_REG_2_REG(W4 d, RR4 s) { +#if defined(ARMV6_ASSEMBLY) + SXTB_rr(d, s); +#else + ROR_rri(d, s, 8); + ASR_rri(d, d, 24); +#endif +} + +static inline void UNSIGNED16_REG_2_REG(W4 d, RR4 s) { +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(d, s); +#else + LSL_rri(d, s, 16); + LSR_rri(d, d, 16); +#endif +} + +static inline void SIGNED16_REG_2_REG(W4 d, RR4 s) { +#if defined(ARMV6_ASSEMBLY) + SXTH_rr(d, s); +#else + LSL_rri(d, s, 16); + ASR_rri(d, d, 16); +#endif +} + +#define ZERO_EXTEND_8_REG_2_REG(d,s) UNSIGNED8_REG_2_REG(d,s) +#define ZERO_EXTEND_16_REG_2_REG(d,s) UNSIGNED16_REG_2_REG(d,s) +#define SIGN_EXTEND_8_REG_2_REG(d,s) SIGNED8_REG_2_REG(d,s) +#define SIGN_EXTEND_16_REG_2_REG(d,s) SIGNED16_REG_2_REG(d,s) + +MIDFUNC(0,restore_inverted_carry,(void)) +{ + RR4 r=readreg(FLAGX,4); + MRS_CPSR(REG_WORK1); + TEQ_ri(r,1); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_C_FLAG); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSRf_r(REG_WORK1); + unlock2(r); +} + +/* + * ADD + * Operand Syntax: , Dn + * Dn, + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if an overflow is generated. Cleared otherwise. + * C Set if a carry is generated. Cleared otherwise. + * + */ +MIDFUNC(3,jnf_ADD_imm,(W4 d, RR4 s, IMM v)) +{ + if (isconst(s)) { + set_const(d,live.state[s].val+v); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + compemu_raw_mov_l_ri(REG_WORK1, v); + ADD_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ADD,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jnf_ADD_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + ADD_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_ADD_b_imm,(W4 d, RR1 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_IMM_2_REG(REG_WORK2, (uint8)v); + SIGNED8_REG_2_REG(REG_WORK1, s); + ADDS_rrr(d,REG_WORK1,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ADD_b,(W4 d, RR1 s, RR1 v)) +{ + if (isconst(v)) { + COMPCALL(jff_ADD_b_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + ADDS_rrr(d,REG_WORK1,REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_ADD_w_imm,(W4 d, RR2 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_IMM_2_REG(REG_WORK2, (uint16)v); + SIGNED16_REG_2_REG(REG_WORK1, s); + ADDS_rrr(d,REG_WORK1,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ADD_w,(W4 d, RR2 s, RR2 v)) +{ + if (isconst(v)) { + COMPCALL(jff_ADD_w_imm)(d,s,live.state[v].val); + return; + } + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + ADDS_rrr(d,REG_WORK1,REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_ADD_l_imm,(W4 d, RR4 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + compemu_raw_mov_l_ri(REG_WORK2, v); + ADDS_rrr(d,s,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ADD_l,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jff_ADD_l_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + ADDS_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +/* + * ADDA + * Operand Syntax: , An + * + * Operand Size: 16,32 + * + * Flags: Not affected. + * + */ +MIDFUNC(2,jnf_ADDA_b,(W4 d, RR1 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + SIGNED8_REG_2_REG(REG_WORK1,s); + ADD_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jnf_ADDA_w,(W4 d, RR2 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + SIGNED16_REG_2_REG(REG_WORK1,s); + ADD_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jnf_ADDA_l,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + ADD_rrr(d,d,s); + + unlock2(d); + unlock2(s); +} + +/* + * ADDX + * Operand Syntax: Dy, Dx + * -(Ay), -(Ax) + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Cleared if the result is nonzero; unchanged otherwise. + * V Set if an overflow is generated. Cleared otherwise. + * C Set if a carry is generated. Cleared otherwise. + * + * Attention: Z is cleared only if the result is nonzero. Unchanged otherwise + * + */ +MIDFUNC(3,jnf_ADDX,(W4 d, RR4 s, RR4 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + ADC_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_ADDX_b,(W4 d, RR1 s, RR1 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + PUSH(REG_WORK2); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + ADCS_rrr(d,REG_WORK1,REG_WORK2); + + POP(REG_WORK2); + MRS_CPSR(REG_WORK1); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_ADDX_w,(W4 d, RR2 s, RR2 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + PUSH(REG_WORK2); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + ADCS_rrr(d,REG_WORK1,REG_WORK2); + + POP(REG_WORK2); + MRS_CPSR(REG_WORK1); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_ADDX_l,(W4 d, RR4 s, RR4 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + PUSH(REG_WORK2); + + ADCS_rrr(d,s,v); + + POP(REG_WORK2); + MRS_CPSR(REG_WORK1); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +/* + * ANDI + * Operand Syntax: #, CCR + * + * Operand Size: 8 + * + * X Cleared if bit 4 of immediate operand is zero. Unchanged otherwise. + * N Cleared if bit 3 of immediate operand is zero. Unchanged otherwise. + * Z Cleared if bit 2 of immediate operand is zero. Unchanged otherwise. + * V Cleared if bit 1 of immediate operand is zero. Unchanged otherwise. + * C Cleared if bit 0 of immediate operand is zero. Unchanged otherwise. + * + */ +MIDFUNC(1,jff_ANDSR,(IMM s, IMM x)) +{ + MRS_CPSR(REG_WORK1); + AND_rri(REG_WORK1, REG_WORK1, s); + MSR_CPSRf_r(REG_WORK1); + + if (!x) { + compemu_raw_mov_l_ri(REG_WORK1, (uintptr)live.state[FLAGX].mem); + MOV_ri(REG_WORK2, 0); + STRB_rR(REG_WORK2, REG_WORK1); + } +} + +/* + * AND + * Operand Syntax: , Dn + * Dn, + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the most significant bit of the result is set. + * Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(3,jnf_AND,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(s) && isconst(v)) { + set_const(d, + live.state[s].val&live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + AND_rrr(d, s, v); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_AND_b,(W4 d, RR1 s, RR1 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + ANDS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_AND_w,(W4 d, RR2 s, RR2 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + ANDS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_AND_l,(W4 d, RR4 s, RR4 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + ANDS_rrr(d, s,v); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +/* + * ASL + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if the most significant bit is changed at any time during the shift operation. Cleared otherwise. + * C Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * + */ +MIDFUNC(3,jff_ASL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d, s, 24); + if (i) { + MRS_CPSR(REG_WORK1); // store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except N & Z + PUSH(REG_WORK1); + + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rri(REG_WORK2, REG_WORK2, (i+1)); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, d, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// restore flags + + LSLS_rri(d,d,i); + } else { + MSR_CPSRf_i(0); + TST_rr(d,d); + } + REV_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ASL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d, s, 16); + if (i) { + MRS_CPSR(REG_WORK1); // store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except N & Z + PUSH(REG_WORK1); + + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rri(REG_WORK2, REG_WORK2, (i+1)); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, d, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// retore flags + + LSLS_rri(d,d,i); + } else { + MSR_CPSRf_i(0); + TST_rr(d,d); + } + ASR_rri(d,d, 16); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ASL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i) { + MRS_CPSR(REG_WORK1); // store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except C + PUSH(REG_WORK1); + + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rri(REG_WORK2, REG_WORK2, (i+1)); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, s, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// retore flags + + LSLS_rri(d,s,i); + } else { + MSR_CPSRf_i(0); + MOVS_rr(d, s); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ASL_b_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + // Calculate V Flag + MRS_CPSR(REG_WORK1);// store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except C + PUSH(REG_WORK1); + + LSL_rri(d, s, 24); + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rrr(REG_WORK2, REG_WORK2, i); + LSR_rri(REG_WORK2, REG_WORK2, 1); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, d, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// retore flags + + AND_rri(REG_WORK2, i, 63); + LSLS_rrr(d,d,REG_WORK2); + ASR_rri(d,d, 24); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ASL_w_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + // Calculate V Flag + MRS_CPSR(REG_WORK1);// store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except c + PUSH(REG_WORK1); + + LSL_rri(d, s, 16); + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rrr(REG_WORK2, REG_WORK2, i); + LSR_rri(REG_WORK2, REG_WORK2, 1); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, d, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// retore flags + + AND_rri(REG_WORK2, i, 63); + LSLS_rrr(d,d,REG_WORK2); + ASR_rri(d,d, 16); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ASL_l_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + // Calculate V Flag + MRS_CPSR(REG_WORK1);// store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except C + PUSH(REG_WORK1); + + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rrr(REG_WORK2, REG_WORK2, i); + LSR_rri(REG_WORK2, REG_WORK2, 1); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, s, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// retore flags + + AND_rri(REG_WORK2, i, 63); + LSLS_rrr(d,s,REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +/* + * ASLW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Set according to the last bit shifted out of the operand. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if the most significant bit is changed at any time during the shift operation. Cleared otherwise. + * C Set according to the last bit shifted out of the operand. + * + */ +MIDFUNC(2,jnf_ASLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_ASLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + LSLS_rri(d,s,17); + + MRS_CPSR(REG_WORK1); + CC_ORR_rri(NATIVE_CC_MI, REG_WORK1, REG_WORK1, ARM_V_FLAG); + CC_EOR_rri(NATIVE_CC_CS, REG_WORK1, REG_WORK1, ARM_V_FLAG); + MSR_CPSRf_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} + +/* + * ASR + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if the most significant bit is changed at any time during the shift operation. Cleared otherwise. + * C Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * + */ +MIDFUNC(3,jnf_ASR_b_imm,(W4 d, RR4 s, IMM i)) +{ + if (!i) return; + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + ASR_rri(d,d,i); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ASR_w_imm,(W4 d, RR4 s, IMM i)) +{ + if (!i) return; + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + ASR_rri(d,d,i); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ASR_l_imm,(W4 d, RR4 s, IMM i)) +{ + if (!i) return; + + s=readreg(s,4); + d=writereg(d,4); + + ASR_rri(d,s,i); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ASR_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + if (i) { + MSR_CPSRf_i(0); + ASRS_rri(d,d,i); + } else { + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + TST_rr(d,d); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ASR_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + if (i) { + MSR_CPSRf_i(0); + ASRS_rri(d,d,i); + } else { + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + TST_rr(d,d); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ASR_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i) { + MSR_CPSRf_i(0); + ASRS_rri(d,s,i); + } else { + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + TST_rr(s,s); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ASR_b_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + AND_rri(REG_WORK1, i, 63); + ASR_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jnf_ASR_w_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + AND_rri(REG_WORK1, i, 63); + ASR_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jnf_ASR_l_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 63); + ASR_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ASR_b_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + ASRS_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ASR_w_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + ASRS_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ASR_l_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + ASRS_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +/* + * ASRW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Set according to the last bit shifted out of the operand. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if the most significant bit is changed at any time during the shift operation. Cleared otherwise. + * C Set according to the last bit shifted out of the operand. + * + */ +MIDFUNC(2,jnf_ASRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + ASR_rri(d,d,1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_ASRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + MSR_CPSRf_i(0); + ASR_rri(d,d,1); + + unlock2(d); + unlock2(s); +} + +/* + * BCHG + * Operand Syntax: Dn, + * #, + * + * Operand Size: 8,32 + * + * X Not affected. + * N Not affected. + * Z Set if the bit tested is zero. Cleared otherwise. + * V Not affected. + * C Not affected. + * + */ +MIDFUNC(2,jnf_BCHG_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + EOR_rri(d,d,(1 << s)); + unlock2(d); +} + +MIDFUNC(2,jnf_BCHG_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + EOR_rri(d,d,(1 << s)); + unlock2(d); +} + +MIDFUNC(2,jnf_BCHG_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BCHG_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + EOR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jnf_BCHG_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BCHG_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + EOR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_BCHG_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + EOR_rri(d,d,v); + + unlock2(d); +} + +MIDFUNC(2,jff_BCHG_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + EOR_rri(d,d,v); + + unlock2(d); +} + +MIDFUNC(2,jff_BCHG_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BCHG_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + EOR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_BCHG_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BCHG_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + EOR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +/* + * BCLR + * Operand Syntax: Dn, + * #, + * + * Operand Size: 8,32 + * + * X Not affected. + * N Not affected. + * Z Set if the bit tested is zero. Cleared otherwise. + * V Not affected. + * C Not affected. + * + */ +MIDFUNC(2,jnf_BCLR_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + BIC_rri(d,d,(1 << s)); + unlock2(d); +} + +MIDFUNC(2,jnf_BCLR_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + BIC_rri(d,d,(1 << s)); + unlock2(d); +} + +MIDFUNC(2,jnf_BCLR_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BCLR_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + BIC_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jnf_BCLR_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BCLR_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + BIC_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_BCLR_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + BIC_rri(d,d,v); + + unlock2(d); +} + +MIDFUNC(2,jff_BCLR_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + BIC_rri(d,d,v); + + unlock2(d); +} + +MIDFUNC(2,jff_BCLR_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BCLR_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + BIC_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_BCLR_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BCLR_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + BIC_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +/* + * BSET + * Operand Syntax: Dn, + * #, + * + * Operand Size: 8,32 + * + * X Not affected. + * N Not affected. + * Z Set if the bit tested is zero. Cleared otherwise. + * V Not affected. + * C Not affected. + * + */ +MIDFUNC(2,jnf_BSET_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + ORR_rri(d,d,(1 << s)); + unlock2(d); +} + +MIDFUNC(2,jnf_BSET_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + ORR_rri(d,d,(1 << s)); + unlock2(d); +} + +MIDFUNC(2,jnf_BSET_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BSET_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + ORR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jnf_BSET_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BSET_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + ORR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_BSET_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + ORR_rri(d,d,v); + + unlock2(d); +} + +MIDFUNC(2,jff_BSET_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + ORR_rri(d,d,v); + + unlock2(d); +} + +MIDFUNC(2,jff_BSET_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BSET_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + ORR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_BSET_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BSET_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + ORR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} + +/* + * BTST + * Operand Syntax: Dn, + * #, + * + * Operand Size: 8,32 + * + * X Not affected + * N Not affected + * Z Set if the bit tested is zero. Cleared otherwise + * V Not affected + * C Not affected + * + */ +MIDFUNC(2,jff_BTST_b_imm,(RR4 d, IMM s)) +{ + d=readreg(d,4); + + MRS_CPSR(REG_WORK1); + TST_ri(d,(1 << s)); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); +} + +MIDFUNC(2,jff_BTST_l_imm,(RR4 d, IMM s)) +{ + d=readreg(d,4); + + MRS_CPSR(REG_WORK1); + TST_ri(d,(1 << s)); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); +} + +MIDFUNC(2,jff_BTST_b,(RR4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BTST_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=readreg(d,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_BTST_l,(RR4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BTST_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=readreg(d,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} + +/* + * CLR + * Operand Syntax: + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Always cleared. + * Z Always set. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(1,jnf_CLR,(W4 d)) +{ + d=writereg(d,4); + MOV_ri(d,0); + unlock2(d); +} + +MIDFUNC(1,jff_CLR,(W4 d)) +{ + d=writereg(d,4); + MOV_ri(d,0); + MSR_CPSR_i(ARM_Z_FLAG); + unlock2(d); +} + +/* + * CMP + * Operand Syntax: , Dn + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if an overflow occurs. Cleared otherwise. + * C Set if a borrow occurs. Cleared otherwise. + * + */ +MIDFUNC(2,jff_CMP_b,(RR1 d, RR1 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + SIGNED8_REG_2_REG(REG_WORK1, d); + SIGNED8_REG_2_REG(REG_WORK2, s); + CMP_rr(REG_WORK1,REG_WORK2); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // inverted_carry = true; + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jff_CMP_w,(RR2 d, RR2 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + SIGNED16_REG_2_REG(REG_WORK1, d); + SIGNED16_REG_2_REG(REG_WORK2, s); + CMP_rr(REG_WORK1,REG_WORK2); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // inverted_carry = true; + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jff_CMP_l,(RR4 d, RR4 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + CMP_rr(d,s); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // inverted_carry = true; + + unlock2(s); + unlock2(d); +} + +/* + * CMPA + * Operand Syntax: , An + * + * Operand Size: 16,32 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if an overflow occurs. Cleared otherwise. + * C Set if a borrow occurs. Cleared otherwise. + * + */ +MIDFUNC(2,jff_CMPA_b,(RR1 d, RR1 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + SIGNED8_REG_2_REG(REG_WORK2, s); + CMP_rr(d,REG_WORK2); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // invertedcarry = true; + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jff_CMPA_w,(RR2 d, RR2 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + SIGNED16_REG_2_REG(REG_WORK2, s); + CMP_rr(d,REG_WORK2); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // invertedcarry = true; + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jff_CMPA_l,(RR4 d, RR4 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + CMP_rr(d,s); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // invertedcarry = true; + + unlock2(s); + unlock2(d); +} + +/* + * EOR + * Operand Syntax: Dn, + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the most significant bit of the result is set. + * Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(3,jnf_EOR,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(s) && isconst(v)) { + set_const(d, + live.state[s].val^live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + EOR_rrr(d, s, v); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_EOR_b,(W4 d, RR1 s, RR1 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + EORS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_EOR_w,(W4 d, RR2 s, RR2 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + EORS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_EOR_l,(W4 d, RR4 s, RR4 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + EORS_rrr(d, s,v); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +/* + * EORI + * Operand Syntax: #, CCR + * + * Operand Size: 8 + * + * X — Changed if bit 4 of immediate operand is one; unchanged otherwise. + * N — Changed if bit 3 of immediate operand is one; unchanged otherwise. + * Z — Changed if bit 2 of immediate operand is one; unchanged otherwise. + * V — Changed if bit 1 of immediate operand is one; unchanged otherwise. + * C — Changed if bit 0 of immediate operand is one; unchanged otherwise. + * + */ +MIDFUNC(1,jff_EORSR,(IMM s, IMM x)) +{ + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, s); + MSR_CPSRf_r(REG_WORK1); + + if (x) { + compemu_raw_mov_l_ri(REG_WORK1, (uintptr)live.state[FLAGX].mem); + LDRB_rR(REG_WORK2, REG_WORK1); + EOR_rri(REG_WORK2, REG_WORK2, 1); + STRB_rR(REG_WORK2, REG_WORK1); + } +} + +/* + * EXT + * Operand Syntax: + * + * Operand Size: 16,32 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(2,jnf_EXT_b,(W4 d, RR4 s)) +{ + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s8)live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jnf_EXT_w,(W4 d, RR4 s)) +{ + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s8)live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jnf_EXT_l,(W4 d, RR4 s)) +{ + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s16)live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jff_EXT_b,(W4 d, RR4 s)) +{ + if (isconst(s)) { + d=writereg(d,4); + SIGNED8_IMM_2_REG(d, (uint8)live.state[s].val); + } else { + s=readreg(s,4); + d=writereg(d,4); + SIGNED8_REG_2_REG(d, s); + unlock2(s); + } + + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} + +MIDFUNC(2,jff_EXT_w,(W4 d, RR4 s)) +{ + if (isconst(s)) { + d=writereg(d,4); + SIGNED8_IMM_2_REG(d, (uint8)live.state[s].val); + } else { + s=readreg(s,4); + d=writereg(d,4); + SIGNED8_REG_2_REG(d, s); + unlock2(s); + } + + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} + +MIDFUNC(2,jff_EXT_l,(W4 d, RR4 s)) +{ + if (isconst(s)) { + d=writereg(d,4); + SIGNED16_IMM_2_REG(d, (uint16)live.state[s].val); + } else { + s=readreg(s,4); + d=writereg(d,4); + SIGNED16_REG_2_REG(d, s); + unlock2(s); + } + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} + +/* + * LSL + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit shifted out of the operand. Cleared for a shift count of zero. + * + */ +MIDFUNC(3,jnf_LSL_imm,(W4 d, RR4 s, IMM i)) +{ + if (!i) return; + + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,i); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_LSL_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 63); + LSL_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_LSL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED8_REG_2_REG(d, s); + MSR_CPSRf_i(0); + + REV_rr(d,d); + if (i) { + LSLS_rri(d,d,i); + } else { + TST_rr(d,d); + } + REV_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_LSL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + + LSL_rri(d,s,16); + if (i) { + LSLS_rri(d,d,i); + } else { + TST_rr(d,d); + } + LSR_rri(d,d,16); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_LSL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + if (i) { + LSLS_rri(d,s,i); + } else { + MOV_rr(d,s); + TST_rr(d,d); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_LSL_b_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED8_REG_2_REG(d,s); + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + REV_rr(d,d); + AND_rri(REG_WORK1, i, 63); + LSLS_rrr(d,d,REG_WORK1); + REV_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_LSL_w_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + LSL_rri(d, s, 16); + AND_rri(REG_WORK1, i, 63); + LSLS_rrr(d,d,REG_WORK1); + LSR_rri(d, d, 16); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_LSL_l_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + LSLS_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +/* + * LSLW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit shifted out of the operand. Cleared for a shift count of zero. + * + */ +MIDFUNC(2,jnf_LSLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_LSLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + LSLS_rri(d,s,17); + LSR_rri(d,d,16); + + unlock2(d); + unlock2(s); +} + +/* + * LSR + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit shifted out of the operand. + * Unaffected for a shift count of zero. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit shifted out of the operand. + * Cleared for a shift count of zero. + * + */ +MIDFUNC(3,jnf_LSR_b_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + if (!i) + return; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED8_REG_2_REG(d, s); + LSR_rri(d,d,i); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(3,jnf_LSR_w_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + if (!i) + return; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED16_REG_2_REG(d, s); + LSR_rri(d,d,i); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(3,jnf_LSR_l_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + if (!i) + return; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + LSR_rri(d,s,i); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(3,jff_LSR_b_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED8_REG_2_REG(d, s); + MSR_CPSRf_i(0); + if (i) { + LSRS_rri(d,d,i); + } else { + TST_rr(d,d); + } + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(3,jff_LSR_w_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED16_REG_2_REG(d, s); + MSR_CPSRf_i(0); + if (i) { + LSRS_rri(d,d,i); + } else { + TST_rr(d,d); + } + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(3,jff_LSR_l_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + MSR_CPSRf_i(0); + if (i) { + LSRS_rri(d,s,i); + } else { + TST_rr(s,s); + } + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(3,jnf_LSR_b_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED8_REG_2_REG(d, s); + AND_rri(REG_WORK1, i, 63); + LSR_rrr(d,d,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} + +MIDFUNC(3,jnf_LSR_w_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED16_REG_2_REG(d, s); + AND_rri(REG_WORK1, i, 63); + LSR_rrr(d,d,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} + +MIDFUNC(3,jnf_LSR_l_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + AND_rri(REG_WORK1, i, 63); + LSR_rrr(d,s,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} + +MIDFUNC(3,jff_LSR_b_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED8_REG_2_REG(d, s); + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + LSRS_rrr(d,d,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} + +MIDFUNC(3,jff_LSR_w_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED16_REG_2_REG(d, s); + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + LSRS_rrr(d,d,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} + +MIDFUNC(3,jff_LSR_l_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + LSRS_rrr(d,s,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} + +/* + * LSRW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit shifted out of the operand. Cleared for a shift count of zero. + * + */ +MIDFUNC(2,jnf_LSRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d, s); + LSR_rri(d,d,1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_LSRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d, s); + MSR_CPSRf_i(0); + LSR_rri(d,d,1); + + unlock2(d); + unlock2(s); +} + +/* + * MOVE + * Operand Syntax: , + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(2,jnf_MOVE,(W4 d, RR4 s)) +{ + if (isconst(s)) { + set_const(d,live.state[s].val); + return; + } + s=readreg(s,4); + d=writereg(d,4); + + MOV_rr(d, s); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_MOVE_b_imm,(W4 d, IMM s)) +{ + d=writereg(d,4); + + SIGNED8_IMM_2_REG(d, (uint8)s); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} + +MIDFUNC(2,jff_MOVE_w_imm,(W4 d, IMM s)) +{ + d=writereg(d,4); + + SIGNED16_IMM_2_REG(d, (uint16)s); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} + +MIDFUNC(2,jff_MOVE_l_imm,(W4 d, IMM s)) +{ + d=writereg(d,4); + + compemu_raw_mov_l_ri(d, s); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} + +MIDFUNC(2,jff_MOVE_b,(W4 d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(jff_MOVE_b_imm)(d,live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_MOVE_w,(W4 d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(jff_MOVE_w_imm)(d,live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_MOVE_l,(W4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_MOVE_l_imm)(d,live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + MOVS_rr(d,s); + + unlock2(d); + unlock2(s); +} + +/* + * MOVE16 + * + * Flags: Not affected. + * + */ +MIDFUNC(2,jnf_MOVE16,(RR4 d, RR4 s)) +{ + s=readreg(s,4); + d=readreg(d,4); + + BIC_rri(s, s, 0x000000FF); + BIC_rri(d, d, 0x000000FF); + + compemu_raw_mov_l_ri(REG_WORK1, (IMM)MEMBaseDiff); + ADD_rrr(s, s, REG_WORK1); + ADD_rrr(d, d, REG_WORK1); + + LDR_rRI(REG_WORK1, s, 8); + LDR_rRI(REG_WORK2, s, 12); + + PUSH_REGS((1<, An + * + * Operand Size: 16,32 + * + * Flags: Not affected. + * + */ +MIDFUNC(2,jnf_MOVEA_w,(W4 d, RR2 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d,s); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jnf_MOVEA_l,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MOV_rr(d,s); + + unlock2(d); + unlock2(s); +} + +/* + * MULS + * Operand Syntax: , Dn + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if overflow. Cleared otherwise. (32 Bit multiply only) + * C Always cleared. + * + */ +MIDFUNC(2,jnf_MULS,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + SIGN_EXTEND_16_REG_2_REG(d,d); + SIGN_EXTEND_16_REG_2_REG(REG_WORK1,s); + MUL_rrr(d, d, REG_WORK1); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jff_MULS,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + SIGN_EXTEND_16_REG_2_REG(d,d); + SIGN_EXTEND_16_REG_2_REG(REG_WORK1,s); + + MSR_CPSRf_i(0); + MULS_rrr(d, d, REG_WORK1); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jnf_MULS32,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + MUL_rrr(d, d, s); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jff_MULS32,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + MSR_CPSRf_i(0); + // L, H, + SMULLS_rrrr(d, REG_WORK2, d, s); + MRS_CPSR(REG_WORK1); + TEQ_rrASRi(REG_WORK2,d,31); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + MSR_CPSRf_r(REG_WORK1); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jnf_MULS64,(RW4 d, RW4 s)) +{ + s = rmw(s, 4, 4); + d = rmw(d, 4, 4); + + // L, H, + SMULL_rrrr(d, s, d, s); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jff_MULS64,(RW4 d, RW4 s)) +{ + s = rmw(s, 4, 4); + d = rmw(d, 4, 4); + + MSR_CPSRf_i(0); + // L, H, + SMULLS_rrrr(d, s, d, s); + MRS_CPSR(REG_WORK1); + TEQ_rrASRi(s,d,31); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + MSR_CPSRf_r(REG_WORK1); + + unlock2(s); + unlock2(d); +} + +/* + * MULU + * Operand Syntax: , Dn + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if overflow. Cleared otherwise. (32 Bit multiply only) + * C Always cleared. + * + */ +MIDFUNC(2,jnf_MULU,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + ZERO_EXTEND_16_REG_2_REG(d,d); + ZERO_EXTEND_16_REG_2_REG(REG_WORK1,s); + + MUL_rrr(d, d, REG_WORK1); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jff_MULU,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + ZERO_EXTEND_16_REG_2_REG(d,d); + ZERO_EXTEND_16_REG_2_REG(REG_WORK1, s); + + MSR_CPSRf_i(0); + MULS_rrr(d, d, REG_WORK1); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jnf_MULU32,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + MUL_rrr(d, d, s); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jff_MULU32,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + // L, H, + MSR_CPSRf_i(0); + UMULLS_rrrr(d, REG_WORK2, d, s); + MRS_CPSR(REG_WORK1); + TST_rr(REG_WORK2,REG_WORK2); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + MSR_CPSRf_r(REG_WORK1); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jnf_MULU64,(RW4 d, RW4 s)) +{ + s = rmw(s, 4, 4); + d = rmw(d, 4, 4); + + // L, H, + UMULL_rrrr(d, s, d, s); + + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,jff_MULU64,(RW4 d, RW4 s)) +{ + s = rmw(s, 4, 4); + d = rmw(d, 4, 4); + + // L, H, + MSR_CPSRf_i(0); + UMULLS_rrrr(d, s, d, s); + MRS_CPSR(REG_WORK1); + TST_rr(s,s); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + MSR_CPSRf_r(REG_WORK1); + + unlock2(s); + unlock2(d); +} + +/* + * NEG + * Operand Syntax: + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if an overflow occurs. Cleared otherwise. + * C Cleared if the result is zero. Set otherwise. + * + */ +MIDFUNC(2,jnf_NEG,(W4 d, RR4 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + RSB_rri(d,s,0); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_NEG_b,(W4 d, RR1 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + RSBS_rri(d,REG_WORK1,0); + + // inverted_carry = true; + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_NEG_w,(W4 d, RR2 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + RSBS_rri(d,REG_WORK1,0); + + // inverted_carry = true; + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_NEG_l,(W4 d, RR4 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + RSBS_rri(d,s,0); + + // inverted_carry = true; + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} + +/* + * NEGX + * Operand Syntax: + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Cleared if the result is nonzero; unchanged otherwise. + * V Set if an overflow occurs. Cleared otherwise. + * C Cleared if the result is zero. Set otherwise. + * + * Attention: Z is cleared only if the result is nonzero. Unchanged otherwise + * + */ +MIDFUNC(2,jnf_NEGX,(W4 d, RR4 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + RSC_rri(d,s,0); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_NEGX_b,(W4 d, RR1 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + MRS_CPSR(REG_WORK2); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + + SIGNED8_REG_2_REG(REG_WORK1, s); + RSCS_rri(d,REG_WORK1,0); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_NEGX_w,(W4 d, RR2 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + MRS_CPSR(REG_WORK2); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + + SIGNED16_REG_2_REG(REG_WORK1, s); + RSCS_rri(d,REG_WORK1,0); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_NEGX_l,(W4 d, RR4 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + MRS_CPSR(REG_WORK2); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + + RSCS_rri(d,s,0); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} + +/* + * NOT + * Operand Syntax: + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(2,jnf_NOT,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MVN_rr(d,s); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_NOT_b,(W4 d, RR1 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED8_REG_2_REG(d,s); + MSR_CPSRf_i(0); // Clear flags + MVNS_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_NOT_w,(W4 d, RR2 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d,s); + MSR_CPSRf_i(0); // Clear flags + MVNS_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_NOT_l,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); // Clear flags + MVNS_rr(d,s); + + unlock2(d); + unlock2(s); +} + +/* + * OR + * Operand Syntax: , Dn + * Dn, + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(3,jnf_OR,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(s) && isconst(v)) { + set_const(d, + live.state[s].val|live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + ORR_rrr(d, s, v); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_OR_b,(W4 d, RR1 s, RR1 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + ORRS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_OR_w,(W4 d, RR2 s, RR2 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + ORRS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_OR_l,(W4 d, RR4 s, RR4 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + ORRS_rrr(d, s,v); + + unlock2(v); + unlock2(d); + unlock2(s); +} + +/* + * ORI + * Operand Syntax: #, CCR + * + * Operand Size: 8 + * + * X — Set if bit 4 of immediate operand is one; unchanged otherwise. + * N — Set if bit 3 of immediate operand is one; unchanged otherwise. + * Z — Set if bit 2 of immediate operand is one; unchanged otherwise. + * V — Set if bit 1 of immediate operand is one; unchanged otherwise. + * C — Set if bit 0 of immediate operand is one; unchanged otherwise. + * + */ +MIDFUNC(1,jff_ORSR,(IMM s, IMM x)) +{ + MRS_CPSR(REG_WORK1); + ORR_rri(REG_WORK1, REG_WORK1, s); + MSR_CPSRf_r(REG_WORK1); + + if (x) { + compemu_raw_mov_l_ri(REG_WORK1, (uintptr)live.state[FLAGX].mem); + MOV_ri(REG_WORK2, 1); + STRB_rR(REG_WORK2, REG_WORK1); + } +} + +/* + * ROL + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * + */ +MIDFUNC(3,jnf_ROL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,(32-(i&0x1f))); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,(32-(i&0x1f))); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + ROR_rri(d,s,(32-(i&0x1f))); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + if (i) { + RORS_rri(d,d,(32-(i&0x1f))); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + } else { + TST_rr(d,d); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + if (i) { + RORS_rri(d,d,(32-(i&0x1f))); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + } else { + TST_rr(d,d); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + if (i) { + RORS_rri(d,s,(32-(i&0x1f))); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + } else { + MOVS_rr(d,s); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROL_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROL_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + ROR_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jnf_ROL_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROL_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jnf_ROL_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROL_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + ROR_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROL_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROL_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rrr(d,d,REG_WORK1); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROL_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROL_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rrr(d,d,REG_WORK1); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROL_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROL_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + MSR_CPSRf_i(0); + RORS_rrr(d,s,REG_WORK1); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +/* + * ROLW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * + */ +MIDFUNC(2,jnf_ROLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,(32-1)); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_ROLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rri(d,d,(32-1)); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + unlock2(d); + unlock2(s); +} + +/* + * RORW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. + * + */ +MIDFUNC(2,jnf_RORW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_RORW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rri(d,d,1); + + unlock2(d); + unlock2(s); +} + +/* + * ROXL + * Operand Syntax: Dx, Dy + * #, Dy + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * + */ +MIDFUNC(3,jnf_ROXL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED8_REG_2_REG(d,s); + LSL_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,d,9); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROXL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED16_REG_2_REG(d,s); + LSL_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,d,17); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROXL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSL_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,s,(32-i)); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROXL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED8_REG_2_REG(d,s); + LSL_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,d,9); + TST_ri(s, (1<<(8-i))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + SIGNED8_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROXL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED16_REG_2_REG(d,s); + LSL_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,d,17); + TST_ri(s, (1<<(16-i))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + SIGNED16_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROXL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSL_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,s,(32-i)); + TST_ri(s, (1<<(32-i))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROXL_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXL_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 36); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 36); + CMP_ri(REG_WORK1, 18); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 18); + CMP_ri(REG_WORK1, 9); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 9); + CMP_ri(REG_WORK1, 0); +#if defined(ARMV6_ASSEMBLY) + BLE_i(8-1); +#else + BLE_i(9-1); +#endif + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 8); +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(REG_WORK2, s); +#else + ROR_rri(REG_WORK2, s, 8); + LSR_rri(REG_WORK2, REG_WORK2, 24); +#endif + ORR_rrrLSRr(d,d,REG_WORK2,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jnf_ROXL_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXL_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d,s); + MRS_CPSR(REG_WORK2); + + CMP_ri(REG_WORK1, 34); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 34); + CMP_ri(REG_WORK1, 17); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 17); + CMP_ri(REG_WORK1, 0); +#if defined(ARMV6_ASSEMBLY) + BLE_i(8-1); +#else + BLE_i(9-1); +#endif + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 16); +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK2, s); +#else + LSL_rri(REG_WORK2, s, 16); + LSR_rri(REG_WORK2, REG_WORK2, 16); +#endif + ORR_rrrLSRr(d,d,REG_WORK2,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jnf_ROXL_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXL_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + CMP_ri(REG_WORK1, 33); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 33); + CMP_ri(REG_WORK1, 0); + BLE_i(7-1); + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 32); + ORR_rrrLSRr(d,d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROXL_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXL_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 36); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 36); + CMP_ri(REG_WORK1, 18); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 18); + CMP_ri(REG_WORK1, 9); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 9); + CMP_ri(REG_WORK1, 0); +#if defined(ARMV6_ASSEMBLY) + BLE_i(16-1); // label +#else + BLE_i(17-1); // label +#endif + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 0x80); + LSR_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + PUSH(REG_WORK2); + + RSB_rri(REG_WORK1, REG_WORK1, 8); +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(REG_WORK2, s); +#else + ROR_rri(REG_WORK2, s, 8); + LSR_rri(REG_WORK2, REG_WORK2, 24); +#endif + ORR_rrrLSRr(d,d,REG_WORK2,REG_WORK1); + + POP(REG_WORK2); + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0); // label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + raw_sign_extend_8_rr(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROXL_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXL_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 34); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 34); + CMP_ri(REG_WORK1, 17); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 17); + CMP_ri(REG_WORK1, 0); +#if defined(ARMV6_ASSEMBLY) + BLE_i(16-1); // label +#else + BLE_i(17-1); // label +#endif + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 0x8000); + LSR_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + PUSH(REG_WORK2); + +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK2, s); +#else + LSL_rri(REG_WORK2, s, 16); + LSR_rri(REG_WORK2, REG_WORK2, 16); +#endif + + RSB_rri(REG_WORK1, REG_WORK1, 16); + ORR_rrrLSRr(d,d,REG_WORK2,REG_WORK1); + + POP(REG_WORK2); + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0); // label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + SIGNED16_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROXL_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXL_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 33); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 33); + CMP_ri(REG_WORK1, 0); + BLE_i(13-1); // label + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 0x80000000); + LSR_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + RSB_rri(REG_WORK1, REG_WORK1, 32); + ORR_rrrLSRr(d,d,s,REG_WORK1); + + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0);// label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +/* + * ROXLW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. + * + */ +MIDFUNC(2,jnf_ROXLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,1); + ADC_rri(d,d,0); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_ROXLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,1); + ADC_rri(d,d,0); + MSR_CPSRf_i(0); + LSLS_rri(d,d,15); + LSR_rri(d,d,16); + + unlock2(d); + unlock2(s); +} + +/* + * ROR + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * + */ +MIDFUNC(3,jnf_ROR_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,i); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROR_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,i); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROR_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + ROR_rri(d,s,i); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROR_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rri(d,d,i); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROR_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rrr(d,d,i); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROR_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + RORS_rrr(d,s,i); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROR_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROR_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + ROR_rrr(d,d,i); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jnf_ROR_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROR_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rrr(d,d,i); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jnf_ROR_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROR_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + ROR_rrr(d,s,i); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROR_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROR_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + AND_rri(REG_WORK1, i, 63); + RORS_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROR_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROR_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + AND_rri(REG_WORK1, i, 63); + RORS_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROR_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROR_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + AND_rri(REG_WORK1, i, 63); + RORS_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +/* + * ROXR + * Operand Syntax: Dx, Dy + * #, Dy + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * + */ +MIDFUNC(3,jnf_ROXR_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSR_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x80 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(9-i)); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROXR_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSR_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x8000 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(17-i)); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROXR_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSR_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x80000000 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(33-i)); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROXR_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED8_REG_2_REG(d,s); + LSR_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x80 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(9-i)); + TST_ri(s, (1<<(i-1))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + SIGNED8_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROXR_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED16_REG_2_REG(d,s); + LSR_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x8000 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(17-i)); + TST_ri(s, (1<<(i-1))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + SIGNED16_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_ROXR_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSR_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x80000000 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(33-i)); + TST_ri(s, (1<<(i-1))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_ROXR_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXR_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + UNSIGNED8_REG_2_REG(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 36); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 36); + CMP_ri(REG_WORK1, 18); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 18); + CMP_ri(REG_WORK1, 9); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 9); + CMP_ri(REG_WORK1, 0); + BLE_i(7-1); + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x80); + LSR_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 8); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jnf_ROXR_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXR_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d,s); + MRS_CPSR(REG_WORK2); + + CMP_ri(REG_WORK1, 34); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 34); + CMP_ri(REG_WORK1, 17); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 17); + CMP_ri(REG_WORK1, 0); + BLE_i(7-1); + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x8000); + LSR_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 16); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jnf_ROXR_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXR_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + CMP_ri(REG_WORK1, 33); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 33); + CMP_ri(REG_WORK1, 0); + BLE_i(7-1); + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x80000000); + LSR_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 32); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROXR_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXR_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + UNSIGNED8_REG_2_REG(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 36); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 36); + CMP_ri(REG_WORK1, 18); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 18); + CMP_ri(REG_WORK1, 9); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 9); + CMP_ri(REG_WORK1, 0); + BLE_i(13-1); // label + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x80); + LSR_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + RSB_rri(REG_WORK1, REG_WORK1, 8); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0);// label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + SIGNED8_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROXR_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXR_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 34); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 34); + CMP_ri(REG_WORK1, 17); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 17); + CMP_ri(REG_WORK1, 0); + BLE_i(13-1); // label + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x8000); + LSR_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + RSB_rri(REG_WORK1, REG_WORK1, 16); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0);// label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + SIGNED16_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +MIDFUNC(3,jff_ROXR_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXR_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 33); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 33); + CMP_ri(REG_WORK1, 0); + BLE_i(13-1); // label + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x80000000); + LSR_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + RSB_rri(REG_WORK1, REG_WORK1, 32); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0);// label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} + +/* + * ROXRW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. + * + */ +MIDFUNC(2,jnf_ROXRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + RRX_rr(d,d); + LSR_rri(d,d,16); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jff_ROXRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + MSR_CPSRf_i(0); + RRXS_rr(d,d); + LSR_rri(d,d,16); + + unlock2(d); + unlock2(s); +} + +/* + * SUB + * Operand Syntax: , Dn + * Dn, + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if an overflow is generated. Cleared otherwise. + * C Set if a carry is generated. Cleared otherwise. + * + */ +MIDFUNC(3,jnf_SUB_b_imm,(W4 d, RR4 s, IMM v)) +{ + if (isconst(s)) { + set_const(d,live.state[s].val-v); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED8_IMM_2_REG(REG_WORK1, (uint8)v); + SUB_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_SUB_b,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jnf_SUB_b_imm)(d,s,live.state[v].val); + return; + } + + // d has to be different to s and v + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SUB_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jnf_SUB_w_imm,(W4 d, RR4 s, IMM v)) +{ + if (isconst(s)) { + set_const(d,live.state[s].val-v); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED16_IMM_2_REG(REG_WORK1, (uint16)v); + SUB_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_SUB_w,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jnf_SUB_w_imm)(d,s,live.state[v].val); + return; + } + + // d has to be different to s and v + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SUB_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jnf_SUB_l_imm,(W4 d, RR4 s, IMM v)) +{ + if (isconst(s)) { + set_const(d,live.state[s].val-v); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + compemu_raw_mov_l_ri(REG_WORK1, v); + SUB_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jnf_SUB_l,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jnf_SUB_l_imm)(d,s,live.state[v].val); + return; + } + + // d has to be different to s and v + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SUB_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_SUB_b_imm,(W4 d, RR1 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_IMM_2_REG(REG_WORK2, (uint8)v); + SIGNED8_REG_2_REG(REG_WORK1, s); + SUBS_rrr(d,REG_WORK1,REG_WORK2); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_SUB_b,(W4 d, RR1 s, RR1 v)) +{ + if (isconst(v)) { + COMPCALL(jff_SUB_b_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + SUBS_rrr(d,REG_WORK1,REG_WORK2); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_SUB_w_imm,(W4 d, RR2 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_IMM_2_REG(REG_WORK2, (uint16)v); + SIGNED16_REG_2_REG(REG_WORK1, s); + SUBS_rrr(d,REG_WORK1,REG_WORK2); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_SUB_w,(W4 d, RR2 s, RR2 v)) +{ + if (isconst(v)) { + COMPCALL(jff_SUB_w_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + SUBS_rrr(d,REG_WORK1,REG_WORK2); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_SUB_l_imm,(W4 d, RR4 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + compemu_raw_mov_l_ri(REG_WORK2, v); + SUBS_rrr(d,s,REG_WORK2); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,jff_SUB_l,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jff_SUB_l_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SUBS_rrr(d,s,v); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); + unlock2(v); +} + +/* + * SUBA + * + * Operand Syntax: , Dn + * + * Operand Size: 16,32 + * + * Flags: Not affected. + * + */ +MIDFUNC(2,jnf_SUBA_b,(W4 d, RR1 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + SIGNED8_REG_2_REG(REG_WORK1,s); + SUB_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jnf_SUBA_w,(W4 d, RR2 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + SIGNED16_REG_2_REG(REG_WORK1,s); + SUB_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,jnf_SUBA_l,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + SUB_rrr(d,d,s); + + unlock2(d); + unlock2(s); +} + +/* + * SUBX + * Operand Syntax: Dy, Dx + * -(Ay), -(Ax) + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Cleared if the result is nonzero. Unchanged otherwise. + * V Set if an overflow is generated. Cleared otherwise. + * C Set if a carry is generated. Cleared otherwise. + * + * Attention: Z is cleared only if the result is nonzero. Unchanged otherwise + * + */ +MIDFUNC(3,jnf_SUBX,(W4 d, RR4 s, RR4 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + SBC_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_SUBX_b,(W4 d, RR1 s, RR1 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + MRS_CPSR(REG_WORK1); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK1, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK1, ARM_Z_FLAG); + PUSH(REG_WORK1); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + SBCS_rrr(d,REG_WORK1,REG_WORK2); + + POP(REG_WORK2); + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_SUBX_w,(W4 d, RR2 s, RR2 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + MRS_CPSR(REG_WORK1); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK1, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK1, ARM_Z_FLAG); + PUSH(REG_WORK1); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + SBCS_rrr(d,REG_WORK1,REG_WORK2); + + POP(REG_WORK2); + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +MIDFUNC(3,jff_SUBX_l,(W4 d, RR4 s, RR4 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + MRS_CPSR(REG_WORK2); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + + SBCS_rrr(d,s,v); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} + +/* + * SWAP + * Operand Syntax: Dn + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the most significant bit of the 32-bit result is set. Cleared otherwise. + * Z Set if the 32-bit result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(1,jnf_SWAP,(RW4 d)) +{ + d=rmw(d,4,4); + + ROR_rri(d,d,16); + + unlock2(d); +} + +MIDFUNC(1,jff_SWAP,(RW4 d)) +{ + d=rmw(d,4,4); + + ROR_rri(d,d,16); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} + +/* + * TST + * Operand Syntax: + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the operand is negative. Cleared otherwise. + * Z Set if the operand is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(1,jff_TST_b,(RR1 s)) +{ + if (isconst(s)) { + SIGNED8_IMM_2_REG(REG_WORK1, (uint8)live.state[s].val); + } else { + s=readreg(s,4); + SIGNED8_REG_2_REG(REG_WORK1, s); + unlock2(s); + } + MSR_CPSRf_i(0); + TST_rr(REG_WORK1,REG_WORK1); +} + +MIDFUNC(1,jff_TST_w,(RR2 s)) +{ + if (isconst(s)) { + SIGNED16_IMM_2_REG(REG_WORK1, (uint16)live.state[s].val); + } else { + s=readreg(s,4); + SIGNED16_REG_2_REG(REG_WORK1, s); + unlock2(s); + } + MSR_CPSRf_i(0); + TST_rr(REG_WORK1,REG_WORK1); +} + +MIDFUNC(1,jff_TST_l,(RR4 s)) +{ + MSR_CPSRf_i(0); + + if (isconst(s)) { + compemu_raw_mov_l_ri(REG_WORK1, live.state[s].val); + TST_rr(REG_WORK1,REG_WORK1); + } + else { + s=readreg(s,4); + TST_rr(s,s); + unlock2(s); + } +} diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.h b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.h new file mode 100644 index 00000000..ecbc2fdf --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.h @@ -0,0 +1,348 @@ +/* + * compiler/compemu_midfunc_arm2.h - Native MIDFUNCS for ARM (JIT v2) + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu.h + * + */ + +// Arm optimized midfunc +extern const uae_u32 ARM_CCR_MAP[]; + +DECLARE_MIDFUNC(restore_inverted_carry(void)); + +// ADD +DECLARE_MIDFUNC(jnf_ADD(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jnf_ADD_imm(W4 d, RR4 s, IMM v)); +DECLARE_MIDFUNC(jff_ADD_b(W4 d, RR1 s, RR1 v)); +DECLARE_MIDFUNC(jff_ADD_w(W4 d, RR2 s, RR2 v)); +DECLARE_MIDFUNC(jff_ADD_l(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_ADD_b_imm(W4 d, RR1 s, IMM v)); +DECLARE_MIDFUNC(jff_ADD_w_imm(W4 d, RR2 s, IMM v)); +DECLARE_MIDFUNC(jff_ADD_l_imm(W4 d, RR4 s, IMM v)); + +// ADDA +DECLARE_MIDFUNC(jnf_ADDA_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jnf_ADDA_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jnf_ADDA_l(W4 d, RR4 s)); + +// ADDX +DECLARE_MIDFUNC(jnf_ADDX(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_ADDX_b(W4 d, RR1 s, RR4 v)); +DECLARE_MIDFUNC(jff_ADDX_w(W4 d, RR2 s, RR4 v)); +DECLARE_MIDFUNC(jff_ADDX_l(W4 d, RR4 s, RR4 v)); + +// AND +DECLARE_MIDFUNC(jnf_AND(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_AND_b(W4 d, RR1 s, RR1 v)); +DECLARE_MIDFUNC(jff_AND_w(W4 d, RR2 s, RR2 v)); +DECLARE_MIDFUNC(jff_AND_l(W4 d, RR4 s, RR4 v)); + +// ANDSR +DECLARE_MIDFUNC(jff_ANDSR(IMM s, IMM x)); + +// ASL +DECLARE_MIDFUNC(jff_ASL_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASL_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASL_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASL_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ASL_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ASL_l_reg(W4 d, RR4 s, RR4 i)); + +// ASLW +DECLARE_MIDFUNC(jff_ASLW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_ASLW(W4 d, RR4 s)); + +// ASR +DECLARE_MIDFUNC(jnf_ASR_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_ASR_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_ASR_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASR_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASR_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASR_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_ASR_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ASR_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ASR_l_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ASR_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ASR_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ASR_l_reg(W4 d, RR4 s, RR4 i)); + +// ASRW +DECLARE_MIDFUNC(jff_ASRW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_ASRW(W4 d, RR4 s)); + +// BCHG +DECLARE_MIDFUNC(jnf_BCHG_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jnf_BCHG_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jff_BCHG_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jff_BCHG_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jnf_BCHG_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_BCHG_l(RW4 d, RR4 s)); + +DECLARE_MIDFUNC(jff_BCHG_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jff_BCHG_l(RW4 d, RR4 s)); + +// BCLR +DECLARE_MIDFUNC(jnf_BCLR_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jnf_BCLR_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jnf_BCLR_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_BCLR_l(RW4 d, RR4 s)); + +DECLARE_MIDFUNC(jff_BCLR_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jff_BCLR_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jff_BCLR_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jff_BCLR_l(RW4 d, RR4 s)); + +// BSET +DECLARE_MIDFUNC(jnf_BSET_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jnf_BSET_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jnf_BSET_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_BSET_l(RW4 d, RR4 s)); + +DECLARE_MIDFUNC(jff_BSET_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jff_BSET_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jff_BSET_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jff_BSET_l(RW4 d, RR4 s)); + +// BTST +DECLARE_MIDFUNC(jff_BTST_b_imm(RR4 d, IMM s)); +DECLARE_MIDFUNC(jff_BTST_l_imm(RR4 d, IMM s)); + +DECLARE_MIDFUNC(jff_BTST_b(RR4 d, RR4 s)); +DECLARE_MIDFUNC(jff_BTST_l(RR4 d, RR4 s)); + +// CLR +DECLARE_MIDFUNC (jnf_CLR(W4 d)); +DECLARE_MIDFUNC (jff_CLR(W4 d)); + +// CMP +DECLARE_MIDFUNC(jff_CMP_b(RR1 d, RR1 s)); +DECLARE_MIDFUNC(jff_CMP_w(RR2 d, RR2 s)); +DECLARE_MIDFUNC(jff_CMP_l(RR4 d, RR4 s)); + +// CMPA +DECLARE_MIDFUNC(jff_CMPA_b(RR1 d, RR1 s)); +DECLARE_MIDFUNC(jff_CMPA_w(RR2 d, RR2 s)); +DECLARE_MIDFUNC(jff_CMPA_l(RR4 d, RR4 s)); + +// EOR +DECLARE_MIDFUNC(jnf_EOR(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_EOR_b(W4 d, RR1 s, RR1 v)); +DECLARE_MIDFUNC(jff_EOR_w(W4 d, RR2 s, RR2 v)); +DECLARE_MIDFUNC(jff_EOR_l(W4 d, RR4 s, RR4 v)); + +// EORSR +DECLARE_MIDFUNC(jff_EORSR(IMM s, IMM x)); + +// EXT +DECLARE_MIDFUNC(jnf_EXT_b(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_EXT_w(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_EXT_l(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_EXT_b(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_EXT_w(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_EXT_l(W4 d, RR4 s)); + +// LSL +DECLARE_MIDFUNC(jnf_LSL_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_LSL_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSL_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSL_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSL_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSL_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSL_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSL_l_reg(W4 d, RR4 s, RR4 i)); + +// LSLW +DECLARE_MIDFUNC(jff_LSLW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_LSLW(W4 d, RR4 s)); + +// LSR +DECLARE_MIDFUNC(jnf_LSR_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_LSR_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_LSR_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSR_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSR_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSR_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_LSR_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_LSR_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_LSR_l_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSR_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSR_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSR_l_reg(W4 d, RR4 s, RR4 i)); + +// LSRW +DECLARE_MIDFUNC(jff_LSRW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_LSRW(W4 d, RR4 s)); + +// MOVE +DECLARE_MIDFUNC(jnf_MOVE(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_MOVE_b_imm(W4 d, IMM i)); +DECLARE_MIDFUNC(jff_MOVE_w_imm(W4 d, IMM i)); +DECLARE_MIDFUNC(jff_MOVE_l_imm(W4 d, IMM i)); +DECLARE_MIDFUNC(jff_MOVE_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jff_MOVE_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jff_MOVE_l(W4 d, RR4 s)); + +// MOVE16 +DECLARE_MIDFUNC(jnf_MOVE16(RR4 d, RR4 s)); + +// MOVEA +DECLARE_MIDFUNC(jnf_MOVEA_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jnf_MOVEA_l(W4 d, RR4 s)); + +// MULS +DECLARE_MIDFUNC (jnf_MULS(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jff_MULS(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jnf_MULS32(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jff_MULS32(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jnf_MULS64(RW4 d, RW4 s)); +DECLARE_MIDFUNC (jff_MULS64(RW4 d, RW4 s)); + +// MULU +DECLARE_MIDFUNC (jnf_MULU(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jff_MULU(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jnf_MULU32(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jff_MULU32(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jnf_MULU64(RW4 d, RW4 s)); +DECLARE_MIDFUNC (jff_MULU64(RW4 d, RW4 s)); + +// NEG +DECLARE_MIDFUNC(jnf_NEG(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_NEG_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jff_NEG_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jff_NEG_l(W4 d, RR4 s)); + +// NEGX +DECLARE_MIDFUNC(jnf_NEGX(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_NEGX_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jff_NEGX_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jff_NEGX_l(W4 d, RR4 s)); + +// NOT +DECLARE_MIDFUNC(jnf_NOT(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_NOT_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jff_NOT_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jff_NOT_l(W4 d, RR4 s)); + +// OR +DECLARE_MIDFUNC(jnf_OR(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_OR_b(W4 d, RR1 s, RR1 v)); +DECLARE_MIDFUNC(jff_OR_w(W4 d, RR2 s, RR2 v)); +DECLARE_MIDFUNC(jff_OR_l(W4 d, RR4 s, RR4 v)); + +// ORSR +DECLARE_MIDFUNC(jff_ORSR(IMM s, IMM x)); + +// ROL +DECLARE_MIDFUNC(jnf_ROL_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROL_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROL_l(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROL_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROL_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROL_l(W4 d, RR4 s, RR4 i)); + +// ROLW +DECLARE_MIDFUNC(jff_ROLW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_ROLW(W4 d, RR4 s)); + +// RORW +DECLARE_MIDFUNC(jff_RORW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_RORW(W4 d, RR4 s)); + +// ROXL +DECLARE_MIDFUNC(jnf_ROXL_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROXL_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROXL_l(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXL_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXL_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXL_l(W4 d, RR4 s, RR4 i)); + +// ROXLW +DECLARE_MIDFUNC(jff_ROXLW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_ROXLW(W4 d, RR4 s)); + +// ROR +DECLARE_MIDFUNC(jnf_ROR_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROR_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROR_l(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROR_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROR_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROR_l(W4 d, RR4 s, RR4 i)); + +// ROXR +DECLARE_MIDFUNC(jnf_ROXR_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROXR_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROXR_l(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXR_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXR_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXR_l(W4 d, RR4 s, RR4 i)); + +// ROXRW +DECLARE_MIDFUNC(jff_ROXRW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_ROXRW(W4 d, RR4 s)); + +// SUB +DECLARE_MIDFUNC(jnf_SUB_b_imm(W4 d, RR4 s, IMM v)); +DECLARE_MIDFUNC(jnf_SUB_b(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jnf_SUB_w_imm(W4 d, RR4 s, IMM v)); +DECLARE_MIDFUNC(jnf_SUB_w(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jnf_SUB_l_imm(W4 d, RR4 s, IMM v)); +DECLARE_MIDFUNC(jnf_SUB_l(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_SUB_b(W4 d, RR1 s, RR1 v)); +DECLARE_MIDFUNC(jff_SUB_w(W4 d, RR2 s, RR2 v)); +DECLARE_MIDFUNC(jff_SUB_l(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_SUB_b_imm(W4 d, RR1 s, IMM v)); +DECLARE_MIDFUNC(jff_SUB_w_imm(W4 d, RR2 s, IMM v)); +DECLARE_MIDFUNC(jff_SUB_l_imm(W4 d, RR4 s, IMM v)); + +// SUBA +DECLARE_MIDFUNC(jnf_SUBA_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jnf_SUBA_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jnf_SUBA_l(W4 d, RR4 s)); + +// SUBX +DECLARE_MIDFUNC(jnf_SUBX(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_SUBX_b(W4 d, RR1 s, RR4 v)); +DECLARE_MIDFUNC(jff_SUBX_w(W4 d, RR2 s, RR4 v)); +DECLARE_MIDFUNC(jff_SUBX_l(W4 d, RR4 s, RR4 v)); + +// SWAP +DECLARE_MIDFUNC (jnf_SWAP(RW4 d)); +DECLARE_MIDFUNC (jff_SWAP(RW4 d)); + +// TST +DECLARE_MIDFUNC (jff_TST_b(RR1 s)); +DECLARE_MIDFUNC (jff_TST_w(RR2 s)); +DECLARE_MIDFUNC (jff_TST_l(RR4 s)); + diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.cpp b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.cpp new file mode 100644 index 00000000..8d35c1b9 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.cpp @@ -0,0 +1,2838 @@ +/* + * compiler/compemu_midfunc_arm.cpp - Native MIDFUNCS for IA-32 and AMD64 + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu_support.cpp + * + */ + +static int f_rmw(int r) +{ + int n; + + f_make_exclusive(r,0); + if (f_isinreg(r)) { + n=live.fate[r].realreg; + } + else + n=f_alloc_reg(r,0); + live.fate[r].status=DIRTY; + live.fat[n].locked++; + live.fat[n].touched=touchcnt++; + return n; +} + +static void fflags_into_flags_internal(uae_u32 tmp) +{ + int r; + + clobber_flags(); + r=f_readreg(FP_RESULT); + if (FFLAG_NREG_CLOBBER_CONDITION) { + int tmp2=tmp; + tmp=writereg_specific(tmp,4,FFLAG_NREG); + raw_fflags_into_flags(r); + unlock2(tmp); + forget_about(tmp2); + } + else + raw_fflags_into_flags(r); + f_unlock(r); + live_flags(); +} + + +/******************************************************************** + * CPU functions exposed to gencomp. Both CREATE and EMIT time * + ********************************************************************/ + + +/* + * RULES FOR HANDLING REGISTERS: + * + * * In the function headers, order the parameters + * - 1st registers written to + * - 2nd read/modify/write registers + * - 3rd registers read from + * * Before calling raw_*, you must call readreg, writereg or rmw for + * each register + * * The order for this is + * - 1st call remove_offset for all registers written to with size<4 + * - 2nd call readreg for all registers read without offset + * - 3rd call rmw for all rmw registers + * - 4th call readreg_offset for all registers that can handle offsets + * - 5th call get_offset for all the registers from the previous step + * - 6th call writereg for all written-to registers + * - 7th call raw_* + * - 8th unlock2 all registers that were locked + */ + +MIDFUNC(0,live_flags,(void)) +{ + live.flags_on_stack=TRASH; + live.flags_in_flags=VALID; + live.flags_are_important=1; +} + +MIDFUNC(0,dont_care_flags,(void)) +{ + live.flags_are_important=0; +} + +/* + * store the state of the x86 carry bit into regflags.x, + * into the position denoted by FLAGBIT_X + */ +MIDFUNC(0,duplicate_carry,(void)) +{ + evict(FLAGX); + make_flags_live_internal(); +#ifdef UAE + COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem + 1, NATIVE_CC_CS); +#else + COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem, NATIVE_CC_CS); +#endif + log_vwrite(FLAGX); +} + +MIDFUNC(3,setcc_for_cntzero,(RR4 /* cnt */, RR4 data, int size)) +{ + uae_u8 *branchadd; + uae_u8 *branchadd2; + + evict(FLAGX); + make_flags_live_internal(); + + raw_pushfl(); + /* + * shift count can only be in CL register; see shrl_b_rr + */ + raw_test_b_rr(X86_CL, X86_CL); + /* if zero, leave X unaffected; carry flag will already be cleared */ + raw_jz_b_oponly(); + branchadd = get_target(); + skip_byte(); + + /* shift count was non-zero; update also x-flag */ + raw_popfl(); +#ifdef UAE + COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem + 1, NATIVE_CC_CS); +#else + COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem, NATIVE_CC_CS); +#endif + log_vwrite(FLAGX); + raw_jmp_b_oponly(); + branchadd2 = get_target(); + skip_byte(); + *branchadd = (uintptr)get_target() - ((uintptr)branchadd + 1); + + /* shift count was zero; need to set Z & N flags since the native flags were unaffected */ + raw_popfl(); + data = readreg(data, size); + switch (size) + { + case 1: raw_test_b_rr(data, data); break; + case 2: raw_test_w_rr(data, data); break; + case 4: raw_test_l_rr(data, data); break; + } + unlock2(data); + *branchadd2 = (uintptr)get_target() - ((uintptr)branchadd2 + 1); +} + +/* + * Set the x86 carry flag from regflags.x, from the position + * denoted by FLAGBIT_X + */ +MIDFUNC(0,restore_carry,(void)) +{ + if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */ +#ifdef UAE + bt_l_ri_noclobber(FLAGX, FLAGBIT_X+8); +#else + bt_l_ri_noclobber(FLAGX, FLAGBIT_X); +#endif + } + else { /* Avoid the stall the above creates. + This is slow on non-P6, though. + */ +#if defined(UAE) || FLAGBIT_X >= 8 + COMPCALL(rol_w_ri(FLAGX, 16 - FLAGBIT_X)); +#else + COMPCALL(rol_b_ri(FLAGX, 8 - FLAGBIT_X)); +#endif + isclean(FLAGX); + } +} + +MIDFUNC(0,start_needflags,(void)) +{ + needflags=1; +} + +MIDFUNC(0,end_needflags,(void)) +{ + needflags=0; +} + +MIDFUNC(0,make_flags_live,(void)) +{ + make_flags_live_internal(); +} + +MIDFUNC(1,fflags_into_flags,(W2 tmp)) +{ + clobber_flags(); + fflags_into_flags_internal(tmp); +} + +MIDFUNC(2,bt_l_ri,(RR4 r, IMM i)) /* This is defined as only affecting C */ +{ + int size=4; + if (i<16) + size=2; + CLOBBER_BT; + r=readreg(r,size); + raw_bt_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,bt_l_rr,(RR4 r, RR4 b)) /* This is defined as only affecting C */ +{ + CLOBBER_BT; + r=readreg(r,4); + b=readreg(b,4); + raw_bt_l_rr(r,b); + unlock2(r); + unlock2(b); +} + +MIDFUNC(2,btc_l_ri,(RW4 r, IMM i)) +{ + int size=4; + if (i<16) + size=2; + CLOBBER_BT; + r=rmw(r,size,size); + raw_btc_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,btc_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_btc_l_rr(r,b); + unlock2(r); + unlock2(b); +} + +MIDFUNC(2,btr_l_ri,(RW4 r, IMM i)) +{ + int size=4; + if (i<16) + size=2; + CLOBBER_BT; + r=rmw(r,size,size); + raw_btr_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,btr_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_btr_l_rr(r,b); + unlock2(r); + unlock2(b); +} + +MIDFUNC(2,bts_l_ri,(RW4 r, IMM i)) +{ + int size=4; + if (i<16) + size=2; + CLOBBER_BT; + r=rmw(r,size,size); + raw_bts_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,bts_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_bts_l_rr(r,b); + unlock2(r); + unlock2(b); +} + +MIDFUNC(2,mov_l_rm,(W4 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,4); + raw_mov_l_rm(d,s); + unlock2(d); +} + +MIDFUNC(1,call_r,(RR4 r)) /* Clobbering is implicit */ +{ + r=readreg(r,4); + raw_dec_sp(STACK_SHADOW_SPACE); + raw_call_r(r); + raw_inc_sp(STACK_SHADOW_SPACE); + unlock2(r); +} + +MIDFUNC(2,sub_l_mi,(IMM d, IMM s)) +{ + CLOBBER_SUB; + raw_sub_l_mi(d,s) ; +} + +MIDFUNC(2,mov_l_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_l_mi(d,s) ; +} + +MIDFUNC(2,mov_w_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_w_mi(d,s) ; +} + +MIDFUNC(2,mov_b_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_b_mi(d,s) ; +} + +MIDFUNC(2,rol_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,1,1); + raw_rol_b_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,rol_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,2,2); + raw_rol_w_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,rol_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,4,4); + raw_rol_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,rol_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in rol_l_rr",r); + } + raw_rol_l_rr(d,r) ; + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,rol_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in rol_w_rr",r); + } + raw_rol_w_rr(d,r) ; + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,rol_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_ROL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in rol_b_rr",r); + } + raw_rol_b_rr(d,r) ; + unlock2(r); + unlock2(d); +} + + +MIDFUNC(2,shll_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHLL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in shll_l_rr",r); + } + raw_shll_l_rr(d,r) ; + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shll_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHLL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in shll_w_rr",r); + } + raw_shll_w_rr(d,r) ; + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shll_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHLL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in shll_b_rr",r); + } + raw_shll_b_rr(d,r) ; + unlock2(r); + unlock2(d); +} + + +MIDFUNC(2,ror_b_ri,(RR1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,1,1); + raw_ror_b_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,ror_w_ri,(RR2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,2,2); + raw_ror_w_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,ror_l_ri,(RR4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,4,4); + raw_ror_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,ror_l_rr,(RR4 d, RR1 r)) +{ + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROR; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + raw_ror_l_rr(d,r) ; + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,ror_w_rr,(RR2 d, RR1 r)) +{ + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROR; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + raw_ror_w_rr(d,r) ; + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,ror_b_rr,(RR1 d, RR1 r)) +{ + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_ROR; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + raw_ror_b_rr(d,r) ; + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shrl_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in shrl_l_rr",r); + } + raw_shrl_l_rr(d,r) ; + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shrl_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in shrl_w_rr",r); + } + raw_shrl_w_rr(d,r) ; + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shrl_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHRL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in shrl_b_rr",r); + } + raw_shrl_b_rr(d,r) ; + unlock2(r); + unlock2(d); +} + + + +MIDFUNC(2,shll_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(r) && !needflags) { + live.state[r].val<<=i; + return; + } + CLOBBER_SHLL; + r=rmw(r,4,4); + raw_shll_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shll_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHLL; + r=rmw(r,2,2); + raw_shll_w_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shll_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHLL; + r=rmw(r,1,1); + raw_shll_b_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(r) && !needflags) { + live.state[r].val>>=i; + return; + } + CLOBBER_SHRL; + r=rmw(r,4,4); + raw_shrl_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,2,2); + raw_shrl_w_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,1,1); + raw_shrl_b_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shra_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,4,4); + raw_shra_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shra_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,2,2); + raw_shra_w_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shra_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,1,1); + raw_shra_b_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,shra_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in shra_l_rr",r); + } + raw_shra_l_rr(d,r) ; + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shra_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in shra_w_rr",r); + } + raw_shra_w_rr(d,r) ; + unlock2(r); + unlock2(d); +} + +MIDFUNC(2,shra_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r) && (uae_u8)live.state[r].val != 0) { + COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + Dif (r!=X86_CL) { + jit_abort("Illegal register %d in shra_b_rr",r); + } + raw_shra_b_rr(d,r) ; + unlock2(r); + unlock2(d); +} + + +MIDFUNC(2,setcc,(W1 d, IMM cc)) +{ + CLOBBER_SETCC; + d=writereg(d,1); + raw_setcc(d,cc); + unlock2(d); +} + +MIDFUNC(2,setcc_m,(IMM d, IMM cc)) +{ + CLOBBER_SETCC; + raw_setcc_m(d,cc); +} + +MIDFUNC(3,cmov_l_rr,(RW4 d, RR4 s, IMM cc)) +{ + if (d==s) + return; + CLOBBER_CMOV; + s=readreg(s,4); + d=rmw(d,4,4); + raw_cmov_l_rr(d,s,cc); + unlock2(s); + unlock2(d); +} + +MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) +{ + CLOBBER_CMOV; + d=rmw(d,4,4); + raw_cmov_l_rm(d,s,cc); + unlock2(d); +} + +MIDFUNC(2,bsf_l_rr,(W4 d, RR4 s)) +{ + CLOBBER_BSF; + s = readreg(s, 4); + d = writereg(d, 4); + raw_bsf_l_rr(d, s); + unlock2(s); + unlock2(d); +} + +/* Set the Z flag depending on the value in s. Note that the + value has to be 0 or -1 (or, more precisely, for non-zero + values, bit 14 must be set)! */ +MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) +{ + CLOBBER_BSF; + s=rmw_specific(s,4,4,FLAG_NREG3); + tmp=writereg(tmp,4); + raw_flags_set_zero(s, tmp); + unlock2(tmp); + unlock2(s); +} + +MIDFUNC(2,imul_32_32,(RW4 d, RR4 s)) +{ + CLOBBER_MUL; + s=readreg(s,4); + d=rmw(d,4,4); + raw_imul_32_32(d,s); + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,imul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_imul_64_32(d,s); + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,mul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_mul_64_32(d,s); + unlock2(s); + unlock2(d); +} + +MIDFUNC(2,mul_32_32,(RW4 d, RR4 s)) +{ + CLOBBER_MUL; + s=readreg(s,4); + d=rmw(d,4,4); + raw_mul_32_32(d,s); + unlock2(s); + unlock2(d); +} + +#if SIZEOF_VOID_P == 8 +MIDFUNC(2,sign_extend_32_rr,(W4 d, RR2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)live.state[s].val); + return; + } + + CLOBBER_SE32; + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,4); + } + raw_sign_extend_32_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +#endif + +MIDFUNC(2,sign_extend_16_rr,(W4 d, RR2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s16)live.state[s].val); + return; + } + + CLOBBER_SE16; + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_sign_extend_16_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(2,sign_extend_8_rr,(W4 d, RR1 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_SE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_sign_extend_8_rr(d,s); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + + +MIDFUNC(2,zero_extend_16_rr,(W4 d, RR2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u16)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE16; + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_zero_extend_16_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(2,zero_extend_8_rr,(W4 d, RR1 s)) +{ + int isrmw; + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_zero_extend_8_rr(d,s); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} + +MIDFUNC(2,mov_b_rr,(W1 d, RR1 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=writereg(d,1); + raw_mov_b_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,mov_w_rr,(W2 d, RR2 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=writereg(d,2); + raw_mov_w_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(4,mov_l_rrm_indexed,(W4 d,RR4 baser, RR4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_mov_l_rrm_indexed(d,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} + +MIDFUNC(4,mov_w_rrm_indexed,(W2 d, RR4 baser, RR4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,2); + + raw_mov_w_rrm_indexed(d,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} + +MIDFUNC(4,mov_b_rrm_indexed,(W1 d, RR4 baser, RR4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,1); + + raw_mov_b_rrm_indexed(d,baser,index,factor); + + unlock2(d); + unlock2(baser); + unlock2(index); +} + + +MIDFUNC(4,mov_l_mrr_indexed,(RR4 baser, RR4 index, IMM factor, RR4 s)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + s=readreg(s,4); + + Dif (baser==s || index==s) + jit_abort("mov_l_mrr_indexed"); + + + raw_mov_l_mrr_indexed(baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} + +MIDFUNC(4,mov_w_mrr_indexed,(RR4 baser, RR4 index, IMM factor, RR2 s)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + s=readreg(s,2); + + raw_mov_w_mrr_indexed(baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} + +MIDFUNC(4,mov_b_mrr_indexed,(RR4 baser, RR4 index, IMM factor, RR1 s)) +{ + CLOBBER_MOV; + s=readreg(s,1); + baser=readreg(baser,4); + index=readreg(index,4); + + raw_mov_b_mrr_indexed(baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} + + +MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, RR4 baser, RR4 index, IMM factor, RR4 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,4); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_l_bmrr_indexed(base,baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} + +MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, RR4 baser, RR4 index, IMM factor, RR2 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,2); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_w_bmrr_indexed(base,baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} + +MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, RR4 baser, RR4 index, IMM factor, RR1 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_b_bmrr_indexed(base,baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} + + + +/* Read a long from base+baser+factor*index */ +MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, RR4 baser, RR4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,4); + raw_mov_l_brrm_indexed(d,base,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} + + +MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, RR4 baser, RR4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + remove_offset(d,-1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,2); + raw_mov_w_brrm_indexed(d,base,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} + + +MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, RR4 baser, RR4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + remove_offset(d,-1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,1); + raw_mov_b_brrm_indexed(d,base,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} + +/* Read a long from base+factor*index */ +MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, RR4 index, IMM factor)) +{ + int indexreg=index; + + if (isconst(index)) { + COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val); + return; + } + + CLOBBER_MOV; + index=readreg_offset(index,4); + base+=get_offset(indexreg)*factor; + d=writereg(d,4); + + raw_mov_l_rm_indexed(d,base,index,factor); + unlock2(index); + unlock2(d); +} + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_rR,(W4 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,4); + + raw_mov_l_rR(d,s,offset); + unlock2(d); + unlock2(s); +} + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_rR,(W2 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,2); + + raw_mov_w_rR(d,s,offset); + unlock2(d); + unlock2(s); +} + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_b_rR,(W1 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_b_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,1); + + raw_mov_b_rR(d,s,offset); + unlock2(d); + unlock2(s); +} + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_brR,(W4 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,4); + + raw_mov_l_brR(d,s,offset); + unlock2(d); + unlock2(s); +} + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_brR,(W2 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,2); + + raw_mov_w_brR(d,s,offset); + unlock2(d); + unlock2(s); +} + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_b_brR,(W1 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_b_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,1); + + raw_mov_b_brR(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,mov_l_Ri,(RR4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_l_Ri(d,i,offset); + unlock2(d); +} + +MIDFUNC(3,mov_w_Ri,(RR4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_w_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_Ri(d,i,offset); + unlock2(d); +} + +MIDFUNC(3,mov_b_Ri,(RR4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_b_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_b_Ri(d,i,offset); + unlock2(d); +} + +/* Warning! OFFSET is byte sized only! */ +MIDFUNC(3,mov_l_Rr,(RR4 d, RR4 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_l_Ri)(d,live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg(d,4); + + raw_mov_l_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,mov_w_Rr,(RR4 d, RR2 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg(d,4); + raw_mov_w_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,mov_b_Rr,(RR4 d, RR1 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_b_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=readreg(d,4); + raw_mov_b_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,lea_l_brr,(W4 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val+offset); + return; + } +#if USE_OFFSET + if (d==s) { + add_offset(d,offset); + return; + } +#endif + CLOBBER_LEA; + s=readreg(s,4); + d=writereg(d,4); + raw_lea_l_brr(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(5,lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)) +{ + if (!offset) { + COMPCALL(lea_l_rr_indexed)(d,s,index,factor); + return; + } + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_brr_indexed(d,s,index,factor,offset); + unlock2(d); + unlock2(index); + unlock2(s); +} + +MIDFUNC(4,lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) +{ + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_rr_indexed(d,s,index,factor); + unlock2(d); + unlock2(index); + unlock2(s); +} + +/* write d to the long at the address contained in s+offset */ +MIDFUNC(3,mov_l_bRr,(RR4 d, RR4 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + + raw_mov_l_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} + +/* write the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_bRr,(RR4 d, RR2 s, IMM offset)) +{ + int dreg=d; + + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(3,mov_b_bRr,(RR4 d, RR1 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_b_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_b_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} + +MIDFUNC(1,mid_bswap_32,(RW4 r)) +{ + + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=reverse32(oldv); + return; + } + + CLOBBER_SW32; + r=rmw(r,4,4); + raw_bswap_32(r); + unlock2(r); +} + +MIDFUNC(1,mid_bswap_16,(RW2 r)) +{ + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) | (oldv&0xffff0000); + return; + } + + CLOBBER_SW16; + r=rmw(r,2,2); + + raw_bswap_16(r); + unlock2(r); +} + + + +MIDFUNC(2,mov_l_rr,(W4 d, RR4 s)) +{ + int olds; + + if (d==s) { /* How pointless! */ + return; + } + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val); + return; + } + olds=s; + disassociate(d); + s=readreg_offset(s,4); + live.state[d].realreg=s; + live.state[d].realind=live.nat[s].nholds; + live.state[d].val=live.state[olds].val; + live.state[d].validsize=4; + live.state[d].dirtysize=4; + set_status(d,DIRTY); + + live.nat[s].holds[live.nat[s].nholds]=d; + live.nat[s].nholds++; + log_clobberreg(d); + jit_log2("Added %d to nreg %d(%d), now holds %d regs", d,s,live.state[d].realind,live.nat[s].nholds); + unlock2(s); +} + +MIDFUNC(2,mov_l_mr,(IMM d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(mov_l_mi)(d,live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + + raw_mov_l_mr(d,s); + unlock2(s); +} + + +MIDFUNC(2,mov_w_mr,(IMM d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,2); + + raw_mov_w_mr(d,s); + unlock2(s); +} + +MIDFUNC(2,mov_w_rm,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_rm(d,s); + unlock2(d); +} + +MIDFUNC(2,mov_b_mr,(IMM d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + + raw_mov_b_mr(d,s); + unlock2(s); +} + +MIDFUNC(2,mov_b_rm,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_rm(d,s); + unlock2(d); +} + +MIDFUNC(2,mov_l_ri,(W4 d, IMM s)) +{ + set_const(d,s); + return; +} + +MIDFUNC(2,mov_w_ri,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_ri(d,s); + unlock2(d); +} + +MIDFUNC(2,mov_b_ri,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_ri(d,s); + unlock2(d); +} + +MIDFUNC(2,add_l_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_l_mi(d,s); +} + +MIDFUNC(2,add_w_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_w_mi(d,s); +} + +MIDFUNC(2,add_b_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_b_mi(d,s); +} + +MIDFUNC(2,test_l_ri,(RR4 d, IMM i)) +{ + CLOBBER_TEST; + d=readreg(d,4); + + raw_test_l_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,test_l_rr,(RR4 d, RR4 s)) +{ + CLOBBER_TEST; + d=readreg(d,4); + s=readreg(s,4); + + raw_test_l_rr(d,s);; + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,test_w_rr,(RR2 d, RR2 s)) +{ + CLOBBER_TEST; + d=readreg(d,2); + s=readreg(s,2); + + raw_test_w_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,test_b_rr,(RR1 d, RR1 s)) +{ + CLOBBER_TEST; + d=readreg(d,1); + s=readreg(s,1); + + raw_test_b_rr(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,test_b_mi,(IMM d, IMM s)) +{ + CLOBBER_TEST; + raw_test_b_mi(d,s); +} + +MIDFUNC(2,and_l_ri,(RW4 d, IMM i)) +{ + if (isconst(d) && !needflags) { + live.state[d].val &= i; + return; + } + + CLOBBER_AND; + d=rmw(d,4,4); + + raw_and_l_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,and_l,(RW4 d, RR4 s)) +{ + CLOBBER_AND; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_and_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,and_w,(RW2 d, RR2 s)) +{ + CLOBBER_AND; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_and_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,and_b,(RW1 d, RR1 s)) +{ + CLOBBER_AND; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_and_b(d,s); + unlock2(d); + unlock2(s); +} + +// gb-- used for making an fpcr value in compemu_fpp.cpp +MIDFUNC(2,or_l_rm,(RW4 d, IMM s)) +{ + CLOBBER_OR; + d=rmw(d,4,4); + + raw_or_l_rm(d,s); + unlock2(d); +} + +MIDFUNC(2,or_l_ri,(RW4 d, IMM i)) +{ + if (isconst(d) && !needflags) { + live.state[d].val|=i; + return; + } + CLOBBER_OR; + d=rmw(d,4,4); + + raw_or_l_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,or_l,(RW4 d, RR4 s)) +{ + if (isconst(d) && isconst(s) && !needflags) { + live.state[d].val|=live.state[s].val; + return; + } + CLOBBER_OR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_or_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,or_w,(RW2 d, RR2 s)) +{ + CLOBBER_OR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_or_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,or_b,(RW1 d, RR1 s)) +{ + CLOBBER_OR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_or_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,adc_l,(RW4 d, RR4 s)) +{ + CLOBBER_ADC; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_adc_l(d,s); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,adc_w,(RW2 d, RR2 s)) +{ + CLOBBER_ADC; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_adc_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,adc_b,(RW1 d, RR1 s)) +{ + CLOBBER_ADC; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_adc_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,add_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(add_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_add_l(d,s); + + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,add_w,(RW2 d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_add_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,add_b,(RW1 d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_add_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sub_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val-=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,-i); + return; + } +#endif + + CLOBBER_SUB; + d=rmw(d,4,4); + + raw_sub_l_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,sub_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,2,2); + + raw_sub_w_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,sub_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,1,1); + + raw_sub_b_ri(d,i); + + unlock2(d); +} + +MIDFUNC(2,add_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val+=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,i); + return; + } +#endif + CLOBBER_ADD; + d=rmw(d,4,4); + raw_add_l_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,add_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,2,2); + + raw_add_w_ri(d,i); + unlock2(d); +} + +MIDFUNC(2,add_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,1,1); + + raw_add_b_ri(d,i); + + unlock2(d); +} + +MIDFUNC(2,sbb_l,(RW4 d, RR4 s)) +{ + CLOBBER_SBB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sbb_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sbb_w,(RW2 d, RR2 s)) +{ + CLOBBER_SBB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sbb_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sbb_b,(RW1 d, RR1 s)) +{ + CLOBBER_SBB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sbb_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sub_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(sub_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sub_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sub_w,(RW2 d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sub_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,sub_b,(RW1 d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sub_b(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,cmp_l,(RR4 d, RR4 s)) +{ + CLOBBER_CMP; + s=readreg(s,4); + d=readreg(d,4); + + raw_cmp_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,cmp_l_ri,(RR4 r, IMM i)) +{ + CLOBBER_CMP; + r=readreg(r,4); + + raw_cmp_l_ri(r,i); + unlock2(r); +} + +MIDFUNC(2,cmp_w,(RR2 d, RR2 s)) +{ + CLOBBER_CMP; + s=readreg(s,2); + d=readreg(d,2); + + raw_cmp_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,cmp_b,(RR1 d, RR1 s)) +{ + CLOBBER_CMP; + s=readreg(s,1); + d=readreg(d,1); + + raw_cmp_b(d,s); + unlock2(d); + unlock2(s); +} + + +MIDFUNC(2,xor_l,(RW4 d, RR4 s)) +{ + CLOBBER_XOR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_xor_l(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,xor_w,(RW2 d, RR2 s)) +{ + CLOBBER_XOR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_xor_w(d,s); + unlock2(d); + unlock2(s); +} + +MIDFUNC(2,xor_b,(RW1 d, RR1 s)) +{ + CLOBBER_XOR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_xor_b(d,s); + unlock2(d); + unlock2(s); +} + +#ifdef UAE +MIDFUNC(5,call_r_11,(W4 out1, RR4 r, RR4 in1, IMM osize, IMM isize)) +{ + clobber_flags(); + remove_all_offsets(); + if (osize==4) { + if (out1!=in1 && out1!=r) { + COMPCALL(forget_about)(out1); + } + } + else { + tomem_c(out1); + } + + in1=readreg_specific(in1,isize,REG_PAR1); + r=readreg(r,4); + prepare_for_call_1(); /* This should ensure that there won't be + any need for swapping nregs in prepare_for_call_2 + */ +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(in1); +#endif + unlock2(in1); + unlock2(r); + + prepare_for_call_2(); + raw_dec_sp(STACK_SHADOW_SPACE); + raw_call_r(r); + raw_inc_sp(STACK_SHADOW_SPACE); + +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(4); +#endif + + + live.nat[REG_RESULT].holds[0]=out1; + live.nat[REG_RESULT].nholds=1; + live.nat[REG_RESULT].touched=touchcnt++; + + live.state[out1].realreg=REG_RESULT; + live.state[out1].realind=0; + live.state[out1].val=0; + live.state[out1].validsize=osize; + live.state[out1].dirtysize=osize; + set_status(out1,DIRTY); +} +#endif + +#if defined(UAE) +MIDFUNC(5,call_r_02,(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2)) +{ + clobber_flags(); + remove_all_offsets(); + in1=readreg_specific(in1,isize1,REG_PAR1); + in2=readreg_specific(in2,isize2,REG_PAR2); + r=readreg(r,4); + prepare_for_call_1(); /* This should ensure that there won't be + any need for swapping nregs in prepare_for_call_2 + */ +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(in2); + raw_push_l_r(in1); +#endif + unlock2(r); + unlock2(in1); + unlock2(in2); + prepare_for_call_2(); + raw_dec_sp(STACK_SHADOW_SPACE); + raw_call_r(r); + raw_inc_sp(STACK_SHADOW_SPACE); +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(8); +#endif +} +#endif + +/* forget_about() takes a mid-layer register */ +MIDFUNC(1,forget_about,(W4 r)) +{ + if (isinreg(r)) + disassociate(r); + live.state[r].val=0; + set_status(r,UNDEF); +} + +MIDFUNC(0,nop,(void)) +{ + raw_emit_nop(); +} + +MIDFUNC(1,f_forget_about,(FW r)) +{ + if (f_isinreg(r)) + f_disassociate(r); + live.fate[r].status=UNDEF; +} + +MIDFUNC(1,fmov_pi,(FW r)) +{ + r=f_writereg(r); + raw_fmov_pi(r); + f_unlock(r); +} + +MIDFUNC(1,fmov_log10_2,(FW r)) +{ + r=f_writereg(r); + raw_fmov_log10_2(r); + f_unlock(r); +} + +MIDFUNC(1,fmov_log2_e,(FW r)) +{ + r=f_writereg(r); + raw_fmov_log2_e(r); + f_unlock(r); +} + +MIDFUNC(1,fmov_loge_2,(FW r)) +{ + r=f_writereg(r); + raw_fmov_loge_2(r); + f_unlock(r); +} + +MIDFUNC(1,fmov_1,(FW r)) +{ + r=f_writereg(r); + raw_fmov_1(r); + f_unlock(r); +} + +MIDFUNC(1,fmov_0,(FW r)) +{ + r=f_writereg(r); + raw_fmov_0(r); + f_unlock(r); +} + +MIDFUNC(2,fmov_rm,(FW r, MEMPTRR m)) +{ + r=f_writereg(r); + raw_fmov_rm(r,m); + f_unlock(r); +} + +MIDFUNC(2,fmovi_rm,(FW r, MEMPTRR m)) +{ + r=f_writereg(r); + raw_fmovi_rm(r,m); + f_unlock(r); +} + +MIDFUNC(2,fmovi_mr,(MEMPTRW m, FR r)) +{ + r=f_readreg(r); + raw_fmovi_mr(m,r); + f_unlock(r); +} + +MIDFUNC(3,fmovi_mrb,(MEMPTRW m, FR r, double *bounds)) +{ + r=f_readreg(r); + raw_fmovi_mrb(m,r,bounds); + f_unlock(r); +} + +MIDFUNC(2,fmovs_rm,(FW r, MEMPTRR m)) +{ + r=f_writereg(r); + raw_fmovs_rm(r,m); + f_unlock(r); +} + +MIDFUNC(2,fmovs_mr,(MEMPTRW m, FR r)) +{ + r=f_readreg(r); + raw_fmovs_mr(m,r); + f_unlock(r); +} + +MIDFUNC(1,fcuts_r,(FRW r)) +{ + r=f_rmw(r); + raw_fcuts_r(r); + f_unlock(r); +} + +MIDFUNC(1,fcut_r,(FRW r)) +{ + r=f_rmw(r); + raw_fcut_r(r); + f_unlock(r); +} + +MIDFUNC(2,fmov_ext_mr,(MEMPTRW m, FR r)) +{ + r=f_readreg(r); + raw_fmov_ext_mr(m,r); + f_unlock(r); +} + +MIDFUNC(2,fmov_mr,(MEMPTRW m, FR r)) +{ + r=f_readreg(r); + raw_fmov_mr(m,r); + f_unlock(r); +} + +MIDFUNC(2,fmov_ext_rm,(FW r, MEMPTRR m)) +{ + r=f_writereg(r); + raw_fmov_ext_rm(r,m); + f_unlock(r); +} + +MIDFUNC(2,fmov_rr,(FW d, FR s)) +{ + if (d==s) { /* How pointless! */ + return; + } +#if USE_F_ALIAS + f_disassociate(d); + s=f_readreg(s); + live.fate[d].realreg=s; + live.fate[d].realind=live.fat[s].nholds; + live.fate[d].status=DIRTY; + live.fat[s].holds[live.fat[s].nholds]=d; + live.fat[s].nholds++; + f_unlock(s); +#else + s=f_readreg(s); + d=f_writereg(d); + raw_fmov_rr(d,s); + f_unlock(s); + f_unlock(d); +#endif +} + +MIDFUNC(2,fldcw_m_indexed,(RR4 index, IMM base)) +{ + index=readreg(index,4); + + raw_fldcw_m_indexed(index,base); + unlock2(index); +} + +MIDFUNC(1,ftst_r,(FR r)) +{ + r=f_readreg(r); + raw_ftst_r(r); + f_unlock(r); +} + +MIDFUNC(0,dont_care_fflags,(void)) +{ + f_disassociate(FP_RESULT); +} + +MIDFUNC(2,fsqrt_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fsqrt_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fabs_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fabs_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fgetexp_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fgetexp_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fgetman_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fgetman_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fsin_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fsin_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fcos_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fcos_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,ftan_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftan_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(3,fsincos_rr,(FW d, FW c, FR s)) +{ + s=f_readreg(s); /* s for source */ + d=f_writereg(d); /* d for sine */ + c=f_writereg(c); /* c for cosine */ + raw_fsincos_rr(d,c,s); + f_unlock(s); + f_unlock(d); + f_unlock(c); +} + +MIDFUNC(2,fscale_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fscale_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,ftwotox_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftwotox_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fetox_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fetox_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,frndint_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_frndint_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fetoxM1_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fetoxM1_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,ftentox_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftentox_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,flog2_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flog2_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,flogN_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flogN_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,flogNP1_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flogNP1_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,flog10_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flog10_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fasin_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fasin_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,facos_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_facos_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fatan_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fatan_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fatanh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fatanh_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fsinh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fsinh_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fcosh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fcosh_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,ftanh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftanh_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fneg_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fneg_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fadd_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fadd_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fsub_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fsub_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fcmp_rr,(FR d, FR s)) +{ + d=f_readreg(d); + s=f_readreg(s); + raw_fcmp_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fdiv_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fdiv_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,frem_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_frem_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,frem1_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_frem1_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +MIDFUNC(2,fmul_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fmul_rr(d,s); + f_unlock(s); + f_unlock(d); +} + +#ifdef __GNUC__ + +static inline void mfence(void) +{ +#ifdef CPU_i386 + if (!cpuinfo.x86_has_xmm2) + __asm__ __volatile__("lock; addl $0,0(%%esp)":::"memory"); + else +#endif + __asm__ __volatile__("mfence":::"memory"); +} + +static inline void clflush(volatile void *__p) +{ + __asm__ __volatile__("clflush %0" : "+m" (*(volatile char *)__p)); +} + +static inline void flush_cpu_icache(void *start, void *stop) +{ + mfence(); + if (cpuinfo.x86_clflush_size != 0) + { + volatile char *vaddr = (volatile char *)(((uintptr)start / cpuinfo.x86_clflush_size) * cpuinfo.x86_clflush_size); + volatile char *vend = (volatile char *)((((uintptr)stop + cpuinfo.x86_clflush_size - 1) / cpuinfo.x86_clflush_size) * cpuinfo.x86_clflush_size); + while (vaddr < vend) + { + clflush(vaddr); + vaddr += cpuinfo.x86_clflush_size; + } + } + mfence(); +} + +#else + +static inline void flush_cpu_icache(void *start, void *stop) +{ + UNUSED(start); + UNUSED(stop); +} + +#endif + +static inline void write_jmp_target(uae_u32 *jmpaddr, cpuop_func* a) { + uintptr rel = (uintptr) a - ((uintptr) jmpaddr + 4); + *(jmpaddr) = (uae_u32) rel; + flush_cpu_icache((void *) jmpaddr, (void *) &jmpaddr[1]); +} + +static inline void emit_jmp_target(uae_u32 a) { + emit_long(a-((uintptr)target+4)); +} + + +void compemu_bkpt(void) +{ + emit_byte(0xcc); +} diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.h b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.h new file mode 100644 index 00000000..82b75415 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.h @@ -0,0 +1,254 @@ +/* + * compiler/compemu_midfunc_x86.h - Native MIDFUNCS for IA-32 and AMD64 + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu.h + * + */ + +DECLARE_MIDFUNC(bt_l_ri(RR4 r, IMM i)); +DECLARE_MIDFUNC(bt_l_rr(RR4 r, RR4 b)); +DECLARE_MIDFUNC(btc_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(btc_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(bts_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(bts_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(btr_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(btr_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s)); +DECLARE_MIDFUNC(call_r(RR4 r)); +DECLARE_MIDFUNC(sub_l_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_l_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_w_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_b_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(rol_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(rol_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(rol_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(rol_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(rol_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(rol_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(shll_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shll_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shll_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(ror_b_ri(RR1 r, IMM i)); +DECLARE_MIDFUNC(ror_w_ri(RR2 r, IMM i)); +DECLARE_MIDFUNC(ror_l_ri(RR4 r, IMM i)); +DECLARE_MIDFUNC(ror_l_rr(RR4 d, RR1 r)); +DECLARE_MIDFUNC(ror_w_rr(RR2 d, RR1 r)); +DECLARE_MIDFUNC(ror_b_rr(RR1 d, RR1 r)); +DECLARE_MIDFUNC(shrl_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shrl_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shrl_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(shra_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shra_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shra_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(shll_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shll_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shll_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(shrl_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shrl_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shrl_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(shra_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shra_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shra_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(setcc(W1 d, IMM cc)); +DECLARE_MIDFUNC(setcc_m(IMM d, IMM cc)); +DECLARE_MIDFUNC(cmov_l_rr(RW4 d, RR4 s, IMM cc)); +DECLARE_MIDFUNC(cmov_l_rm(RW4 d, IMM s, IMM cc)); +DECLARE_MIDFUNC(bsf_l_rr(W4 d, RR4 s)); +DECLARE_MIDFUNC(pop_m(IMM d)); +DECLARE_MIDFUNC(push_m(IMM d)); +DECLARE_MIDFUNC(pop_l(W4 d)); +DECLARE_MIDFUNC(push_l_i(IMM i)); +DECLARE_MIDFUNC(push_l(RR4 s)); +DECLARE_MIDFUNC(clear_16(RW4 r)); +DECLARE_MIDFUNC(clear_8(RW4 r)); +DECLARE_MIDFUNC(sign_extend_32_rr(W4 d, RR2 s)); +DECLARE_MIDFUNC(sign_extend_16_rr(W4 d, RR2 s)); +DECLARE_MIDFUNC(sign_extend_8_rr(W4 d, RR1 s)); +DECLARE_MIDFUNC(zero_extend_16_rr(W4 d, RR2 s)); +DECLARE_MIDFUNC(zero_extend_8_rr(W4 d, RR1 s)); +DECLARE_MIDFUNC(imul_64_32(RW4 d, RW4 s)); +DECLARE_MIDFUNC(mul_64_32(RW4 d, RW4 s)); +DECLARE_MIDFUNC(simulate_bsf(W4 tmp, RW4 s)); +DECLARE_MIDFUNC(imul_32_32(RW4 d, RR4 s)); +DECLARE_MIDFUNC(mul_32_32(RW4 d, RR4 s)); +DECLARE_MIDFUNC(mov_b_rr(W1 d, RR1 s)); +DECLARE_MIDFUNC(mov_w_rr(W2 d, RR2 s)); +DECLARE_MIDFUNC(mov_l_rrm_indexed(W4 d,RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_w_rrm_indexed(W2 d, RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_b_rrm_indexed(W1 d, RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_mrr_indexed(RR4 baser, RR4 index, IMM factor, RR4 s)); +DECLARE_MIDFUNC(mov_w_mrr_indexed(RR4 baser, RR4 index, IMM factor, RR2 s)); +DECLARE_MIDFUNC(mov_b_mrr_indexed(RR4 baser, RR4 index, IMM factor, RR1 s)); +DECLARE_MIDFUNC(mov_l_bmrr_indexed(IMM base, RR4 baser, RR4 index, IMM factor, RR4 s)); +DECLARE_MIDFUNC(mov_w_bmrr_indexed(IMM base, RR4 baser, RR4 index, IMM factor, RR2 s)); +DECLARE_MIDFUNC(mov_b_bmrr_indexed(IMM base, RR4 baser, RR4 index, IMM factor, RR1 s)); +DECLARE_MIDFUNC(mov_l_brrm_indexed(W4 d, IMM base, RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_w_brrm_indexed(W2 d, IMM base, RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_b_brrm_indexed(W1 d, IMM base, RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_rm_indexed(W4 d, IMM base, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_rR(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_rR(W2 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_rR(W1 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_l_brR(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_brR(W2 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_brR(W1 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_l_Ri(RR4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_w_Ri(RR4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_b_Ri(RR4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_l_Rr(RR4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_Rr(RR4 d, RR2 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_Rr(RR4 d, RR1 s, IMM offset)); +DECLARE_MIDFUNC(lea_l_brr(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)); +DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, RR4 s, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_bRr(RR4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_bRr(RR4 d, RR2 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_bRr(RR4 d, RR1 s, IMM offset)); +DECLARE_MIDFUNC(mid_bswap_32(RW4 r)); +DECLARE_MIDFUNC(mid_bswap_16(RW2 r)); +DECLARE_MIDFUNC(mov_l_rr(W4 d, RR4 s)); +DECLARE_MIDFUNC(mov_l_mr(IMM d, RR4 s)); +DECLARE_MIDFUNC(mov_w_mr(IMM d, RR2 s)); +DECLARE_MIDFUNC(mov_w_rm(W2 d, IMM s)); +DECLARE_MIDFUNC(mov_b_mr(IMM d, RR1 s)); +DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s)); +DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s)); +DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s)); +DECLARE_MIDFUNC(mov_b_ri(W1 d, IMM s)); +DECLARE_MIDFUNC(add_l_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(add_w_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(add_b_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(test_l_ri(RR4 d, IMM i)); +DECLARE_MIDFUNC(test_l_rr(RR4 d, RR4 s)); +DECLARE_MIDFUNC(test_w_rr(RR2 d, RR2 s)); +DECLARE_MIDFUNC(test_b_rr(RR1 d, RR1 s)); +DECLARE_MIDFUNC(test_b_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(and_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(and_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(and_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(and_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(or_l_rm(RW4 d, IMM s)); +DECLARE_MIDFUNC(or_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(or_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(or_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(or_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(adc_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(adc_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(adc_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(add_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(add_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(add_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(sub_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(sub_w_ri(RW2 d, IMM i)); +DECLARE_MIDFUNC(sub_b_ri(RW1 d, IMM i)); +DECLARE_MIDFUNC(add_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(add_w_ri(RW2 d, IMM i)); +DECLARE_MIDFUNC(add_b_ri(RW1 d, IMM i)); +DECLARE_MIDFUNC(sbb_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(sbb_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(sbb_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(sub_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(sub_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(sub_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(cmp_l(RR4 d, RR4 s)); +DECLARE_MIDFUNC(cmp_l_ri(RR4 r, IMM i)); +DECLARE_MIDFUNC(cmp_w(RR2 d, RR2 s)); +DECLARE_MIDFUNC(cmp_b(RR1 d, RR1 s)); +DECLARE_MIDFUNC(xor_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(xor_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(xor_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(live_flags(void)); +DECLARE_MIDFUNC(dont_care_flags(void)); +DECLARE_MIDFUNC(duplicate_carry(void)); +DECLARE_MIDFUNC(setcc_for_cntzero(RR4 d, RR4 data, int size)); +DECLARE_MIDFUNC(restore_carry(void)); +DECLARE_MIDFUNC(start_needflags(void)); +DECLARE_MIDFUNC(end_needflags(void)); +DECLARE_MIDFUNC(make_flags_live(void)); +DECLARE_MIDFUNC(call_r_11(RR4 r, W4 out1, RR4 in1, IMM osize, IMM isize)); +DECLARE_MIDFUNC(call_r_02(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2)); +DECLARE_MIDFUNC(forget_about(W4 r)); +DECLARE_MIDFUNC(nop(void)); + +DECLARE_MIDFUNC(f_forget_about(FW r)); +DECLARE_MIDFUNC(fmov_pi(FW r)); +DECLARE_MIDFUNC(fmov_log10_2(FW r)); +DECLARE_MIDFUNC(fmov_log2_e(FW r)); +DECLARE_MIDFUNC(fmov_loge_2(FW r)); +DECLARE_MIDFUNC(fmov_1(FW r)); +DECLARE_MIDFUNC(fmov_0(FW r)); +DECLARE_MIDFUNC(fmov_rm(FW r, MEMPTRR m)); +DECLARE_MIDFUNC(fmov_mr(MEMPTRW m, FR r)); +DECLARE_MIDFUNC(fmovi_rm(FW r, MEMPTRR m)); +DECLARE_MIDFUNC(fmovi_mr(MEMPTRW m, FR r)); +DECLARE_MIDFUNC(fmovi_mrb(MEMPTRW m, FR r, double *bounds)); +DECLARE_MIDFUNC(fmovs_rm(FW r, MEMPTRR m)); +DECLARE_MIDFUNC(fmovs_mr(MEMPTRW m, FR r)); +DECLARE_MIDFUNC(fcuts_r(FRW r)); +DECLARE_MIDFUNC(fcut_r(FRW r)); +DECLARE_MIDFUNC(fmov_ext_mr(MEMPTRW m, FR r)); +DECLARE_MIDFUNC(fmov_ext_rm(FW r, MEMPTRR m)); +DECLARE_MIDFUNC(fmov_rr(FW d, FR s)); +DECLARE_MIDFUNC(fldcw_m_indexed(RR4 index, IMM base)); +DECLARE_MIDFUNC(ftst_r(FR r)); +DECLARE_MIDFUNC(dont_care_fflags(void)); +DECLARE_MIDFUNC(fsqrt_rr(FW d, FR s)); +DECLARE_MIDFUNC(fabs_rr(FW d, FR s)); +DECLARE_MIDFUNC(frndint_rr(FW d, FR s)); +DECLARE_MIDFUNC(fgetexp_rr(FW d, FR s)); +DECLARE_MIDFUNC(fgetman_rr(FW d, FR s)); +DECLARE_MIDFUNC(fsin_rr(FW d, FR s)); +DECLARE_MIDFUNC(fcos_rr(FW d, FR s)); +DECLARE_MIDFUNC(ftan_rr(FW d, FR s)); +DECLARE_MIDFUNC(fsincos_rr(FW d, FW c, FR s)); +DECLARE_MIDFUNC(fscale_rr(FRW d, FR s)); +DECLARE_MIDFUNC(ftwotox_rr(FW d, FR s)); +DECLARE_MIDFUNC(fetox_rr(FW d, FR s)); +DECLARE_MIDFUNC(fetoxM1_rr(FW d, FR s)); +DECLARE_MIDFUNC(ftentox_rr(FW d, FR s)); +DECLARE_MIDFUNC(flog2_rr(FW d, FR s)); +DECLARE_MIDFUNC(flogN_rr(FW d, FR s)); +DECLARE_MIDFUNC(flogNP1_rr(FW d, FR s)); +DECLARE_MIDFUNC(flog10_rr(FW d, FR s)); +DECLARE_MIDFUNC(fasin_rr(FW d, FR s)); +DECLARE_MIDFUNC(facos_rr(FW d, FR s)); +DECLARE_MIDFUNC(fatan_rr(FW d, FR s)); +DECLARE_MIDFUNC(fatanh_rr(FW d, FR s)); +DECLARE_MIDFUNC(fsinh_rr(FW d, FR s)); +DECLARE_MIDFUNC(fcosh_rr(FW d, FR s)); +DECLARE_MIDFUNC(ftanh_rr(FW d, FR s)); +DECLARE_MIDFUNC(fneg_rr(FW d, FR s)); +DECLARE_MIDFUNC(fadd_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fsub_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fmul_rr(FRW d, FR s)); +DECLARE_MIDFUNC(frem_rr(FRW d, FR s)); +DECLARE_MIDFUNC(frem1_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fdiv_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fcmp_rr(FR d, FR s)); +DECLARE_MIDFUNC(fflags_into_flags(W2 tmp)); diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp b/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp index 608f4e55..d21f1c36 100644 --- a/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp +++ b/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp @@ -1,37 +1,45 @@ /* - * compiler/compemu_support.cpp - Core dynamic translation engine + * compiler/compemu_support.cpp - Core dynamic translation engine * - * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Copyright (c) 2001-2009 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * Adaptation for Basilisk II and improvements, copyright 2000-2005 - * Gwenole Beauchesne + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * Basilisk II (C) 1997-2008 Christian Bauer - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * JIT compiler m68k -> IA-32 and AMD64 / ARM * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "sysdeps.h" +#ifdef UAE -#if USE_JIT +#define writemem_special writemem +#define readmem_special readmem -#if !REAL_ADDRESSING && !DIRECT_ADDRESSING -#error "Only Real or Direct Addressing is supported with the JIT Compiler" +#else +#if !FIXED_ADDRESSING +#error "Only Fixed Addressing is supported with the JIT Compiler" #endif -#if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE +#if defined(X86_ASSEMBLY) && !SAHF_SETO_PROFITABLE #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler" #endif @@ -40,71 +48,190 @@ * code is not 64-bit clean and (ii) it's faster to resolve branches * that way. */ -#if !defined(__i386__) && !defined(__x86_64__) -#error "Only IA-32 and X86-64 targets are supported with the JIT Compiler" +#if !defined(CPU_i386) && !defined(CPU_x86_64) && !defined(CPU_arm) +#error "Only IA-32, X86-64 and ARM v6 targets are supported with the JIT Compiler" +#endif #endif #define USE_MATCH 0 /* kludge for Brian, so he can compile under MSVC++ */ -#define USE_NORMAL_CALLING_CONVENTION 1 && defined(_MSC_VER) +#define USE_NORMAL_CALLING_CONVENTION 0 -#ifndef WIN32 -#include -#include -#include -#endif +// #include "sysconfig.h" +#include "sysdeps.h" -#include -#include -#include +#ifdef JIT +#ifdef UAE +#include "options.h" +#include "events.h" +#include "uae/memory.h" +#include "custom.h" +#else #include "cpu_emulation.h" #include "main.h" #include "prefs.h" -#include "user_strings.h" #include "vm_alloc.h" #include "m68k.h" #include "memory.h" #include "readcpu.h" +#endif #include "newcpu.h" #include "comptbl.h" +#ifdef UAE +#include "compemu.h" +#ifdef FSUAE +#include "codegen_udis86.h" +#endif +#else #include "compiler/compemu.h" #include "fpu/fpu.h" #include "fpu/flags.h" +// #include "parameters.h" +static void build_comp(void); +#endif +// #include "verify.h" +// #define jit_log(format, ...) \ +// uae_log("JIT: " format "\n", ##__VA_ARGS__); +#define D2 D + +#ifdef UAE +#ifdef FSUAE +#include "uae/fs.h" +#endif +#include "uae/log.h" + +#if defined(__pie__) || defined (__PIE__) +#error Position-independent code (PIE) cannot be used with JIT +#endif + +#include "uae/vm.h" +#define VM_PAGE_READ UAE_VM_READ +#define VM_PAGE_WRITE UAE_VM_WRITE +#define VM_PAGE_EXECUTE UAE_VM_EXECUTE +#define VM_MAP_FAILED UAE_VM_ALLOC_FAILED +#define VM_MAP_DEFAULT 1 +#define VM_MAP_32BIT 1 +#define vm_protect(address, size, protect) uae_vm_protect(address, size, protect) +#define vm_release(address, size) uae_vm_free(address, size) + +static inline void *vm_acquire(size_t size, int options = VM_MAP_DEFAULT) +{ + assert(options == (VM_MAP_DEFAULT | VM_MAP_32BIT)); + return uae_vm_alloc(size, UAE_VM_32BIT, UAE_VM_READ_WRITE); +} + +#define UNUSED(x) +#include "uae.h" +#include "uae/log.h" +#define jit_log(format, ...) \ + uae_log("JIT: " format "\n", ##__VA_ARGS__); +#define jit_log2(format, ...) + +#define MEMBaseDiff uae_p32(NATMEM_OFFSET) + +#ifdef NATMEM_OFFSET +#define FIXED_ADDRESSING 1 +#endif + +#define SAHF_SETO_PROFITABLE + +// %%% BRIAN KING WAS HERE %%% +extern bool canbang; + +#include "compemu_prefs.cpp" + +#define uint32 uae_u32 +#define uint8 uae_u8 + +static inline int distrust_check(int value) +{ +#ifdef JIT_ALWAYS_DISTRUST + return 1; +#else + int distrust = value; +#ifdef FSUAE + switch (value) { + case 0: distrust = 0; break; + case 1: distrust = 1; break; + case 2: distrust = ((start_pc & 0xF80000) == 0xF80000); break; + case 3: distrust = !have_done_picasso; break; + default: abort(); + } +#endif + return distrust; +#endif +} + +static inline int distrust_byte(void) +{ + return distrust_check(currprefs.comptrustbyte); +} + +static inline int distrust_word(void) +{ + return distrust_check(currprefs.comptrustword); +} + +static inline int distrust_long(void) +{ + return distrust_check(currprefs.comptrustlong); +} + +static inline int distrust_addr(void) +{ + return distrust_check(currprefs.comptrustnaddr); +} + +#else #define DEBUG 0 #include "debug.h" -#ifdef ENABLE_MON -#include "mon.h" +#define NATMEM_OFFSET MEMBaseDiff +#define canbang 1 +#define op_illg op_illg_1 + +#ifdef WINUAE_ARANYM +void jit_abort(const char *format, ...) +{ + va_list args; + va_start(args, format); + vprintf(format, args); + va_end(args); + abort(); +} #endif -#define PROFILE_COMPILE_TIME 0 -#define PROFILE_UNTRANSLATED_INSNS 0 +#if DEBUG +#define PROFILE_COMPILE_TIME 1 +#define PROFILE_UNTRANSLATED_INSNS 1 +#endif +#endif -#if defined(__x86_64__) && 0 +# include +# include +# include +# include + +#if defined(CPU_x86_64) && 0 #define RECORD_REGISTER_USAGE 1 #endif -//#ifdef WIN32 -#undef write_log -#define write_log dummy_write_log -static void dummy_write_log(const char *, ...) { } -//#endif - -#if JIT_DEBUG +#ifdef JIT_DEBUG #undef abort #define abort() do { \ fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \ + compiler_dumpstate(); \ exit(EXIT_FAILURE); \ } while (0) #endif -#if RECORD_REGISTER_USAGE +#ifdef RECORD_REGISTER_USAGE static uint64 reg_count[16]; -static int reg_count_local[16]; +static uint64 reg_count_local[16]; static int reg_count_compare(const void *ap, const void *bp) { @@ -114,7 +241,7 @@ static int reg_count_compare(const void *ap, const void *bp) } #endif -#if PROFILE_COMPILE_TIME +#ifdef PROFILE_COMPILE_TIME #include static uae_u32 compile_count = 0; static clock_t compile_time = 0; @@ -122,11 +249,12 @@ static clock_t emul_start_time = 0; static clock_t emul_end_time = 0; #endif -#if PROFILE_UNTRANSLATED_INSNS -const int untranslated_top_ten = 20; +#ifdef PROFILE_UNTRANSLATED_INSNS +static const int untranslated_top_ten = 50; static uae_u32 raw_cputbl_count[65536] = { 0, }; static uae_u16 opcode_nums[65536]; + static int untranslated_compfn(const void *e1, const void *e2) { return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2]; @@ -135,44 +263,84 @@ static int untranslated_compfn(const void *e1, const void *e2) static compop_func *compfunctbl[65536]; static compop_func *nfcompfunctbl[65536]; +#ifdef NOFLAGS_SUPPORT static cpuop_func *nfcpufunctbl[65536]; +#endif uae_u8* comp_pc_p; -// From newcpu.cpp -extern bool quit_program; +#ifdef UAE +/* defined in uae.h */ +#else +// External variables +// newcpu.cpp +extern int quit_program; +#endif // gb-- Extra data for Basilisk II/JIT -#if JIT_DEBUG +#ifdef JIT_DEBUG static bool JITDebug = false; // Enable runtime disassemblers through mon? +// #define JITDebug bx_options.jit.jitdebug // Enable runtime disassemblers through mon? #else -const bool JITDebug = false; // Don't use JIT debug mode at all +const bool JITDebug = false; +// #define JITDebug false // Don't use JIT debug mode at all #endif #if USE_INLINING -static bool follow_const_jumps = true; // Flag: translation through constant jumps +#ifdef UAE +#define follow_const_jumps (currprefs.comp_constjump != 0) #else -const bool follow_const_jumps = false; +static bool follow_const_jumps = true; // Flag: translation through constant jumps +#endif +#else +const bool follow_const_jumps = false; #endif -const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB) -static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks -static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already -static bool lazy_flush = true; // Flag: lazy translation cache invalidation -static bool avoid_fpu = true; // Flag: compile FPU instructions ? -static bool have_cmov = false; // target has CMOV instructions ? -static bool have_lahf_lm = true; // target has LAHF supported in long mode ? -static bool have_rat_stall = true; // target has partial register stalls ? -const bool tune_alignment = true; // Tune code alignments for running CPU ? -const bool tune_nop_fillers = true; // Tune no-op fillers for architecture +const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB) +static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks +static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already +static bool lazy_flush = true; // Flag: lazy translation cache invalidation +// Flag: compile FPU instructions ? +#ifdef UAE +#ifdef USE_JIT_FPU +#define avoid_fpu (!currprefs.compfpu) +#else +#define avoid_fpu (true) +#endif +#else +static bool avoid_fpu = true; // Flag: compile FPU instructions ? +// #ifdef USE_JIT_FPU +// #define avoid_fpu (!bx_options.jit.jitfpu) +// #else +// #define avoid_fpu (true) +// #endif +#endif +static bool have_cmov = false; // target has CMOV instructions ? +static bool have_rat_stall = true; // target has partial register stalls ? +const bool tune_alignment = true; // Tune code alignments for running CPU ? +const bool tune_nop_fillers = true; // Tune no-op fillers for architecture static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly? -static int align_loops = 32; // Align the start of loops -static int align_jumps = 32; // Align the start of jumps +static int align_loops = 32; // Align the start of loops +static int align_jumps = 32; // Align the start of jumps static int optcount[10] = { +#ifdef UAE + 4, // How often a block has to be executed before it is translated +#else 10, // How often a block has to be executed before it is translated +#endif 0, // How often to use naive translation 0, 0, 0, 0, -1, -1, -1, -1 }; +#ifdef UAE +/* FIXME: op_properties is currently in compemu.h */ + +op_properties prop[65536]; + +static inline bool is_const_jump(uae_u32 opcode) +{ + return prop[opcode].is_const_jump != 0; +} +#else struct op_properties { uae_u8 use_flags; uae_u8 set_flags; @@ -191,17 +359,25 @@ static inline bool is_const_jump(uae_u32 opcode) return (prop[opcode].cflow == fl_const_jump); } +#if 0 static inline bool may_trap(uae_u32 opcode) { - return (prop[opcode].cflow & fl_trap) != 0; + return (prop[opcode].cflow & fl_trap); } +#endif + +#endif static inline unsigned int cft_map (unsigned int f) { -#ifndef HAVE_GET_WORD_UNSWAPPED - return f; +#ifdef UAE + return f; #else - return ((f >> 8) & 255) | ((f & 255) << 8); +#if !defined(HAVE_GET_WORD_UNSWAPPED) || defined(FULLMMU) + return f; +#else + return ((f >> 8) & 255) | ((f & 255) << 8); +#endif #endif } @@ -212,19 +388,18 @@ static uintptr current_block_start_target; uae_u32 needed_flags; static uintptr next_pc_p; static uintptr taken_pc_p; -static int branch_cc; +static int branch_cc; static int redo_current_block; +#ifdef UAE int segvcount=0; -int soft_flush_count=0; -int hard_flush_count=0; -int checksum_count=0; +#endif static uae_u8* current_compile_p=NULL; static uae_u8* max_compile_start; static uae_u8* compiled_code=NULL; static uae_s32 reg_alloc_run; -const int POPALLSPACE_SIZE = 1024; /* That should be enough space */ -static uae_u8* popallspace=NULL; +const int POPALLSPACE_SIZE = 2048; /* That should be enough space */ +static uae_u8 *popallspace=NULL; void* pushall_call_handler=NULL; static void* popall_do_nothing=NULL; @@ -239,42 +414,40 @@ static void* popall_check_checksum=NULL; * UPDATE: We now use those entries to store the start of the linked * lists that we maintain for each hash result. */ -cacheline cache_tags[TAGSIZE]; -int letit=0; -blockinfo* hold_bi[MAX_HOLD_BI]; -blockinfo* active; -blockinfo* dormant; +static cacheline cache_tags[TAGSIZE]; +static int cache_enabled=0; +static blockinfo* hold_bi[MAX_HOLD_BI]; +static blockinfo* active; +static blockinfo* dormant; +#ifdef NOFLAGS_SUPPORT /* 68040 */ -extern struct cputbl op_smalltbl_0_nf[]; -extern struct comptbl op_smalltbl_0_comp_nf[]; -extern struct comptbl op_smalltbl_0_comp_ff[]; +extern const struct cputbl op_smalltbl_0_nf[]; +#endif +extern const struct comptbl op_smalltbl_0_comp_nf[]; +extern const struct comptbl op_smalltbl_0_comp_ff[]; +#ifdef NOFLAGS_SUPPORT /* 68020 + 68881 */ -extern struct cputbl op_smalltbl_1_nf[]; - +extern const struct cputbl op_smalltbl_1_nf[]; /* 68020 */ -extern struct cputbl op_smalltbl_2_nf[]; - +extern const struct cputbl op_smalltbl_2_nf[]; /* 68010 */ -extern struct cputbl op_smalltbl_3_nf[]; - +extern const struct cputbl op_smalltbl_3_nf[]; /* 68000 */ -extern struct cputbl op_smalltbl_4_nf[]; - +extern const struct cputbl op_smalltbl_4_nf[]; /* 68000 slow but compatible. */ -extern struct cputbl op_smalltbl_5_nf[]; +extern const struct cputbl op_smalltbl_5_nf[]; +#endif -static void flush_icache_hard(int n); -static void flush_icache_lazy(int n); -static void flush_icache_none(int n); -void (*flush_icache)(int n) = flush_icache_none; +static void flush_icache_hard(void); +static void flush_icache_lazy(void); +static void flush_icache_none(void); +void (*flush_icache)(void) = flush_icache_none; - - -bigstate live; -smallstate empty_ss; -smallstate default_ss; +static bigstate live; +static smallstate empty_ss; +static smallstate default_ss; static int optlev; static int writereg(int r, int size); @@ -282,20 +455,17 @@ static void unlock2(int r); static void setlock(int r); static int readreg_specific(int r, int size, int spec); static int writereg_specific(int r, int size, int spec); -static void prepare_for_call_1(void); -static void prepare_for_call_2(void); -static void align_target(uae_u32 a); -static uae_s32 nextused[VREGS]; +static void inline write_jmp_target(uae_u32 *jmpaddr, cpuop_func* a); uae_u32 m68k_pc_offset; -/* Some arithmetic ooperations can be optimized away if the operands +/* Some arithmetic operations can be optimized away if the operands * are known to be constant. But that's only a good idea when the * side effects they would have on the flags are not important. This * variable indicates whether we need the side effects or not */ -uae_u32 needflags=0; +static uae_u32 needflags=0; /* Flag handling is complicated. * @@ -322,199 +492,297 @@ uae_u32 needflags=0; * is in the register and/or the native flags is seen as valid. */ -static __inline__ blockinfo* get_blockinfo(uae_u32 cl) +static inline blockinfo* get_blockinfo(uae_u32 cl) { - return cache_tags[cl+1].bi; + return cache_tags[cl+1].bi; } -static __inline__ blockinfo* get_blockinfo_addr(void* addr) +static inline blockinfo* get_blockinfo_addr(void* addr) { - blockinfo* bi=get_blockinfo(cacheline(addr)); + blockinfo* bi=get_blockinfo(cacheline(addr)); - while (bi) { - if (bi->pc_p==addr) - return bi; - bi=bi->next_same_cl; - } - return NULL; + while (bi) { + if (bi->pc_p==addr) + return bi; + bi=bi->next_same_cl; + } + return NULL; } - +#ifdef WINUAE_ARANYM +/******************************************************************* + * Disassembler support * + *******************************************************************/ + +#define TARGET_M68K 0 +#define TARGET_POWERPC 1 +#define TARGET_X86 2 +#define TARGET_X86_64 3 +#define TARGET_ARM 4 +#if defined(CPU_i386) +#define TARGET_NATIVE TARGET_X86 +#endif +#if defined(CPU_powerpc) +#define TARGET_NATIVE TARGET_POWERPC +#endif +#if defined(CPU_x86_64) +#define TARGET_NATIVE TARGET_X86_64 +#endif +#if defined(CPU_arm) +#define TARGET_NATIVE TARGET_ARM +#endif +// #include "disasm-glue.h" + +bool disasm_this_inst; + +#if defined(JIT_DEBUG) || (defined(HAVE_DISASM_NATIVE) && defined(HAVE_DISASM_M68K)) +static void disasm_block(int disasm_target, const uint8 *start, size_t length) +{ + UNUSED(start); + UNUSED(length); + switch (disasm_target) + { + case TARGET_M68K: +#if defined(HAVE_DISASM_M68K) + { + char buf[256]; + + disasm_info.memory_vma = ((memptr)((uintptr_t)(start) - MEMBaseDiff)); + while (length > 0) + { + int isize = m68k_disasm_to_buf(&disasm_info, buf, 1); + bug("%s", buf); + if (isize < 0) + break; + if ((uintptr)isize > length) + break; + length -= isize; + } + } +#endif + break; + case TARGET_X86: + case TARGET_X86_64: +#if defined(HAVE_DISASM_X86) + { + const uint8 *end = start + length; + char buf[256]; + + while (start < end) + { + start = x86_disasm(start, buf, 1); + bug("%s", buf); + } + } +#endif + break; + case TARGET_ARM: +#if defined(HAVE_DISASM_ARM) + { + const uint8 *end = start + length; + char buf[256]; + + while (start < end) + { + start = arm_disasm(start, buf, 1); + bug("%s", buf); + } + } +#endif + break; + } +} + +static inline void disasm_native_block(const uint8 *start, size_t length) +{ + disasm_block(TARGET_NATIVE, start, length); +} + +static inline void disasm_m68k_block(const uint8 *start, size_t length) +{ + disasm_block(TARGET_M68K, start, length); +} +#endif +#endif /* WINUAE_ARANYM */ + + /******************************************************************* * All sorts of list related functions for all of the lists * *******************************************************************/ -static __inline__ void remove_from_cl_list(blockinfo* bi) +static inline void remove_from_cl_list(blockinfo* bi) { - uae_u32 cl=cacheline(bi->pc_p); + uae_u32 cl=cacheline(bi->pc_p); - if (bi->prev_same_cl_p) - *(bi->prev_same_cl_p)=bi->next_same_cl; - if (bi->next_same_cl) - bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p; - if (cache_tags[cl+1].bi) - cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use; - else - cache_tags[cl].handler=(cpuop_func *)popall_execute_normal; + if (bi->prev_same_cl_p) + *(bi->prev_same_cl_p)=bi->next_same_cl; + if (bi->next_same_cl) + bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p; + if (cache_tags[cl+1].bi) + cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use; + else + cache_tags[cl].handler=(cpuop_func*)popall_execute_normal; } -static __inline__ void remove_from_list(blockinfo* bi) +static inline void remove_from_list(blockinfo* bi) { - if (bi->prev_p) - *(bi->prev_p)=bi->next; - if (bi->next) - bi->next->prev_p=bi->prev_p; + if (bi->prev_p) + *(bi->prev_p)=bi->next; + if (bi->next) + bi->next->prev_p=bi->prev_p; } -static __inline__ void remove_from_lists(blockinfo* bi) +#if 0 +static inline void remove_from_lists(blockinfo* bi) { - remove_from_list(bi); - remove_from_cl_list(bi); + remove_from_list(bi); + remove_from_cl_list(bi); +} +#endif + +static inline void add_to_cl_list(blockinfo* bi) +{ + uae_u32 cl=cacheline(bi->pc_p); + + if (cache_tags[cl+1].bi) + cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl); + bi->next_same_cl=cache_tags[cl+1].bi; + + cache_tags[cl+1].bi=bi; + bi->prev_same_cl_p=&(cache_tags[cl+1].bi); + + cache_tags[cl].handler=bi->handler_to_use; } -static __inline__ void add_to_cl_list(blockinfo* bi) +static inline void raise_in_cl_list(blockinfo* bi) { - uae_u32 cl=cacheline(bi->pc_p); - - if (cache_tags[cl+1].bi) - cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl); - bi->next_same_cl=cache_tags[cl+1].bi; - - cache_tags[cl+1].bi=bi; - bi->prev_same_cl_p=&(cache_tags[cl+1].bi); - - cache_tags[cl].handler=bi->handler_to_use; + remove_from_cl_list(bi); + add_to_cl_list(bi); } -static __inline__ void raise_in_cl_list(blockinfo* bi) +static inline void add_to_active(blockinfo* bi) { - remove_from_cl_list(bi); - add_to_cl_list(bi); + if (active) + active->prev_p=&(bi->next); + bi->next=active; + + active=bi; + bi->prev_p=&active; } -static __inline__ void add_to_active(blockinfo* bi) +static inline void add_to_dormant(blockinfo* bi) { - if (active) - active->prev_p=&(bi->next); - bi->next=active; + if (dormant) + dormant->prev_p=&(bi->next); + bi->next=dormant; - active=bi; - bi->prev_p=&active; + dormant=bi; + bi->prev_p=&dormant; } -static __inline__ void add_to_dormant(blockinfo* bi) +static inline void remove_dep(dependency* d) { - if (dormant) - dormant->prev_p=&(bi->next); - bi->next=dormant; - - dormant=bi; - bi->prev_p=&dormant; -} - -static __inline__ void remove_dep(dependency* d) -{ - if (d->prev_p) - *(d->prev_p)=d->next; - if (d->next) - d->next->prev_p=d->prev_p; - d->prev_p=NULL; - d->next=NULL; + if (d->prev_p) + *(d->prev_p)=d->next; + if (d->next) + d->next->prev_p=d->prev_p; + d->prev_p=NULL; + d->next=NULL; } /* This block's code is about to be thrown away, so it no longer depends on anything else */ -static __inline__ void remove_deps(blockinfo* bi) +static inline void remove_deps(blockinfo* bi) { - remove_dep(&(bi->dep[0])); - remove_dep(&(bi->dep[1])); + remove_dep(&(bi->dep[0])); + remove_dep(&(bi->dep[1])); } -static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a) +static inline void adjust_jmpdep(dependency* d, cpuop_func* a) { - *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4); + write_jmp_target(d->jmp_off, a); } /******************************************************************** * Soft flush handling support functions * ********************************************************************/ -static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh) +static inline void set_dhtu(blockinfo* bi, cpuop_func *dh) { - //write_log("bi is %p\n",bi); - if (dh!=bi->direct_handler_to_use) { - dependency* x=bi->deplist; - //write_log("bi->deplist=%p\n",bi->deplist); - while (x) { - //write_log("x is %p\n",x); - //write_log("x->next is %p\n",x->next); - //write_log("x->prev_p is %p\n",x->prev_p); - - if (x->jmp_off) { - adjust_jmpdep(x,dh); - } - x=x->next; + jit_log2("bi is %p",bi); + if (dh!=bi->direct_handler_to_use) { + dependency* x=bi->deplist; + jit_log2("bi->deplist=%p",bi->deplist); + while (x) { + jit_log2("x is %p",x); + jit_log2("x->next is %p",x->next); + jit_log2("x->prev_p is %p",x->prev_p); + + if (x->jmp_off) { + adjust_jmpdep(x,dh); + } + x=x->next; + } + bi->direct_handler_to_use=dh; } - bi->direct_handler_to_use=dh; - } } -static __inline__ void invalidate_block(blockinfo* bi) +static inline void invalidate_block(blockinfo* bi) { - int i; + int i; - bi->optlevel=0; - bi->count=optcount[0]-1; - bi->handler=NULL; - bi->handler_to_use=(cpuop_func *)popall_execute_normal; - bi->direct_handler=NULL; - set_dhtu(bi,bi->direct_pen); - bi->needed_flags=0xff; + bi->optlevel=0; + bi->count=optcount[0]-1; + bi->handler=NULL; + bi->handler_to_use=(cpuop_func*)popall_execute_normal; + bi->direct_handler=NULL; + set_dhtu(bi,bi->direct_pen); + bi->needed_flags=0xff; bi->status=BI_INVALID; - for (i=0;i<2;i++) { - bi->dep[i].jmp_off=NULL; - bi->dep[i].target=NULL; - } - remove_deps(bi); + for (i=0;i<2;i++) { + bi->dep[i].jmp_off=NULL; + bi->dep[i].target=NULL; + } + remove_deps(bi); } -static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target) +static inline void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target) { - blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target); - - Dif(!tbi) { - write_log("Could not create jmpdep!\n"); - abort(); - } - bi->dep[i].jmp_off=jmpaddr; + blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target); + + Dif(!tbi) { + jit_abort("Could not create jmpdep!"); + } + bi->dep[i].jmp_off=jmpaddr; bi->dep[i].source=bi; - bi->dep[i].target=tbi; - bi->dep[i].next=tbi->deplist; - if (bi->dep[i].next) - bi->dep[i].next->prev_p=&(bi->dep[i].next); - bi->dep[i].prev_p=&(tbi->deplist); - tbi->deplist=&(bi->dep[i]); + bi->dep[i].target=tbi; + bi->dep[i].next=tbi->deplist; + if (bi->dep[i].next) + bi->dep[i].next->prev_p=&(bi->dep[i].next); + bi->dep[i].prev_p=&(tbi->deplist); + tbi->deplist=&(bi->dep[i]); } -static __inline__ void block_need_recompile(blockinfo * bi) +static inline void block_need_recompile(blockinfo * bi) { - uae_u32 cl = cacheline(bi->pc_p); - - set_dhtu(bi, bi->direct_pen); - bi->direct_handler = bi->direct_pen; - - bi->handler_to_use = (cpuop_func *)popall_execute_normal; - bi->handler = (cpuop_func *)popall_execute_normal; - if (bi == cache_tags[cl + 1].bi) - cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; - bi->status = BI_NEED_RECOMP; + uae_u32 cl = cacheline(bi->pc_p); + + set_dhtu(bi, bi->direct_pen); + bi->direct_handler = bi->direct_pen; + + bi->handler_to_use = (cpuop_func *)popall_execute_normal; + bi->handler = (cpuop_func *)popall_execute_normal; + if (bi == cache_tags[cl + 1].bi) + cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; + bi->status = BI_NEED_RECOMP; } -static __inline__ void mark_callers_recompile(blockinfo * bi) +#if USE_MATCH +static inline void mark_callers_recompile(blockinfo * bi) { dependency *x = bi->deplist; - while (x) { + while (x) { dependency *next = x->next; /* This disappears when we mark for * recompilation and thus remove the * blocks from the lists */ @@ -522,7 +790,7 @@ static __inline__ void mark_callers_recompile(blockinfo * bi) blockinfo *cbi = x->source; Dif(cbi->status == BI_INVALID) { - // write_log("invalid block in dependency list\n"); // FIXME? + jit_log("invalid block in dependency list"); // FIXME? // abort(); } if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) { @@ -536,38 +804,38 @@ static __inline__ void mark_callers_recompile(blockinfo * bi) /* nothing */ } else { - //write_log("Status %d in mark_callers\n",cbi->status); // FIXME? + jit_log2("Status %d in mark_callers",cbi->status); // FIXME? } } x = next; } } +#endif -static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate) +static inline blockinfo* get_blockinfo_addr_new(void* addr, int /* setstate */) { - blockinfo* bi=get_blockinfo_addr(addr); - int i; + blockinfo* bi=get_blockinfo_addr(addr); + int i; - if (!bi) { - for (i=0;ipc_p=(uae_u8 *)addr; - invalidate_block(bi); - add_to_active(bi); - add_to_cl_list(bi); - - } + if (!bi) { + for (i=0;ipc_p=(uae_u8*)addr; + invalidate_block(bi); + add_to_active(bi); + add_to_cl_list(bi); + + } + } } - } - if (!bi) { - write_log("Looking for blockinfo, can't find free one\n"); - abort(); - } - return bi; + if (!bi) { + jit_abort("Looking for blockinfo, can't find free one"); + } + return bi; } static void prepare_block(blockinfo* bi); @@ -578,7 +846,7 @@ static void prepare_block(blockinfo* bi); compiled. If the list of free blockinfos is empty, we allocate a new pool of blockinfos and link the newly created blockinfos altogether into the list of free blockinfos. Otherwise, we simply pop a structure - off the free list. + of the free list. Blockinfo are lazily deallocated, i.e. chained altogether in the list of free blockinfos whenvever a translation cache flush (hard or @@ -589,7 +857,7 @@ template< class T > class LazyBlockAllocator { enum { - kPoolSize = 1 + 4096 / sizeof(T) + kPoolSize = 1 + (16384 - sizeof(T) - sizeof(void *)) / sizeof(T) }; struct Pool { T chunk[kPoolSize]; @@ -599,11 +867,20 @@ class LazyBlockAllocator T * mChunks; public: LazyBlockAllocator() : mPools(0), mChunks(0) { } +#ifdef UAE +#else ~LazyBlockAllocator(); +#endif T * acquire(); void release(T * const); }; +#ifdef UAE +/* uae_vm_release may do logging, which isn't safe to do when the application + * is shutting down. Better to release memory manually with a function call + * to a release_all method on shutdown, or even simpler, just let the OS + * handle it (we're shutting down anyway). */ +#else template< class T > LazyBlockAllocator::~LazyBlockAllocator() { @@ -611,9 +888,10 @@ LazyBlockAllocator::~LazyBlockAllocator() while (currentPool) { Pool * deadPool = currentPool; currentPool = currentPool->next; - free(deadPool); + vm_release(deadPool, sizeof(Pool)); } } +#endif template< class T > T * LazyBlockAllocator::acquire() @@ -621,7 +899,10 @@ T * LazyBlockAllocator::acquire() if (!mChunks) { // There is no chunk left, allocate a new pool and link the // chunks into the free list - Pool * newPool = (Pool *)malloc(sizeof(Pool)); + Pool * newPool = (Pool *)vm_acquire(sizeof(Pool), VM_MAP_DEFAULT | VM_MAP_32BIT); + if (newPool == VM_MAP_FAILED) { + jit_abort("Could not allocate block pool!"); + } for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) { chunk->next = mChunks; mChunks = chunk; @@ -651,7 +932,7 @@ public: return data; } - void release(T * const chunk) { + void release(T * const ) { // Deallocated on invalidation } }; @@ -664,20 +945,20 @@ static HardBlockAllocator BlockInfoAllocator; static HardBlockAllocator ChecksumInfoAllocator; #endif -static __inline__ checksum_info *alloc_checksum_info(void) +static inline checksum_info *alloc_checksum_info(void) { checksum_info *csi = ChecksumInfoAllocator.acquire(); csi->next = NULL; return csi; } -static __inline__ void free_checksum_info(checksum_info *csi) +static inline void free_checksum_info(checksum_info *csi) { csi->next = NULL; ChecksumInfoAllocator.release(csi); } -static __inline__ void free_checksum_info_chain(checksum_info *csi) +static inline void free_checksum_info_chain(checksum_info *csi) { while (csi != NULL) { checksum_info *csi2 = csi->next; @@ -686,7 +967,7 @@ static __inline__ void free_checksum_info_chain(checksum_info *csi) } } -static __inline__ blockinfo *alloc_blockinfo(void) +static inline blockinfo *alloc_blockinfo(void) { blockinfo *bi = BlockInfoAllocator.acquire(); #if USE_CHECKSUM_INFO @@ -695,7 +976,7 @@ static __inline__ blockinfo *alloc_blockinfo(void) return bi; } -static __inline__ void free_blockinfo(blockinfo *bi) +static inline void free_blockinfo(blockinfo *bi) { #if USE_CHECKSUM_INFO free_checksum_info_chain(bi->csi); @@ -704,17 +985,17 @@ static __inline__ void free_blockinfo(blockinfo *bi) BlockInfoAllocator.release(bi); } -static __inline__ void alloc_blockinfos(void) +static inline void alloc_blockinfos(void) { - int i; - blockinfo* bi; + int i; + blockinfo* bi; - for (i=0;i data_endpos || get_target_noopt() + codesize - data_writepos > DATA_BUFFER_MAXOFFSET) + { + // Start new buffer +#if DEBUG + if(data_writepos < data_endpos) + data_wasted += data_endpos - data_writepos; +#endif + compemu_raw_branch(DATA_BUFFER_SIZE); + data_writepos = get_target_noopt(); + data_endpos = data_writepos + DATA_BUFFER_SIZE; + set_target(get_target_noopt() + DATA_BUFFER_SIZE); + } +} + +static inline long data_word_offs(uae_u16 x) +{ + data_check_end(4, 4); +#ifdef WORDS_BIGENDIAN + *((uae_u16*)data_writepos)=x; + data_writepos += 2; + *((uae_u16*)data_writepos)=0; + data_writepos += 2; +#else + *((uae_u32*)data_writepos)=x; + data_writepos += 4; +#endif + return (long)data_writepos - (long)get_target_noopt() - 12; +} + +static inline long data_long(uae_u32 x, long codesize) +{ + data_check_end(4, codesize); + *((uae_u32*)data_writepos)=x; + data_writepos += 4; + return (long)data_writepos - 4; +} + +static inline long data_long_offs(uae_u32 x) +{ + data_check_end(4, 4); + *((uae_u32*)data_writepos)=x; + data_writepos += 4; + return (long)data_writepos - (long)get_target_noopt() - 12; +} + +static inline long get_data_offset(long t) +{ + return t - (long)get_target_noopt() - 8; +} + +static inline void reset_data_buffer(void) +{ + data_writepos = 0; + data_endpos = 0; +} + +#endif /******************************************************************** * Getting the information about the target CPU * ********************************************************************/ +#if defined(CPU_arm) +#include "codegen_arm.cpp" +#endif +#if defined(CPU_i386) || defined(CPU_x86_64) #include "codegen_x86.cpp" - -void set_target(uae_u8* t) -{ - target=t; -} - -static __inline__ uae_u8* get_target_noopt(void) -{ - return target; -} - -__inline__ uae_u8* get_target(void) -{ - return get_target_noopt(); -} +#endif /******************************************************************** * Flags status handling. EMIT TIME! * ********************************************************************/ -static void bt_l_ri_noclobber(R4 r, IMM i); +static void bt_l_ri_noclobber(RR4 r, IMM i); static void make_flags_live_internal(void) { - if (live.flags_in_flags==VALID) - return; - Dif (live.flags_on_stack==TRASH) { - write_log("Want flags, got something on stack, but it is TRASH\n"); - abort(); - } - if (live.flags_on_stack==VALID) { - int tmp; - tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2); - raw_reg_to_flags(tmp); - unlock2(tmp); + if (live.flags_in_flags==VALID) + return; + Dif (live.flags_on_stack==TRASH) { + jit_abort("Want flags, got something on stack, but it is TRASH"); + } + if (live.flags_on_stack==VALID) { + int tmp; + tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2); + raw_reg_to_flags(tmp); + unlock2(tmp); - live.flags_in_flags=VALID; - return; - } - write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n", - live.flags_in_flags,live.flags_on_stack); - abort(); + live.flags_in_flags=VALID; + return; + } + jit_abort("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live", + live.flags_in_flags,live.flags_on_stack); } static void flags_to_stack(void) { - if (live.flags_on_stack==VALID) - return; - if (!live.flags_are_important) { + if (live.flags_on_stack==VALID) + return; + if (!live.flags_are_important) { + live.flags_on_stack=VALID; + return; + } + Dif (live.flags_in_flags!=VALID) + jit_abort("flags_to_stack != VALID"); + else { + int tmp; + tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1); + raw_flags_to_reg(tmp); + unlock2(tmp); + } live.flags_on_stack=VALID; - return; - } - Dif (live.flags_in_flags!=VALID) - abort(); - else { - int tmp; - tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1); - raw_flags_to_reg(tmp); - unlock2(tmp); - } - live.flags_on_stack=VALID; } -static __inline__ void clobber_flags(void) +static inline void clobber_flags(void) { - if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID) - flags_to_stack(); - live.flags_in_flags=TRASH; + if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID) + flags_to_stack(); + live.flags_in_flags=TRASH; } /* Prepare for leaving the compiled stuff */ -static __inline__ void flush_flags(void) +static inline void flush_flags(void) { - flags_to_stack(); - return; + flags_to_stack(); + return; } int touchcnt; @@ -860,10 +1240,14 @@ struct regusage { uae_u16 wmask; }; +#if 0 static inline void ru_set(uae_u16 *mask, int reg) { #if USE_OPTIMIZED_CALLS *mask |= 1 << reg; +#else + UNUSED(mask); + UNUSED(reg); #endif } @@ -872,6 +1256,8 @@ static inline bool ru_get(const uae_u16 *mask, int reg) #if USE_OPTIMIZED_CALLS return (*mask & (1 << reg)); #else + UNUSED(mask); + UNUSED(reg); /* Default: instruction reads & write to register */ return true; #endif @@ -944,6 +1330,7 @@ static void ru_fill_ea(regusage *ru, int reg, amodes mode, /* TODO: split into a static initialization part and a dynamic one (instructions depending on extension words) */ + static void ru_fill(regusage *ru, uae_u32 opcode) { m68k_pc_offset += 2; @@ -1080,11 +1467,11 @@ static void ru_fill(regusage *ru, uae_u32 opcode) } if (!handled) { - write_log("ru_fill: %04x = { %04x, %04x }\n", + jit_abort("ru_fill: %04x = { %04x, %04x }", real_opcode, ru->rmask, ru->wmask); - abort(); } } +#endif /******************************************************************** * register allocation per block logging * @@ -1099,7 +1486,8 @@ static uae_s8 nstate[N_REGS]; #define L_NEEDED -2 #define L_UNNEEDED -3 -static __inline__ void big_to_small_state(bigstate * b, smallstate * s) +#if USE_MATCH +static inline void big_to_small_state(bigstate * /* b */, smallstate * s) { int i; @@ -1109,7 +1497,7 @@ static __inline__ void big_to_small_state(bigstate * b, smallstate * s) s->nat[i] = nstate[i]; } -static __inline__ int callers_need_recompile(bigstate * b, smallstate * s) +static inline int callers_need_recompile(bigstate * /* b */, smallstate * s) { int i; int reverse = 0; @@ -1131,642 +1519,640 @@ static __inline__ int callers_need_recompile(bigstate * b, smallstate * s) * callers */ return 0; } +#endif -static __inline__ void log_startblock(void) +static inline void log_startblock(void) { - int i; + int i; - for (i = 0; i < VREGS; i++) { - vstate[i] = L_UNKNOWN; - vwritten[i] = 0; - } - for (i = 0; i < N_REGS; i++) - nstate[i] = L_UNKNOWN; + for (i = 0; i < VREGS; i++) { + vstate[i] = L_UNKNOWN; + vwritten[i] = 0; + } + for (i = 0; i < N_REGS; i++) + nstate[i] = L_UNKNOWN; } /* Using an n-reg for a temp variable */ -static __inline__ void log_isused(int n) +static inline void log_isused(int n) { - if (nstate[n] == L_UNKNOWN) - nstate[n] = L_UNAVAIL; + if (nstate[n] == L_UNKNOWN) + nstate[n] = L_UNAVAIL; } -static __inline__ void log_visused(int r) +static inline void log_visused(int r) { - if (vstate[r] == L_UNKNOWN) - vstate[r] = L_NEEDED; + if (vstate[r] == L_UNKNOWN) + vstate[r] = L_NEEDED; } -static __inline__ void do_load_reg(int n, int r) +static inline void do_load_reg(int n, int r) { - if (r == FLAGTMP) - raw_load_flagreg(n, r); - else if (r == FLAGX) - raw_load_flagx(n, r); - else - raw_mov_l_rm(n, (uintptr) live.state[r].mem); + if (r == FLAGTMP) + raw_load_flagreg(n); + else if (r == FLAGX) + raw_load_flagx(n); + else + compemu_raw_mov_l_rm(n, (uintptr) live.state[r].mem); } -static __inline__ void check_load_reg(int n, int r) +#if 0 +static inline void check_load_reg(int n, int r) { - raw_mov_l_rm(n, (uintptr) live.state[r].mem); + compemu_raw_mov_l_rm(n, (uintptr) live.state[r].mem); } +#endif -static __inline__ void log_vwrite(int r) +static inline void log_vwrite(int r) { - vwritten[r] = 1; + vwritten[r] = 1; } /* Using an n-reg to hold a v-reg */ -static __inline__ void log_isreg(int n, int r) +static inline void log_isreg(int n, int r) { - static int count = 0; - - if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH) - nstate[n] = r; - else { - do_load_reg(n, r); - if (nstate[n] == L_UNKNOWN) - nstate[n] = L_UNAVAIL; - } - if (vstate[r] == L_UNKNOWN) - vstate[r] = L_NEEDED; + if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH) + nstate[n] = r; + else { + do_load_reg(n, r); + if (nstate[n] == L_UNKNOWN) + nstate[n] = L_UNAVAIL; + } + if (vstate[r] == L_UNKNOWN) + vstate[r] = L_NEEDED; } -static __inline__ void log_clobberreg(int r) +static inline void log_clobberreg(int r) { - if (vstate[r] == L_UNKNOWN) - vstate[r] = L_UNNEEDED; + if (vstate[r] == L_UNKNOWN) + vstate[r] = L_UNNEEDED; } /* This ends all possibility of clever register allocation */ -static __inline__ void log_flush(void) +static inline void log_flush(void) { - int i; - - for (i = 0; i < VREGS; i++) - if (vstate[i] == L_UNKNOWN) - vstate[i] = L_NEEDED; - for (i = 0; i < N_REGS; i++) - if (nstate[i] == L_UNKNOWN) - nstate[i] = L_UNAVAIL; + int i; + + for (i = 0; i < VREGS; i++) + if (vstate[i] == L_UNKNOWN) + vstate[i] = L_NEEDED; + for (i = 0; i < N_REGS; i++) + if (nstate[i] == L_UNKNOWN) + nstate[i] = L_UNAVAIL; } -static __inline__ void log_dump(void) +static inline void log_dump(void) { - int i; - - return; - - write_log("----------------------\n"); - for (i = 0; i < N_REGS; i++) { - switch (nstate[i]) { - case L_UNKNOWN: - write_log("Nat %d : UNKNOWN\n", i); - break; - case L_UNAVAIL: - write_log("Nat %d : UNAVAIL\n", i); - break; - default: - write_log("Nat %d : %d\n", i, nstate[i]); - break; + int i; + + return; + + jit_log("----------------------"); + for (i = 0; i < N_REGS; i++) { + switch (nstate[i]) { + case L_UNKNOWN: + jit_log("Nat %d : UNKNOWN", i); + break; + case L_UNAVAIL: + jit_log("Nat %d : UNAVAIL", i); + break; + default: + jit_log("Nat %d : %d", i, nstate[i]); + break; + } + } + for (i = 0; i < VREGS; i++) { + if (vstate[i] == L_UNNEEDED) { + jit_log("Virt %d: UNNEEDED", i); + } } - } - for (i = 0; i < VREGS; i++) { - if (vstate[i] == L_UNNEEDED) - write_log("Virt %d: UNNEEDED\n", i); - } } /******************************************************************** * register status handling. EMIT TIME! * ********************************************************************/ -static __inline__ void set_status(int r, int status) +static inline void set_status(int r, int status) { if (status == ISCONST) log_clobberreg(r); - live.state[r].status=status; + live.state[r].status=status; } -static __inline__ int isinreg(int r) +static inline int isinreg(int r) { - return live.state[r].status==CLEAN || live.state[r].status==DIRTY; + return live.state[r].status==CLEAN || live.state[r].status==DIRTY; } -static __inline__ void adjust_nreg(int r, uae_u32 val) +static inline void adjust_nreg(int r, uae_u32 val) { - if (!val) - return; - raw_lea_l_brr(r,r,val); + if (!val) + return; + compemu_raw_lea_l_brr(r,r,val); } -static void tomem(int r) +static void tomem(int r) { - int rr=live.state[r].realreg; + int rr=live.state[r].realreg; - if (isinreg(r)) { - if (live.state[r].val && live.nat[rr].nholds==1 - && !live.nat[rr].locked) { - // write_log("RemovingA offset %x from reg %d (%d) at %p\n", - // live.state[r].val,r,rr,target); - adjust_nreg(rr,live.state[r].val); - live.state[r].val=0; - live.state[r].dirtysize=4; - set_status(r,DIRTY); + if (isinreg(r)) { + if (live.state[r].val && live.nat[rr].nholds==1 + && !live.nat[rr].locked) { + jit_log2("RemovingA offset %x from reg %d (%d) at %p", live.state[r].val,r,rr,target); + adjust_nreg(rr,live.state[r].val); + live.state[r].val=0; + live.state[r].dirtysize=4; + set_status(r,DIRTY); + } } - } - if (live.state[r].status==DIRTY) { - switch (live.state[r].dirtysize) { - case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break; - case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break; - case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break; - default: abort(); + if (live.state[r].status==DIRTY) { + switch (live.state[r].dirtysize) { + case 1: compemu_raw_mov_b_mr((uintptr)live.state[r].mem,rr); break; + case 2: compemu_raw_mov_w_mr((uintptr)live.state[r].mem,rr); break; + case 4: compemu_raw_mov_l_mr((uintptr)live.state[r].mem,rr); break; + default: abort(); + } + log_vwrite(r); + set_status(r,CLEAN); + live.state[r].dirtysize=0; } - log_vwrite(r); - set_status(r,CLEAN); - live.state[r].dirtysize=0; - } } -static __inline__ int isconst(int r) +static inline int isconst(int r) { - return live.state[r].status==ISCONST; + return live.state[r].status==ISCONST; } int is_const(int r) { - return isconst(r); + return isconst(r); } -static __inline__ void writeback_const(int r) +static inline void writeback_const(int r) { - if (!isconst(r)) - return; - Dif (live.state[r].needflush==NF_HANDLER) { - write_log("Trying to write back constant NF_HANDLER!\n"); - abort(); - } + if (!isconst(r)) + return; + Dif (live.state[r].needflush==NF_HANDLER) { + jit_abort("Trying to write back constant NF_HANDLER!"); + } - raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val); + compemu_raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val); log_vwrite(r); - live.state[r].val=0; - set_status(r,INMEM); + live.state[r].val=0; + set_status(r,INMEM); } -static __inline__ void tomem_c(int r) +static inline void tomem_c(int r) { - if (isconst(r)) { - writeback_const(r); - } - else + if (isconst(r)) { + writeback_const(r); + } + else + tomem(r); +} + +static void evict(int r) +{ + int rr; + + if (!isinreg(r)) + return; tomem(r); + rr=live.state[r].realreg; + + Dif (live.nat[rr].locked && + live.nat[rr].nholds==1) { + jit_abort("register %d in nreg %d is locked!",r,live.state[r].realreg); + } + + live.nat[rr].nholds--; + if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */ + int topreg=live.nat[rr].holds[live.nat[rr].nholds]; + int thisind=live.state[r].realind; + + live.nat[rr].holds[thisind]=topreg; + live.state[topreg].realind=thisind; + } + live.state[r].realreg=-1; + set_status(r,INMEM); } -static void evict(int r) +static inline void free_nreg(int r) { - int rr; + int i=live.nat[r].nholds; - if (!isinreg(r)) - return; - tomem(r); - rr=live.state[r].realreg; + while (i) { + int vr; - Dif (live.nat[rr].locked && - live.nat[rr].nholds==1) { - write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg); - abort(); - } - - live.nat[rr].nholds--; - if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */ - int topreg=live.nat[rr].holds[live.nat[rr].nholds]; - int thisind=live.state[r].realind; - - live.nat[rr].holds[thisind]=topreg; - live.state[topreg].realind=thisind; - } - live.state[r].realreg=-1; - set_status(r,INMEM); -} - -static __inline__ void free_nreg(int r) -{ - int i=live.nat[r].nholds; - - while (i) { - int vr; - - --i; - vr=live.nat[r].holds[i]; - evict(vr); - } - Dif (live.nat[r].nholds!=0) { - write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds); - abort(); - } + --i; + vr=live.nat[r].holds[i]; + evict(vr); + } + Dif (live.nat[r].nholds!=0) { + jit_abort("Failed to free nreg %d, nholds is %d",r,live.nat[r].nholds); + } } /* Use with care! */ -static __inline__ void isclean(int r) +static inline void isclean(int r) { - if (!isinreg(r)) - return; - live.state[r].validsize=4; - live.state[r].dirtysize=0; - live.state[r].val=0; - set_status(r,CLEAN); -} - -static __inline__ void disassociate(int r) -{ - isclean(r); - evict(r); -} - -static __inline__ void set_const(int r, uae_u32 val) -{ - disassociate(r); - live.state[r].val=val; - set_status(r,ISCONST); -} - -static __inline__ uae_u32 get_offset(int r) -{ - return live.state[r].val; -} - -static int alloc_reg_hinted(int r, int size, int willclobber, int hint) -{ - int bestreg; - uae_s32 when; - int i; - uae_s32 badness=0; /* to shut up gcc */ - bestreg=-1; - when=2000000000; - - /* XXX use a regalloc_order table? */ - for (i=0;i0) { - free_nreg(bestreg); - } - if (isinreg(r)) { - int rr=live.state[r].realreg; - /* This will happen if we read a partially dirty register at a - bigger size */ - Dif (willclobber || live.state[r].validsize>=size) - abort(); - Dif (live.nat[rr].nholds!=1) - abort(); - if (size==4 && live.state[r].validsize==2) { - log_isused(bestreg); - log_visused(r); - raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem); - raw_bswap_32(bestreg); - raw_zero_extend_16_rr(rr,rr); - raw_zero_extend_16_rr(bestreg,bestreg); - raw_bswap_32(bestreg); - raw_lea_l_brr_indexed(rr,rr,bestreg,1,0); - live.state[r].validsize=4; - live.nat[rr].touched=touchcnt++; - return rr; - } - if (live.state[r].validsize==1) { - /* Nothing yet */ - } - evict(r); - } - - if (!willclobber) { - if (live.state[r].status!=UNDEF) { - if (isconst(r)) { - raw_mov_l_ri(bestreg,live.state[r].val); - live.state[r].val=0; - live.state[r].dirtysize=4; - set_status(r,DIRTY); - log_isused(bestreg); - } - else { - log_isreg(bestreg, r); /* This will also load it! */ - live.state[r].dirtysize=0; - set_status(r,CLEAN); - } - } - else { - live.state[r].val=0; - live.state[r].dirtysize=0; - set_status(r,CLEAN); - log_isused(bestreg); - } + if (!isinreg(r)) + return; live.state[r].validsize=4; - } - else { /* this is the easiest way, but not optimal. FIXME! */ - /* Now it's trickier, but hopefully still OK */ - if (!isconst(r) || size==4) { - live.state[r].validsize=size; - live.state[r].dirtysize=size; - live.state[r].val=0; - set_status(r,DIRTY); - if (size == 4) { - log_clobberreg(r); + live.state[r].dirtysize=0; + live.state[r].val=0; + set_status(r,CLEAN); +} + +static inline void disassociate(int r) +{ + isclean(r); + evict(r); +} + +/* XXFIXME: val may be 64bit address for PC_P */ +static inline void set_const(int r, uae_u32 val) +{ + disassociate(r); + live.state[r].val=val; + set_status(r,ISCONST); +} + +static inline uae_u32 get_offset(int r) +{ + return live.state[r].val; +} + +static int alloc_reg_hinted(int r, int size, int willclobber, int hint) +{ + int bestreg; + uae_s32 when; + int i; + uae_s32 badness=0; /* to shut up gcc */ + bestreg=-1; + when=2000000000; + + /* XXX use a regalloc_order table? */ + for (i=0;i0) { + free_nreg(bestreg); + } + if (isinreg(r)) { + int rr=live.state[r].realreg; + /* This will happen if we read a partially dirty register at a + bigger size */ + Dif (willclobber || live.state[r].validsize>=size) + jit_abort("willclobber || live.state[r].validsize>=size"); + Dif (live.nat[rr].nholds!=1) + jit_abort("live.nat[rr].nholds!=1"); + if (size==4 && live.state[r].validsize==2) { log_isused(bestreg); + log_visused(r); + compemu_raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem); + compemu_raw_bswap_32(bestreg); + compemu_raw_zero_extend_16_rr(rr,rr); + compemu_raw_zero_extend_16_rr(bestreg,bestreg); + compemu_raw_bswap_32(bestreg); + compemu_raw_lea_l_rr_indexed(rr, rr, bestreg, 1); + live.state[r].validsize=4; + live.nat[rr].touched=touchcnt++; + return rr; + } + if (live.state[r].validsize==1) { + /* Nothing yet */ + } + evict(r); + } + + if (!willclobber) { + if (live.state[r].status!=UNDEF) { + if (isconst(r)) { + compemu_raw_mov_l_ri(bestreg,live.state[r].val); + live.state[r].val=0; + live.state[r].dirtysize=4; + set_status(r,DIRTY); + log_isused(bestreg); + } + else { + log_isreg(bestreg, r); /* This will also load it! */ + live.state[r].dirtysize=0; + set_status(r,CLEAN); + } } else { - log_visused(r); + live.state[r].val=0; + live.state[r].dirtysize=0; + set_status(r,CLEAN); + log_isused(bestreg); + } + live.state[r].validsize=4; + } + else { /* this is the easiest way, but not optimal. FIXME! */ + /* Now it's trickier, but hopefully still OK */ + if (!isconst(r) || size==4) { + live.state[r].validsize=size; + live.state[r].dirtysize=size; + live.state[r].val=0; + set_status(r,DIRTY); + if (size == 4) { + log_clobberreg(r); + log_isused(bestreg); + } + else { + log_visused(r); + log_isused(bestreg); + } + } + else { + if (live.state[r].status!=UNDEF) + compemu_raw_mov_l_ri(bestreg,live.state[r].val); + live.state[r].val=0; + live.state[r].validsize=4; + live.state[r].dirtysize=4; + set_status(r,DIRTY); log_isused(bestreg); } } - else { - if (live.state[r].status!=UNDEF) - raw_mov_l_ri(bestreg,live.state[r].val); - live.state[r].val=0; - live.state[r].validsize=4; - live.state[r].dirtysize=4; - set_status(r,DIRTY); - log_isused(bestreg); - } - } - live.state[r].realreg=bestreg; - live.state[r].realind=live.nat[bestreg].nholds; - live.nat[bestreg].touched=touchcnt++; - live.nat[bestreg].holds[live.nat[bestreg].nholds]=r; - live.nat[bestreg].nholds++; + live.state[r].realreg=bestreg; + live.state[r].realind=live.nat[bestreg].nholds; + live.nat[bestreg].touched=touchcnt++; + live.nat[bestreg].holds[live.nat[bestreg].nholds]=r; + live.nat[bestreg].nholds++; - return bestreg; + return bestreg; } -static int alloc_reg(int r, int size, int willclobber) +/* +static int alloc_reg(int r, int size, int willclobber) { - return alloc_reg_hinted(r,size,willclobber,-1); + return alloc_reg_hinted(r,size,willclobber,-1); +} +*/ + +static void unlock2(int r) +{ + Dif (!live.nat[r].locked) + jit_abort("unlock2 %d not locked", r); + live.nat[r].locked--; } -static void unlock2(int r) +static void setlock(int r) { - Dif (!live.nat[r].locked) - abort(); - live.nat[r].locked--; -} - -static void setlock(int r) -{ - live.nat[r].locked++; + live.nat[r].locked++; } static void mov_nregs(int d, int s) { - int ns=live.nat[s].nholds; - int nd=live.nat[d].nholds; - int i; + int nd=live.nat[d].nholds; + int i; - if (s==d) - return; + if (s==d) + return; - if (nd>0) - free_nreg(d); + if (nd>0) + free_nreg(d); log_isused(d); - raw_mov_l_rr(d,s); + compemu_raw_mov_l_rr(d,s); - for (i=0;i=size) { - n=live.state[r].realreg; - switch(size) { - case 1: - if (live.nat[n].canbyte || spec>=0) { - answer=n; - } - break; - case 2: - if (live.nat[n].canword || spec>=0) { - answer=n; - } - break; - case 4: - answer=n; - break; - default: abort(); - } - if (answer<0) - evict(r); - } - /* either the value was in memory to start with, or it was evicted and - is in memory now */ - if (answer<0) { - answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec); - } + int n; + int answer=-1; - if (spec>=0 && spec!=answer) { - /* Too bad */ - mov_nregs(spec,answer); - answer=spec; - } - live.nat[answer].locked++; - live.nat[answer].touched=touchcnt++; - return answer; + record_register(r); + if (live.state[r].status==UNDEF) { + jit_log("WARNING: Unexpected read of undefined register %d",r); + } + if (!can_offset) + remove_offset(r,spec); + + if (isinreg(r) && live.state[r].validsize>=size) { + n=live.state[r].realreg; + switch(size) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + answer=n; + } + break; + case 4: + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec); + } + + if (spec>=0 && spec!=answer) { + /* Too bad */ + mov_nregs(spec,answer); + answer=spec; + } + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + return answer; } static int readreg(int r, int size) { - return readreg_general(r,size,-1,0); + return readreg_general(r,size,-1,0); } static int readreg_specific(int r, int size, int spec) { - return readreg_general(r,size,spec,0); + return readreg_general(r,size,spec,0); } static int readreg_offset(int r, int size) { - return readreg_general(r,size,-1,1); + return readreg_general(r,size,-1,1); } /* writereg_general(r, size, spec) @@ -1780,3168 +2166,515 @@ static int readreg_offset(int r, int size) * OUTPUT * - hard (physical, x86 here) register allocated to virtual register r */ -static __inline__ int writereg_general(int r, int size, int spec) +static inline int writereg_general(int r, int size, int spec) { - int n; - int answer=-1; + int n; + int answer=-1; - record_register(r); - if (size<4) { - remove_offset(r,spec); - } - - make_exclusive(r,size,spec); - if (isinreg(r)) { - int nvsize=size>live.state[r].validsize?size:live.state[r].validsize; - int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; - n=live.state[r].realreg; - - Dif (live.nat[n].nholds!=1) - abort(); - switch(size) { - case 1: - if (live.nat[n].canbyte || spec>=0) { - live.state[r].dirtysize=ndsize; - live.state[r].validsize=nvsize; - answer=n; - } - break; - case 2: - if (live.nat[n].canword || spec>=0) { - live.state[r].dirtysize=ndsize; - live.state[r].validsize=nvsize; - answer=n; - } - break; - case 4: - live.state[r].dirtysize=ndsize; - live.state[r].validsize=nvsize; - answer=n; - break; - default: abort(); + record_register(r); + if (size<4) { + remove_offset(r,spec); } - if (answer<0) - evict(r); - } - /* either the value was in memory to start with, or it was evicted and - is in memory now */ - if (answer<0) { - answer=alloc_reg_hinted(r,size,1,spec); - } - if (spec>=0 && spec!=answer) { - mov_nregs(spec,answer); - answer=spec; - } - if (live.state[r].status==UNDEF) - live.state[r].validsize=4; - live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; - live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize; - - live.nat[answer].locked++; - live.nat[answer].touched=touchcnt++; - if (size==4) { - live.state[r].val=0; - } - else { - Dif (live.state[r].val) { - write_log("Problem with val\n"); - abort(); + + make_exclusive(r,size,spec); + if (isinreg(r)) { + int nvsize=size>live.state[r].validsize?size:live.state[r].validsize; + int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; + n=live.state[r].realreg; + + Dif (live.nat[n].nholds!=1) + jit_abort("live.nat[%d].nholds!=1", n); + switch(size) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + } + break; + case 4: + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); } - } - set_status(r,DIRTY); - return answer; + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,size,1,spec); + } + if (spec>=0 && spec!=answer) { + mov_nregs(spec,answer); + answer=spec; + } + if (live.state[r].status==UNDEF) + live.state[r].validsize=4; + live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; + live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize; + + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + if (size==4) { + live.state[r].val=0; + } + else { + Dif (live.state[r].val) { + jit_abort("Problem with val"); + } + } + set_status(r,DIRTY); + return answer; } static int writereg(int r, int size) { - return writereg_general(r,size,-1); + return writereg_general(r,size,-1); } static int writereg_specific(int r, int size, int spec) { - return writereg_general(r,size,spec); + return writereg_general(r,size,spec); } -static __inline__ int rmw_general(int r, int wsize, int rsize, int spec) +static inline int rmw_general(int r, int wsize, int rsize, int spec) { - int n; - int answer=-1; - - record_register(r); + int n; + int answer=-1; + + record_register(r); if (live.state[r].status==UNDEF) { - write_log("WARNING: Unexpected read of undefined register %d\n",r); + jit_log("WARNING: Unexpected read of undefined register %d",r); } - remove_offset(r,spec); - make_exclusive(r,0,spec); + remove_offset(r,spec); + make_exclusive(r,0,spec); - Dif (wsize=rsize) { - n=live.state[r].realreg; - Dif (live.nat[n].nholds!=1) - abort(); - - switch(rsize) { - case 1: - if (live.nat[n].canbyte || spec>=0) { - answer=n; - } - break; - case 2: - if (live.nat[n].canword || spec>=0) { - answer=n; - } - break; - case 4: - answer=n; - break; - default: abort(); + Dif (wsize=0?4:rsize,0,spec); - } + if (isinreg(r) && live.state[r].validsize>=rsize) { + n=live.state[r].realreg; + Dif (live.nat[n].nholds!=1) + jit_abort("live.nat[%d].nholds!=1", n); - if (spec>=0 && spec!=answer) { - /* Too bad */ - mov_nregs(spec,answer); - answer=spec; - } - if (wsize>live.state[r].dirtysize) - live.state[r].dirtysize=wsize; - if (wsize>live.state[r].validsize) - live.state[r].validsize=wsize; - set_status(r,DIRTY); + switch(rsize) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + answer=n; + } + break; + case 4: + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec); + } - live.nat[answer].locked++; - live.nat[answer].touched=touchcnt++; + if (spec>=0 && spec!=answer) { + /* Too bad */ + mov_nregs(spec,answer); + answer=spec; + } + if (wsize>live.state[r].dirtysize) + live.state[r].dirtysize=wsize; + if (wsize>live.state[r].validsize) + live.state[r].validsize=wsize; + set_status(r,DIRTY); - Dif (live.state[r].val) { - write_log("Problem with val(rmw)\n"); - abort(); - } - return answer; + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + + Dif (live.state[r].val) { + jit_abort("Problem with val(rmw)"); + } + return answer; } -static int rmw(int r, int wsize, int rsize) +static int rmw(int r, int wsize, int rsize) { - return rmw_general(r,wsize,rsize,-1); + return rmw_general(r,wsize,rsize,-1); } -static int rmw_specific(int r, int wsize, int rsize, int spec) +static int rmw_specific(int r, int wsize, int rsize, int spec) { - return rmw_general(r,wsize,rsize,spec); + return rmw_general(r,wsize,rsize,spec); } /* needed for restoring the carry flag on non-P6 cores */ -static void bt_l_ri_noclobber(R4 r, IMM i) +static void bt_l_ri_noclobber(RR4 r, IMM i) { - int size=4; - if (i<16) - size=2; - r=readreg(r,size); - raw_bt_l_ri(r,i); - unlock2(r); + int size=4; + if (i<16) + size=2; + r=readreg(r,size); + compemu_raw_bt_l_ri(r,i); + unlock2(r); } /******************************************************************** * FPU register status handling. EMIT TIME! * ********************************************************************/ -static void f_tomem(int r) +static void f_tomem(int r) { - if (live.fate[r].status==DIRTY) { -#if USE_LONG_DOUBLE - raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); + if (live.fate[r].status==DIRTY) { +#if defined(USE_LONG_DOUBLE) + raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); #else - raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); + raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); #endif - live.fate[r].status=CLEAN; - } + live.fate[r].status=CLEAN; + } } -static void f_tomem_drop(int r) +static void f_tomem_drop(int r) { - if (live.fate[r].status==DIRTY) { -#if USE_LONG_DOUBLE - raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); + if (live.fate[r].status==DIRTY) { +#if defined(USE_LONG_DOUBLE) + raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); #else - raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); + raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); #endif - live.fate[r].status=INMEM; - } + live.fate[r].status=INMEM; + } } -static __inline__ int f_isinreg(int r) +static inline int f_isinreg(int r) { - return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY; + return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY; } static void f_evict(int r) { - int rr; + int rr; - if (!f_isinreg(r)) - return; - rr=live.fate[r].realreg; - if (live.fat[rr].nholds==1) - f_tomem_drop(r); - else - f_tomem(r); + if (!f_isinreg(r)) + return; + rr=live.fate[r].realreg; + if (live.fat[rr].nholds==1) + f_tomem_drop(r); + else + f_tomem(r); - Dif (live.fat[rr].locked && - live.fat[rr].nholds==1) { - write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg); - abort(); - } + Dif (live.fat[rr].locked && + live.fat[rr].nholds==1) { + jit_abort("FPU register %d in nreg %d is locked!",r,live.fate[r].realreg); + } - live.fat[rr].nholds--; - if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */ - int topreg=live.fat[rr].holds[live.fat[rr].nholds]; - int thisind=live.fate[r].realind; - live.fat[rr].holds[thisind]=topreg; - live.fate[topreg].realind=thisind; - } - live.fate[r].status=INMEM; - live.fate[r].realreg=-1; + live.fat[rr].nholds--; + if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */ + int topreg=live.fat[rr].holds[live.fat[rr].nholds]; + int thisind=live.fate[r].realind; + live.fat[rr].holds[thisind]=topreg; + live.fate[topreg].realind=thisind; + } + live.fate[r].status=INMEM; + live.fate[r].realreg=-1; } -static __inline__ void f_free_nreg(int r) +static inline void f_free_nreg(int r) { - int i=live.fat[r].nholds; + int i=live.fat[r].nholds; - while (i) { - int vr; + while (i) { + int vr; - --i; - vr=live.fat[r].holds[i]; - f_evict(vr); - } - Dif (live.fat[r].nholds!=0) { - write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds); - abort(); - } + --i; + vr=live.fat[r].holds[i]; + f_evict(vr); + } + Dif (live.fat[r].nholds!=0) { + jit_abort("Failed to free nreg %d, nholds is %d",r,live.fat[r].nholds); + } } /* Use with care! */ -static __inline__ void f_isclean(int r) +static inline void f_isclean(int r) { - if (!f_isinreg(r)) - return; - live.fate[r].status=CLEAN; -} - -static __inline__ void f_disassociate(int r) -{ - f_isclean(r); - f_evict(r); -} - - - -static int f_alloc_reg(int r, int willclobber) -{ - int bestreg; - uae_s32 when; - int i; - uae_s32 badness; - bestreg=-1; - when=2000000000; - for (i=N_FREGS;i--;) { - badness=live.fat[i].touched; - if (live.fat[i].nholds==0) - badness=0; - - if (!live.fat[i].locked && badness0) { - f_free_nreg(bestreg); - } - if (f_isinreg(r)) { - f_evict(r); - } - - if (!willclobber) { - if (live.fate[r].status!=UNDEF) { -#if USE_LONG_DOUBLE - raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem); -#else - raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem); -#endif - } + if (!f_isinreg(r)) + return; live.fate[r].status=CLEAN; - } - else { - live.fate[r].status=DIRTY; - } - live.fate[r].realreg=bestreg; - live.fate[r].realind=live.fat[bestreg].nholds; - live.fat[bestreg].touched=touchcnt++; - live.fat[bestreg].holds[live.fat[bestreg].nholds]=r; - live.fat[bestreg].nholds++; - - return bestreg; } -static void f_unlock(int r) +static inline void f_disassociate(int r) { - Dif (!live.fat[r].locked) - abort(); - live.fat[r].locked--; + f_isclean(r); + f_evict(r); } -static void f_setlock(int r) + + +static int f_alloc_reg(int r, int willclobber) { - live.fat[r].locked++; + int bestreg; + uae_s32 when; + int i; + uae_s32 badness; + bestreg=-1; + when=2000000000; + for (i=N_FREGS;i--;) { + badness=live.fat[i].touched; + if (live.fat[i].nholds==0) + badness=0; + + if (!live.fat[i].locked && badness0) { + f_free_nreg(bestreg); + } + if (f_isinreg(r)) { + f_evict(r); + } + + if (!willclobber) { + if (live.fate[r].status!=UNDEF) { +#if defined(USE_LONG_DOUBLE) + raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem); +#else + raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem); +#endif + } + live.fate[r].status=CLEAN; + } + else { + live.fate[r].status=DIRTY; + } + live.fate[r].realreg=bestreg; + live.fate[r].realind=live.fat[bestreg].nholds; + live.fat[bestreg].touched=touchcnt++; + live.fat[bestreg].holds[live.fat[bestreg].nholds]=r; + live.fat[bestreg].nholds++; + + return bestreg; } -static __inline__ int f_readreg(int r) +static void f_unlock(int r) { - int n; - int answer=-1; - - if (f_isinreg(r)) { - n=live.fate[r].realreg; - answer=n; - } - /* either the value was in memory to start with, or it was evicted and - is in memory now */ - if (answer<0) - answer=f_alloc_reg(r,0); - - live.fat[answer].locked++; - live.fat[answer].touched=touchcnt++; - return answer; + Dif (!live.fat[r].locked) + jit_abort ("unlock %d", r); + live.fat[r].locked--; } -static __inline__ void f_make_exclusive(int r, int clobber) +static void f_setlock(int r) { - freg_status oldstate; - int rr=live.fate[r].realreg; - int nr; - int nind; - int ndirt=0; - int i; + live.fat[r].locked++; +} - if (!f_isinreg(r)) - return; - if (live.fat[rr].nholds==1) - return; - for (i=0;i>=i; - return; - } - CLOBBER_SHRL; - r=rmw(r,4,4); - raw_shrl_l_ri(r,i); - unlock2(r); -} -MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) - -MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) -{ - if (!i && !needflags) - return; - CLOBBER_SHRL; - r=rmw(r,2,2); - raw_shrl_w_ri(r,i); - unlock2(r); -} -MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) - -MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) -{ - if (!i && !needflags) - return; - CLOBBER_SHRL; - r=rmw(r,1,1); - raw_shrl_b_ri(r,i); - unlock2(r); -} -MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) - -MIDFUNC(2,shra_l_ri,(RW4 r, IMM i)) -{ - if (!i && !needflags) - return; - CLOBBER_SHRA; - r=rmw(r,4,4); - raw_shra_l_ri(r,i); - unlock2(r); -} -MENDFUNC(2,shra_l_ri,(RW4 r, IMM i)) - -MIDFUNC(2,shra_w_ri,(RW2 r, IMM i)) -{ - if (!i && !needflags) - return; - CLOBBER_SHRA; - r=rmw(r,2,2); - raw_shra_w_ri(r,i); - unlock2(r); -} -MENDFUNC(2,shra_w_ri,(RW2 r, IMM i)) - -MIDFUNC(2,shra_b_ri,(RW1 r, IMM i)) -{ - if (!i && !needflags) - return; - CLOBBER_SHRA; - r=rmw(r,1,1); - raw_shra_b_ri(r,i); - unlock2(r); -} -MENDFUNC(2,shra_b_ri,(RW1 r, IMM i)) - -MIDFUNC(2,shra_l_rr,(RW4 d, R1 r)) -{ - if (isconst(r)) { - COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val); - return; - } - CLOBBER_SHRA; - r=readreg_specific(r,1,SHIFTCOUNT_NREG); - d=rmw(d,4,4); - Dif (r!=1) { - write_log("Illegal register %d in raw_rol_b\n",r); - abort(); - } - raw_shra_l_rr(d,r) ; - unlock2(r); - unlock2(d); -} -MENDFUNC(2,shra_l_rr,(RW4 d, R1 r)) - -MIDFUNC(2,shra_w_rr,(RW2 d, R1 r)) -{ /* Can only do this with r==1, i.e. cl */ - - if (isconst(r)) { - COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val); - return; - } - CLOBBER_SHRA; - r=readreg_specific(r,1,SHIFTCOUNT_NREG); - d=rmw(d,2,2); - Dif (r!=1) { - write_log("Illegal register %d in raw_shra_b\n",r); - abort(); - } - raw_shra_w_rr(d,r) ; - unlock2(r); - unlock2(d); -} -MENDFUNC(2,shra_w_rr,(RW2 d, R1 r)) - -MIDFUNC(2,shra_b_rr,(RW1 d, R1 r)) -{ /* Can only do this with r==1, i.e. cl */ - - if (isconst(r)) { - COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val); - return; - } - - CLOBBER_SHRA; - r=readreg_specific(r,1,SHIFTCOUNT_NREG); - d=rmw(d,1,1); - Dif (r!=1) { - write_log("Illegal register %d in raw_shra_b\n",r); - abort(); - } - raw_shra_b_rr(d,r) ; - unlock2(r); - unlock2(d); -} -MENDFUNC(2,shra_b_rr,(RW1 d, R1 r)) - - -MIDFUNC(2,setcc,(W1 d, IMM cc)) -{ - CLOBBER_SETCC; - d=writereg(d,1); - raw_setcc(d,cc); - unlock2(d); -} -MENDFUNC(2,setcc,(W1 d, IMM cc)) - -MIDFUNC(2,setcc_m,(IMM d, IMM cc)) -{ - CLOBBER_SETCC; - raw_setcc_m(d,cc); -} -MENDFUNC(2,setcc_m,(IMM d, IMM cc)) - -MIDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc)) -{ - if (d==s) - return; - CLOBBER_CMOV; - s=readreg(s,1); - d=rmw(d,1,1); - raw_cmov_b_rr(d,s,cc); - unlock2(s); - unlock2(d); -} -MENDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc)) - -MIDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc)) -{ - if (d==s) - return; - CLOBBER_CMOV; - s=readreg(s,2); - d=rmw(d,2,2); - raw_cmov_w_rr(d,s,cc); - unlock2(s); - unlock2(d); -} -MENDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc)) - -MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc)) -{ - if (d==s) - return; - CLOBBER_CMOV; - s=readreg(s,4); - d=rmw(d,4,4); - raw_cmov_l_rr(d,s,cc); - unlock2(s); - unlock2(d); -} -MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc)) - -MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) -{ - CLOBBER_CMOV; - d=rmw(d,4,4); - raw_cmov_l_rm(d,s,cc); - unlock2(d); -} -MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) - -MIDFUNC(2,bsf_l_rr,(W4 d, W4 s)) -{ - CLOBBER_BSF; - s = readreg(s, 4); - d = writereg(d, 4); - raw_bsf_l_rr(d, s); - unlock2(s); - unlock2(d); -} -MENDFUNC(2,bsf_l_rr,(W4 d, W4 s)) - -/* Set the Z flag depending on the value in s. Note that the - value has to be 0 or -1 (or, more precisely, for non-zero - values, bit 14 must be set)! */ -MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) -{ - CLOBBER_BSF; - s=rmw_specific(s,4,4,FLAG_NREG3); - tmp=writereg(tmp,4); - raw_flags_set_zero(s, tmp); - unlock2(tmp); - unlock2(s); -} -MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) - -MIDFUNC(2,imul_32_32,(RW4 d, R4 s)) -{ - CLOBBER_MUL; - s=readreg(s,4); - d=rmw(d,4,4); - raw_imul_32_32(d,s); - unlock2(s); - unlock2(d); -} -MENDFUNC(2,imul_32_32,(RW4 d, R4 s)) - -MIDFUNC(2,imul_64_32,(RW4 d, RW4 s)) -{ - CLOBBER_MUL; - s=rmw_specific(s,4,4,MUL_NREG2); - d=rmw_specific(d,4,4,MUL_NREG1); - raw_imul_64_32(d,s); - unlock2(s); - unlock2(d); -} -MENDFUNC(2,imul_64_32,(RW4 d, RW4 s)) - -MIDFUNC(2,mul_64_32,(RW4 d, RW4 s)) -{ - CLOBBER_MUL; - s=rmw_specific(s,4,4,MUL_NREG2); - d=rmw_specific(d,4,4,MUL_NREG1); - raw_mul_64_32(d,s); - unlock2(s); - unlock2(d); -} -MENDFUNC(2,mul_64_32,(RW4 d, RW4 s)) - -MIDFUNC(2,mul_32_32,(RW4 d, R4 s)) -{ - CLOBBER_MUL; - s=readreg(s,4); - d=rmw(d,4,4); - raw_mul_32_32(d,s); - unlock2(s); - unlock2(d); -} -MENDFUNC(2,mul_32_32,(RW4 d, R4 s)) - -#if SIZEOF_VOID_P == 8 -MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s)) -{ - int isrmw; - - if (isconst(s)) { - set_const(d,(uae_s32)live.state[s].val); - return; - } - - CLOBBER_SE32; - isrmw=(s==d); - if (!isrmw) { - s=readreg(s,4); - d=writereg(d,4); - } - else { /* If we try to lock this twice, with different sizes, we - are int trouble! */ - s=d=rmw(s,4,4); - } - raw_sign_extend_32_rr(d,s); - if (!isrmw) { - unlock2(d); - unlock2(s); - } - else { - unlock2(s); - } -} -MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s)) -#endif - -MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s)) -{ - int isrmw; - - if (isconst(s)) { - set_const(d,(uae_s32)(uae_s16)live.state[s].val); - return; - } - - CLOBBER_SE16; - isrmw=(s==d); - if (!isrmw) { - s=readreg(s,2); - d=writereg(d,4); - } - else { /* If we try to lock this twice, with different sizes, we - are int trouble! */ - s=d=rmw(s,4,2); - } - raw_sign_extend_16_rr(d,s); - if (!isrmw) { - unlock2(d); - unlock2(s); - } - else { - unlock2(s); - } -} -MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s)) - -MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s)) -{ - int isrmw; - - if (isconst(s)) { - set_const(d,(uae_s32)(uae_s8)live.state[s].val); - return; - } - - isrmw=(s==d); - CLOBBER_SE8; - if (!isrmw) { - s=readreg(s,1); - d=writereg(d,4); - } - else { /* If we try to lock this twice, with different sizes, we - are int trouble! */ - s=d=rmw(s,4,1); - } - - raw_sign_extend_8_rr(d,s); - - if (!isrmw) { - unlock2(d); - unlock2(s); - } - else { - unlock2(s); - } -} -MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s)) - - -MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s)) -{ - int isrmw; - - if (isconst(s)) { - set_const(d,(uae_u32)(uae_u16)live.state[s].val); - return; - } - - isrmw=(s==d); - CLOBBER_ZE16; - if (!isrmw) { - s=readreg(s,2); - d=writereg(d,4); - } - else { /* If we try to lock this twice, with different sizes, we - are int trouble! */ - s=d=rmw(s,4,2); - } - raw_zero_extend_16_rr(d,s); - if (!isrmw) { - unlock2(d); - unlock2(s); - } - else { - unlock2(s); - } -} -MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s)) - -MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s)) -{ - int isrmw; - if (isconst(s)) { - set_const(d,(uae_u32)(uae_u8)live.state[s].val); - return; - } - - isrmw=(s==d); - CLOBBER_ZE8; - if (!isrmw) { - s=readreg(s,1); - d=writereg(d,4); - } - else { /* If we try to lock this twice, with different sizes, we - are int trouble! */ - s=d=rmw(s,4,1); - } - - raw_zero_extend_8_rr(d,s); - - if (!isrmw) { - unlock2(d); - unlock2(s); - } - else { - unlock2(s); - } -} -MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s)) - -MIDFUNC(2,mov_b_rr,(W1 d, R1 s)) -{ - if (d==s) - return; - if (isconst(s)) { - COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val); - return; - } - - CLOBBER_MOV; - s=readreg(s,1); - d=writereg(d,1); - raw_mov_b_rr(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,mov_b_rr,(W1 d, R1 s)) - -MIDFUNC(2,mov_w_rr,(W2 d, R2 s)) -{ - if (d==s) - return; - if (isconst(s)) { - COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val); - return; - } - - CLOBBER_MOV; - s=readreg(s,2); - d=writereg(d,2); - raw_mov_w_rr(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,mov_w_rr,(W2 d, R2 s)) - - -MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) -{ - CLOBBER_MOV; - baser=readreg(baser,4); - index=readreg(index,4); - d=writereg(d,4); - - raw_mov_l_rrm_indexed(d,baser,index,factor); - unlock2(d); - unlock2(baser); - unlock2(index); -} -MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) - -MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) -{ - CLOBBER_MOV; - baser=readreg(baser,4); - index=readreg(index,4); - d=writereg(d,2); - - raw_mov_w_rrm_indexed(d,baser,index,factor); - unlock2(d); - unlock2(baser); - unlock2(index); -} -MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) - -MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) -{ - CLOBBER_MOV; - baser=readreg(baser,4); - index=readreg(index,4); - d=writereg(d,1); - - raw_mov_b_rrm_indexed(d,baser,index,factor); - - unlock2(d); - unlock2(baser); - unlock2(index); -} -MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) - - -MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) -{ - CLOBBER_MOV; - baser=readreg(baser,4); - index=readreg(index,4); - s=readreg(s,4); - - Dif (baser==s || index==s) - abort(); - - - raw_mov_l_mrr_indexed(baser,index,factor,s); - unlock2(s); - unlock2(baser); - unlock2(index); -} -MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) - -MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) -{ - CLOBBER_MOV; - baser=readreg(baser,4); - index=readreg(index,4); - s=readreg(s,2); - - raw_mov_w_mrr_indexed(baser,index,factor,s); - unlock2(s); - unlock2(baser); - unlock2(index); -} -MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) - -MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) -{ - CLOBBER_MOV; - s=readreg(s,1); - baser=readreg(baser,4); - index=readreg(index,4); - - raw_mov_b_mrr_indexed(baser,index,factor,s); - unlock2(s); - unlock2(baser); - unlock2(index); -} -MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) - - -MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) -{ - int basereg=baser; - int indexreg=index; - - CLOBBER_MOV; - s=readreg(s,4); - baser=readreg_offset(baser,4); - index=readreg_offset(index,4); - - base+=get_offset(basereg); - base+=factor*get_offset(indexreg); - - raw_mov_l_bmrr_indexed(base,baser,index,factor,s); - unlock2(s); - unlock2(baser); - unlock2(index); -} -MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) - -MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) -{ - int basereg=baser; - int indexreg=index; - - CLOBBER_MOV; - s=readreg(s,2); - baser=readreg_offset(baser,4); - index=readreg_offset(index,4); - - base+=get_offset(basereg); - base+=factor*get_offset(indexreg); - - raw_mov_w_bmrr_indexed(base,baser,index,factor,s); - unlock2(s); - unlock2(baser); - unlock2(index); -} -MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) - -MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) -{ - int basereg=baser; - int indexreg=index; - - CLOBBER_MOV; - s=readreg(s,1); - baser=readreg_offset(baser,4); - index=readreg_offset(index,4); - - base+=get_offset(basereg); - base+=factor*get_offset(indexreg); - - raw_mov_b_bmrr_indexed(base,baser,index,factor,s); - unlock2(s); - unlock2(baser); - unlock2(index); -} -MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) - - - -/* Read a long from base+baser+factor*index */ -MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) -{ - int basereg=baser; - int indexreg=index; - - CLOBBER_MOV; - baser=readreg_offset(baser,4); - index=readreg_offset(index,4); - base+=get_offset(basereg); - base+=factor*get_offset(indexreg); - d=writereg(d,4); - raw_mov_l_brrm_indexed(d,base,baser,index,factor); - unlock2(d); - unlock2(baser); - unlock2(index); -} -MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) - - -MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) -{ - int basereg=baser; - int indexreg=index; - - CLOBBER_MOV; - remove_offset(d,-1); - baser=readreg_offset(baser,4); - index=readreg_offset(index,4); - base+=get_offset(basereg); - base+=factor*get_offset(indexreg); - d=writereg(d,2); - raw_mov_w_brrm_indexed(d,base,baser,index,factor); - unlock2(d); - unlock2(baser); - unlock2(index); -} -MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) - - -MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) -{ - int basereg=baser; - int indexreg=index; - - CLOBBER_MOV; - remove_offset(d,-1); - baser=readreg_offset(baser,4); - index=readreg_offset(index,4); - base+=get_offset(basereg); - base+=factor*get_offset(indexreg); - d=writereg(d,1); - raw_mov_b_brrm_indexed(d,base,baser,index,factor); - unlock2(d); - unlock2(baser); - unlock2(index); -} -MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) - -/* Read a long from base+factor*index */ -MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) -{ - int indexreg=index; - - if (isconst(index)) { - COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val); - return; - } - - CLOBBER_MOV; - index=readreg_offset(index,4); - base+=get_offset(indexreg)*factor; - d=writereg(d,4); - - raw_mov_l_rm_indexed(d,base,index,factor); - unlock2(index); - unlock2(d); -} -MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) - - -/* read the long at the address contained in s+offset and store in d */ -MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset)) -{ - if (isconst(s)) { - COMPCALL(mov_l_rm)(d,live.state[s].val+offset); - return; - } - CLOBBER_MOV; - s=readreg(s,4); - d=writereg(d,4); - - raw_mov_l_rR(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset)) - -/* read the word at the address contained in s+offset and store in d */ -MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset)) -{ - if (isconst(s)) { - COMPCALL(mov_w_rm)(d,live.state[s].val+offset); - return; - } - CLOBBER_MOV; - s=readreg(s,4); - d=writereg(d,2); - - raw_mov_w_rR(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset)) - -/* read the word at the address contained in s+offset and store in d */ -MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset)) -{ - if (isconst(s)) { - COMPCALL(mov_b_rm)(d,live.state[s].val+offset); - return; - } - CLOBBER_MOV; - s=readreg(s,4); - d=writereg(d,1); - - raw_mov_b_rR(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset)) - -/* read the long at the address contained in s+offset and store in d */ -MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset)) -{ - int sreg=s; - if (isconst(s)) { - COMPCALL(mov_l_rm)(d,live.state[s].val+offset); - return; - } - CLOBBER_MOV; - s=readreg_offset(s,4); - offset+=get_offset(sreg); - d=writereg(d,4); - - raw_mov_l_brR(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset)) - -/* read the word at the address contained in s+offset and store in d */ -MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset)) -{ - int sreg=s; - if (isconst(s)) { - COMPCALL(mov_w_rm)(d,live.state[s].val+offset); - return; - } - CLOBBER_MOV; - remove_offset(d,-1); - s=readreg_offset(s,4); - offset+=get_offset(sreg); - d=writereg(d,2); - - raw_mov_w_brR(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset)) - -/* read the word at the address contained in s+offset and store in d */ -MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset)) -{ - int sreg=s; - if (isconst(s)) { - COMPCALL(mov_b_rm)(d,live.state[s].val+offset); - return; - } - CLOBBER_MOV; - remove_offset(d,-1); - s=readreg_offset(s,4); - offset+=get_offset(sreg); - d=writereg(d,1); - - raw_mov_b_brR(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset)) - -MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset)) -{ - int dreg=d; - if (isconst(d)) { - COMPCALL(mov_l_mi)(live.state[d].val+offset,i); - return; - } - - CLOBBER_MOV; - d=readreg_offset(d,4); - offset+=get_offset(dreg); - raw_mov_l_Ri(d,i,offset); - unlock2(d); -} -MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset)) - -MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset)) -{ - int dreg=d; - if (isconst(d)) { - COMPCALL(mov_w_mi)(live.state[d].val+offset,i); - return; - } - - CLOBBER_MOV; - d=readreg_offset(d,4); - offset+=get_offset(dreg); - raw_mov_w_Ri(d,i,offset); - unlock2(d); -} -MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset)) - -MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset)) -{ - int dreg=d; - if (isconst(d)) { - COMPCALL(mov_b_mi)(live.state[d].val+offset,i); - return; - } - - CLOBBER_MOV; - d=readreg_offset(d,4); - offset+=get_offset(dreg); - raw_mov_b_Ri(d,i,offset); - unlock2(d); -} -MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset)) - - /* Warning! OFFSET is byte sized only! */ -MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset)) -{ - if (isconst(d)) { - COMPCALL(mov_l_mr)(live.state[d].val+offset,s); - return; - } - if (isconst(s)) { - COMPCALL(mov_l_Ri)(d,live.state[s].val,offset); - return; - } - - CLOBBER_MOV; - s=readreg(s,4); - d=readreg(d,4); - - raw_mov_l_Rr(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset)) - -MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset)) -{ - if (isconst(d)) { - COMPCALL(mov_w_mr)(live.state[d].val+offset,s); - return; - } - if (isconst(s)) { - COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset); - return; - } - - CLOBBER_MOV; - s=readreg(s,2); - d=readreg(d,4); - raw_mov_w_Rr(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset)) - -MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset)) -{ - if (isconst(d)) { - COMPCALL(mov_b_mr)(live.state[d].val+offset,s); - return; - } - if (isconst(s)) { - COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset); - return; - } - - CLOBBER_MOV; - s=readreg(s,1); - d=readreg(d,4); - raw_mov_b_Rr(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset)) - -MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset)) -{ - if (isconst(s)) { - COMPCALL(mov_l_ri)(d,live.state[s].val+offset); - return; - } -#if USE_OFFSET - if (d==s) { - add_offset(d,offset); - return; - } -#endif - CLOBBER_LEA; - s=readreg(s,4); - d=writereg(d,4); - raw_lea_l_brr(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset)) - -MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) -{ - if (!offset) { - COMPCALL(lea_l_rr_indexed)(d,s,index,factor); - return; - } - CLOBBER_LEA; - s=readreg(s,4); - index=readreg(index,4); - d=writereg(d,4); - - raw_lea_l_brr_indexed(d,s,index,factor,offset); - unlock2(d); - unlock2(index); - unlock2(s); -} -MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) - -MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) -{ - CLOBBER_LEA; - s=readreg(s,4); - index=readreg(index,4); - d=writereg(d,4); - - raw_lea_l_rr_indexed(d,s,index,factor); - unlock2(d); - unlock2(index); - unlock2(s); -} -MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) - -/* write d to the long at the address contained in s+offset */ -MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset)) -{ - int dreg=d; - if (isconst(d)) { - COMPCALL(mov_l_mr)(live.state[d].val+offset,s); - return; - } - - CLOBBER_MOV; - s=readreg(s,4); - d=readreg_offset(d,4); - offset+=get_offset(dreg); - - raw_mov_l_bRr(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset)) - -/* write the word at the address contained in s+offset and store in d */ -MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset)) -{ - int dreg=d; - - if (isconst(d)) { - COMPCALL(mov_w_mr)(live.state[d].val+offset,s); - return; - } - - CLOBBER_MOV; - s=readreg(s,2); - d=readreg_offset(d,4); - offset+=get_offset(dreg); - raw_mov_w_bRr(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset)) - -MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset)) -{ - int dreg=d; - if (isconst(d)) { - COMPCALL(mov_b_mr)(live.state[d].val+offset,s); - return; - } - - CLOBBER_MOV; - s=readreg(s,1); - d=readreg_offset(d,4); - offset+=get_offset(dreg); - raw_mov_b_bRr(d,s,offset); - unlock2(d); - unlock2(s); -} -MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset)) - -MIDFUNC(1,bswap_32,(RW4 r)) -{ - int reg=r; - - if (isconst(r)) { - uae_u32 oldv=live.state[r].val; - live.state[r].val=reverse32(oldv); - return; - } - - CLOBBER_SW32; - r=rmw(r,4,4); - raw_bswap_32(r); - unlock2(r); -} -MENDFUNC(1,bswap_32,(RW4 r)) - -MIDFUNC(1,bswap_16,(RW2 r)) -{ - if (isconst(r)) { - uae_u32 oldv=live.state[r].val; - live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) | - (oldv&0xffff0000); - return; - } - - CLOBBER_SW16; - r=rmw(r,2,2); - - raw_bswap_16(r); - unlock2(r); -} -MENDFUNC(1,bswap_16,(RW2 r)) - - - -MIDFUNC(2,mov_l_rr,(W4 d, R4 s)) -{ - int olds; - - if (d==s) { /* How pointless! */ - return; - } - if (isconst(s)) { - COMPCALL(mov_l_ri)(d,live.state[s].val); - return; - } - olds=s; - disassociate(d); - s=readreg_offset(s,4); - live.state[d].realreg=s; - live.state[d].realind=live.nat[s].nholds; - live.state[d].val=live.state[olds].val; - live.state[d].validsize=4; - live.state[d].dirtysize=4; - set_status(d,DIRTY); - - live.nat[s].holds[live.nat[s].nholds]=d; - live.nat[s].nholds++; - log_clobberreg(d); - /* write_log("Added %d to nreg %d(%d), now holds %d regs\n", - d,s,live.state[d].realind,live.nat[s].nholds); */ - unlock2(s); -} -MENDFUNC(2,mov_l_rr,(W4 d, R4 s)) - -MIDFUNC(2,mov_l_mr,(IMM d, R4 s)) -{ - if (isconst(s)) { - COMPCALL(mov_l_mi)(d,live.state[s].val); - return; - } - CLOBBER_MOV; - s=readreg(s,4); - - raw_mov_l_mr(d,s); - unlock2(s); -} -MENDFUNC(2,mov_l_mr,(IMM d, R4 s)) - - -MIDFUNC(2,mov_w_mr,(IMM d, R2 s)) -{ - if (isconst(s)) { - COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val); - return; - } - CLOBBER_MOV; - s=readreg(s,2); - - raw_mov_w_mr(d,s); - unlock2(s); -} -MENDFUNC(2,mov_w_mr,(IMM d, R2 s)) - -MIDFUNC(2,mov_w_rm,(W2 d, IMM s)) -{ - CLOBBER_MOV; - d=writereg(d,2); - - raw_mov_w_rm(d,s); - unlock2(d); -} -MENDFUNC(2,mov_w_rm,(W2 d, IMM s)) - -MIDFUNC(2,mov_b_mr,(IMM d, R1 s)) -{ - if (isconst(s)) { - COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val); - return; - } - - CLOBBER_MOV; - s=readreg(s,1); - - raw_mov_b_mr(d,s); - unlock2(s); -} -MENDFUNC(2,mov_b_mr,(IMM d, R1 s)) - -MIDFUNC(2,mov_b_rm,(W1 d, IMM s)) -{ - CLOBBER_MOV; - d=writereg(d,1); - - raw_mov_b_rm(d,s); - unlock2(d); -} -MENDFUNC(2,mov_b_rm,(W1 d, IMM s)) - -MIDFUNC(2,mov_l_ri,(W4 d, IMM s)) -{ - set_const(d,s); - return; -} -MENDFUNC(2,mov_l_ri,(W4 d, IMM s)) - -MIDFUNC(2,mov_w_ri,(W2 d, IMM s)) -{ - CLOBBER_MOV; - d=writereg(d,2); - - raw_mov_w_ri(d,s); - unlock2(d); -} -MENDFUNC(2,mov_w_ri,(W2 d, IMM s)) - -MIDFUNC(2,mov_b_ri,(W1 d, IMM s)) -{ - CLOBBER_MOV; - d=writereg(d,1); - - raw_mov_b_ri(d,s); - unlock2(d); -} -MENDFUNC(2,mov_b_ri,(W1 d, IMM s)) - - -MIDFUNC(2,add_l_mi,(IMM d, IMM s)) -{ - CLOBBER_ADD; - raw_add_l_mi(d,s) ; -} -MENDFUNC(2,add_l_mi,(IMM d, IMM s)) - -MIDFUNC(2,add_w_mi,(IMM d, IMM s)) -{ - CLOBBER_ADD; - raw_add_w_mi(d,s) ; -} -MENDFUNC(2,add_w_mi,(IMM d, IMM s)) - -MIDFUNC(2,add_b_mi,(IMM d, IMM s)) -{ - CLOBBER_ADD; - raw_add_b_mi(d,s) ; -} -MENDFUNC(2,add_b_mi,(IMM d, IMM s)) - - -MIDFUNC(2,test_l_ri,(R4 d, IMM i)) -{ - CLOBBER_TEST; - d=readreg(d,4); - - raw_test_l_ri(d,i); - unlock2(d); -} -MENDFUNC(2,test_l_ri,(R4 d, IMM i)) - -MIDFUNC(2,test_l_rr,(R4 d, R4 s)) -{ - CLOBBER_TEST; - d=readreg(d,4); - s=readreg(s,4); - - raw_test_l_rr(d,s);; - unlock2(d); - unlock2(s); -} -MENDFUNC(2,test_l_rr,(R4 d, R4 s)) - -MIDFUNC(2,test_w_rr,(R2 d, R2 s)) -{ - CLOBBER_TEST; - d=readreg(d,2); - s=readreg(s,2); - - raw_test_w_rr(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,test_w_rr,(R2 d, R2 s)) - -MIDFUNC(2,test_b_rr,(R1 d, R1 s)) -{ - CLOBBER_TEST; - d=readreg(d,1); - s=readreg(s,1); - - raw_test_b_rr(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,test_b_rr,(R1 d, R1 s)) - - -MIDFUNC(2,and_l_ri,(RW4 d, IMM i)) -{ - if (isconst(d) && !needflags) { - live.state[d].val &= i; - return; + if (tune_nop_fillers) + raw_emit_nop_filler(a - (((uintptr)target) & (a - 1))); + else { + /* Fill with NOPs --- makes debugging with gdb easier */ + while ((uintptr)target&(a-1)) + emit_byte(0x90); // Attention x86 specific code } - - CLOBBER_AND; - d=rmw(d,4,4); - - raw_and_l_ri(d,i); - unlock2(d); } -MENDFUNC(2,and_l_ri,(RW4 d, IMM i)) -MIDFUNC(2,and_l,(RW4 d, R4 s)) +static inline int isinrom(uintptr addr) { - CLOBBER_AND; - s=readreg(s,4); - d=rmw(d,4,4); - - raw_and_l(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,and_l,(RW4 d, R4 s)) - -MIDFUNC(2,and_w,(RW2 d, R2 s)) -{ - CLOBBER_AND; - s=readreg(s,2); - d=rmw(d,2,2); - - raw_and_w(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,and_w,(RW2 d, R2 s)) - -MIDFUNC(2,and_b,(RW1 d, R1 s)) -{ - CLOBBER_AND; - s=readreg(s,1); - d=rmw(d,1,1); - - raw_and_b(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,and_b,(RW1 d, R1 s)) - -// gb-- used for making an fpcr value in compemu_fpp.cpp -MIDFUNC(2,or_l_rm,(RW4 d, IMM s)) -{ - CLOBBER_OR; - d=rmw(d,4,4); - - raw_or_l_rm(d,s); - unlock2(d); -} -MENDFUNC(2,or_l_rm,(RW4 d, IMM s)) - -MIDFUNC(2,or_l_ri,(RW4 d, IMM i)) -{ - if (isconst(d) && !needflags) { - live.state[d].val|=i; - return; - } - CLOBBER_OR; - d=rmw(d,4,4); - - raw_or_l_ri(d,i); - unlock2(d); -} -MENDFUNC(2,or_l_ri,(RW4 d, IMM i)) - -MIDFUNC(2,or_l,(RW4 d, R4 s)) -{ - if (isconst(d) && isconst(s) && !needflags) { - live.state[d].val|=live.state[s].val; - return; - } - CLOBBER_OR; - s=readreg(s,4); - d=rmw(d,4,4); - - raw_or_l(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,or_l,(RW4 d, R4 s)) - -MIDFUNC(2,or_w,(RW2 d, R2 s)) -{ - CLOBBER_OR; - s=readreg(s,2); - d=rmw(d,2,2); - - raw_or_w(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,or_w,(RW2 d, R2 s)) - -MIDFUNC(2,or_b,(RW1 d, R1 s)) -{ - CLOBBER_OR; - s=readreg(s,1); - d=rmw(d,1,1); - - raw_or_b(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,or_b,(RW1 d, R1 s)) - -MIDFUNC(2,adc_l,(RW4 d, R4 s)) -{ - CLOBBER_ADC; - s=readreg(s,4); - d=rmw(d,4,4); - - raw_adc_l(d,s); - - unlock2(d); - unlock2(s); -} -MENDFUNC(2,adc_l,(RW4 d, R4 s)) - -MIDFUNC(2,adc_w,(RW2 d, R2 s)) -{ - CLOBBER_ADC; - s=readreg(s,2); - d=rmw(d,2,2); - - raw_adc_w(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,adc_w,(RW2 d, R2 s)) - -MIDFUNC(2,adc_b,(RW1 d, R1 s)) -{ - CLOBBER_ADC; - s=readreg(s,1); - d=rmw(d,1,1); - - raw_adc_b(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,adc_b,(RW1 d, R1 s)) - -MIDFUNC(2,add_l,(RW4 d, R4 s)) -{ - if (isconst(s)) { - COMPCALL(add_l_ri)(d,live.state[s].val); - return; - } - - CLOBBER_ADD; - s=readreg(s,4); - d=rmw(d,4,4); - - raw_add_l(d,s); - - unlock2(d); - unlock2(s); -} -MENDFUNC(2,add_l,(RW4 d, R4 s)) - -MIDFUNC(2,add_w,(RW2 d, R2 s)) -{ - if (isconst(s)) { - COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val); - return; - } - - CLOBBER_ADD; - s=readreg(s,2); - d=rmw(d,2,2); - - raw_add_w(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,add_w,(RW2 d, R2 s)) - -MIDFUNC(2,add_b,(RW1 d, R1 s)) -{ - if (isconst(s)) { - COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val); - return; - } - - CLOBBER_ADD; - s=readreg(s,1); - d=rmw(d,1,1); - - raw_add_b(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,add_b,(RW1 d, R1 s)) - -MIDFUNC(2,sub_l_ri,(RW4 d, IMM i)) -{ - if (!i && !needflags) - return; - if (isconst(d) && !needflags) { - live.state[d].val-=i; - return; - } -#if USE_OFFSET - if (!needflags) { - add_offset(d,-i); - return; - } -#endif - - CLOBBER_SUB; - d=rmw(d,4,4); - - raw_sub_l_ri(d,i); - unlock2(d); -} -MENDFUNC(2,sub_l_ri,(RW4 d, IMM i)) - -MIDFUNC(2,sub_w_ri,(RW2 d, IMM i)) -{ - if (!i && !needflags) - return; - - CLOBBER_SUB; - d=rmw(d,2,2); - - raw_sub_w_ri(d,i); - unlock2(d); -} -MENDFUNC(2,sub_w_ri,(RW2 d, IMM i)) - -MIDFUNC(2,sub_b_ri,(RW1 d, IMM i)) -{ - if (!i && !needflags) - return; - - CLOBBER_SUB; - d=rmw(d,1,1); - - raw_sub_b_ri(d,i); - - unlock2(d); -} -MENDFUNC(2,sub_b_ri,(RW1 d, IMM i)) - -MIDFUNC(2,add_l_ri,(RW4 d, IMM i)) -{ - if (!i && !needflags) - return; - if (isconst(d) && !needflags) { - live.state[d].val+=i; - return; - } -#if USE_OFFSET - if (!needflags) { - add_offset(d,i); - return; - } -#endif - CLOBBER_ADD; - d=rmw(d,4,4); - raw_add_l_ri(d,i); - unlock2(d); -} -MENDFUNC(2,add_l_ri,(RW4 d, IMM i)) - -MIDFUNC(2,add_w_ri,(RW2 d, IMM i)) -{ - if (!i && !needflags) - return; - - CLOBBER_ADD; - d=rmw(d,2,2); - - raw_add_w_ri(d,i); - unlock2(d); -} -MENDFUNC(2,add_w_ri,(RW2 d, IMM i)) - -MIDFUNC(2,add_b_ri,(RW1 d, IMM i)) -{ - if (!i && !needflags) - return; - - CLOBBER_ADD; - d=rmw(d,1,1); - - raw_add_b_ri(d,i); - - unlock2(d); -} -MENDFUNC(2,add_b_ri,(RW1 d, IMM i)) - -MIDFUNC(2,sbb_l,(RW4 d, R4 s)) -{ - CLOBBER_SBB; - s=readreg(s,4); - d=rmw(d,4,4); - - raw_sbb_l(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,sbb_l,(RW4 d, R4 s)) - -MIDFUNC(2,sbb_w,(RW2 d, R2 s)) -{ - CLOBBER_SBB; - s=readreg(s,2); - d=rmw(d,2,2); - - raw_sbb_w(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,sbb_w,(RW2 d, R2 s)) - -MIDFUNC(2,sbb_b,(RW1 d, R1 s)) -{ - CLOBBER_SBB; - s=readreg(s,1); - d=rmw(d,1,1); - - raw_sbb_b(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,sbb_b,(RW1 d, R1 s)) - -MIDFUNC(2,sub_l,(RW4 d, R4 s)) -{ - if (isconst(s)) { - COMPCALL(sub_l_ri)(d,live.state[s].val); - return; - } - - CLOBBER_SUB; - s=readreg(s,4); - d=rmw(d,4,4); - - raw_sub_l(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,sub_l,(RW4 d, R4 s)) - -MIDFUNC(2,sub_w,(RW2 d, R2 s)) -{ - if (isconst(s)) { - COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val); - return; - } - - CLOBBER_SUB; - s=readreg(s,2); - d=rmw(d,2,2); - - raw_sub_w(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,sub_w,(RW2 d, R2 s)) - -MIDFUNC(2,sub_b,(RW1 d, R1 s)) -{ - if (isconst(s)) { - COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val); - return; - } - - CLOBBER_SUB; - s=readreg(s,1); - d=rmw(d,1,1); - - raw_sub_b(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,sub_b,(RW1 d, R1 s)) - -MIDFUNC(2,cmp_l,(R4 d, R4 s)) -{ - CLOBBER_CMP; - s=readreg(s,4); - d=readreg(d,4); - - raw_cmp_l(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,cmp_l,(R4 d, R4 s)) - -MIDFUNC(2,cmp_l_ri,(R4 r, IMM i)) -{ - CLOBBER_CMP; - r=readreg(r,4); - - raw_cmp_l_ri(r,i); - unlock2(r); -} -MENDFUNC(2,cmp_l_ri,(R4 r, IMM i)) - -MIDFUNC(2,cmp_w,(R2 d, R2 s)) -{ - CLOBBER_CMP; - s=readreg(s,2); - d=readreg(d,2); - - raw_cmp_w(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,cmp_w,(R2 d, R2 s)) - -MIDFUNC(2,cmp_b,(R1 d, R1 s)) -{ - CLOBBER_CMP; - s=readreg(s,1); - d=readreg(d,1); - - raw_cmp_b(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,cmp_b,(R1 d, R1 s)) - - -MIDFUNC(2,xor_l,(RW4 d, R4 s)) -{ - CLOBBER_XOR; - s=readreg(s,4); - d=rmw(d,4,4); - - raw_xor_l(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,xor_l,(RW4 d, R4 s)) - -MIDFUNC(2,xor_w,(RW2 d, R2 s)) -{ - CLOBBER_XOR; - s=readreg(s,2); - d=rmw(d,2,2); - - raw_xor_w(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,xor_w,(RW2 d, R2 s)) - -MIDFUNC(2,xor_b,(RW1 d, R1 s)) -{ - CLOBBER_XOR; - s=readreg(s,1); - d=rmw(d,1,1); - - raw_xor_b(d,s); - unlock2(d); - unlock2(s); -} -MENDFUNC(2,xor_b,(RW1 d, R1 s)) - -MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize)) -{ - clobber_flags(); - remove_all_offsets(); - if (osize==4) { - if (out1!=in1 && out1!=r) { - COMPCALL(forget_about)(out1); - } - } - else { - tomem_c(out1); - } - - in1=readreg_specific(in1,isize,REG_PAR1); - r=readreg(r,4); - prepare_for_call_1(); /* This should ensure that there won't be - any need for swapping nregs in prepare_for_call_2 - */ -#if USE_NORMAL_CALLING_CONVENTION - raw_push_l_r(in1); -#endif - unlock2(in1); - unlock2(r); - - prepare_for_call_2(); - raw_call_r(r); - -#if USE_NORMAL_CALLING_CONVENTION - raw_inc_sp(4); -#endif - - - live.nat[REG_RESULT].holds[0]=out1; - live.nat[REG_RESULT].nholds=1; - live.nat[REG_RESULT].touched=touchcnt++; - - live.state[out1].realreg=REG_RESULT; - live.state[out1].realind=0; - live.state[out1].val=0; - live.state[out1].validsize=osize; - live.state[out1].dirtysize=osize; - set_status(out1,DIRTY); -} -MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize)) - -MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)) -{ - clobber_flags(); - remove_all_offsets(); - in1=readreg_specific(in1,isize1,REG_PAR1); - in2=readreg_specific(in2,isize2,REG_PAR2); - r=readreg(r,4); - prepare_for_call_1(); /* This should ensure that there won't be - any need for swapping nregs in prepare_for_call_2 - */ -#if USE_NORMAL_CALLING_CONVENTION - raw_push_l_r(in2); - raw_push_l_r(in1); -#endif - unlock2(r); - unlock2(in1); - unlock2(in2); - prepare_for_call_2(); - raw_call_r(r); -#if USE_NORMAL_CALLING_CONVENTION - raw_inc_sp(8); -#endif -} -MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)) - -/* forget_about() takes a mid-layer register */ -MIDFUNC(1,forget_about,(W4 r)) -{ - if (isinreg(r)) - disassociate(r); - live.state[r].val=0; - set_status(r,UNDEF); -} -MENDFUNC(1,forget_about,(W4 r)) - -MIDFUNC(0,nop,(void)) -{ - raw_nop(); -} -MENDFUNC(0,nop,(void)) - - -MIDFUNC(1,f_forget_about,(FW r)) -{ - if (f_isinreg(r)) - f_disassociate(r); - live.fate[r].status=UNDEF; -} -MENDFUNC(1,f_forget_about,(FW r)) - -MIDFUNC(1,fmov_pi,(FW r)) -{ - r=f_writereg(r); - raw_fmov_pi(r); - f_unlock(r); -} -MENDFUNC(1,fmov_pi,(FW r)) - -MIDFUNC(1,fmov_log10_2,(FW r)) -{ - r=f_writereg(r); - raw_fmov_log10_2(r); - f_unlock(r); -} -MENDFUNC(1,fmov_log10_2,(FW r)) - -MIDFUNC(1,fmov_log2_e,(FW r)) -{ - r=f_writereg(r); - raw_fmov_log2_e(r); - f_unlock(r); -} -MENDFUNC(1,fmov_log2_e,(FW r)) - -MIDFUNC(1,fmov_loge_2,(FW r)) -{ - r=f_writereg(r); - raw_fmov_loge_2(r); - f_unlock(r); -} -MENDFUNC(1,fmov_loge_2,(FW r)) - -MIDFUNC(1,fmov_1,(FW r)) -{ - r=f_writereg(r); - raw_fmov_1(r); - f_unlock(r); -} -MENDFUNC(1,fmov_1,(FW r)) - -MIDFUNC(1,fmov_0,(FW r)) -{ - r=f_writereg(r); - raw_fmov_0(r); - f_unlock(r); -} -MENDFUNC(1,fmov_0,(FW r)) - -MIDFUNC(2,fmov_rm,(FW r, MEMR m)) -{ - r=f_writereg(r); - raw_fmov_rm(r,m); - f_unlock(r); -} -MENDFUNC(2,fmov_rm,(FW r, MEMR m)) - -MIDFUNC(2,fmovi_rm,(FW r, MEMR m)) -{ - r=f_writereg(r); - raw_fmovi_rm(r,m); - f_unlock(r); -} -MENDFUNC(2,fmovi_rm,(FW r, MEMR m)) - -MIDFUNC(2,fmovi_mr,(MEMW m, FR r)) -{ - r=f_readreg(r); - raw_fmovi_mr(m,r); - f_unlock(r); -} -MENDFUNC(2,fmovi_mr,(MEMW m, FR r)) - -MIDFUNC(2,fmovs_rm,(FW r, MEMR m)) -{ - r=f_writereg(r); - raw_fmovs_rm(r,m); - f_unlock(r); -} -MENDFUNC(2,fmovs_rm,(FW r, MEMR m)) - -MIDFUNC(2,fmovs_mr,(MEMW m, FR r)) -{ - r=f_readreg(r); - raw_fmovs_mr(m,r); - f_unlock(r); -} -MENDFUNC(2,fmovs_mr,(MEMW m, FR r)) - -MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) -{ - r=f_readreg(r); - raw_fmov_ext_mr(m,r); - f_unlock(r); -} -MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) - -MIDFUNC(2,fmov_mr,(MEMW m, FR r)) -{ - r=f_readreg(r); - raw_fmov_mr(m,r); - f_unlock(r); -} -MENDFUNC(2,fmov_mr,(MEMW m, FR r)) - -MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) -{ - r=f_writereg(r); - raw_fmov_ext_rm(r,m); - f_unlock(r); -} -MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) - -MIDFUNC(2,fmov_rr,(FW d, FR s)) -{ - if (d==s) { /* How pointless! */ - return; - } -#if USE_F_ALIAS - f_disassociate(d); - s=f_readreg(s); - live.fate[d].realreg=s; - live.fate[d].realind=live.fat[s].nholds; - live.fate[d].status=DIRTY; - live.fat[s].holds[live.fat[s].nholds]=d; - live.fat[s].nholds++; - f_unlock(s); +#ifdef UAE + return (addr >= uae_p32(kickmem_bank.baseaddr) && + addr < uae_p32(kickmem_bank.baseaddr + 8 * 65536)); #else - s=f_readreg(s); - d=f_writereg(d); - raw_fmov_rr(d,s); - f_unlock(s); - f_unlock(d); + return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize)); #endif } -MENDFUNC(2,fmov_rr,(FW d, FR s)) -MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base)) +#if defined(UAE) || defined(FLIGHT_RECORDER) +static void flush_all(void) { - index=readreg(index,4); + int i; - raw_fldcw_m_indexed(index,base); - unlock2(index); + log_flush(); + for (i=0;i0) + free_nreg(i); -MIDFUNC(2,fsqrt_rr,(FW d, FR s)) -{ - s=f_readreg(s); - d=f_writereg(d); - raw_fsqrt_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,fsqrt_rr,(FW d, FR s)) + for (i=0;i0) + f_free_nreg(i); -MIDFUNC(2,fabs_rr,(FW d, FR s)) -{ - s=f_readreg(s); - d=f_writereg(d); - raw_fabs_rr(d,s); - f_unlock(s); - f_unlock(d); + live.flags_in_flags=TRASH; /* Note: We assume we already rescued the + flags at the very start of the call_r + functions! */ } -MENDFUNC(2,fabs_rr,(FW d, FR s)) +#endif -MIDFUNC(2,fsin_rr,(FW d, FR s)) -{ - s=f_readreg(s); - d=f_writereg(d); - raw_fsin_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,fsin_rr,(FW d, FR s)) +#if defined(CPU_arm) +#include "compemu_midfunc_arm.cpp" -MIDFUNC(2,fcos_rr,(FW d, FR s)) -{ - s=f_readreg(s); - d=f_writereg(d); - raw_fcos_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,fcos_rr,(FW d, FR s)) +#if defined(USE_JIT2) +#include "compemu_midfunc_arm2.cpp" +#endif +#endif -MIDFUNC(2,ftwotox_rr,(FW d, FR s)) -{ - s=f_readreg(s); - d=f_writereg(d); - raw_ftwotox_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,ftwotox_rr,(FW d, FR s)) +#if defined(CPU_i386) || defined(CPU_x86_64) +#include "compemu_midfunc_x86.cpp" +#endif -MIDFUNC(2,fetox_rr,(FW d, FR s)) -{ - s=f_readreg(s); - d=f_writereg(d); - raw_fetox_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,fetox_rr,(FW d, FR s)) - -MIDFUNC(2,frndint_rr,(FW d, FR s)) -{ - s=f_readreg(s); - d=f_writereg(d); - raw_frndint_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,frndint_rr,(FW d, FR s)) - -MIDFUNC(2,flog2_rr,(FW d, FR s)) -{ - s=f_readreg(s); - d=f_writereg(d); - raw_flog2_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,flog2_rr,(FW d, FR s)) - -MIDFUNC(2,fneg_rr,(FW d, FR s)) -{ - s=f_readreg(s); - d=f_writereg(d); - raw_fneg_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,fneg_rr,(FW d, FR s)) - -MIDFUNC(2,fadd_rr,(FRW d, FR s)) -{ - s=f_readreg(s); - d=f_rmw(d); - raw_fadd_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,fadd_rr,(FRW d, FR s)) - -MIDFUNC(2,fsub_rr,(FRW d, FR s)) -{ - s=f_readreg(s); - d=f_rmw(d); - raw_fsub_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,fsub_rr,(FRW d, FR s)) - -MIDFUNC(2,fcmp_rr,(FR d, FR s)) -{ - d=f_readreg(d); - s=f_readreg(s); - raw_fcmp_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,fcmp_rr,(FR d, FR s)) - -MIDFUNC(2,fdiv_rr,(FRW d, FR s)) -{ - s=f_readreg(s); - d=f_rmw(d); - raw_fdiv_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,fdiv_rr,(FRW d, FR s)) - -MIDFUNC(2,frem_rr,(FRW d, FR s)) -{ - s=f_readreg(s); - d=f_rmw(d); - raw_frem_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,frem_rr,(FRW d, FR s)) - -MIDFUNC(2,frem1_rr,(FRW d, FR s)) -{ - s=f_readreg(s); - d=f_rmw(d); - raw_frem1_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,frem1_rr,(FRW d, FR s)) - -MIDFUNC(2,fmul_rr,(FRW d, FR s)) -{ - s=f_readreg(s); - d=f_rmw(d); - raw_fmul_rr(d,s); - f_unlock(s); - f_unlock(d); -} -MENDFUNC(2,fmul_rr,(FRW d, FR s)) /******************************************************************** * Support functions exposed to gencomp. CREATE time * @@ -4949,40 +2682,198 @@ MENDFUNC(2,fmul_rr,(FRW d, FR s)) void set_zero(int r, int tmp) { - if (setzflg_uses_bsf) - bsf_l_rr(r,r); - else - simulate_bsf(tmp,r); + if (setzflg_uses_bsf) + bsf_l_rr(r,r); + else + simulate_bsf(tmp,r); } int kill_rodent(int r) { - return KILLTHERAT && + return KILLTHERAT && have_rat_stall && - (live.state[r].status==INMEM || - live.state[r].status==CLEAN || - live.state[r].status==ISCONST || - live.state[r].dirtysize==4); + (live.state[r].status==INMEM || + live.state[r].status==CLEAN || + live.state[r].status==ISCONST || + live.state[r].dirtysize==4); } uae_u32 get_const(int r) { Dif (!isconst(r)) { - write_log("Register %d should be constant, but isn't\n",r); - abort(); + jit_abort("Register %d should be constant, but isn't",r); } - return live.state[r].val; + return live.state[r].val; } void sync_m68k_pc(void) { - if (m68k_pc_offset) { - add_l_ri(PC_P,m68k_pc_offset); - comp_pc_p+=m68k_pc_offset; - m68k_pc_offset=0; - } + if (m68k_pc_offset) { + add_l_ri(PC_P,m68k_pc_offset); + comp_pc_p+=m68k_pc_offset; + m68k_pc_offset=0; + } } - + +/* for building exception frames */ +void compemu_exc_make_frame(int format, int sr, int ret, int nr, int tmp) +{ + lea_l_brr(SP_REG, SP_REG, -2); + mov_l_ri(tmp, (format << 12) + (nr * 4)); /* format | vector */ + writeword(SP_REG, tmp, tmp); + + lea_l_brr(SP_REG, SP_REG, -4); + writelong(SP_REG, ret, tmp); + + lea_l_brr(SP_REG, SP_REG, -2); + writeword_clobber(SP_REG, sr, tmp); + remove_offset(SP_REG, -1); + if (isinreg(SP_REG)) + evict(SP_REG); + else + flush_reg(SP_REG); +} + +void compemu_make_sr(int sr, int tmp) +{ + flush_flags(); /* low level */ + flush_reg(FLAGX); + +#ifdef OPTIMIZED_FLAGS + + /* + * x86 EFLAGS: (!SAHF_SETO_PROFITABLE) + * FEDCBA98 76543210 + * ----V--- NZ-----C + * + * <--AH--> <--AL--> (SAHF_SETO_PROFITABLE) + * FEDCBA98 76543210 + * NZxxxxxC xxxxxxxV + * + * arm RFLAGS: + * FEDCBA98 76543210 FEDCBA98 76543210 + * NZCV---- -------- -------- -------- + * + * -> m68k SR: + * --S--III ---XNZVC + * + * Master-Bit and traceflags are ignored here, + * since they are not emulated in JIT code + */ + mov_l_rm(sr, uae_p32(live.state[FLAGTMP].mem)); + mov_l_ri(tmp, FLAGVAL_N|FLAGVAL_Z|FLAGVAL_V|FLAGVAL_C); + and_l(sr, tmp); + mov_l_rr(tmp, sr); + +#if (defined(CPU_i386) && defined(X86_ASSEMBLY)) || (defined(CPU_x86_64) && defined(X86_64_ASSEMBLY)) +#ifndef SAHF_SETO_PROFITABLE + ror_b_ri(sr, FLAGBIT_N - 3); /* move NZ into position; C->4 */ + shrl_w_ri(tmp, FLAGBIT_V - 1); /* move V into position in tmp */ + or_l(sr, tmp); /* or V flag to SR */ + mov_l_rr(tmp, sr); + shrl_b_ri(tmp, (8 - (FLAGBIT_N - 3)) - FLAGBIT_C); /* move C into position in tmp */ + or_l(sr, tmp); /* or C flag to SR */ +#else + ror_w_ri(sr, FLAGBIT_N - 3); /* move NZ in position; V->4, C->12 */ + shrl_w_ri(tmp, (16 - (FLAGBIT_N - 3)) - FLAGBIT_V - 1); /* move V into position in tmp; C->9 */ + or_l(sr, tmp); /* or V flag to SR */ + shrl_w_ri(tmp, FLAGBIT_C + FLAGBIT_V - 1); /* move C into position in tmp */ + or_l(sr, tmp); /* or C flag to SR */ +#endif + mov_l_ri(tmp, 0x0f); + and_l(sr, tmp); + + mov_b_rm(tmp, uae_p32(®flags.x)); + and_l_ri(tmp, FLAGVAL_X); + shll_l_ri(tmp, 4); + or_l(sr, tmp); + +#elif defined(CPU_arm) && defined(ARM_ASSEMBLY) + shrl_l_ri(sr, FLAGBIT_N - 3); /* move NZ into position */ + ror_l_ri(tmp, FLAGBIT_C - 1); /* move C into position in tmp; V->31 */ + and_l_ri(sr, 0xc); + or_l(sr, tmp); /* or C flag to SR */ + shrl_l_ri(tmp, 31); /* move V into position in tmp */ + or_l(sr, tmp); /* or V flag to SR */ + + mov_b_rm(tmp, uae_p32(®flags.x)); + and_l_ri(tmp, FLAGVAL_X); + shrl_l_ri(tmp, FLAGBIT_X - 4); + or_l(sr, tmp); + +#else +#error "unknown CPU" +#endif + +#else + + xor_l(sr, sr); + xor_l(tmp, tmp); + mov_b_rm(tmp, uae_p32(®s.c)); + shll_l_ri(tmp, 0); + or_l(sr, tmp); + mov_b_rm(tmp, uae_p32(®s.v)); + shll_l_ri(tmp, 1); + or_l(sr, tmp); + mov_b_rm(tmp, uae_p32(®s.z)); + shll_l_ri(tmp, 2); + or_l(sr, tmp); + mov_b_rm(tmp, uae_p32(®s.n)); + shll_l_ri(tmp, 3); + or_l(sr, tmp); + +#endif /* OPTIMIZED_FLAGS */ + + mov_b_rm(tmp, uae_p32(®s.s)); + shll_l_ri(tmp, 13); + or_l(sr, tmp); + mov_l_rm(tmp, uae_p32(®s.intmask)); + shll_l_ri(tmp, 8); + or_l(sr, tmp); + and_l_ri(sr, 0x271f); + mov_w_mr(uae_p32(®s.sr), sr); +} + +void compemu_enter_super(int sr) +{ +#if 0 + fprintf(stderr, "enter_super: isinreg=%d rr=%d nholds=%d\n", isinreg(SP_REG), live.state[SP_REG].realreg, isinreg(SP_REG) ? live.nat[live.state[SP_REG].realreg].nholds : -1); +#endif + remove_offset(SP_REG, -1); + if (isinreg(SP_REG)) + evict(SP_REG); + else + flush_reg(SP_REG); + /* + * equivalent to: + * if (!regs.s) + * { + * regs.usp = m68k_areg(regs, 7); + * m68k_areg(regs, 7) = regs.isp; + * regs.s = 1; + * mmu_set_super(1); + * } + */ + test_l_ri(sr, 0x2000); +#if defined(CPU_i386) || defined(CPU_x86_64) + compemu_raw_jnz_b_oponly(); + uae_u8 *branchadd = get_target(); + skip_byte(); +#elif defined(CPU_arm) + compemu_raw_jnz_b_oponly(); + uae_u8 *branchadd = get_target(); + skip_byte(); +#endif + mov_l_mr((uintptr)®s.usp, SP_REG); + mov_l_rm(SP_REG, uae_p32(®s.isp)); + mov_b_mi(uae_p32(®s.s), 1); +#if defined(CPU_i386) || defined(CPU_x86_64) + *branchadd = get_target() - (branchadd + 1); +#elif defined(CPU_arm) + *((uae_u32 *)branchadd - 3) = get_target() - (branchadd + 1); +#endif +} + /******************************************************************** * Scratch registers management * ********************************************************************/ @@ -5003,18 +2894,23 @@ static inline const char *str_on_off(bool b) return b ? "on" : "off"; } +#ifdef UAE +static +#endif void compiler_init(void) { static bool initialized = false; if (initialized) return; -#if JIT_DEBUG +#ifdef UAE +#else +#ifdef JIT_DEBUG // JIT debug mode ? JITDebug = PrefsFindBool("jitdebug"); #endif - write_log(" : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no"); - + jit_log(" : enable runtime disassemblers : %s", JITDebug ? "yes" : "no"); + #ifdef USE_JIT_FPU // Use JIT compiler for FPU instructions ? avoid_fpu = !PrefsFindBool("jitfpu"); @@ -5022,55 +2918,70 @@ void compiler_init(void) // JIT FPU is always disabled avoid_fpu = true; #endif - write_log(" : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no"); - + jit_log(" : compile FPU instructions : %s", !avoid_fpu ? "yes" : "no"); + // Get size of the translation cache (in KB) cache_size = PrefsFindInt32("jitcachesize"); - write_log(" : requested translation cache size : %d KB\n", cache_size); - - // Initialize target CPU (check for features, e.g. CMOV, rat stalls) - raw_init_cpu(); + jit_log(" : requested translation cache size : %d KB", cache_size); + setzflg_uses_bsf = target_check_bsf(); - write_log(" : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no"); - write_log(" : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no"); - write_log(" : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps); - + jit_log(" : target processor has CMOV instructions : %s", have_cmov ? "yes" : "no"); + jit_log(" : target processor can suffer from partial register stalls : %s", have_rat_stall ? "yes" : "no"); + jit_log(" : alignment for loops, jumps are %d, %d", align_loops, align_jumps); +#if defined(CPU_i386) || defined(CPU_x86_64) + jit_log(" : target processor has SSE2 instructions : %s", cpuinfo.x86_has_xmm2 ? "yes" : "no"); + jit_log(" : cache linesize is %lu", (unsigned long)cpuinfo.x86_clflush_size); +#endif + // Translation cache flush mechanism lazy_flush = PrefsFindBool("jitlazyflush"); - write_log(" : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush)); + jit_log(" : lazy translation cache invalidation : %s", str_on_off(lazy_flush)); flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard; - + // Compiler features - write_log(" : register aliasing : %s\n", str_on_off(1)); - write_log(" : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS)); - write_log(" : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET)); + jit_log(" : register aliasing : %s", str_on_off(1)); + jit_log(" : FP register aliasing : %s", str_on_off(USE_F_ALIAS)); + jit_log(" : lazy constant offsetting : %s", str_on_off(USE_OFFSET)); #if USE_INLINING follow_const_jumps = PrefsFindBool("jitinline"); #endif - write_log(" : translate through constant jumps : %s\n", str_on_off(follow_const_jumps)); - write_log(" : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA)); - + jit_log(" : block inlining : %s", str_on_off(follow_const_jumps)); + jit_log(" : separate blockinfo allocation : %s", str_on_off(USE_SEPARATE_BIA)); + // Build compiler tables + init_table68k (); build_comp(); - - initialized = true; - -#if PROFILE_UNTRANSLATED_INSNS - write_log(" : gather statistics on untranslated insns count\n"); #endif -#if PROFILE_COMPILE_TIME - write_log(" : gather statistics on translation time\n"); + initialized = true; + +#ifdef PROFILE_UNTRANSLATED_INSNS + jit_log(" : gather statistics on untranslated insns count"); +#endif + +#ifdef PROFILE_COMPILE_TIME + jit_log(" : gather statistics on translation time"); emul_start_time = clock(); #endif } +#ifdef UAE +static +#endif void compiler_exit(void) { -#if PROFILE_COMPILE_TIME +#ifdef PROFILE_COMPILE_TIME emul_end_time = clock(); #endif - + +#ifdef UAE +#else +#if DEBUG +#if defined(USE_DATA_BUFFER) + jit_log("data_wasted = %ld bytes", data_wasted); +#endif +#endif + // Deallocate translation cache if (compiled_code) { vm_release(compiled_code, cache_size * 1024); @@ -5082,26 +2993,25 @@ void compiler_exit(void) vm_release(popallspace, POPALLSPACE_SIZE); popallspace = 0; } - -#if PROFILE_COMPILE_TIME - write_log("### Compile Block statistics\n"); - write_log("Number of calls to compile_block : %d\n", compile_count); - uae_u32 emul_time = emul_end_time - emul_start_time; - write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC)); - write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC), - 100.0*double(compile_time)/double(emul_time)); - write_log("\n"); #endif -#if PROFILE_UNTRANSLATED_INSNS +#ifdef PROFILE_COMPILE_TIME + jit_log("### Compile Block statistics"); + jit_log("Number of calls to compile_block : %d", compile_count); + uae_u32 emul_time = emul_end_time - emul_start_time; + jit_log("Total emulation time : %.1f sec", double(emul_time)/double(CLOCKS_PER_SEC)); + jit_log("Total compilation time : %.1f sec (%.1f%%)", double(compile_time)/double(CLOCKS_PER_SEC), 100.0*double(compile_time)/double(emul_time)); +#endif + +#ifdef PROFILE_UNTRANSLATED_INSNS uae_u64 untranslated_count = 0; for (int i = 0; i < 65536; i++) { opcode_nums[i] = i; untranslated_count += raw_cputbl_count[i]; } - write_log("Sorting out untranslated instructions count...\n"); + bug("Sorting out untranslated instructions count..."); qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn); - write_log("\nRank Opc Count Name\n"); + jit_log("Rank Opc Count Name"); for (int i = 0; i < untranslated_top_ten; i++) { uae_u32 count = raw_cputbl_count[opcode_nums[i]]; struct instr *dp; @@ -5109,32 +3019,36 @@ void compiler_exit(void) if (!count) break; dp = table68k + opcode_nums[i]; - for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++) + for (lookup = lookuptab; lookup->mnemo != (instrmnem)dp->mnemo; lookup++) ; - write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name); + bug("%03d: %04x %10u %s", i, opcode_nums[i], count, lookup->name); } #endif -#if RECORD_REGISTER_USAGE +#ifdef RECORD_REGISTER_USAGE int reg_count_ids[16]; uint64 tot_reg_count = 0; for (int i = 0; i < 16; i++) { - reg_count_ids[i] = i; - tot_reg_count += reg_count[i]; + reg_count_ids[i] = i; + tot_reg_count += reg_count[i]; } qsort(reg_count_ids, 16, sizeof(int), reg_count_compare); uint64 cum_reg_count = 0; for (int i = 0; i < 16; i++) { - int r = reg_count_ids[i]; - cum_reg_count += reg_count[r]; - printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8, + int r = reg_count_ids[i]; + cum_reg_count += reg_count[r]; + jit_log("%c%d : %16ld %2.1f%% [%2.1f]", r < 8 ? 'D' : 'A', r % 8, reg_count[r], 100.0*double(reg_count[r])/double(tot_reg_count), 100.0*double(cum_reg_count)/double(tot_reg_count)); } #endif + + // exit_table68k(); } +#ifdef UAE +#else bool compiler_use_jit(void) { // Check for the "jit" prefs item @@ -5147,283 +3061,248 @@ bool compiler_use_jit(void) return false; } - // Enable JIT for 68020+ emulation only - if (CPUType < 2) { - write_log(" : JIT is not supported in 680%d0 emulation mode, disabling.\n", CPUType); - return false; - } - return true; } - -void init_comp(void) -{ - int i; - uae_s8* cb=can_byte; - uae_s8* cw=can_word; - uae_s8* au=always_used; - -#if RECORD_REGISTER_USAGE - for (i=0;i<16;i++) - reg_count_local[i] = 0; #endif - for (i=0;i= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize)); -} - -static void flush_all(void) -{ - int i; - - log_flush(); - for (i=0;i0) - free_nreg(i); - - for (i=0;i0) - f_free_nreg(i); - - live.flags_in_flags=TRASH; /* Note: We assume we already rescued the - flags at the very start of the call_r - functions! */ + for (i=0;idirect_handler_to_use); -} - -static uae_u32 get_handler(uae_u32 addr) -{ - uae_u32 cl=cacheline(addr); - blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0); - return (uintptr)bi->direct_handler_to_use; -} - -static void load_handler(int reg, uae_u32 addr) -{ - mov_l_rm(reg,get_handler_address(addr)); + blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0); + return (uintptr)bi->direct_handler_to_use; } /* This version assumes that it is writing *real* memory, and *will* fail - * if that assumption is wrong! No branches, no second chances, just - * straight go-for-it attitude */ + * if that assumption is wrong! No branches, no second chances, just + * straight go-for-it attitude */ static void writemem_real(int address, int source, int size, int tmp, int clobber) { - int f=tmp; + int f=tmp; - if (clobber) - f=source; +#ifdef NATMEM_OFFSET + if (canbang) { /* Woohoo! go directly at the memory! */ + if (clobber) + f=source; - switch(size) { - case 1: mov_b_bRr(address,source,MEMBaseDiff); break; - case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break; - case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break; + switch(size) { + case 1: mov_b_bRr(address,source,MEMBaseDiff); break; + case 2: mov_w_rr(f,source); mid_bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break; + case 4: mov_l_rr(f,source); mid_bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break; + } + forget_about(tmp); + forget_about(f); + return; } - forget_about(tmp); - forget_about(f); +#endif + +#ifdef UAE + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the baseaddr table */ + mov_l_rm_indexed(f,uae_p32(baseaddr),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + + if (address==source) { /* IBrowse does this! */ + if (size > 1) { + add_l(f,address); /* f now holds the final address */ + switch (size) { + case 2: mid_bswap_16(source); mov_w_Rr(f,source,0); + mid_bswap_16(source); return; + case 4: mid_bswap_32(source); mov_l_Rr(f,source,0); + mid_bswap_32(source); return; + } + } + } + switch (size) { /* f now holds the offset */ + case 1: mov_b_mrr_indexed(address,f,1,source); break; + case 2: mid_bswap_16(source); mov_w_mrr_indexed(address,f,1,source); + mid_bswap_16(source); break; /* base, index, source */ + case 4: mid_bswap_32(source); mov_l_mrr_indexed(address,f,1,source); + mid_bswap_32(source); break; + } +#endif } +#ifdef UAE +static inline void writemem(int address, int source, int offset, int size, int tmp) +{ + int f=tmp; + + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the mem bank table */ + mov_l_rm_indexed(f,uae_p32(mem_banks),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + /* Now f holds a pointer to the actual membank */ + mov_l_rR(f,f,offset); + /* Now f holds the address of the b/w/lput function */ + call_r_02(f,address,source,4,size); + forget_about(tmp); +} +#endif + void writebyte(int address, int source, int tmp) { - writemem_real(address,source,1,tmp,0); +#ifdef UAE + if ((special_mem & S_WRITE) || distrust_byte()) + writemem_special(address, source, 5 * SIZEOF_VOID_P, 1, tmp); + else +#endif + writemem_real(address,source,1,tmp,0); } -static __inline__ void writeword_general(int address, int source, int tmp, - int clobber) +static inline void writeword_general(int address, int source, int tmp, + int clobber) { - writemem_real(address,source,2,tmp,clobber); +#ifdef UAE + if ((special_mem & S_WRITE) || distrust_word()) + writemem_special(address, source, 4 * SIZEOF_VOID_P, 2, tmp); + else +#endif + writemem_real(address,source,2,tmp,clobber); } void writeword_clobber(int address, int source, int tmp) { - writeword_general(address,source,tmp,1); + writeword_general(address,source,tmp,1); } void writeword(int address, int source, int tmp) { - writeword_general(address,source,tmp,0); + writeword_general(address,source,tmp,0); } -static __inline__ void writelong_general(int address, int source, int tmp, - int clobber) +static inline void writelong_general(int address, int source, int tmp, + int clobber) { - writemem_real(address,source,4,tmp,clobber); +#ifdef UAE + if ((special_mem & S_WRITE) || distrust_long()) + writemem_special(address, source, 3 * SIZEOF_VOID_P, 4, tmp); + else +#endif + writemem_real(address,source,4,tmp,clobber); } void writelong_clobber(int address, int source, int tmp) { - writelong_general(address,source,tmp,1); + writelong_general(address,source,tmp,1); } void writelong(int address, int source, int tmp) { - writelong_general(address,source,tmp,0); + writelong_general(address,source,tmp,0); } /* This version assumes that it is reading *real* memory, and *will* fail - * if that assumption is wrong! No branches, no second chances, just - * straight go-for-it attitude */ + * if that assumption is wrong! No branches, no second chances, just + * straight go-for-it attitude */ static void readmem_real(int address, int dest, int size, int tmp) { - int f=tmp; + int f=tmp; - if (size==4 && address!=dest) - f=dest; + if (size==4 && address!=dest) + f=dest; + +#ifdef NATMEM_OFFSET + if (canbang) { /* Woohoo! go directly at the memory! */ + switch(size) { + case 1: mov_b_brR(dest,address,MEMBaseDiff); break; + case 2: mov_w_brR(dest,address,MEMBaseDiff); mid_bswap_16(dest); break; + case 4: mov_l_brR(dest,address,MEMBaseDiff); mid_bswap_32(dest); break; + } + forget_about(tmp); + (void) f; + return; + } +#endif + +#ifdef UAE + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the baseaddr table */ + mov_l_rm_indexed(f,uae_p32(baseaddr),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + /* f now holds the offset */ switch(size) { - case 1: mov_b_brR(dest,address,MEMBaseDiff); break; - case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break; - case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break; + case 1: mov_b_rrm_indexed(dest,address,f,1); break; + case 2: mov_w_rrm_indexed(dest,address,f,1); mid_bswap_16(dest); break; + case 4: mov_l_rrm_indexed(dest,address,f,1); mid_bswap_32(dest); break; } forget_about(tmp); +#endif } + + +#ifdef UAE +static inline void readmem(int address, int dest, int offset, int size, int tmp) +{ + int f=tmp; + + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the mem bank table */ + mov_l_rm_indexed(f,uae_p32(mem_banks),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + /* Now f holds a pointer to the actual membank */ + mov_l_rR(f,f,offset); + /* Now f holds the address of the b/w/lget function */ + call_r_11(dest,f,address,size,4); + forget_about(tmp); +} +#endif + void readbyte(int address, int dest, int tmp) { - readmem_real(address,dest,1,tmp); +#ifdef UAE + if ((special_mem & S_READ) || distrust_byte()) + readmem_special(address, dest, 2 * SIZEOF_VOID_P, 1, tmp); + else +#endif + readmem_real(address,dest,1,tmp); } void readword(int address, int dest, int tmp) { - readmem_real(address,dest,2,tmp); +#ifdef UAE + if ((special_mem & S_READ) || distrust_word()) + readmem_special(address, dest, 1 * SIZEOF_VOID_P, 2, tmp); + else +#endif + readmem_real(address,dest,2,tmp); } void readlong(int address, int dest, int tmp) { - readmem_real(address,dest,4,tmp); +#ifdef UAE + if ((special_mem & S_READ) || distrust_long()) + readmem_special(address, dest, 0 * SIZEOF_VOID_P, 4, tmp); + else +#endif + readmem_real(address,dest,4,tmp); } void get_n_addr(int address, int dest, int tmp) { +#ifdef UAE + if (special_mem || distrust_addr()) { + /* This one might appear a bit odd... */ + readmem(address, dest, 6 * SIZEOF_VOID_P, 4, tmp); + return; + } +#endif + // a is the register containing the virtual address // after the offset had been fetched int a=tmp; - + // f is the register that will contain the offset int f=tmp; - + // a == f == tmp if (address == dest) if (address!=dest) { - a=address; - f=dest; + a=address; + f=dest; } -#if REAL_ADDRESSING - mov_l_rr(dest, address); -#elif DIRECT_ADDRESSING - lea_l_brr(dest,address,MEMBaseDiff); +#ifdef NATMEM_OFFSET + if (canbang) { +#if FIXED_ADDRESSING + lea_l_brr(dest,address,MEMBaseDiff); +#else +# error "Only fixed adressing mode supported" #endif + forget_about(tmp); + (void) f; + (void) a; + return; + } +#endif + +#ifdef UAE + mov_l_rr(f,address); + mov_l_rr(dest,address); // gb-- nop if dest==address + shrl_l_ri(f,16); + mov_l_rm_indexed(f,uae_p32(baseaddr),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + add_l(dest,f); forget_about(tmp); +#endif } void get_n_addr_jmp(int address, int dest, int tmp) { +#ifdef WINUAE_ARANYM /* For this, we need to get the same address as the rest of UAE would --- otherwise we end up translating everything twice */ - get_n_addr(address,dest,tmp); + get_n_addr(address,dest,tmp); +#else + int f=tmp; + if (address!=dest) + f=dest; + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the baseaddr bank table */ + mov_l_rm_indexed(dest,uae_p32(baseaddr),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + add_l(dest,address); + and_l_ri (dest, ~1); + forget_about(tmp); +#endif } @@ -5585,68 +3603,68 @@ void get_n_addr_jmp(int address, int dest, int tmp) target is a register, as is tmp */ void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp) { - int reg = (dp >> 12) & 15; - int regd_shift=(dp >> 9) & 3; + int reg = (dp >> 12) & 15; + int regd_shift=(dp >> 9) & 3; - if (dp & 0x100) { - int ignorebase=(dp&0x80); - int ignorereg=(dp&0x40); - int addbase=0; - int outer=0; - - if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); - if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4); + if (dp & 0x100) { + int ignorebase=(dp&0x80); + int ignorereg=(dp&0x40); + int addbase=0; + int outer=0; - if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); - if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4); + if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4); - if ((dp & 0x4) == 0) { /* add regd *before* the get_long */ - if (!ignorereg) { - if ((dp & 0x800) == 0) - sign_extend_16_rr(target,reg); - else - mov_l_rr(target,reg); - shll_l_ri(target,regd_shift); - } - else - mov_l_ri(target,0); + if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4); - /* target is now regd */ - if (!ignorebase) - add_l(target,base); - add_l_ri(target,addbase); - if (dp&0x03) readlong(target,target,tmp); - } else { /* do the getlong first, then add regd */ - if (!ignorebase) { - mov_l_rr(target,base); - add_l_ri(target,addbase); - } - else - mov_l_ri(target,addbase); - if (dp&0x03) readlong(target,target,tmp); + if ((dp & 0x4) == 0) { /* add regd *before* the get_long */ + if (!ignorereg) { + if ((dp & 0x800) == 0) + sign_extend_16_rr(target,reg); + else + mov_l_rr(target,reg); + shll_l_ri(target,regd_shift); + } + else + mov_l_ri(target,0); - if (!ignorereg) { - if ((dp & 0x800) == 0) - sign_extend_16_rr(tmp,reg); - else - mov_l_rr(tmp,reg); - shll_l_ri(tmp,regd_shift); - /* tmp is now regd */ - add_l(target,tmp); - } + /* target is now regd */ + if (!ignorebase) + add_l(target,base); + add_l_ri(target,addbase); + if (dp&0x03) readlong(target,target,tmp); + } else { /* do the getlong first, then add regd */ + if (!ignorebase) { + mov_l_rr(target,base); + add_l_ri(target,addbase); + } + else + mov_l_ri(target,addbase); + if (dp&0x03) readlong(target,target,tmp); + + if (!ignorereg) { + if ((dp & 0x800) == 0) + sign_extend_16_rr(tmp,reg); + else + mov_l_rr(tmp,reg); + shll_l_ri(tmp,regd_shift); + /* tmp is now regd */ + add_l(target,tmp); + } + } + add_l_ri(target,outer); } - add_l_ri(target,outer); - } - else { /* 68000 version */ - if ((dp & 0x800) == 0) { /* Sign extend */ - sign_extend_16_rr(target,reg); - lea_l_brr_indexed(target,base,target,1<= CODE_ALLOC_MAX_ATTEMPTS) - return NULL; - - return do_alloc_code(size, depth + 1); -#else - uint8 *code = (uint8 *)vm_acquire(size); + UNUSED(depth); + uint8 *code = (uint8 *)vm_acquire(size, VM_MAP_DEFAULT | VM_MAP_32BIT); return code == VM_MAP_FAILED ? NULL : code; -#endif } static inline uint8 *alloc_code(uint32 size) @@ -5734,14 +3708,17 @@ static inline uint8 *alloc_code(uint32 size) void alloc_cache(void) { if (compiled_code) { - flush_icache_hard(6); + flush_icache_hard(); vm_release(compiled_code, cache_size * 1024); compiled_code = 0; } - + +#ifdef UAE + cache_size = currprefs.cachesize; +#endif if (cache_size == 0) return; - + while (!compiled_code && cache_size) { if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) { compiled_code = 0; @@ -5751,24 +3728,29 @@ void alloc_cache(void) vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE); if (compiled_code) { - write_log(" : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code); + jit_log(" : actual translation cache size : %d KB at %p-%p", cache_size, compiled_code, compiled_code + cache_size*1024); +#ifdef USE_DATA_BUFFER + max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST - DATA_BUFFER_SIZE; +#else max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST; +#endif current_compile_p = compiled_code; current_cache_size = 0; +#if defined(USE_DATA_BUFFER) + reset_data_buffer(); +#endif } } - - extern void op_illg_1 (uae_u32 opcode) REGPARAM; static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2) { - uae_u32 k1 = 0; - uae_u32 k2 = 0; + uae_u32 k1 = 0; + uae_u32 k2 = 0; #if USE_CHECKSUM_INFO - checksum_info *csi = bi->csi; + checksum_info *csi = bi->csi; Dif(!csi) abort(); while (csi) { uae_s32 len = csi->length; @@ -5777,7 +3759,7 @@ static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2) uae_s32 len = bi->len; uintptr tmp = (uintptr)bi->min_pcp; #endif - uae_u32*pos; + uae_u32* pos; len += (tmp & 3); tmp &= ~((uintptr)3); @@ -5804,343 +3786,361 @@ static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2) #if 0 static void show_checksum(CSI_TYPE* csi) { - uae_u32 k1=0; - uae_u32 k2=0; - uae_s32 len=CSI_LENGTH(csi); - uae_u32 tmp=(uintptr)CSI_STARTcsi - uae_u32* pos; + uae_u32 k1=0; + uae_u32 k2=0; + uae_s32 len=CSI_LENGTH(csi); + uae_u32 tmp=(uintptr)CSI_START_P(csi); + uae_u32* pos; - len+=(tmp&3); - tmp&=(~3); - pos=(uae_u32*)tmp; + len+=(tmp&3); + tmp&=(~3); + pos=(uae_u32*)tmp; - if (len<0 || len>MAX_CHECKSUM_LEN) { - return; - } - else { - while (len>0) { - write_log("%08x ",*pos); - pos++; - len-=4; + if (len<0 || len>MAX_CHECKSUM_LEN) { + return; + } + else { + while (len>0) { + jit_log("%08x ",*pos); + pos++; + len-=4; + } + jit_log(" bla"); } - write_log(" bla\n"); - } } #endif int check_for_cache_miss(void) { - blockinfo* bi=get_blockinfo_addr(regs.pc_p); - - if (bi) { - int cl=cacheline(regs.pc_p); - if (bi!=cache_tags[cl+1].bi) { - raise_in_cl_list(bi); - return 1; + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + + if (bi) { + int cl=cacheline(regs.pc_p); + if (bi!=cache_tags[cl+1].bi) { + raise_in_cl_list(bi); + return 1; + } } - } - return 0; + return 0; } - + static void recompile_block(void) { - /* An existing block's countdown code has expired. We need to make - sure that execute_normal doesn't refuse to recompile due to a - perceived cache miss... */ - blockinfo* bi=get_blockinfo_addr(regs.pc_p); + /* An existing block's countdown code has expired. We need to make + sure that execute_normal doesn't refuse to recompile due to a + perceived cache miss... */ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); - Dif (!bi) - abort(); - raise_in_cl_list(bi); - execute_normal(); - return; + Dif (!bi) + jit_abort("recompile_block"); + raise_in_cl_list(bi); + execute_normal(); + return; } static void cache_miss(void) { - blockinfo* bi=get_blockinfo_addr(regs.pc_p); - uae_u32 cl=cacheline(regs.pc_p); - blockinfo* bi2=get_blockinfo(cl); + blockinfo* bi=get_blockinfo_addr(regs.pc_p); +#if COMP_DEBUG + uae_u32 cl=cacheline(regs.pc_p); + blockinfo* bi2=get_blockinfo(cl); +#endif - if (!bi) { - execute_normal(); /* Compile this block now */ + if (!bi) { + execute_normal(); /* Compile this block now */ + return; + } + Dif (!bi2 || bi==bi2) { + jit_abort("Unexplained cache miss %p %p",bi,bi2); + } + raise_in_cl_list(bi); return; - } - Dif (!bi2 || bi==bi2) { - write_log("Unexplained cache miss %p %p\n",bi,bi2); - abort(); - } - raise_in_cl_list(bi); - return; } static int called_check_checksum(blockinfo* bi); -static inline int block_check_checksum(blockinfo* bi) +static inline int block_check_checksum(blockinfo* bi) { - uae_u32 c1,c2; - bool isgood; - - if (bi->status!=BI_NEED_CHECK) - return 1; /* This block is in a checked state */ - - checksum_count++; + uae_u32 c1,c2; + bool isgood; - if (bi->c1 || bi->c2) - calc_checksum(bi,&c1,&c2); - else { - c1=c2=1; /* Make sure it doesn't match */ + if (bi->status!=BI_NEED_CHECK) + return 1; /* This block is in a checked state */ + + if (bi->c1 || bi->c2) + calc_checksum(bi,&c1,&c2); + else { + c1=c2=1; /* Make sure it doesn't match */ } - - isgood=(c1==bi->c1 && c2==bi->c2); - if (isgood) { - /* This block is still OK. So we reactivate. Of course, that - means we have to move it into the needs-to-be-flushed list */ - bi->handler_to_use=bi->handler; - set_dhtu(bi,bi->direct_handler); - bi->status=BI_CHECKING; - isgood=called_check_checksum(bi) != 0; - } - if (isgood) { - /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p, - c1,c2,bi->c1,bi->c2);*/ - remove_from_list(bi); - add_to_active(bi); - raise_in_cl_list(bi); - bi->status=BI_ACTIVE; - } - else { - /* This block actually changed. We need to invalidate it, - and set it up to be recompiled */ - /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p, - c1,c2,bi->c1,bi->c2); */ - invalidate_block(bi); - raise_in_cl_list(bi); - } - return isgood; + isgood=(c1==bi->c1 && c2==bi->c2); + + if (isgood) { + /* This block is still OK. So we reactivate. Of course, that + means we have to move it into the needs-to-be-flushed list */ + bi->handler_to_use=bi->handler; + set_dhtu(bi,bi->direct_handler); + bi->status=BI_CHECKING; + isgood=called_check_checksum(bi) != 0; + } + if (isgood) { + jit_log2("reactivate %p/%p (%x %x/%x %x)",bi,bi->pc_p, c1,c2,bi->c1,bi->c2); + remove_from_list(bi); + add_to_active(bi); + raise_in_cl_list(bi); + bi->status=BI_ACTIVE; + } + else { + /* This block actually changed. We need to invalidate it, + and set it up to be recompiled */ + jit_log2("discard %p/%p (%x %x/%x %x)",bi,bi->pc_p, c1,c2,bi->c1,bi->c2); + invalidate_block(bi); + raise_in_cl_list(bi); + } + return isgood; } -static int called_check_checksum(blockinfo* bi) +static int called_check_checksum(blockinfo* bi) { - dependency* x=bi->deplist; - int isgood=1; - int i; - - for (i=0;i<2 && isgood;i++) { - if (bi->dep[i].jmp_off) { - isgood=block_check_checksum(bi->dep[i].target); + int isgood=1; + int i; + + for (i=0;i<2 && isgood;i++) { + if (bi->dep[i].jmp_off) { + isgood=block_check_checksum(bi->dep[i].target); + } } - } - return isgood; + return isgood; } -static void check_checksum(void) +static void check_checksum(void) { - blockinfo* bi=get_blockinfo_addr(regs.pc_p); - uae_u32 cl=cacheline(regs.pc_p); - blockinfo* bi2=get_blockinfo(cl); + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + uae_u32 cl=cacheline(regs.pc_p); + blockinfo* bi2=get_blockinfo(cl); - /* These are not the droids you are looking for... */ - if (!bi) { - /* Whoever is the primary target is in a dormant state, but - calling it was accidental, and we should just compile this - new block */ - execute_normal(); - return; - } - if (bi!=bi2) { - /* The block was hit accidentally, but it does exist. Cache miss */ - cache_miss(); - return; - } + /* These are not the droids you are looking for... */ + if (!bi) { + /* Whoever is the primary target is in a dormant state, but + calling it was accidental, and we should just compile this + new block */ + execute_normal(); + return; + } + if (bi!=bi2) { + /* The block was hit accidentally, but it does exist. Cache miss */ + cache_miss(); + return; + } - if (!block_check_checksum(bi)) - execute_normal(); + if (!block_check_checksum(bi)) + execute_normal(); } -static __inline__ void match_states(blockinfo* bi) +static inline void match_states(blockinfo* bi) { - int i; - smallstate* s=&(bi->env); - - if (bi->status==BI_NEED_CHECK) { - block_check_checksum(bi); - } - if (bi->status==BI_ACTIVE || - bi->status==BI_FINALIZING) { /* Deal with the *promises* the - block makes (about not using - certain vregs) */ - for (i=0;i<16;i++) { - if (s->virt[i]==L_UNNEEDED) { - // write_log("unneeded reg %d at %p\n",i,target); - COMPCALL(forget_about)(i); // FIXME - } - } - } - flush(1); + int i; + smallstate* s=&(bi->env); - /* And now deal with the *demands* the block makes */ - for (i=0;inat[i]; - if (v>=0) { - // printf("Loading reg %d into %d at %p\n",v,i,target); - readreg_specific(v,4,i); - // do_load_reg(i,v); - // setlock(i); + if (bi->status==BI_NEED_CHECK) { + block_check_checksum(bi); } - } - for (i=0;inat[i]; - if (v>=0) { - unlock2(i); + if (bi->status==BI_ACTIVE || + bi->status==BI_FINALIZING) { /* Deal with the *promises* the + block makes (about not using + certain vregs) */ + for (i=0;i<16;i++) { + if (s->virt[i]==L_UNNEEDED) { + jit_log2("unneeded reg %d at %p",i,target); + COMPCALL(forget_about)(i); // FIXME + } + } + } + flush(1); + + /* And now deal with the *demands* the block makes */ + for (i=0;inat[i]; + if (v>=0) { + // printf("Loading reg %d into %d at %p\n",v,i,target); + readreg_specific(v,4,i); + // do_load_reg(i,v); + // setlock(i); + } + } + for (i=0;inat[i]; + if (v>=0) { + unlock2(i); + } } - } } -static __inline__ void create_popalls(void) +static inline void create_popalls(void) { - int i,r; + int i,r; - if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) { - write_log("FATAL: Could not allocate popallspace!\n"); - abort(); - } - vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE); + if (popallspace == NULL) { + if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) { + jit_log("WARNING: Could not allocate popallspace!"); +#ifdef UAE + if (currprefs.cachesize > 0) +#endif + { + jit_abort("Could not allocate popallspace!"); + } +#ifdef UAE + /* This is not fatal if JIT is not used. If JIT is + * turned on, it will crash, but it would have crashed + * anyway. */ + return; +#endif + } + } + vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE); - int stack_space = STACK_OFFSET; - for (i=0;idirect_pen=(cpuop_func *)get_target(); - raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); - raw_mov_l_mr((uintptr)®s.pc_p,0); - raw_jmp((uintptr)popall_execute_normal); + set_target(current_compile_p); + align_target(align_jumps); + bi->direct_pen=(cpuop_func*)get_target(); + compemu_raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); + compemu_raw_mov_l_mr((uintptr)®s.pc_p,0); + compemu_raw_jmp((uintptr)popall_execute_normal); - align_target(align_jumps); - bi->direct_pcc=(cpuop_func *)get_target(); - raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); - raw_mov_l_mr((uintptr)®s.pc_p,0); - raw_jmp((uintptr)popall_check_checksum); - current_compile_p=get_target(); + align_target(align_jumps); + bi->direct_pcc=(cpuop_func*)get_target(); + compemu_raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); + compemu_raw_mov_l_mr((uintptr)®s.pc_p,0); + compemu_raw_jmp((uintptr)popall_check_checksum); + flush_cpu_icache((void *)current_compile_p, (void *)target); + current_compile_p=get_target(); - bi->deplist=NULL; - for (i=0;i<2;i++) { - bi->dep[i].prev_p=NULL; - bi->dep[i].next=NULL; - } - bi->env=default_ss; - bi->status=BI_INVALID; - bi->havestate=0; - //bi->env=empty_ss; + bi->deplist=NULL; + for (i=0;i<2;i++) { + bi->dep[i].prev_p=NULL; + bi->dep[i].next=NULL; + } + bi->env=default_ss; + bi->status=BI_INVALID; + bi->havestate=0; + //bi->env=empty_ss; } +#ifdef UAE +void compemu_reset(void) +{ + set_cache_state(0); +} +#endif + +#ifdef UAE +#else // OPCODE is in big endian format, use cft_map() beforehand, if needed. +#endif static inline void reset_compop(int opcode) { compfunctbl[opcode] = NULL; @@ -6170,10 +4170,100 @@ static int read_opcode(const char *p) return opcode; } + +#ifdef USE_JIT_FPU +static struct { + const char *name; + bool *const disabled; +} const jit_opcodes[] = { + { "fbcc", &jit_disable.fbcc }, + { "fdbcc", &jit_disable.fdbcc }, + { "fscc", &jit_disable.fscc }, + { "ftrapcc", &jit_disable.ftrapcc }, + { "fsave", &jit_disable.fsave }, + { "frestore", &jit_disable.frestore }, + { "fmove", &jit_disable.fmove }, + { "fmovec", &jit_disable.fmovec }, + { "fmovem", &jit_disable.fmovem }, + { "fmovecr", &jit_disable.fmovecr }, + { "fint", &jit_disable.fint }, + { "fsinh", &jit_disable.fsinh }, + { "fintrz", &jit_disable.fintrz }, + { "fsqrt", &jit_disable.fsqrt }, + { "flognp1", &jit_disable.flognp1 }, + { "fetoxm1", &jit_disable.fetoxm1 }, + { "ftanh", &jit_disable.ftanh }, + { "fatan", &jit_disable.fatan }, + { "fasin", &jit_disable.fasin }, + { "fatanh", &jit_disable.fatanh }, + { "fsin", &jit_disable.fsin }, + { "ftan", &jit_disable.ftan }, + { "fetox", &jit_disable.fetox }, + { "ftwotox", &jit_disable.ftwotox }, + { "ftentox", &jit_disable.ftentox }, + { "flogn", &jit_disable.flogn }, + { "flog10", &jit_disable.flog10 }, + { "flog2", &jit_disable.flog2 }, + { "fabs", &jit_disable.fabs }, + { "fcosh", &jit_disable.fcosh }, + { "fneg", &jit_disable.fneg }, + { "facos", &jit_disable.facos }, + { "fcos", &jit_disable.fcos }, + { "fgetexp", &jit_disable.fgetexp }, + { "fgetman", &jit_disable.fgetman }, + { "fdiv", &jit_disable.fdiv }, + { "fmod", &jit_disable.fmod }, + { "fadd", &jit_disable.fadd }, + { "fmul", &jit_disable.fmul }, + { "fsgldiv", &jit_disable.fsgldiv }, + { "frem", &jit_disable.frem }, + { "fscale", &jit_disable.fscale }, + { "fsglmul", &jit_disable.fsglmul }, + { "fsub", &jit_disable.fsub }, + { "fsincos", &jit_disable.fsincos }, + { "fcmp", &jit_disable.fcmp }, + { "ftst", &jit_disable.ftst }, +}; + +static bool read_fpu_opcode(const char **pp) +{ + const char *p = *pp; + const char *end; + size_t len; + unsigned int i; + + end = p; + while (*end != '\0' && *end != ',') + end++; + len = end - p; + if (*end != '\0') + end++; + for (i = 0; i < (sizeof(jit_opcodes) / sizeof(jit_opcodes[0])); i++) + { + if (len == strlen(jit_opcodes[i].name) && strncasecmp(jit_opcodes[i].name, p, len) == 0) + { + *jit_opcodes[i].disabled = true; + jit_log(" : disabled %s", jit_opcodes[i].name); + *pp = end; + return true; + } + } + return false; +} +#endif + static bool merge_blacklist() { +#ifdef UAE + const char *blacklist = ""; +#else const char *blacklist = PrefsFindString("jitblacklist"); - if (blacklist) { +#endif +#ifdef USE_JIT_FPU + for (unsigned int i = 0; i < (sizeof(jit_opcodes) / sizeof(jit_opcodes[0])); i++) + *jit_opcodes[i].disabled = false; +#endif + if (blacklist[0] != '\0') { const char *p = blacklist; for (;;) { if (*p == 0) @@ -6181,7 +4271,14 @@ static bool merge_blacklist() int opcode1 = read_opcode(p); if (opcode1 < 0) + { +#ifdef USE_JIT_FPU + if (read_fpu_opcode(&p)) + continue; +#endif + bug(" : invalid opcode %s", p); return false; + } p += 4; int opcode2 = opcode1; @@ -6189,16 +4286,19 @@ static bool merge_blacklist() p++; opcode2 = read_opcode(p); if (opcode2 < 0) + { + bug(" : invalid opcode %s", p); return false; + } p += 4; } - if (*p == 0 || *p == ',' || *p == ';') { - write_log(" : blacklist opcodes : %04x-%04x\n", opcode1, opcode2); + if (*p == 0 || *p == ',') { + jit_log(" : blacklist opcodes : %04x-%04x", opcode1, opcode2); for (int opcode = opcode1; opcode <= opcode2; opcode++) reset_compop(cft_map(opcode)); - if (*p == ',' || *p++ == ';') + if (*(p++) == ',') continue; return true; @@ -6210,354 +4310,355 @@ static bool merge_blacklist() return true; } -void build_comp(void) +void build_comp(void) { - int i; - int jumpcount=0; - unsigned long opcode; - struct comptbl* tbl=op_smalltbl_0_comp_ff; - struct comptbl* nftbl=op_smalltbl_0_comp_nf; - int count; - unsigned int cpu_level = 0; // 68000 (default) - if (CPUType == 4) - cpu_level = 4; // 68040 with FPU - else { - if (FPUType) - cpu_level = 3; // 68020 with FPU - else if (CPUType >= 2) - cpu_level = 2; // 68020 - else if (CPUType == 1) - cpu_level = 1; +#ifdef FSUAE + if (!g_fs_uae_jit_compiler) { + jit_log("JIT: JIT compiler is not enabled"); + return; } - struct cputbl *nfctbl = ( - cpu_level == 4 ? op_smalltbl_0_nf - : cpu_level == 3 ? op_smalltbl_1_nf - : cpu_level == 2 ? op_smalltbl_2_nf - : cpu_level == 1 ? op_smalltbl_3_nf - : op_smalltbl_4_nf); +#endif + int i; + unsigned long opcode; + const struct comptbl* tbl=op_smalltbl_0_comp_ff; + const struct comptbl* nftbl=op_smalltbl_0_comp_nf; + int count; +#ifdef WINUAE_ARANYM + unsigned int cpu_level = 4; // 68040 +#if 0 + const struct cputbl *nfctbl = op_smalltbl_0_nf; +#endif +#else +#ifdef NOFLAGS_SUPPORT + struct comptbl *nfctbl = (currprefs.cpu_level >= 5 ? op_smalltbl_0_nf + : currprefs.cpu_level == 4 ? op_smalltbl_1_nf + : (currprefs.cpu_level == 2 || currprefs.cpu_level == 3) ? op_smalltbl_2_nf + : currprefs.cpu_level == 1 ? op_smalltbl_3_nf + : ! currprefs.cpu_compatible ? op_smalltbl_4_nf + : op_smalltbl_5_nf); +#endif +#endif + // Initialize target CPU (check for features, e.g. CMOV, rat stalls) + raw_init_cpu(); - write_log (" : building compiler function tables\n"); +#ifdef NATMEM_OFFSET +#ifdef UAE +#ifdef JIT_EXCEPTION_HANDLER + install_exception_handler(); +#endif +#endif +#endif + + jit_log(" : building compiler function tables"); for (opcode = 0; opcode < 65536; opcode++) { reset_compop(opcode); - nfcpufunctbl[opcode] = op_illg_1; - prop[opcode].use_flags = 0x1f; - prop[opcode].set_flags = 0x1f; +#ifdef NOFLAGS_SUPPORT + nfcpufunctbl[opcode] = op_illg; +#endif + prop[opcode].use_flags = FLAG_ALL; + prop[opcode].set_flags = FLAG_ALL; +#ifdef UAE + prop[opcode].is_jump=1; +#else prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap +#endif } - + for (i = 0; tbl[i].opcode < 65536; i++) { +#ifdef UAE + int isjmp = (tbl[i].specific & COMP_OPCODE_ISJUMP); + int isaddx = (tbl[i].specific & COMP_OPCODE_ISADDX); + int iscjmp = (tbl[i].specific & COMP_OPCODE_ISCJUMP); + + prop[cft_map(tbl[i].opcode)].is_jump = isjmp; + prop[cft_map(tbl[i].opcode)].is_const_jump = iscjmp; + prop[cft_map(tbl[i].opcode)].is_addx = isaddx; +#else int cflow = table68k[tbl[i].opcode].cflow; - if (follow_const_jumps && (tbl[i].specific & 16)) + if (follow_const_jumps && (tbl[i].specific & COMP_OPCODE_ISCJUMP)) cflow = fl_const_jump; else cflow &= ~fl_const_jump; prop[cft_map(tbl[i].opcode)].cflow = cflow; +#endif - int uses_fpu = tbl[i].specific & 32; + bool uses_fpu = (tbl[i].specific & COMP_OPCODE_USES_FPU) != 0; if (uses_fpu && avoid_fpu) compfunctbl[cft_map(tbl[i].opcode)] = NULL; else compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler; } - for (i = 0; nftbl[i].opcode < 65536; i++) { - int uses_fpu = tbl[i].specific & 32; + for (i = 0; nftbl[i].opcode < 65536; i++) { + bool uses_fpu = (tbl[i].specific & COMP_OPCODE_USES_FPU) != 0; if (uses_fpu && avoid_fpu) nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL; else nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler; - +#ifdef NOFLAGS_SUPPORT nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler; - } +#endif + } +#ifdef NOFLAGS_SUPPORT for (i = 0; nfctbl[i].handler; i++) { nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler; } +#endif - for (opcode = 0; opcode < 65536; opcode++) { + for (opcode = 0; opcode < 65536; opcode++) { compop_func *f; compop_func *nff; +#ifdef NOFLAGS_SUPPORT cpuop_func *nfcf; - int isaddx,cflow; +#endif + int isaddx; +#ifdef UAE + int isjmp,iscjmp; +#else + int cflow; +#endif - if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) +#ifdef UAE + int cpu_level = (currprefs.cpu_model - 68000) / 10; + if (cpu_level > 4) + cpu_level--; +#endif + if ((instrmnem)table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) continue; if (table68k[opcode].handler != -1) { f = compfunctbl[cft_map(table68k[opcode].handler)]; nff = nfcompfunctbl[cft_map(table68k[opcode].handler)]; +#ifdef NOFLAGS_SUPPORT nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)]; - cflow = prop[cft_map(table68k[opcode].handler)].cflow; +#endif isaddx = prop[cft_map(table68k[opcode].handler)].is_addx; - prop[cft_map(opcode)].cflow = cflow; prop[cft_map(opcode)].is_addx = isaddx; +#ifdef UAE + isjmp = prop[cft_map(table68k[opcode].handler)].is_jump; + iscjmp = prop[cft_map(table68k[opcode].handler)].is_const_jump; + prop[cft_map(opcode)].is_jump = isjmp; + prop[cft_map(opcode)].is_const_jump = iscjmp; +#else + cflow = prop[cft_map(table68k[opcode].handler)].cflow; + prop[cft_map(opcode)].cflow = cflow; +#endif compfunctbl[cft_map(opcode)] = f; nfcompfunctbl[cft_map(opcode)] = nff; - Dif (nfcf == op_illg_1) - abort(); +#ifdef NOFLAGS_SUPPORT + Dif (nfcf == op_illg) + abort(); nfcpufunctbl[cft_map(opcode)] = nfcf; +#endif } prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead; prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive; /* Unconditional jumps don't evaluate condition codes, so they * don't actually use any flags themselves */ +#ifdef UAE + if (prop[cft_map(opcode)].is_const_jump) +#else if (prop[cft_map(opcode)].cflow & fl_const_jump) +#endif prop[cft_map(opcode)].use_flags = 0; - } + } +#ifdef NOFLAGS_SUPPORT for (i = 0; nfctbl[i].handler != NULL; i++) { if (nfctbl[i].specific) nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler; } +#endif /* Merge in blacklist */ if (!merge_blacklist()) - write_log(" : blacklist merge failure!\n"); + { + jit_log(" : blacklist merge failure!"); + } - count=0; - for (opcode = 0; opcode < 65536; opcode++) { - if (compfunctbl[cft_map(opcode)]) - count++; - } - write_log(" : supposedly %d compileable opcodes!\n",count); + count=0; + for (opcode = 0; opcode < 65536; opcode++) { + if (compfunctbl[cft_map(opcode)]) + count++; + } + jit_log(" : supposedly %d compileable opcodes!",count); - /* Initialise state */ - create_popalls(); - alloc_cache(); - reset_lists(); + /* Initialise state */ + create_popalls(); + alloc_cache(); + reset_lists(); - for (i=0;ipc_p)].handler=(cpuop_func *)popall_execute_normal; - cache_tags[cacheline(bi->pc_p)+1].bi=NULL; - dbi=bi; bi=bi->next; - free_blockinfo(dbi); - } - bi=dormant; - while(bi) { - cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal; - cache_tags[cacheline(bi->pc_p)+1].bi=NULL; - dbi=bi; bi=bi->next; - free_blockinfo(dbi); - } + bi=active; + while(bi) { + cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func*)popall_execute_normal; + cache_tags[cacheline(bi->pc_p)+1].bi=NULL; + dbi=bi; bi=bi->next; + free_blockinfo(dbi); + } + bi=dormant; + while(bi) { + cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func*)popall_execute_normal; + cache_tags[cacheline(bi->pc_p)+1].bi=NULL; + dbi=bi; bi=bi->next; + free_blockinfo(dbi); + } - reset_lists(); - if (!compiled_code) - return; - current_compile_p=compiled_code; + reset_lists(); + if (!compiled_code) + return; + +#if defined(USE_DATA_BUFFER) + reset_data_buffer(); +#endif + + current_compile_p=compiled_code; +#ifdef UAE + set_special(0); /* To get out of compiled code */ +#else SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */ +#endif } /* "Soft flushing" --- instead of actually throwing everything away, - we simply mark everything as "needs to be checked". + we simply mark everything as "needs to be checked". */ -static inline void flush_icache_lazy(int n) +static inline void flush_icache_lazy(void) { - blockinfo* bi; - blockinfo* bi2; + blockinfo* bi; + blockinfo* bi2; - soft_flush_count++; if (!active) - return; + return; bi=active; while (bi) { - uae_u32 cl=cacheline(bi->pc_p); + uae_u32 cl=cacheline(bi->pc_p); if (bi->status==BI_INVALID || bi->status==BI_NEED_RECOMP) { - if (bi==cache_tags[cl+1].bi) - cache_tags[cl].handler=(cpuop_func *)popall_execute_normal; - bi->handler_to_use=(cpuop_func *)popall_execute_normal; - set_dhtu(bi,bi->direct_pen); - bi->status=BI_INVALID; - } - else { - if (bi==cache_tags[cl+1].bi) - cache_tags[cl].handler=(cpuop_func *)popall_check_checksum; - bi->handler_to_use=(cpuop_func *)popall_check_checksum; - set_dhtu(bi,bi->direct_pcc); - bi->status=BI_NEED_CHECK; - } - bi2=bi; - bi=bi->next; + if (bi==cache_tags[cl+1].bi) + cache_tags[cl].handler=(cpuop_func*)popall_execute_normal; + bi->handler_to_use=(cpuop_func*)popall_execute_normal; + set_dhtu(bi,bi->direct_pen); + bi->status=BI_INVALID; + } + else { + if (bi==cache_tags[cl+1].bi) + cache_tags[cl].handler=(cpuop_func*)popall_check_checksum; + bi->handler_to_use=(cpuop_func*)popall_check_checksum; + set_dhtu(bi,bi->direct_pcc); + bi->status=BI_NEED_CHECK; + } + bi2=bi; + bi=bi->next; } /* bi2 is now the last entry in the active list */ bi2->next=dormant; if (dormant) - dormant->prev_p=&(bi2->next); - + dormant->prev_p=&(bi2->next); + dormant=active; active->prev_p=&dormant; active=NULL; } -void flush_icache_range(uae_u8 *start_p, uae_u32 length) + +#if 0 +static void flush_icache_range(uae_u32 start, uae_u32 length) { if (!active) return; #if LAZY_FLUSH_ICACHE_RANGE + uae_u8 *start_p = get_real_address(start); blockinfo *bi = active; while (bi) { #if USE_CHECKSUM_INFO - bool candidate = false; - for (checksum_info *csi = bi->csi; csi; csi = csi->next) { - if (((start_p - csi->start_p) < csi->length) || - ((csi->start_p - start_p) < length)) { - candidate = true; - break; - } - } + bool invalidate = false; + for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next) + invalidate = (((start_p - csi->start_p) < csi->length) || + ((csi->start_p - start_p) < length)); #else // Assume system is consistent and would invalidate the right range - const bool candidate = (bi->pc_p - start_p) < length; + const bool invalidate = (bi->pc_p - start_p) < length; #endif - blockinfo *dbi = bi; - bi = bi->next; - if (candidate) { - uae_u32 cl = cacheline(dbi->pc_p); - if (dbi->status == BI_INVALID || dbi->status == BI_NEED_RECOMP) { - if (dbi == cache_tags[cl+1].bi) + if (invalidate) { + uae_u32 cl = cacheline(bi->pc_p); + if (bi == cache_tags[cl + 1].bi) cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; - dbi->handler_to_use = (cpuop_func *)popall_execute_normal; - set_dhtu(dbi, dbi->direct_pen); - dbi->status = BI_INVALID; - } - else { - if (dbi == cache_tags[cl+1].bi) - cache_tags[cl].handler = (cpuop_func *)popall_check_checksum; - dbi->handler_to_use = (cpuop_func *)popall_check_checksum; - set_dhtu(dbi, dbi->direct_pcc); - dbi->status = BI_NEED_CHECK; - } - remove_from_list(dbi); - add_to_dormant(dbi); + bi->handler_to_use = (cpuop_func *)popall_execute_normal; + set_dhtu(bi, bi->direct_pen); + bi->status = BI_NEED_RECOMP; } + bi = bi->next; } return; +#else + UNUSED(start); + UNUSED(length); #endif - flush_icache(-1); + flush_icache(); } +#endif -static void catastrophe(void) -{ - abort(); -} int failure; -#define TARGET_M68K 0 -#define TARGET_POWERPC 1 -#define TARGET_X86 2 -#define TARGET_X86_64 3 -#if defined(i386) || defined(__i386__) -#define TARGET_NATIVE TARGET_X86 -#endif -#if defined(powerpc) || defined(__powerpc__) -#define TARGET_NATIVE TARGET_POWERPC -#endif -#if defined(x86_64) || defined(__x86_64__) -#define TARGET_NATIVE TARGET_X86_64 -#endif - -#ifdef ENABLE_MON -static uae_u32 mon_read_byte_jit(uintptr addr) +#ifdef UAE +static inline unsigned int get_opcode_cft_map(unsigned int f) { - uae_u8 *m = (uae_u8 *)addr; - return (uintptr)(*m); + return ((f >> 8) & 255) | ((f & 255) << 8); } - -static void mon_write_byte_jit(uintptr addr, uae_u32 b) -{ - uae_u8 *m = (uae_u8 *)addr; - *m = b; -} -#endif - -void disasm_block(int target, uint8 * start, size_t length) -{ - if (!JITDebug) - return; - -#if defined(JIT_DEBUG) && defined(ENABLE_MON) - char disasm_str[200]; - sprintf(disasm_str, "%s $%x $%x", - target == TARGET_M68K ? "d68" : - target == TARGET_X86 ? "d86" : - target == TARGET_X86_64 ? "d8664" : - target == TARGET_POWERPC ? "d" : "x", - start, start + length - 1); - - uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte; - void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte; - - mon_read_byte = mon_read_byte_jit; - mon_write_byte = mon_write_byte_jit; - - const char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL}; - mon(4, arg); - - mon_read_byte = old_mon_read_byte; - mon_write_byte = old_mon_write_byte; -#endif -} - -static void disasm_native_block(uint8 *start, size_t length) -{ - disasm_block(TARGET_NATIVE, start, length); -} - -static void disasm_m68k_block(uint8 *start, size_t length) -{ - disasm_block(TARGET_M68K, start, length); -} - -#ifdef HAVE_GET_WORD_UNSWAPPED +#define DO_GET_OPCODE(a) (get_opcode_cft_map((uae_u16)*(a))) +#else +#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU) # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a))) #else # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a))) #endif +#endif -#if JIT_DEBUG +#ifdef JIT_DEBUG static uae_u8 *last_regs_pc_p = 0; static uae_u8 *last_compiled_block_addr = 0; @@ -6566,506 +4667,726 @@ void compiler_dumpstate(void) if (!JITDebug) return; - write_log("### Host addresses\n"); - write_log("MEM_BASE : %x\n", MEMBaseDiff); - write_log("PC_P : %p\n", ®s.pc_p); - write_log("SPCFLAGS : %p\n", ®s.spcflags); - write_log("D0-D7 : %p-%p\n", ®s.regs[0], ®s.regs[7]); - write_log("A0-A7 : %p-%p\n", ®s.regs[8], ®s.regs[15]); - write_log("\n"); + jit_log("### Host addresses"); + jit_log("MEM_BASE : %lx", (unsigned long)MEMBaseDiff); + jit_log("PC_P : %p", ®s.pc_p); + jit_log("SPCFLAGS : %p", ®s.spcflags); + jit_log("D0-D7 : %p-%p", ®s.regs[0], ®s.regs[7]); + jit_log("A0-A7 : %p-%p", ®s.regs[8], ®s.regs[15]); + jit_log(" "); - write_log("### M68k processor state\n"); - m68k_dumpstate(0); - write_log("\n"); + jit_log("### M68k processor state"); + m68k_dumpstate(stderr, 0); + jit_log(" "); - write_log("### Block in Mac address space\n"); - write_log("M68K block : %p\n", - (void *)(uintptr)get_virtual_address(last_regs_pc_p)); - write_log("Native block : %p (%d bytes)\n", - (void *)(uintptr)get_virtual_address(last_compiled_block_addr), + jit_log("### Block in Atari address space"); + jit_log("M68K block : %p", + (void *)(uintptr)last_regs_pc_p); + if (last_regs_pc_p != 0) { + jit_log("Native block : %p (%d bytes)", + (void *)last_compiled_block_addr, get_blockinfo_addr(last_regs_pc_p)->direct_handler_size); - write_log("\n"); + } + jit_log(" "); } #endif + +#if 0 /* debugging helpers; activate as needed */ +static void print_exc_frame(uae_u32 opcode) +{ + int nr = (opcode & 0x0f) + 32; + if (nr != 0x45 && /* Timer-C */ + nr != 0x1c && /* VBL */ + nr != 0x46) /* ACIA */ + { + memptr sp = m68k_areg(regs, 7); + uae_u16 sr = get_word(sp); + fprintf(stderr, "Exc:%02x SP: %08x USP: %08x SR: %04x PC: %08x Format: %04x", nr, sp, regs.usp, sr, get_long(sp + 2), get_word(sp + 6)); + if (nr >= 32 && nr < 48) + { + fprintf(stderr, " Opcode: $%04x", sr & 0x2000 ? get_word(sp + 8) : get_word(regs.usp)); + } + fprintf(stderr, "\n"); + } +} + +static void push_all_nat(void) +{ + raw_pushfl(); + raw_push_l_r(EAX_INDEX); + raw_push_l_r(ECX_INDEX); + raw_push_l_r(EDX_INDEX); + raw_push_l_r(EBX_INDEX); + raw_push_l_r(EBP_INDEX); + raw_push_l_r(EDI_INDEX); + raw_push_l_r(ESI_INDEX); + raw_push_l_r(R8_INDEX); + raw_push_l_r(R9_INDEX); + raw_push_l_r(R10_INDEX); + raw_push_l_r(R11_INDEX); + raw_push_l_r(R12_INDEX); + raw_push_l_r(R13_INDEX); + raw_push_l_r(R14_INDEX); + raw_push_l_r(R15_INDEX); +} + +static void pop_all_nat(void) +{ + raw_pop_l_r(R15_INDEX); + raw_pop_l_r(R14_INDEX); + raw_pop_l_r(R13_INDEX); + raw_pop_l_r(R12_INDEX); + raw_pop_l_r(R11_INDEX); + raw_pop_l_r(R10_INDEX); + raw_pop_l_r(R9_INDEX); + raw_pop_l_r(R8_INDEX); + raw_pop_l_r(ESI_INDEX); + raw_pop_l_r(EDI_INDEX); + raw_pop_l_r(EBP_INDEX); + raw_pop_l_r(EBX_INDEX); + raw_pop_l_r(EDX_INDEX); + raw_pop_l_r(ECX_INDEX); + raw_pop_l_r(EAX_INDEX); + raw_popfl(); +} +#endif + +#if 0 +static void print_inst(void) +{ + disasm_m68k_block(regs.fault_pc + (uint8 *)MEMBaseDiff, 1); +} +#endif + + +#ifdef UAE +void compile_block(cpu_history *pc_hist, int blocklen, int totcycles) +{ + if (cache_enabled && compiled_code && currprefs.cpu_model >= 68020) { +#else static void compile_block(cpu_history* pc_hist, int blocklen) { - if (letit && compiled_code) { -#if PROFILE_COMPILE_TIME - compile_count++; - clock_t start_time = clock(); + if (cache_enabled && compiled_code) { #endif -#if JIT_DEBUG - bool disasm_block = false; +#ifdef PROFILE_COMPILE_TIME + compile_count++; + clock_t start_time = clock(); #endif - - /* OK, here we need to 'compile' a block */ - int i; - int r; - int was_comp=0; - uae_u8 liveflags[MAXRUN+1]; +#ifdef JIT_DEBUG + bool disasm_block = false; +#endif + + /* OK, here we need to 'compile' a block */ + int i; + int r; + int was_comp=0; + uae_u8 liveflags[MAXRUN+1]; #if USE_CHECKSUM_INFO - bool trace_in_rom = isinrom((uintptr)pc_hist[0].location); - uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location; - uintptr min_pcp=max_pcp; + bool trace_in_rom = isinrom((uintptr)pc_hist[0].location) != 0; + uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location; + uintptr min_pcp=max_pcp; #else - uintptr max_pcp=(uintptr)pc_hist[0].location; - uintptr min_pcp=max_pcp; + uintptr max_pcp=(uintptr)pc_hist[0].location; + uintptr min_pcp=max_pcp; #endif - uae_u32 cl=cacheline(pc_hist[0].location); - void* specflags=(void*)®s.spcflags; - blockinfo* bi=NULL; - blockinfo* bi2; - int extra_len=0; + uae_u32 cl=cacheline(pc_hist[0].location); + void* specflags=(void*)®s.spcflags; + blockinfo* bi=NULL; + blockinfo* bi2; + int extra_len=0; - redo_current_block=0; - if (current_compile_p>=max_compile_start) - flush_icache_hard(7); + redo_current_block=0; + if (current_compile_p >= MAX_COMPILE_PTR) + flush_icache_hard(); - alloc_blockinfos(); + alloc_blockinfos(); - bi=get_blockinfo_addr_new(pc_hist[0].location,0); - bi2=get_blockinfo(cl); + bi=get_blockinfo_addr_new(pc_hist[0].location,0); + bi2=get_blockinfo(cl); - optlev=bi->optlevel; - if (bi->status!=BI_INVALID) { - Dif (bi!=bi2) { - /* I don't think it can happen anymore. Shouldn't, in - any case. So let's make sure... */ - write_log("WOOOWOO count=%d, ol=%d %p %p\n", - bi->count,bi->optlevel,bi->handler_to_use, - cache_tags[cl].handler); - abort(); - } + optlev=bi->optlevel; + if (bi->status!=BI_INVALID) { + Dif (bi!=bi2) { + /* I don't think it can happen anymore. Shouldn't, in + any case. So let's make sure... */ + jit_abort("WOOOWOO count=%d, ol=%d %p %p", bi->count,bi->optlevel,bi->handler_to_use, cache_tags[cl].handler); + } - Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) { - write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status); - /* What the heck? We are not supposed to be here! */ - abort(); - } - } - if (bi->count==-1) { - optlev++; - while (!optcount[optlev]) - optlev++; - bi->count=optcount[optlev]-1; - } - current_block_pc_p=(uintptr)pc_hist[0].location; - - remove_deps(bi); /* We are about to create new code */ - bi->optlevel=optlev; - bi->pc_p=(uae_u8*)pc_hist[0].location; + Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) { + jit_abort("bi->count=%d, bi->status=%d,bi->optlevel=%d",bi->count,bi->status,bi->optlevel); + /* What the heck? We are not supposed to be here! */ + } + } + if (bi->count==-1) { + optlev++; + while (!optcount[optlev]) + optlev++; + bi->count=optcount[optlev]-1; + } + current_block_pc_p=(uintptr)pc_hist[0].location; + + remove_deps(bi); /* We are about to create new code */ + bi->optlevel=optlev; + bi->pc_p=(uae_u8*)pc_hist[0].location; #if USE_CHECKSUM_INFO - free_checksum_info_chain(bi->csi); - bi->csi = NULL; -#endif - - liveflags[blocklen]=0x1f; /* All flags needed afterwards */ - i=blocklen; - while (i--) { - uae_u16* currpcp=pc_hist[i].location; - uae_u32 op=DO_GET_OPCODE(currpcp); - -#if USE_CHECKSUM_INFO - trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp); - if (follow_const_jumps && is_const_jump(op)) { - checksum_info *csi = alloc_checksum_info(); - csi->start_p = (uae_u8 *)min_pcp; - csi->length = max_pcp - min_pcp + LONGEST_68K_INST; - csi->next = bi->csi; - bi->csi = csi; - max_pcp = (uintptr)currpcp; - } - min_pcp = (uintptr)currpcp; -#else - if ((uintptr)currpcpmax_pcp) - max_pcp=(uintptr)currpcp; -#endif - - liveflags[i]=((liveflags[i+1]& - (~prop[op].set_flags))| - prop[op].use_flags); - if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0) - liveflags[i]&= ~FLAG_Z; - } - -#if USE_CHECKSUM_INFO - checksum_info *csi = alloc_checksum_info(); - csi->start_p = (uae_u8 *)min_pcp; - csi->length = max_pcp - min_pcp + LONGEST_68K_INST; - csi->next = bi->csi; - bi->csi = csi; -#endif - - bi->needed_flags=liveflags[0]; - - align_target(align_loops); - was_comp=0; - - bi->direct_handler=(cpuop_func *)get_target(); - set_dhtu(bi,bi->direct_handler); - bi->status=BI_COMPILING; - current_block_start_target=(uintptr)get_target(); - - log_startblock(); - - if (bi->count>=0) { /* Need to generate countdown code */ - raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); - raw_sub_l_mi((uintptr)&(bi->count),1); - raw_jl((uintptr)popall_recompile_block); - } - if (optlev==0) { /* No need to actually translate */ - /* Execute normally without keeping stats */ - raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); - raw_jmp((uintptr)popall_exec_nostats); - } - else { - reg_alloc_run=0; - next_pc_p=0; - taken_pc_p=0; - branch_cc=0; - - comp_pc_p=(uae_u8*)pc_hist[0].location; - init_comp(); - was_comp=1; - -#ifdef USE_CPU_EMUL_SERVICES - raw_sub_l_mi((uintptr)&emulated_ticks,blocklen); - raw_jcc_b_oponly(NATIVE_CC_GT); - uae_s8 *branchadd=(uae_s8*)get_target(); - emit_byte(0); - raw_call((uintptr)cpu_do_check_ticks); - *branchadd=(uintptr)get_target()-((uintptr)branchadd+1); -#endif - -#if JIT_DEBUG - if (JITDebug) { - raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location); - raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target); - } -#endif - - for (i=0;i1) { - failure=0; - if (!was_comp) { - comp_pc_p=(uae_u8*)pc_hist[i].location; - init_comp(); - } - was_comp=1; - - comptbl[opcode](opcode); - freescratch(); - if (!(liveflags[i+1] & FLAG_CZNV)) { - /* We can forget about flags */ - dont_care_flags(); - } -#if INDIVIDUAL_INST - flush(1); - nop(); - flush(1); - was_comp=0; -#endif - } - - if (failure) { - if (was_comp) { - flush(1); - was_comp=0; - } - raw_mov_l_ri(REG_PAR1,(uae_u32)opcode); -#if USE_NORMAL_CALLING_CONVENTION - raw_push_l_r(REG_PAR1); -#endif - raw_mov_l_mi((uintptr)®s.pc_p, - (uintptr)pc_hist[i].location); - raw_call((uintptr)cputbl[opcode]); -#if PROFILE_UNTRANSLATED_INSNS - // raw_cputbl_count[] is indexed with plain opcode (in m68k order) - raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1); -#endif -#if USE_NORMAL_CALLING_CONVENTION - raw_inc_sp(4); -#endif - - if (i < blocklen - 1) { - uae_s8* branchadd; - - raw_mov_l_rm(0,(uintptr)specflags); - raw_test_l_rr(0,0); - raw_jz_b_oponly(); - branchadd=(uae_s8 *)get_target(); - emit_byte(0); - raw_jmp((uintptr)popall_do_nothing); - *branchadd=(uintptr)get_target()-(uintptr)branchadd-1; - } - } - } -#if 1 /* This isn't completely kosher yet; It really needs to be - be integrated into a general inter-block-dependency scheme */ - if (next_pc_p && taken_pc_p && - was_comp && taken_pc_p==current_block_pc_p) { - blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0); - blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0); - uae_u8 x=bi1->needed_flags; - - if (x==0xff || 1) { /* To be on the safe side */ - uae_u16* next=(uae_u16*)next_pc_p; - uae_u32 op=DO_GET_OPCODE(next); - - x=0x1f; - x&=(~prop[op].set_flags); - x|=prop[op].use_flags; - } - - x|=bi2->needed_flags; - if (!(x & FLAG_CZNV)) { - /* We can forget about flags */ - dont_care_flags(); - extra_len+=2; /* The next instruction now is part of this - block */ - } - - } -#endif - log_flush(); - - if (next_pc_p) { /* A branch was registered */ - uintptr t1=next_pc_p; - uintptr t2=taken_pc_p; - int cc=branch_cc; - - uae_u32* branchadd; - uae_u32* tba; - bigstate tmp; - blockinfo* tbi; - - if (taken_pc_penv))) { - mark_callers_recompile(bi); - } - - big_to_small_state(&live,&(bi->env)); -#endif - -#if USE_CHECKSUM_INFO - remove_from_list(bi); - if (trace_in_rom) { - // No need to checksum that block trace on cache invalidation free_checksum_info_chain(bi->csi); bi->csi = NULL; - add_to_dormant(bi); - } - else { - calc_checksum(bi,&(bi->c1),&(bi->c2)); - add_to_active(bi); - } +#endif + + liveflags[blocklen]=FLAG_ALL; /* All flags needed afterwards */ + i=blocklen; + while (i--) { + uae_u16* currpcp=pc_hist[i].location; + uae_u32 op=DO_GET_OPCODE(currpcp); + +#if USE_CHECKSUM_INFO + trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp); + if (follow_const_jumps && is_const_jump(op)) { + checksum_info *csi = alloc_checksum_info(); + csi->start_p = (uae_u8 *)min_pcp; + csi->length = max_pcp - min_pcp + LONGEST_68K_INST; + csi->next = bi->csi; + bi->csi = csi; + max_pcp = (uintptr)currpcp; + } + min_pcp = (uintptr)currpcp; #else - if (next_pc_p+extra_len>=max_pcp && - next_pc_p+extra_lenlen=max_pcp-min_pcp; - bi->min_pcp=min_pcp; - - remove_from_list(bi); - if (isinrom(min_pcp) && isinrom(max_pcp)) { - add_to_dormant(bi); /* No need to checksum it on cache flush. - Please don't start changing ROMs in - flight! */ - } - else { - calc_checksum(bi,&(bi->c1),&(bi->c2)); - add_to_active(bi); - } + if ((uintptr)currpcpmax_pcp) + max_pcp=(uintptr)currpcp; #endif + +#ifdef UAE + if (!currprefs.compnf) { + liveflags[i]=FLAG_ALL; + } + else +#endif + { + liveflags[i] = ((liveflags[i+1] & (~prop[op].set_flags))|prop[op].use_flags); + if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0) + liveflags[i]&= ~FLAG_Z; + } + } + +#if USE_CHECKSUM_INFO + checksum_info *csi = alloc_checksum_info(); + csi->start_p = (uae_u8 *)min_pcp; + csi->length = max_pcp - min_pcp + LONGEST_68K_INST; + csi->next = bi->csi; + bi->csi = csi; +#endif + + bi->needed_flags=liveflags[0]; + + align_target(align_loops); + was_comp=0; + + bi->direct_handler=(cpuop_func*)get_target(); + set_dhtu(bi,bi->direct_handler); + bi->status=BI_COMPILING; + current_block_start_target=(uintptr)get_target(); - current_cache_size += get_target() - (uae_u8 *)current_compile_p; - -#if JIT_DEBUG - if (JITDebug) + log_startblock(); + + if (bi->count>=0) { /* Need to generate countdown code */ + compemu_raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); + compemu_raw_sub_l_mi((uintptr)&(bi->count),1); + compemu_raw_jl((uintptr)popall_recompile_block); + } + if (optlev==0) { /* No need to actually translate */ + /* Execute normally without keeping stats */ + compemu_raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); + compemu_raw_jmp((uintptr)popall_exec_nostats); + } + else { + reg_alloc_run=0; + next_pc_p=0; + taken_pc_p=0; + branch_cc=0; // Only to be initialized. Will be set together with next_pc_p + + comp_pc_p=(uae_u8*)pc_hist[0].location; + init_comp(); + was_comp=1; + +#ifdef USE_CPU_EMUL_SERVICES + compemu_raw_sub_l_mi((uintptr)&emulated_ticks,blocklen); + compemu_raw_jcc_b_oponly(NATIVE_CC_GT); + uae_u8 *branchadd=get_target(); + skip_byte(); + raw_dec_sp(STACK_SHADOW_SPACE); + compemu_raw_call((uintptr)cpu_do_check_ticks); + raw_inc_sp(STACK_SHADOW_SPACE); + *branchadd=get_target()-(branchadd+1); +#endif + +#ifdef JIT_DEBUG + if (JITDebug) { + compemu_raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location); + compemu_raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target); + } +#endif + + for (i=0;i1) { + failure=0; + if (!was_comp) { + comp_pc_p=(uae_u8*)pc_hist[i].location; + init_comp(); + } + was_comp=1; + +#if defined(HAVE_DISASM_NATIVE) && defined(HAVE_DISASM_M68K) +/* debugging helpers; activate as needed */ +#if 1 + disasm_this_inst = false; + const uae_u8 *start_m68k_thisinst = (const uae_u8 *)pc_hist[i].location; + uae_u8 *start_native_thisinst = get_target(); +#endif +#endif + +#ifdef WINUAE_ARANYM + bool isnop = do_get_mem_word(pc_hist[i].location) == 0x4e71 || + ((i + 1) < blocklen && do_get_mem_word(pc_hist[i+1].location) == 0x4e71); + + if (isnop) + compemu_raw_mov_l_mi((uintptr)®s.fault_pc, ((uintptr)(pc_hist[i].location)) - MEMBaseDiff); +#endif + + comptbl[opcode](opcode); + freescratch(); + if (!(liveflags[i+1] & FLAG_CZNV)) { + /* We can forget about flags */ + dont_care_flags(); + } +#if INDIVIDUAL_INST + flush(1); + nop(); + flush(1); + was_comp=0; +#endif +#ifdef WINUAE_ARANYM + /* + * workaround for buserror handling: on a "nop", write registers back + */ + if (isnop) + { + flush(1); + nop(); + was_comp=0; + } +#endif +#if defined(HAVE_DISASM_NATIVE) && defined(HAVE_DISASM_M68K) + +/* debugging helpers; activate as needed */ +#if 0 + disasm_m68k_block(start_m68k_thisinst, 1); + push_all_nat(); + compemu_raw_mov_l_mi(uae_p32(®s.fault_pc), (uintptr)start_m68k_thisinst - MEMBaseDiff); + raw_dec_sp(STACK_SHADOW_SPACE); + compemu_raw_call(uae_p32(print_instn)); + raw_inc_sp(STACK_SHADOW_SPACE); + pop_all_nat(); +#endif + + if (disasm_this_inst) + { + disasm_m68k_block(start_m68k_thisinst, 1); +#if 1 + disasm_native_block(start_native_thisinst, get_target() - start_native_thisinst); +#endif + +#if 0 + push_all_nat(); + + raw_dec_sp(STACK_SHADOW_SPACE); + compemu_raw_mov_l_ri(REG_PAR1, (uae_u32)cft_map(opcode)); + compemu_raw_call((uintptr)print_exc_frame); + raw_inc_sp(STACK_SHADOW_SPACE); + + pop_all_nat(); +#endif + + if (failure) + { + bug("(discarded)"); + target = start_native_thisinst; + } + } +#endif + } + + if (failure) { + if (was_comp) { + flush(1); + was_comp=0; + } + compemu_raw_mov_l_ri(REG_PAR1,(uae_u32)opcode); +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(REG_PAR1); +#endif + compemu_raw_mov_l_mi((uintptr)®s.pc_p, + (uintptr)pc_hist[i].location); + raw_dec_sp(STACK_SHADOW_SPACE); + compemu_raw_call((uintptr)cputbl[opcode]); + raw_inc_sp(STACK_SHADOW_SPACE); +#ifdef PROFILE_UNTRANSLATED_INSNS + // raw_cputbl_count[] is indexed with plain opcode (in m68k order) + compemu_raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1); +#endif +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(4); +#endif + + if (i < blocklen - 1) { + uae_u8* branchadd; + + /* if (SPCFLAGS_TEST(SPCFLAG_ALL)) popall_do_nothing() */ + compemu_raw_mov_l_rm(0, (uintptr)specflags); + compemu_raw_test_l_rr(0,0); +#if defined(USE_DATA_BUFFER) + data_check_end(8, 64); // just a pessimistic guess... +#endif + compemu_raw_jz_b_oponly(); + branchadd=get_target(); + skip_byte(); +#ifdef UAE + raw_sub_l_mi(uae_p32(&countdown),scaled_cycles(totcycles)); +#endif + compemu_raw_jmp((uintptr)popall_do_nothing); + *branchadd = get_target() - (branchadd + 1); + } + } + } +#if 1 /* This isn't completely kosher yet; It really needs to be + be integrated into a general inter-block-dependency scheme */ + if (next_pc_p && taken_pc_p && + was_comp && taken_pc_p==current_block_pc_p) + { + blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0); + blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0); + uae_u8 x=bi1->needed_flags; + + if (x==0xff || 1) { /* To be on the safe side */ + uae_u16* next=(uae_u16*)next_pc_p; + uae_u32 op=DO_GET_OPCODE(next); + + x=FLAG_ALL; + x&=(~prop[op].set_flags); + x|=prop[op].use_flags; + } + + x|=bi2->needed_flags; + if (!(x & FLAG_CZNV)) { + /* We can forget about flags */ + dont_care_flags(); + extra_len+=2; /* The next instruction now is part of this block */ + } + } +#endif + log_flush(); + + if (next_pc_p) { /* A branch was registered */ + uintptr t1=next_pc_p; + uintptr t2=taken_pc_p; + int cc=branch_cc; + + uae_u32* branchadd; + uae_u32* tba; + bigstate tmp; + blockinfo* tbi; + + if (taken_pc_penv))) { + mark_callers_recompile(bi); + } + + big_to_small_state(&live,&(bi->env)); +#endif + +#if USE_CHECKSUM_INFO + remove_from_list(bi); + if (trace_in_rom) { + // No need to checksum that block trace on cache invalidation + free_checksum_info_chain(bi->csi); + bi->csi = NULL; + add_to_dormant(bi); + } + else { + calc_checksum(bi,&(bi->c1),&(bi->c2)); + add_to_active(bi); + } +#else + if (next_pc_p+extra_len>=max_pcp && + next_pc_p+extra_lenlen=max_pcp-min_pcp; + bi->min_pcp=min_pcp; + + remove_from_list(bi); + if (isinrom(min_pcp) && isinrom(max_pcp)) { + add_to_dormant(bi); /* No need to checksum it on cache flush. + Please don't start changing ROMs in + flight! */ + } + else { + calc_checksum(bi,&(bi->c1),&(bi->c2)); + add_to_active(bi); + } +#endif + + current_cache_size += get_target() - current_compile_p; + +#ifdef JIT_DEBUG bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target; - - if (JITDebug && disasm_block) { - uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p); - D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen)); - uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1; - disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size); - D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location)); - disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size); - getchar(); + + if (JITDebug && disasm_block) { + uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p); + jit_log("M68K block @ 0x%08x (%d insns)", block_addr, blocklen); + uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1; +#ifdef WINUAE_ARANYM + disasm_m68k_block((const uae_u8 *)pc_hist[0].location, block_size); +#endif + jit_log("Compiled block @ %p", pc_hist[0].location); +#ifdef WINUAE_ARANYM + disasm_native_block((const uae_u8 *)current_block_start_target, bi->direct_handler_size); +#endif + UNUSED(block_addr); + } +#endif + + log_dump(); + align_target(align_jumps); + +#ifdef UAE +#ifdef USE_UDIS86 + UDISFN(current_block_start_target, target) +#endif +#endif + + /* This is the non-direct handler */ + bi->handler= + bi->handler_to_use=(cpuop_func *)get_target(); + compemu_raw_cmp_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); + compemu_raw_jnz((uintptr)popall_cache_miss); + comp_pc_p=(uae_u8*)pc_hist[0].location; + + bi->status=BI_FINALIZING; + init_comp(); + match_states(bi); + flush(1); + + compemu_raw_jmp((uintptr)bi->direct_handler); + + flush_cpu_icache((void *)current_block_start_target, (void *)target); + current_compile_p=get_target(); + raise_in_cl_list(bi); +#ifdef UAE + bi->nexthandler=current_compile_p; +#endif + + /* We will flush soon, anyway, so let's do it now */ + if (current_compile_p >= MAX_COMPILE_PTR) + flush_icache_hard(); + + bi->status=BI_ACTIVE; + if (redo_current_block) + block_need_recompile(bi); + +#ifdef PROFILE_COMPILE_TIME + compile_time += (clock() - start_time); +#endif +#ifdef UAE + /* Account for compilation time */ + do_extra_cycles(totcycles); +#endif } + +#ifdef USE_CPU_EMUL_SERVICES + /* Account for compilation time */ + cpu_do_check_ticks(); #endif - - log_dump(); - align_target(align_jumps); - - /* This is the non-direct handler */ - bi->handler= - bi->handler_to_use=(cpuop_func *)get_target(); - raw_cmp_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); - raw_jnz((uintptr)popall_cache_miss); - comp_pc_p=(uae_u8*)pc_hist[0].location; - - bi->status=BI_FINALIZING; - init_comp(); - match_states(bi); - flush(1); - - raw_jmp((uintptr)bi->direct_handler); - - current_compile_p=get_target(); - raise_in_cl_list(bi); - - /* We will flush soon, anyway, so let's do it now */ - if (current_compile_p>=max_compile_start) - flush_icache_hard(7); - - bi->status=BI_ACTIVE; - if (redo_current_block) - block_need_recompile(bi); - -#if PROFILE_COMPILE_TIME - compile_time += (clock() - start_time); -#endif - } - - /* Account for compilation time */ - cpu_do_check_ticks(); } +#ifdef UAE + /* Slightly different function defined in newcpu.cpp */ +#else void do_nothing(void) { - /* What did you expect this to do? */ + /* What did you expect this to do? */ } +#endif +#ifdef UAE + /* Different implementation in newcpu.cpp */ +#else void exec_nostats(void) { for (;;) { uae_u32 opcode = GET_OPCODE; #if FLIGHT_RECORDER - m68k_record_step(m68k_getpc()); + m68k_record_step(m68k_getpc(), cft_map(opcode)); #endif (*cpufunctbl[opcode])(opcode); cpu_check_ticks(); @@ -7074,24 +5395,28 @@ void exec_nostats(void) } } } +#endif +#ifdef UAE +/* FIXME: check differences against UAE execute_normal (newcpu.cpp) */ +#else void execute_normal(void) { if (!check_for_cache_miss()) { cpu_history pc_hist[MAXRUN]; int blocklen = 0; -#if REAL_ADDRESSING || DIRECT_ADDRESSING +#if 0 && FIXED_ADDRESSING start_pc_p = regs.pc_p; start_pc = get_virtual_address(regs.pc_p); #else - start_pc_p = regs.pc_oldp; + start_pc_p = regs.pc_oldp; start_pc = regs.pc; #endif for (;;) { /* Take note: This is the do-it-normal loop */ pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p; uae_u32 opcode = GET_OPCODE; #if FLIGHT_RECORDER - m68k_record_step(m68k_getpc()); + m68k_record_step(m68k_getpc(), cft_map(opcode)); #endif (*cpufunctbl[opcode])(opcode); cpu_check_ticks(); @@ -7104,10 +5429,14 @@ void execute_normal(void) } } } +#endif typedef void (*compiled_handler)(void); -static void m68k_do_compile_execute(void) +#ifdef UAE +/* FIXME: check differences against UAE m68k_do_compile_execute */ +#else +void m68k_do_compile_execute(void) { for (;;) { ((compiled_handler)(pushall_call_handler))(); @@ -7118,14 +5447,42 @@ static void m68k_do_compile_execute(void) } } } +#endif +#ifdef UAE +/* FIXME: check differences against UAE m68k_compile_execute */ +#else void m68k_compile_execute (void) { - for (;;) { - if (quit_program) - break; - m68k_do_compile_execute(); - } +setjmpagain: + TRY(prb) { + for (;;) { + if (quit_program > 0) { + if (quit_program == 1) { +#if FLIGHT_RECORDER + dump_flight_recorder(); +#endif + break; + } + quit_program = 0; + m68k_reset (); + } + m68k_do_compile_execute(); + } + } + CATCH(prb) { + jit_log("m68k_compile_execute: exception %d pc=%08x (%08x+%p-%p) fault_pc=%08x addr=%08x -> %08x sp=%08x", + int(prb), + m68k_getpc(), + regs.pc, regs.pc_p, regs.pc_oldp, + regs.fault_pc, + regs.mmu_fault_addr, get_long (regs.vbr + 4*prb), + regs.regs[15]); + flush_icache(); + Exception(prb, 0); + goto setjmpagain; + } } +#endif -#endif //USE_JIT +#endif /* JIT */ diff --git a/BasiliskII/src/uae_cpu/compiler/compstbla.cpp b/BasiliskII/src/uae_cpu/compiler/compstbla.cpp new file mode 100644 index 00000000..e2f36d1e --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compstbla.cpp @@ -0,0 +1,5 @@ +/* + * compstbl.cpp must be compiled twice, once for the generator program + * and once for the actual executable + */ +#include "compstbl.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/flags_arm.h b/BasiliskII/src/uae_cpu/compiler/flags_arm.h new file mode 100644 index 00000000..c9a60490 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/flags_arm.h @@ -0,0 +1,52 @@ +/* + * compiler/flags_arm.h - Native flags definitions for ARM + * + * Copyright (c) 2013 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef NATIVE_FLAGS_ARM_H +#define NATIVE_FLAGS_ARM_H + +/* Native integer code conditions */ +enum { + NATIVE_CC_EQ = 0, + NATIVE_CC_NE = 1, + NATIVE_CC_CS = 2, + NATIVE_CC_CC = 3, + NATIVE_CC_MI = 4, + NATIVE_CC_PL = 5, + NATIVE_CC_VS = 6, + NATIVE_CC_VC = 7, + NATIVE_CC_HI = 8, + NATIVE_CC_LS = 9, + NATIVE_CC_GE = 10, + NATIVE_CC_LT = 11, + NATIVE_CC_GT = 12, + NATIVE_CC_LE = 13, + NATIVE_CC_AL = 14 +}; + +#endif /* NATIVE_FLAGS_ARM_H */ diff --git a/BasiliskII/src/uae_cpu/compiler/flags_x86.h b/BasiliskII/src/uae_cpu/compiler/flags_x86.h index 4247f10a..310dbcc3 100644 --- a/BasiliskII/src/uae_cpu/compiler/flags_x86.h +++ b/BasiliskII/src/uae_cpu/compiler/flags_x86.h @@ -3,11 +3,11 @@ * * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer * - * Adaptation for Basilisk II and improvements, copyright 2000-2005 + * Adaptation for Basilisk II and improvements, copyright 2000-2002 * Gwenole Beauchesne * - * Basilisk II (C) 1997-2008 Christian Bauer - * + * Basilisk II (C) 1997-2002 Christian Bauer + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -34,8 +34,8 @@ enum { NATIVE_CC_CS = 2, NATIVE_CC_NE = 5, NATIVE_CC_EQ = 4, - NATIVE_CC_VC = 11, - NATIVE_CC_VS = 10, + NATIVE_CC_VC = 1, + NATIVE_CC_VS = 0, NATIVE_CC_PL = 9, NATIVE_CC_MI = 8, NATIVE_CC_GE = 13, @@ -44,4 +44,9 @@ enum { NATIVE_CC_LE = 14 }; +/* FIXME: include/flags_x86.h in UAE had the following values: + NATIVE_CC_VC = 11, + NATIVE_CC_VS = 10, +*/ + #endif /* NATIVE_FLAGS_X86_H */ diff --git a/BasiliskII/src/uae_cpu/compiler/gencomp.c b/BasiliskII/src/uae_cpu/compiler/gencomp.c index 7055e581..d301ced7 100644 --- a/BasiliskII/src/uae_cpu/compiler/gencomp.c +++ b/BasiliskII/src/uae_cpu/compiler/gencomp.c @@ -4,11 +4,14 @@ * Based on work Copyright 1995, 1996 Bernd Schmidt * Changes for UAE-JIT Copyright 2000 Bernd Meyer * + * Adaptation for ARAnyM/ARM, copyright 2001-2014 + * Milan Jurik, Jens Heitmann + * * Adaptation for Basilisk II and improvements, copyright 2000-2005 * Gwenole Beauchesne * * Basilisk II (C) 1997-2005 Christian Bauer - * + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -24,19 +27,98 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define CC_FOR_BUILD 1 +// #include "sysconfig.h" +#define WINUAE_ARANYM + +#include "sysdeps.h" +#include "readcpu.h" + +#undef NDEBUG +#include #include #include #include #include #include -#include "sysdeps.h" -#include "../readcpu.h" +#undef abort + +#ifdef UAE +/* +#define DISABLE_I_OR_AND_EOR +#define DISABLE_I_SUB +#define DISABLE_I_SUBA +#define DISABLE_I_SUBX +#define DISABLE_I_ADD +#define DISABLE_I_ADDA +#define DISABLE_I_ADDX +#define DISABLE_I_NEG +#define DISABLE_I_NEGX +#define DISABLE_I_CLR +#define DISABLE_I_NOT +#define DISABLE_I_TST +#define DISABLE_I_BCHG_BCLR_BSET_BTST +#define DISABLE_I_CMPM_CMP +#define DISABLE_I_CMPA +#define DISABLE_I_MOVE +#define DISABLE_I_MOVEA +#define DISABLE_I_SWAP +#define DISABLE_I_EXG +#define DISABLE_I_EXT +#define DISABLE_I_MVEL +#define DISABLE_I_MVMLE +#define DISABLE_I_RTD +#define DISABLE_I_LINK +#define DISABLE_I_UNLK +#define DISABLE_I_RTS +#define DISABLE_I_JSR +#define DISABLE_I_JMP +#define DISABLE_I_BSR +#define DISABLE_I_BCC +#define DISABLE_I_LEA +#define DISABLE_I_PEA +#define DISABLE_I_DBCC +#define DISABLE_I_SCC +#define DISABLE_I_MULU +#define DISABLE_I_MULS +#define DISABLE_I_ASR +#define DISABLE_I_ASL +#define DISABLE_I_LSR +#define DISABLE_I_LSL +#define DISABLE_I_ROL +#define DISABLE_I_ROR +#define DISABLE_I_MULL +#define DISABLE_I_FPP +#define DISABLE_I_FBCC +#define DISABLE_I_FSCC +#define DISABLE_I_MOVE16 +*/ + +#endif /* UAE */ + +#ifdef UAE +#define JIT_PATH "jit/" +#ifdef FSUAE +#define GEN_PATH "gen/" +#else +#define GEN_PATH "jit/" +#endif +#define RETURN "return 0;" +#define RETTYPE "uae_u32" +#define NEXT_CPU_LEVEL 5 +#else +#define JIT_PATH "compiler/" +#define GEN_PATH "" +#define RETURN "return;" +#define RETTYPE "void" +#define NEXT_CPU_LEVEL 4 +#endif #define BOOL_TYPE "int" #define failure global_failure=1 #define FAILURE global_failure=1 #define isjump global_isjump=1 -#define is_const_jump global_iscjump=1; +#define is_const_jump global_iscjump=1 #define isaddx global_isaddx=1 #define uses_cmov global_cmov=1 #define mayfail global_mayfail=1 @@ -57,27 +139,53 @@ static char endstr[1000]; static char lines[100000]; static int comp_index=0; -static int cond_codes_x86[]={-1,-1,7,6,3,2,5,4,-1,-1,9,8,13,12,15,14}; +#include "flags_x86.h" -static void comprintf(const char* format, ...) +#ifndef __attribute__ +# ifndef __GNUC__ +# define __attribute__(x) +# endif +#endif + +#define GENA_GETV_NO_FETCH 0 +#define GENA_GETV_FETCH 1 +#define GENA_GETV_FETCH_ALIGN 2 +#define GENA_MOVEM_DO_INC 0 +#define GENA_MOVEM_NO_INC 1 +#define GENA_MOVEM_MOVE16 2 + + +static int cond_codes[]={-1,-1, + NATIVE_CC_HI,NATIVE_CC_LS, + NATIVE_CC_CC,NATIVE_CC_CS, + NATIVE_CC_NE,NATIVE_CC_EQ, + -1,-1, + NATIVE_CC_PL,NATIVE_CC_MI, + NATIVE_CC_GE,NATIVE_CC_LT, + NATIVE_CC_GT,NATIVE_CC_LE + }; + +__attribute__((format(printf, 1, 2))) +static void comprintf(const char *format, ...) { - va_list args; + va_list args; - va_start(args,format); - comp_index+=vsprintf(lines+comp_index,format,args); + va_start(args, format); + comp_index += vsprintf(lines + comp_index, format, args); + va_end(args); } static void com_discard(void) { - comp_index=0; + comp_index = 0; } static void com_flush(void) { - int i; - for (i=0;i 0); @@ -156,87 +264,47 @@ close_brace (void) comprintf ("}"); } -static void +static void finish_braces (void) { while (n_braces > 0) close_brace (); + comprintf ("\n"); } -static void -pop_braces (int to) -{ - while (n_braces > to) - close_brace (); -} - -static int -bit_size (int size) -{ - switch (size) - { - case sz_byte: - return 8; - case sz_word: - return 16; - case sz_long: - return 32; - default: - abort (); - } - return 0; -} - -static const char * -bit_mask (int size) -{ - switch (size) - { - case sz_byte: - return "0xff"; - case sz_word: - return "0xffff"; - case sz_long: - return "0xffffffff"; - default: - abort (); - } - return 0; -} - -static __inline__ void gen_update_next_handler(void) +static inline void gen_update_next_handler(void) { return; /* Can anything clever be done here? */ } -static void gen_writebyte(char* address, char* source) +static void gen_writebyte(const char *address, const char *source) { - comprintf("\twritebyte(%s,%s,scratchie);\n",address,source); + comprintf("\twritebyte(%s, %s, scratchie);\n", address, source); } -static void gen_writeword(char* address, char* source) +static void gen_writeword(const char *address, const char *source) { - comprintf("\twriteword(%s,%s,scratchie);\n",address,source); + comprintf("\twriteword(%s, %s, scratchie);\n", address, source); } -static void gen_writelong(char* address, char* source) +static void gen_writelong(const char *address, const char *source) { - comprintf("\twritelong(%s,%s,scratchie);\n",address,source); + comprintf("\twritelong(%s, %s, scratchie);\n", address, source); } -static void gen_readbyte(char* address, char* dest) +static void gen_readbyte(const char *address, const char* dest) { - comprintf("\treadbyte(%s,%s,scratchie);\n",address,dest); + comprintf("\treadbyte(%s, %s, scratchie);\n", address, dest); } -static void gen_readword(char* address, char* dest) +static void gen_readword(const char *address, const char *dest) { - comprintf("\treadword(%s,%s,scratchie);\n",address,dest); + comprintf("\treadword(%s,%s,scratchie);\n", address, dest); } -static void gen_readlong(char* address, char* dest) +static void gen_readlong(const char *address, const char *dest) { - comprintf("\treadlong(%s,%s,scratchie);\n",address,dest); + comprintf("\treadlong(%s, %s, scratchie);\n", address, dest); } @@ -248,7 +316,7 @@ gen_nextilong (void) sprintf (buffer, "comp_get_ilong((m68k_pc_offset+=4)-4)"); insn_n_cycles += 4; - + long_opcode=1; return buffer; } @@ -277,352 +345,391 @@ gen_nextibyte (void) return buffer; } + static void swap_opcode (void) { - comprintf("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); +#ifdef UAE + /* no-op */ +#else + comprintf("#ifdef USE_JIT_FPU\n"); + comprintf("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); comprintf("\topcode = do_byteswap_16(opcode);\n"); comprintf("#endif\n"); + comprintf("#endif\n"); +#endif } -static void +static void sync_m68k_pc (void) { - comprintf("\t if (m68k_pc_offset>100) sync_m68k_pc();\n"); + comprintf(" if (m68k_pc_offset > SYNC_PC_OFFSET)\n sync_m68k_pc();\n"); +} + + +static void gen_set_fault_pc(void) +{ + start_brace(); + comprintf("\tsync_m68k_pc();\n"); + comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf("\tint ret=scratchie++;\n" + "\tmov_l_ri(ret,retadd);\n" + "\tmov_l_mr((uintptr)®s.fault_pc,ret);\n"); +} + + +static void make_sr(void) +{ + start_brace(); + comprintf("\tint sr = scratchie++;\n"); + comprintf("\tint tmp = scratchie++;\n"); + comprintf("\tcompemu_make_sr(sr, tmp);\n"); +} + + +static void disasm_this_inst(void) +{ + comprintf("\tdisasm_this_inst = true;\n"); } /* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0, * the calling routine handles Apdi and Aipi modes. * gb-- movem == 2 means the same thing but for a MOVE16 instruction */ -static void -genamode (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem) +static void genamode(amodes mode, const char *reg, wordsizes size, const char *name, int getv, int movem) { - start_brace (); - switch (mode) - { - case Dreg: /* Do we need to check dodgy here? */ - if (movem) - abort (); - if (getv == 1 || getv==2) { - /* We generate the variable even for getv==2, so we can use - it as a destination for MOVE */ - comprintf ("\tint %s=%s;\n",name,reg); - } - return; - - case Areg: - if (movem) - abort (); - if (getv == 1 || getv==2) { - /* see above */ - comprintf ("\tint %s=dodgy?scratchie++:%s+8;\n",name,reg); - if (getv==1) { - comprintf ("\tif (dodgy) \n"); - comprintf ("\t\tmov_l_rr(%s,%s+8);\n",name, reg); - } - } - return; - - case Aind: - comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); - comprintf ("\tif (dodgy) \n"); - comprintf ("\t\tmov_l_rr(%sa,%s+8);\n",name, reg); - break; - case Aipi: - comprintf ("\tint %sa=scratchie++;\n",name,reg); - comprintf ("\tmov_l_rr(%sa,%s+8);\n",name, reg); - break; - case Apdi: - switch (size) - { - case sz_byte: - if (movem) { - comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); - comprintf ("\tif (dodgy) \n"); - comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); - } - else { - start_brace(); - comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); - comprintf("\tlea_l_brr(%s+8,%s+8,(uae_s32)-areg_byteinc[%s]);\n",reg,reg,reg); - comprintf ("\tif (dodgy) \n"); - comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); - } - break; - case sz_word: - if (movem) { - comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); - comprintf ("\tif (dodgy) \n"); - comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); - } - else { - start_brace(); - comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); - comprintf("\tlea_l_brr(%s+8,%s+8,-2);\n",reg,reg); - comprintf ("\tif (dodgy) \n"); - comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); - } - break; - case sz_long: - if (movem) { - comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); - comprintf ("\tif (dodgy) \n"); - comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); - } - else { - start_brace(); - comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); - comprintf("\tlea_l_brr(%s+8,%s+8,-4);\n",reg,reg); - comprintf ("\tif (dodgy) \n"); - comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); - } - break; - default: - abort (); - } - break; - case Ad16: - comprintf("\tint %sa=scratchie++;\n",name); - comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); - comprintf("\tlea_l_brr(%sa,%sa,(uae_s32)(uae_s16)%s);\n",name,name,gen_nextiword()); - break; - case Ad8r: - comprintf("\tint %sa=scratchie++;\n",name); - comprintf("\tcalc_disp_ea_020(%s+8,%s,%sa,scratchie);\n", - reg,gen_nextiword(),name); - break; - - case PC16: - comprintf("\tint %sa=scratchie++;\n",name); - comprintf("\tuae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); - comprintf ("\tuae_s32 PC16off = (uae_s32)(uae_s16)%s;\n", gen_nextiword ()); - comprintf("\tmov_l_ri(%sa,address+PC16off);\n",name); - break; - - case PC8r: - comprintf("\tint pctmp=scratchie++;\n"); - comprintf("\tint %sa=scratchie++;\n",name); - comprintf("\tuae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); start_brace(); - comprintf("\tmov_l_ri(pctmp,address);\n"); - - comprintf("\tcalc_disp_ea_020(pctmp,%s,%sa,scratchie);\n", - gen_nextiword(),name); - break; - case absw: - comprintf ("\tint %sa = scratchie++;\n",name); - comprintf ("\tmov_l_ri(%sa,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ()); - break; - case absl: - comprintf ("\tint %sa = scratchie++;\n",name); - comprintf ("\tmov_l_ri(%sa,%s); /* absl */\n", name, gen_nextilong ()); - break; - case imm: - if (getv != 1) - abort (); - switch (size) - { - case sz_byte: - comprintf ("\tint %s = scratchie++;\n",name); - comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s8)%s);\n", name, gen_nextibyte ()); - break; - case sz_word: - comprintf ("\tint %s = scratchie++;\n",name); - comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ()); - break; - case sz_long: - comprintf ("\tint %s = scratchie++;\n",name); - comprintf ("\tmov_l_ri(%s,%s);\n", name, gen_nextilong ()); - break; - default: - abort (); - } - return; - case imm0: - if (getv != 1) - abort (); - comprintf ("\tint %s = scratchie++;\n",name); - comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s8)%s);\n", name, gen_nextibyte ()); - return; - case imm1: - if (getv != 1) - abort (); - comprintf ("\tint %s = scratchie++;\n",name); - comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ()); - return; - case imm2: - if (getv != 1) - abort (); - comprintf ("\tint %s = scratchie++;\n",name); - comprintf ("\tmov_l_ri(%s,%s);\n", name, gen_nextilong ()); - return; - case immi: - if (getv != 1) - abort (); - comprintf ("\tint %s = scratchie++;\n",name); - comprintf ("\tmov_l_ri(%s,%s);\n", name, reg); - return; - default: - abort (); - } - - /* We get here for all non-reg non-immediate addressing modes to - * actually fetch the value. */ - if (getv == 1) - { - char astring[80]; - sprintf(astring,"%sa",name); - switch (size) - { - case sz_byte: - insn_n_cycles += 2; - break; - case sz_word: - insn_n_cycles += 2; - break; - case sz_long: - insn_n_cycles += 4; - break; - default: - abort (); - } - start_brace (); - comprintf("\tint %s=scratchie++;\n",name); - switch (size) - { - case sz_byte: - gen_readbyte(astring,name); - break; - case sz_word: - gen_readword(astring,name); - break; - case sz_long: - gen_readlong(astring,name); - break; - default: - abort (); - } - } - - /* We now might have to fix up the register for pre-dec or post-inc - * addressing modes. */ - if (!movem) { switch (mode) { - case Aipi: - switch (size) - { - case sz_byte: - comprintf("\tlea_l_brr(%s+8,%s+8,areg_byteinc[%s]);\n",reg,reg,reg); + case Dreg: /* Do we need to check dodgy here? */ + assert (movem == GENA_MOVEM_DO_INC); + if (getv == GENA_GETV_FETCH || getv == GENA_GETV_FETCH_ALIGN) + { + /* We generate the variable even for getv==2, so we can use + it as a destination for MOVE */ + comprintf("\tint %s = %s;\n", name, reg); + } + return; + + case Areg: + assert (movem == GENA_MOVEM_DO_INC); + if (getv == GENA_GETV_FETCH || getv == GENA_GETV_FETCH_ALIGN) + { + /* see above */ + comprintf("\tint %s = dodgy ? scratchie++ : %s + 8;\n", name, reg); + if (getv == GENA_GETV_FETCH) + { + comprintf("\tif (dodgy) \n"); + comprintf("\t\tmov_l_rr(%s, %s + 8);\n", name, reg); + } + } + return; + + case Aind: + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, %s + 8);\n", name, reg); break; - case sz_word: - comprintf("\tlea_l_brr(%s+8,%s+8,2);\n",reg,reg,reg); + case Aipi: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_rr(%sa, %s + 8);\n", name, reg); break; - case sz_long: - comprintf("\tlea_l_brr(%s+8,%s+8,4);\n",reg,reg); + case Apdi: + switch (size) + { + case sz_byte: + if (movem != GENA_MOVEM_DO_INC) + { + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, (uae_s32)-areg_byteinc[%s]);\n", reg, reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + case sz_word: + if (movem != GENA_MOVEM_DO_INC) + { + comprintf("\tint %sa=dodgy?scratchie++:%s+8;\n", name, reg); + comprintf("\tif (dodgy) \n"); + comprintf("\tmov_l_rr(%sa,8+%s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, -2);\n", reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + case sz_long: + if (movem != GENA_MOVEM_DO_INC) + { + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, -4);\n", reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + default: + assert(0); + break; + } + break; + case Ad16: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + comprintf("\tlea_l_brr(%sa, %sa, (uae_s32)(uae_s16)%s);\n", name, name, gen_nextiword()); + break; + case Ad8r: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tcalc_disp_ea_020(%s + 8, %s, %sa, scratchie);\n", reg, gen_nextiword(), name); + break; + + case PC16: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tuae_u32 address = start_pc + ((char *)comp_pc_p - (char *)start_pc_p) + m68k_pc_offset;\n"); + comprintf("\tuae_s32 PC16off = (uae_s32)(uae_s16)%s;\n", gen_nextiword()); + comprintf("\tmov_l_ri(%sa, address + PC16off);\n", name); + break; + + case PC8r: + comprintf("\tint pctmp = scratchie++;\n"); + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tuae_u32 address = start_pc + ((char *)comp_pc_p - (char *)start_pc_p) + m68k_pc_offset;\n"); + start_brace(); + comprintf("\tmov_l_ri(pctmp,address);\n"); + + comprintf("\tcalc_disp_ea_020(pctmp, %s, %sa, scratchie);\n", gen_nextiword(), name); + break; + case absw: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_ri(%sa, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + break; + case absl: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_ri(%sa, %s); /* absl */\n", name, gen_nextilong()); + break; + case imm: + assert (getv == GENA_GETV_FETCH); + switch (size) + { + case sz_byte: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s8)%s);\n", name, gen_nextibyte()); + break; + case sz_word: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + break; + case sz_long: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, gen_nextilong()); + break; + default: + assert(0); + break; + } + return; + case imm0: + assert (getv == GENA_GETV_FETCH); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s8)%s);\n", name, gen_nextibyte()); + return; + case imm1: + assert (getv == GENA_GETV_FETCH); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + return; + case imm2: + assert (getv == GENA_GETV_FETCH); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, gen_nextilong()); + return; + case immi: + assert (getv == GENA_GETV_FETCH); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, reg); + return; + default: + assert(0); break; - default: - abort (); - } - break; - case Apdi: - break; - default: - break; } - } + + /* We get here for all non-reg non-immediate addressing modes to + * actually fetch the value. */ + if (getv == GENA_GETV_FETCH) + { + char astring[80]; + sprintf(astring, "%sa", name); + switch (size) + { + case sz_byte: + insn_n_cycles += 2; + break; + case sz_word: + insn_n_cycles += 2; + break; + case sz_long: + insn_n_cycles += 4; + break; + default: + assert(0); + break; + } + start_brace(); + comprintf("\tint %s = scratchie++;\n", name); + switch (size) + { + case sz_byte: + gen_readbyte(astring, name); + break; + case sz_word: + gen_readword(astring, name); + break; + case sz_long: + gen_readlong(astring, name); + break; + default: + assert(0); + break; + } + } + + /* We now might have to fix up the register for pre-dec or post-inc + * addressing modes. */ + if (movem == GENA_MOVEM_DO_INC) + { + switch (mode) + { + case Aipi: + switch (size) + { + case sz_byte: + comprintf("\tlea_l_brr(%s + 8,%s + 8, areg_byteinc[%s]);\n", reg, reg, reg); + break; + case sz_word: + comprintf("\tlea_l_brr(%s + 8, %s + 8, 2);\n", reg, reg); + break; + case sz_long: + comprintf("\tlea_l_brr(%s + 8, %s + 8, 4);\n", reg, reg); + break; + default: + assert(0); + break; + } + break; + case Apdi: + break; + default: + break; + } + } } -static void -genastore (char *from, amodes mode, char *reg, wordsizes size, char *to) +static void genastore(const char *from, amodes mode, const char *reg, wordsizes size, const char *to) { - switch (mode) - { - case Dreg: - switch (size) + switch (mode) { - case sz_byte: - comprintf("\tif(%s!=%s)\n",reg,from); - comprintf ("\t\tmov_b_rr(%s,%s);\n", reg, from); - break; - case sz_word: - comprintf("\tif(%s!=%s)\n",reg,from); - comprintf ("\t\tmov_w_rr(%s,%s);\n", reg, from); - break; - case sz_long: - comprintf("\tif(%s!=%s)\n",reg,from); - comprintf ("\t\tmov_l_rr(%s,%s);\n", reg, from); - break; - default: - abort (); - } - break; - case Areg: - switch (size) - { - case sz_word: - comprintf("\tif(%s+8!=%s)\n",reg,from); - comprintf ("\t\tmov_w_rr(%s+8,%s);\n", reg, from); - break; - case sz_long: - comprintf("\tif(%s+8!=%s)\n",reg,from); - comprintf ("\t\tmov_l_rr(%s+8,%s);\n", reg, from); - break; - default: - abort (); - } - break; + case Dreg: + switch (size) + { + case sz_byte: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_b_rr(%s, %s);\n", reg, from); + break; + case sz_word: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_w_rr(%s, %s);\n", reg, from); + break; + case sz_long: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_l_rr(%s, %s);\n", reg, from); + break; + default: + assert(0); + break; + } + break; + case Areg: + switch (size) + { + case sz_word: + comprintf("\tif(%s + 8 != %s)\n", reg, from); + comprintf("\t\tmov_w_rr(%s + 8, %s);\n", reg, from); + break; + case sz_long: + comprintf("\tif(%s + 8 != %s)\n", reg, from); + comprintf("\t\tmov_l_rr(%s + 8, %s);\n", reg, from); + break; + default: + assert(0); + break; + } + break; - case Apdi: - case absw: - case PC16: - case PC8r: - case Ad16: - case Ad8r: - case Aipi: - case Aind: - case absl: - { - char astring[80]; - sprintf(astring,"%sa",to); - - switch (size) - { - case sz_byte: - insn_n_cycles += 2; - gen_writebyte(astring,from); - break; - case sz_word: - insn_n_cycles += 2; - gen_writeword(astring,from); - break; - case sz_long: - insn_n_cycles += 4; - gen_writelong(astring,from); - break; - default: - abort (); - } - } - break; - case imm: - case imm0: - case imm1: - case imm2: - case immi: - abort (); - break; - default: - abort (); - } + case Apdi: + case absw: + case PC16: + case PC8r: + case Ad16: + case Ad8r: + case Aipi: + case Aind: + case absl: + { + char astring[80]; + sprintf(astring, "%sa", to); + + switch (size) + { + case sz_byte: + insn_n_cycles += 2; + gen_writebyte(astring, from); + break; + case sz_word: + insn_n_cycles += 2; + gen_writeword(astring, from); + break; + case sz_long: + insn_n_cycles += 4; + gen_writelong(astring, from); + break; + default: + assert(0); + break; + } + } + break; + case imm: + case imm0: + case imm1: + case imm2: + case immi: + assert(0); + break; + default: + assert(0); + break; + } } static void genmov16(uae_u32 opcode, struct instr *curi) { comprintf("\tint src=scratchie++;\n"); comprintf("\tint dst=scratchie++;\n"); - + if ((opcode & 0xfff8) == 0xf620) { /* MOVE16 (Ax)+,(Ay)+ */ comprintf("\tuae_u16 dstreg=((%s)>>12)&0x07;\n", gen_nextiword()); @@ -631,16 +738,16 @@ static void genmov16(uae_u32 opcode, struct instr *curi) } else { /* Other variants */ - genamode (curi->smode, "srcreg", curi->size, "src", 0, 2); - genamode (curi->dmode, "dstreg", curi->size, "dst", 0, 2); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_MOVE16); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_NO_FETCH, GENA_MOVEM_MOVE16); comprintf("\tmov_l_rr(src,srca);\n"); comprintf("\tmov_l_rr(dst,dsta);\n"); } - + /* Align on 16-byte boundaries */ comprintf("\tand_l_ri(src,~15);\n"); comprintf("\tand_l_ri(dst,~15);\n"); - + if ((opcode & 0xfff8) == 0xf620) { comprintf("\tif (srcreg != dstreg)\n"); comprintf("\tadd_l_ri(srcreg+8,16);\n"); @@ -651,10 +758,30 @@ static void genmov16(uae_u32 opcode, struct instr *curi) else if ((opcode & 0xfff8) == 0xf608) comprintf("\tadd_l_ri(dstreg+8,16);\n"); +#ifdef UAE + comprintf("\tif (special_mem) {\n"); + comprintf("\t\tint tmp=scratchie;\n"); + comprintf("\tscratchie+=4;\n" + "\treadlong(src,tmp,scratchie);\n" + "\twritelong_clobber(dst,tmp,scratchie);\n" + "\tadd_l_ri(src,4);\n" + "\tadd_l_ri(dst,4);\n" + "\treadlong(src,tmp,scratchie);\n" + "\twritelong_clobber(dst,tmp,scratchie);\n" + "\tadd_l_ri(src,4);\n" + "\tadd_l_ri(dst,4);\n" + "\treadlong(src,tmp,scratchie);\n" + "\twritelong_clobber(dst,tmp,scratchie);\n" + "\tadd_l_ri(src,4);\n" + "\tadd_l_ri(dst,4);\n" + "\treadlong(src,tmp,scratchie);\n" + "\twritelong_clobber(dst,tmp,scratchie);\n"); + comprintf("\t} else\n"); +#endif + start_brace(); comprintf("\tint tmp=scratchie;\n"); - comprintf("\tscratchie+=4;\n"); - - comprintf("\tget_n_addr(src,src,scratchie);\n" + comprintf("\tscratchie+=4;\n" + "\tget_n_addr(src,src,scratchie);\n" "\tget_n_addr(dst,dst,scratchie);\n" "\tmov_l_rR(tmp+0,src,0);\n" "\tmov_l_rR(tmp+1,src,4);\n" @@ -667,43 +794,82 @@ static void genmov16(uae_u32 opcode, struct instr *curi) "\tmov_l_Rr(dst,tmp+2,8);\n" "\tforget_about(tmp+2);\n" "\tmov_l_Rr(dst,tmp+3,12);\n"); + close_brace(); } -static void +static void genmovemel (uae_u16 opcode) { comprintf ("\tuae_u16 mask = %s;\n", gen_nextiword ()); comprintf ("\tint native=scratchie++;\n"); comprintf ("\tint i;\n"); comprintf ("\tsigned char offset=0;\n"); - genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1); + genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_NO_INC); +#ifdef UAE + if (table68k[opcode].size == sz_long) + comprintf("\tif (1 && !special_mem) {\n"); + else + comprintf("\tif (1 && !special_mem) {\n"); +#endif + + /* Fast but unsafe... */ comprintf("\tget_n_addr(srca,native,scratchie);\n"); comprintf("\tfor (i=0;i<16;i++) {\n" "\t\tif ((mask>>i)&1) {\n"); switch(table68k[opcode].size) { - case sz_long: + case sz_long: comprintf("\t\t\tmov_l_rR(i,native,offset);\n" - "\t\t\tbswap_32(i);\n" + "\t\t\tmid_bswap_32(i);\n" "\t\t\toffset+=4;\n"); break; - case sz_word: + case sz_word: comprintf("\t\t\tmov_w_rR(i,native,offset);\n" - "\t\t\tbswap_16(i);\n" + "\t\t\tmid_bswap_16(i);\n" "\t\t\tsign_extend_16_rr(i,i);\n" "\t\t\toffset+=2;\n"); break; - default: abort(); + default: assert(0); } comprintf("\t\t}\n" "\t}"); if (table68k[opcode].dmode == Aipi) { - comprintf("\t\t\tlea_l_brr(8+dstreg,srca,offset);\n"); + comprintf("\t\t\tlea_l_brr(8+dstreg,srca,offset);\n"); } + /* End fast but unsafe. */ + +#ifdef UAE + comprintf("\t} else {\n"); + + comprintf ("\t\tint tmp=scratchie++;\n"); + + comprintf("\t\tmov_l_rr(tmp,srca);\n"); + comprintf("\t\tfor (i=0;i<16;i++) {\n" + "\t\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\t\treadlong(tmp,i,scratchie);\n" + "\t\t\t\tadd_l_ri(tmp,4);\n"); + break; + case sz_word: + comprintf("\t\t\t\treadword(tmp,i,scratchie);\n" + "\t\t\t\tadd_l_ri(tmp,2);\n"); + break; + default: assert(0); + } + + comprintf("\t\t\t}\n" + "\t\t}\n"); + if (table68k[opcode].dmode == Aipi) { + comprintf("\t\tmov_l_rr(8+dstreg,tmp);\n"); + } + comprintf("\t}\n"); +#endif + } -static void +static void genmovemle (uae_u16 opcode) { comprintf ("\tuae_u16 mask = %s;\n", gen_nextiword ()); @@ -711,61 +877,115 @@ genmovemle (uae_u16 opcode) comprintf ("\tint i;\n"); comprintf ("\tint tmp=scratchie++;\n"); comprintf ("\tsigned char offset=0;\n"); - genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1); + genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_NO_INC); +#ifdef UAE + /* *Sigh* Some clever geek realized that the fastest way to copy a + buffer from main memory to the gfx card is by using movmle. Good + on her, but unfortunately, gfx mem isn't "real" mem, and thus that + act of cleverness means that movmle must pay attention to special_mem, + or Genetic Species is a rather boring-looking game ;-) */ + if (table68k[opcode].size == sz_long) + comprintf("\tif (1 && !special_mem) {\n"); + else + comprintf("\tif (1 && !special_mem) {\n"); +#endif comprintf("\tget_n_addr(srca,native,scratchie);\n"); if (table68k[opcode].dmode!=Apdi) { comprintf("\tfor (i=0;i<16;i++) {\n" "\t\tif ((mask>>i)&1) {\n"); switch(table68k[opcode].size) { - case sz_long: + case sz_long: comprintf("\t\t\tmov_l_rr(tmp,i);\n" - "\t\t\tbswap_32(tmp);\n" + "\t\t\tmid_bswap_32(tmp);\n" "\t\t\tmov_l_Rr(native,tmp,offset);\n" "\t\t\toffset+=4;\n"); break; - case sz_word: + case sz_word: comprintf("\t\t\tmov_l_rr(tmp,i);\n" - "\t\t\tbswap_16(tmp);\n" + "\t\t\tmid_bswap_16(tmp);\n" "\t\t\tmov_w_Rr(native,tmp,offset);\n" "\t\t\toffset+=2;\n"); break; - default: abort(); + default: assert(0); } } else { /* Pre-decrement */ comprintf("\tfor (i=0;i<16;i++) {\n" "\t\tif ((mask>>i)&1) {\n"); switch(table68k[opcode].size) { - case sz_long: + case sz_long: comprintf("\t\t\toffset-=4;\n" "\t\t\tmov_l_rr(tmp,15-i);\n" - "\t\t\tbswap_32(tmp);\n" + "\t\t\tmid_bswap_32(tmp);\n" "\t\t\tmov_l_Rr(native,tmp,offset);\n" ); break; - case sz_word: + case sz_word: comprintf("\t\t\toffset-=2;\n" "\t\t\tmov_l_rr(tmp,15-i);\n" - "\t\t\tbswap_16(tmp);\n" + "\t\t\tmid_bswap_16(tmp);\n" "\t\t\tmov_w_Rr(native,tmp,offset);\n" ); break; - default: abort(); + default: assert(0); } } - + comprintf("\t\t}\n" "\t}"); if (table68k[opcode].dmode == Apdi) { comprintf("\t\t\tlea_l_brr(8+dstreg,srca,(uae_s32)offset);\n"); } +#ifdef UAE + comprintf("\t} else {\n"); + + if (table68k[opcode].dmode!=Apdi) { + comprintf("\tmov_l_rr(tmp,srca);\n"); + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\twritelong(tmp,i,scratchie);\n" + "\t\t\tadd_l_ri(tmp,4);\n"); + break; + case sz_word: + comprintf("\t\t\twriteword(tmp,i,scratchie);\n" + "\t\t\tadd_l_ri(tmp,2);\n"); + break; + default: assert(0); + } + } + else { /* Pre-decrement */ + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\tsub_l_ri(srca,4);\n" + "\t\t\twritelong(srca,15-i,scratchie);\n"); + break; + case sz_word: + comprintf("\t\t\tsub_l_ri(srca,2);\n" + "\t\t\twriteword(srca,15-i,scratchie);\n"); + break; + default: assert(0); + } + } + + + comprintf("\t\t}\n" + "\t}"); + if (table68k[opcode].dmode == Apdi) { + comprintf("\t\t\tmov_l_rr(8+dstreg,srca);\n"); + } + comprintf("\t}\n"); +#endif } -static void +static void duplicate_carry (void) { comprintf ("\tif (needed_flags&FLAG_X) duplicate_carry();\n"); @@ -773,15 +993,15 @@ duplicate_carry (void) typedef enum { - flag_logical_noclobber, flag_logical, flag_add, flag_sub, flag_cmp, + flag_logical_noclobber, flag_logical, flag_add, flag_sub, flag_cmp, flag_addx, flag_subx, flag_zn, flag_av, flag_sv, flag_and, flag_or, flag_eor, flag_mov } flagtypes; -static void -genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) +static void +genflags (flagtypes type, wordsizes size, const char *value, const char *src, const char *dst) { if (noflags) { switch(type) { @@ -793,11 +1013,11 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) case flag_sub: comprintf("\tdont_care_flags();\n"); { - char* op; + const char* op; switch(type) { case flag_add: op="add"; break; case flag_sub: op="sub"; break; - default: abort(); + default: assert(0); } switch (size) { @@ -876,11 +1096,11 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) comprintf("\tdont_care_flags();\n"); start_brace(); { - char* op; + const char* op; switch(type) { case flag_or: op="or"; break; case flag_eor: op="xor"; break; - default: abort(); + default: assert(0); } switch (size) { @@ -907,17 +1127,17 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) close_brace(); return; } - + case flag_addx: case flag_subx: comprintf("\tdont_care_flags();\n"); { - char* op; + const char* op; switch(type) { case flag_addx: op="adc"; break; case flag_subx: op="sbb"; break; - default: abort(); + default: assert(0); } comprintf("\trestore_carry();\n"); /* Reload the X flag into C */ switch (size) @@ -938,12 +1158,13 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) default: return; } } - + /* Need the flags, but possibly not all of them */ switch (type) { case flag_logical_noclobber: failure; + /* fall through */ case flag_and: case flag_or: @@ -951,12 +1172,12 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) comprintf("\tdont_care_flags();\n"); start_brace(); { - char* op; + const char* op; switch(type) { case flag_and: op="and"; break; case flag_or: op="or"; break; case flag_eor: op="xor"; break; - default: abort(); + default: assert(0); } switch (size) { @@ -1051,12 +1272,12 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) case flag_cmp: comprintf("\tdont_care_flags();\n"); { - char* op; + const char* op; switch(type) { case flag_add: op="add"; break; case flag_sub: op="sub"; break; case flag_cmp: op="cmp"; break; - default: abort(); + default: assert(0); } switch (size) { @@ -1079,20 +1300,20 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) duplicate_carry(); } comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); - + return; } - + case flag_addx: case flag_subx: - uses_cmov; + uses_cmov; comprintf("\tdont_care_flags();\n"); { - char* op; + const char* op; switch(type) { case flag_addx: op="adc"; break; case flag_subx: op="sbb"; break; - default: abort(); + default: assert(0); } start_brace(); comprintf("\tint zero=scratchie++;\n" @@ -1101,8 +1322,8 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) "\tmov_l_ri(zero,0);\n" "\tmov_l_ri(one,-1);\n" "\tmake_flags_live();\n" - "\tcmov_l_rr(zero,one,5);\n" - "\t}\n"); + "\tcmov_l_rr(zero,one,%d);\n" + "\t}\n",NATIVE_CC_NE); comprintf("\trestore_carry();\n"); /* Reload the X flag into C */ switch (size) { @@ -1120,11 +1341,11 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) break; } comprintf("\tlive_flags();\n"); - comprintf("\tif (needed_flags&FLAG_Z) {\n" - "\tcmov_l_rr(zero,one,5);\n" - "\tset_zero(zero, one);\n" /* No longer need one */ - "\tlive_flags();\n" - "\t}\n"); + comprintf("\tif (needed_flags&FLAG_Z) {\n"); + comprintf("\tcmov_l_rr(zero,one,%d);\n", NATIVE_CC_NE); + comprintf("\tset_zero(zero, one);\n"); /* No longer need one */ + comprintf("\tlive_flags();\n"); + comprintf("\t}\n"); comprintf("\tend_needflags();\n"); duplicate_carry(); comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); @@ -1136,54 +1357,11 @@ genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) } } -static void -force_range_for_rox (const char *var, wordsizes size) -{ - /* Could do a modulo operation here... which one is faster? */ - switch (size) - { - case sz_long: - comprintf ("\tif (%s >= 33) %s -= 33;\n", var, var); - break; - case sz_word: - comprintf ("\tif (%s >= 34) %s -= 34;\n", var, var); - comprintf ("\tif (%s >= 17) %s -= 17;\n", var, var); - break; - case sz_byte: - comprintf ("\tif (%s >= 36) %s -= 36;\n", var, var); - comprintf ("\tif (%s >= 18) %s -= 18;\n", var, var); - comprintf ("\tif (%s >= 9) %s -= 9;\n", var, var); - break; - } -} - -static const char * -cmask (wordsizes size) -{ - switch (size) - { - case sz_byte: - return "0x80"; - case sz_word: - return "0x8000"; - case sz_long: - return "0x80000000"; - default: - abort (); - } -} - -static int -source_is_imm1_8 (struct instr *i) -{ - return i->stype == 3; -} - static int /* returns zero for success, non-zero for failure */ -gen_opcode (unsigned long int opcode) +gen_opcode (unsigned int opcode) { struct instr *curi = table68k + opcode; - char* ssize=NULL; + const char* ssize=NULL; insn_n_cycles = 2; global_failure=0; @@ -1223,16 +1401,20 @@ gen_opcode (unsigned long int opcode) case sz_byte: ssize="b"; break; case sz_word: ssize="w"; break; case sz_long: ssize="l"; break; - default: abort(); + default: assert(0); } + (void)ssize; switch (curi->mnemo) { case i_OR: case i_AND: case i_EOR: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); +#ifdef DISABLE_I_OR_AND_EOR + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); switch(curi->mnemo) { case i_OR: genflags (flag_or, curi->size, "", "src", "dst"); break; case i_AND: genflags (flag_and, curi->size, "", "src", "dst"); break; @@ -1244,86 +1426,121 @@ gen_opcode (unsigned long int opcode) case i_ORSR: case i_EORSR: failure; - isjump; + isjump; break; + case i_ANDSR: failure; - isjump; + isjump; break; + case i_SUB: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); +#ifdef DISABLE_I_SUB + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); genflags (flag_sub, curi->size, "", "src", "dst"); genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); break; + case i_SUBA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); +#ifdef DISABLE_I_SUBA + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace(); comprintf("\tint tmp=scratchie++;\n"); switch(curi->size) { case sz_byte: comprintf("\tsign_extend_8_rr(tmp,src);\n"); break; case sz_word: comprintf("\tsign_extend_16_rr(tmp,src);\n"); break; case sz_long: comprintf("\ttmp=src;\n"); break; - default: abort(); + default: assert(0); } comprintf("\tsub_l(dst,tmp);\n"); genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); break; + case i_SUBX: +#ifdef DISABLE_I_SUBX + failure; +#endif isaddx; - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); genflags (flag_subx, curi->size, "", "src", "dst"); genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); break; + case i_SBCD: failure; /* I don't think so! */ break; + case i_ADD: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); +#ifdef DISABLE_I_ADD + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); genflags (flag_add, curi->size, "", "src", "dst"); genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); break; + case i_ADDA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); +#ifdef DISABLE_I_ADDA + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace(); comprintf("\tint tmp=scratchie++;\n"); switch(curi->size) { case sz_byte: comprintf("\tsign_extend_8_rr(tmp,src);\n"); break; case sz_word: comprintf("\tsign_extend_16_rr(tmp,src);\n"); break; case sz_long: comprintf("\ttmp=src;\n"); break; - default: abort(); + default: assert(0); } comprintf("\tadd_l(dst,tmp);\n"); genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); break; + case i_ADDX: +#ifdef DISABLE_I_ADDX + failure; +#endif isaddx; - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace(); genflags (flag_addx, curi->size, "", "src", "dst"); genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); break; + case i_ABCD: failure; /* No BCD maths for me.... */ break; + case i_NEG: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); +#ifdef DISABLE_I_NEG + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace (); comprintf("\tint dst=scratchie++;\n"); comprintf("\tmov_l_ri(dst,0);\n"); genflags (flag_sub, curi->size, "", "src", "dst"); genastore ("dst", curi->smode, "srcreg", curi->size, "src"); break; + case i_NEGX: - isaddx; - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); +#ifdef DISABLE_I_NEGX + failure; +#endif + isaddx; + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace (); comprintf("\tint dst=scratchie++;\n"); comprintf("\tmov_l_ri(dst,0);\n"); @@ -1335,33 +1552,47 @@ gen_opcode (unsigned long int opcode) failure; /* Nope! */ break; + case i_CLR: - genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); +#ifdef DISABLE_I_CLR + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC); start_brace(); comprintf("\tint dst=scratchie++;\n"); comprintf("\tmov_l_ri(dst,0);\n"); genflags (flag_logical, curi->size, "dst", "", ""); genastore ("dst", curi->smode, "srcreg", curi->size, "src"); break; + case i_NOT: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); +#ifdef DISABLE_I_NOT + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace (); comprintf("\tint dst=scratchie++;\n"); comprintf("\tmov_l_ri(dst,0xffffffff);\n"); genflags (flag_eor, curi->size, "", "src", "dst"); genastore ("dst", curi->smode, "srcreg", curi->size, "src"); break; + case i_TST: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); +#ifdef DISABLE_I_TST + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); genflags (flag_logical, curi->size, "src", "", ""); break; case i_BCHG: case i_BCLR: case i_BSET: case i_BTST: -/* failure; NEW: from "Ipswitch Town" release */ - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); +#ifdef DISABLE_I_BCHG_BCLR_BSET_BTST + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace(); comprintf("\tint s=scratchie++;\n" "\tint tmp=scratchie++;\n" @@ -1372,7 +1603,7 @@ gen_opcode (unsigned long int opcode) comprintf("\tand_l_ri(s,31);\n"); { - char* op; + const char* op; int need_write=1; switch(curi->mnemo) { @@ -1380,7 +1611,7 @@ gen_opcode (unsigned long int opcode) case i_BCLR: op="btr"; break; case i_BSET: op="bts"; break; case i_BTST: op="bt"; need_write=0; break; - default: abort(); + default: op=""; assert(0); } comprintf("\t%s_l_rr(dst,s);\n" /* Answer now in C */ "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */ @@ -1392,98 +1623,128 @@ gen_opcode (unsigned long int opcode) "\tlive_flags();\n" "\tend_needflags();\n"); } - if (need_write) + if (need_write) genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); } break; case i_CMPM: case i_CMP: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); +#ifdef DISABLE_I_CMPM_CMP + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace (); genflags (flag_cmp, curi->size, "", "src", "dst"); break; + case i_CMPA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); +#ifdef DISABLE_I_CMPA + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace(); comprintf("\tint tmps=scratchie++;\n"); switch(curi->size) { case sz_byte: comprintf("\tsign_extend_8_rr(tmps,src);\n"); break; case sz_word: comprintf("\tsign_extend_16_rr(tmps,src);\n"); break; case sz_long: comprintf("tmps=src;\n"); break; - default: abort(); + default: assert(0); } genflags (flag_cmp, sz_long, "", "tmps", "dst"); break; /* The next two are coded a little unconventional, but they are doing * weird things... */ + case i_MVPRM: - isjump; + isjump; failure; break; + case i_MVPMR: - isjump; + isjump; failure; break; + case i_MOVE: +#ifdef DISABLE_I_MOVE + failure; +#endif switch(curi->dmode) { case Dreg: case Areg: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC); genflags (flag_mov, curi->size, "", "src", "dst"); genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); break; default: /* It goes to memory, not a register */ - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC); genflags (flag_logical, curi->size, "src", "", ""); genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); break; } break; + case i_MOVEA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); +#ifdef DISABLE_I_MOVEA + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC); start_brace(); comprintf("\tint tmps=scratchie++;\n"); switch(curi->size) { case sz_word: comprintf("\tsign_extend_16_rr(dst,src);\n"); break; case sz_long: comprintf("\tmov_l_rr(dst,src);\n"); break; - default: abort(); + default: assert(0); } genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); break; case i_MVSR2: - isjump; + isjump; failure; break; + case i_MV2SR: - isjump; + isjump; failure; break; + case i_SWAP: - genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); +#ifdef DISABLE_I_SWAP + failure; +#endif + genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); comprintf("\tdont_care_flags();\n"); comprintf("\trol_l_ri(src,16);\n"); genflags (flag_logical, sz_long, "src", "", ""); genastore ("src", curi->smode, "srcreg", sz_long, "src"); break; + case i_EXG: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); +#ifdef DISABLE_I_EXG + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace(); comprintf("\tint tmp=scratchie++;\n" "\tmov_l_rr(tmp,src);\n"); genastore ("dst", curi->smode, "srcreg", curi->size, "src"); genastore ("tmp", curi->dmode, "dstreg", curi->size, "dst"); break; - case i_EXT: - genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); + + case i_EXT: +#ifdef DISABLE_I_EXT + failure; +#endif + genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); comprintf("\tdont_care_flags();\n"); start_brace (); switch (curi->size) @@ -1501,141 +1762,213 @@ gen_opcode (unsigned long int opcode) "\tsign_extend_16_rr(src,src);\n"); break; default: - abort (); + assert(0); } genflags (flag_logical, curi->size == sz_word ? sz_word : sz_long, "dst", "", ""); genastore ("dst", curi->smode, "srcreg", curi->size == sz_word ? sz_word : sz_long, "src"); break; - case i_MVMEL: - genmovemel ((uae_u16)opcode); + + case i_MVMEL: +#ifdef DISABLE_I_MVEL + failure; +#endif + genmovemel (opcode); break; + case i_MVMLE: - genmovemle ((uae_u16)opcode); +#ifdef DISABLE_I_MVMLE + failure; +#endif + genmovemle (opcode); break; - case i_TRAP: - isjump; + + case i_TRAP: +#ifdef DISABLE_I_TRAP + failure; +#endif + isjump; + mayfail; + start_brace(); + comprintf(" int trapno = srcreg + 32;\n"); + gen_set_fault_pc(); + make_sr(); + comprintf(" compemu_enter_super(sr);\n"); + comprintf(" compemu_exc_make_frame(0, sr, ret, trapno, scratchie);\n"); + comprintf(" forget_about(ret);\n"); + /* m68k_setpc (get_long (regs.vbr + 4*nr)); */ + start_brace(); + comprintf(" int srca = scratchie++;\n"); + comprintf(" mov_l_rm(srca, (uintptr)®s.vbr);\n"); + comprintf(" mov_l_brR(srca, srca, MEMBaseDiff + trapno * 4); mid_bswap_32(srca);\n"); + comprintf(" mov_l_mr((uintptr)®s.pc, srca);\n"); + comprintf(" get_n_addr_jmp(srca, PC_P, scratchie);\n"); + comprintf(" mov_l_mr((uintptr)®s.pc_oldp, PC_P);\n"); + gen_update_next_handler(); + disasm_this_inst(); /* for debugging only */ + /* + * this currently deactivates this feature, since it does not work yet + */ failure; break; + case i_MVR2USP: - isjump; + isjump; failure; break; + case i_MVUSP2R: - isjump; + isjump; failure; break; + case i_RESET: - isjump; + isjump; failure; break; + case i_NOP: break; + case i_STOP: - isjump; + isjump; failure; break; + case i_RTE: - isjump; + isjump; failure; break; + case i_RTD: -/* failure; NEW: from "Ipswitch Town" release */ - genamode (curi->smode, "srcreg", curi->size, "offs", 1, 0); +#ifdef DISABLE_I_RTD + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); /* offs is constant */ comprintf("\tadd_l_ri(offs,4);\n"); start_brace(); comprintf("\tint newad=scratchie++;\n" - "\treadlong(15,newad,scratchie);\n" + "\treadlong(SP_REG,newad,scratchie);\n" "\tmov_l_mr((uintptr)®s.pc,newad);\n" "\tget_n_addr_jmp(newad,PC_P,scratchie);\n" "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" "\tm68k_pc_offset=0;\n" - "\tadd_l(15,offs);\n"); + "\tadd_l(SP_REG,offs);\n"); gen_update_next_handler(); - isjump; + isjump; break; + case i_LINK: -/* failure; NEW: from "Ipswitch Town" release */ - genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0); - comprintf("\tsub_l_ri(15,4);\n" - "\twritelong_clobber(15,src,scratchie);\n" - "\tmov_l_rr(src,15);\n"); +#ifdef DISABLE_I_LINK + failure; +#endif + genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + comprintf("\tsub_l_ri(SP_REG,4);\n" + "\twritelong_clobber(SP_REG,src,scratchie);\n" + "\tmov_l_rr(src,SP_REG);\n"); if (curi->size==sz_word) comprintf("\tsign_extend_16_rr(offs,offs);\n"); - comprintf("\tadd_l(15,offs);\n"); + comprintf("\tadd_l(SP_REG,offs);\n"); genastore ("src", curi->smode, "srcreg", sz_long, "src"); break; + case i_UNLK: -/* failure; NEW: from "Ipswitch Town" release */ - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - comprintf("\tmov_l_rr(15,src);\n" - "\treadlong(15,src,scratchie);\n" - "\tadd_l_ri(15,4);\n"); +#ifdef DISABLE_I_UNLK + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + comprintf("\tmov_l_rr(SP_REG,src);\n" + "\treadlong(SP_REG,src,scratchie);\n" + "\tadd_l_ri(SP_REG,4);\n"); genastore ("src", curi->smode, "srcreg", curi->size, "src"); break; - case i_RTS: + + case i_RTS: +#ifdef DISABLE_I_RTS + failure; +#endif comprintf("\tint newad=scratchie++;\n" - "\treadlong(15,newad,scratchie);\n" + "\treadlong(SP_REG,newad,scratchie);\n" "\tmov_l_mr((uintptr)®s.pc,newad);\n" "\tget_n_addr_jmp(newad,PC_P,scratchie);\n" "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" "\tm68k_pc_offset=0;\n" - "\tlea_l_brr(15,15,4);\n"); + "\tlea_l_brr(SP_REG,SP_REG,4);\n"); gen_update_next_handler(); - isjump; - break; - case i_TRAPV: - isjump; - failure; - break; - case i_RTR: - isjump; - failure; - break; - case i_JSR: - isjump; - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); - start_brace(); - comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); - comprintf("\tint ret=scratchie++;\n" - "\tmov_l_ri(ret,retadd);\n" - "\tsub_l_ri(15,4);\n" - "\twritelong_clobber(15,ret,scratchie);\n"); - comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n" - "\tget_n_addr_jmp(srca,PC_P,scratchie);\n" - "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" - "\tm68k_pc_offset=0;\n"); - gen_update_next_handler(); - break; - case i_JMP: isjump; - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + break; + + case i_TRAPV: + isjump; + failure; + break; + + case i_RTR: + isjump; + failure; + break; + + case i_JSR: +#ifdef DISABLE_I_JSR + failure; +#endif + isjump; + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC); + start_brace(); + comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf("\tint ret=scratchie++;\n" + "\tmov_l_ri(ret,retadd);\n" + "\tsub_l_ri(SP_REG,4);\n" + "\twritelong_clobber(SP_REG,ret,scratchie);\n"); comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n" "\tget_n_addr_jmp(srca,PC_P,scratchie);\n" "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" "\tm68k_pc_offset=0;\n"); gen_update_next_handler(); break; + + case i_JMP: +#ifdef DISABLE_I_JMP + failure; +#endif + isjump; + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC); + comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n" + "\tget_n_addr_jmp(srca,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n"); + gen_update_next_handler(); + break; + case i_BSR: +#ifdef DISABLE_I_BSR + failure; +#endif is_const_jump; - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace(); comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); comprintf("\tint ret=scratchie++;\n" "\tmov_l_ri(ret,retadd);\n" - "\tsub_l_ri(15,4);\n" - "\twritelong_clobber(15,ret,scratchie);\n"); + "\tsub_l_ri(SP_REG,4);\n" + "\twritelong_clobber(SP_REG,ret,scratchie);\n"); comprintf("\tadd_l_ri(src,m68k_pc_offset_thisinst+2);\n"); comprintf("\tm68k_pc_offset=0;\n"); comprintf("\tadd_l(PC_P,src);\n"); - comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n"); + comprintf("\tcomp_pc_p=(uae_u8*)(uintptr)get_const(PC_P);\n"); + gen_update_next_handler(); break; + case i_Bcc: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); +#ifdef DISABLE_I_BCC + failure; +#endif + comprintf("\tuae_u32 v,v1,v2;\n"); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); /* That source is an immediate, so we can clobber it with abandon */ switch(curi->size) { case sz_byte: comprintf("\tsign_extend_8_rr(src,src);\n"); break; @@ -1645,7 +1978,7 @@ gen_opcode (unsigned long int opcode) comprintf("\tsub_l_ri(src,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n"); /* Leave the following as "add" --- it will allow it to be optimized away due to src being a constant ;-) */ - comprintf("\tadd_l_ri(src,(uintptr)comp_pc_p);\n"); + comprintf("\tadd_l_ri(src,(uintptr)comp_pc_p);\n"); comprintf("\tmov_l_ri(PC_P,(uintptr)comp_pc_p);\n"); /* Now they are both constant. Might as well fold in m68k_pc_offset */ comprintf("\tadd_l_ri(src,m68k_pc_offset);\n"); @@ -1653,21 +1986,21 @@ gen_opcode (unsigned long int opcode) comprintf("\tm68k_pc_offset=0;\n"); if (curi->cc>=2) { - comprintf("\tuae_u32 v1=get_const(PC_P);\n" - "\tuae_u32 v2=get_const(src);\n" + comprintf("\tv1=get_const(PC_P);\n" + "\tv2=get_const(src);\n" "\tregister_branch(v1,v2,%d);\n", - cond_codes_x86[curi->cc]); + cond_codes[curi->cc]); comprintf("\tmake_flags_live();\n"); /* Load the flags */ - isjump; + isjump; } else { - is_const_jump; + is_const_jump; } switch(curi->cc) { case 0: /* Unconditional jump */ - comprintf("\tmov_l_rr(PC_P,src);\n"); - comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n"); + comprintf("\tmov_l_rr(PC_P,src);\n"); + comprintf("\tcomp_pc_p=(uae_u8*)(uintptr)get_const(PC_P);\n"); break; case 1: break; /* This is silly! */ case 8: failure; break; /* Work out details! FIXME */ @@ -1684,43 +2017,55 @@ gen_opcode (unsigned long int opcode) case 12: case 13: case 14: - case 15: + case 15: break; - default: abort(); + default: assert(0); } break; + case i_LEA: - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); +#ifdef DISABLE_I_LEA + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC); genastore ("srca", curi->dmode, "dstreg", curi->size, "dst"); break; + case i_PEA: - if (table68k[opcode].smode==Areg || - table68k[opcode].smode==Aind || - table68k[opcode].smode==Aipi || - table68k[opcode].smode==Apdi || - table68k[opcode].smode==Ad16 || - table68k[opcode].smode==Ad8r) +#ifdef DISABLE_I_PEA + failure; +#endif + if (table68k[opcode].smode==Areg || + table68k[opcode].smode==Aind || + table68k[opcode].smode==Aipi || + table68k[opcode].smode==Apdi || + table68k[opcode].smode==Ad16 || + table68k[opcode].smode==Ad8r) comprintf("if (srcreg==7) dodgy=1;\n"); - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); - genamode (Apdi, "7", sz_long, "dst", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC); + genamode (Apdi, "7", sz_long, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC); genastore ("srca", Apdi, "7", sz_long, "dst"); break; + case i_DBcc: - isjump; +#ifdef DISABLE_I_DBCC + failure; +#endif + isjump; uses_cmov; - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); /* That offs is an immediate, so we can clobber it with abandon */ switch(curi->size) { case sz_word: comprintf("\tsign_extend_16_rr(offs,offs);\n"); break; - default: abort(); /* Seems this only comes in word flavour */ + default: assert(0); /* Seems this only comes in word flavour */ } - comprintf("\tsub_l_ri(offs,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n"); - comprintf("\tadd_l_ri(offs,(uintptr)comp_pc_p);\n"); /* New PC, - once the + comprintf("\tsub_l_ri(offs,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n"); + comprintf("\tadd_l_ri(offs,(uintptr)comp_pc_p);\n"); /* New PC, + once the offset_68k is * also added */ /* Let's fold in the m68k_pc_offset at this point */ @@ -1735,21 +2080,20 @@ gen_opcode (unsigned long int opcode) comprintf("\tmake_flags_live();\n"); /* Load the flags */ } - if (curi->size!=sz_word) - abort(); - + assert (curi->size==sz_word); switch(curi->cc) { case 0: /* This is an elaborate nop? */ break; - case 1: + case 1: comprintf("\tstart_needflags();\n"); comprintf("\tsub_w_ri(src,1);\n"); - comprintf("\t end_needflags();\n"); + comprintf("\tend_needflags();\n"); start_brace(); - comprintf("\tuae_u32 v1=get_const(PC_P);\n"); - comprintf("\tuae_u32 v2=get_const(offs);\n" - "\tregister_branch(v1,v2,3);\n"); + comprintf("\tuae_u32 v2,v;\n" + "\tuae_u32 v1=get_const(PC_P);\n"); + comprintf("\tv2=get_const(offs);\n" + "\tregister_branch(v1,v2,%d);\n", NATIVE_CC_CC); break; case 8: failure; break; /* Work out details! FIXME */ @@ -1771,38 +2115,40 @@ gen_opcode (unsigned long int opcode) comprintf("\tlea_l_brr(scratchie,src,(uae_s32)-1);\n" "\tmov_w_rr(src,scratchie);\n"); comprintf("\tcmov_l_rr(offs,PC_P,%d);\n", - cond_codes_x86[curi->cc]); + cond_codes[curi->cc]); comprintf("\tcmov_l_rr(src,nsrc,%d);\n", - cond_codes_x86[curi->cc]); - /* OK, now for cc=true, we have src==nsrc and offs==PC_P, + cond_codes[curi->cc]); + /* OK, now for cc=true, we have src==nsrc and offs==PC_P, so whether we move them around doesn't matter. However, if cc=false, we have offs==jump_pc, and src==nsrc-1 */ - comprintf("\t start_needflags();\n"); - comprintf("\ttest_w_rr(nsrc,nsrc);\n"); - comprintf("\t end_needflags();\n"); - comprintf("\tcmov_l_rr(PC_P,offs,5);\n"); + comprintf("\tstart_needflags();\n"); + comprintf("\ttest_w_rr(nsrc,nsrc);\n"); + comprintf("\tend_needflags();\n"); + comprintf("\tcmov_l_rr(PC_P,offs,%d);\n", NATIVE_CC_NE); break; - default: abort(); + default: assert(0); } genastore ("src", curi->smode, "srcreg", curi->size, "src"); gen_update_next_handler(); break; case i_Scc: -/* failure; NEW: from "Ipswitch Town" release */ - genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); +#ifdef DISABLE_I_SCC + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC); start_brace (); comprintf ("\tint val = scratchie++;\n"); /* We set val to 0 if we really should use 255, and to 1 for real 0 */ switch(curi->cc) { case 0: /* Unconditional set */ - comprintf("\tmov_l_ri(val,0);\n"); + comprintf("\tmov_l_ri(val,0);\n"); break; - case 1: + case 1: /* Unconditional not-set */ - comprintf("\tmov_l_ri(val,1);\n"); + comprintf("\tmov_l_ri(val,1);\n"); break; case 8: failure; break; /* Work out details! FIXME */ case 9: failure; break; /* Not critical, though! */ @@ -1822,26 +2168,31 @@ gen_opcode (unsigned long int opcode) comprintf("\tmake_flags_live();\n"); /* Load the flags */ /* All condition codes can be inverted by changing the LSB */ comprintf("\tsetcc(val,%d);\n", - cond_codes_x86[curi->cc]^1); break; - default: abort(); + cond_codes[curi->cc]^1); break; + default: assert(0); } comprintf("\tsub_b_ri(val,1);\n"); genastore ("val", curi->smode, "srcreg", curi->size, "src"); break; - case i_DIVU: - isjump; + + case i_DIVU: + isjump; failure; break; + case i_DIVS: - isjump; + isjump; failure; break; - case i_MULU: -/* failure; NEW: from "Ipswitch Town" release */ + + case i_MULU: +#ifdef DISABLE_I_MULU + failure; +#endif comprintf("\tdont_care_flags();\n"); - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0); - /* To do 16x16 unsigned multiplication, we actually use + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", sz_word, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + /* To do 16x16 unsigned multiplication, we actually use 32x32 signed, and zero-extend the registers first. That solves the problem of MUL needing dedicated registers on the x86 */ @@ -1851,691 +2202,428 @@ gen_opcode (unsigned long int opcode) genflags (flag_logical, sz_long, "dst", "", ""); genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); break; + case i_MULS: -/* failure; NEW: from "Ipswitch Town" release */ +#ifdef DISABLE_I_MULS + failure; +#endif comprintf("\tdont_care_flags();\n"); - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", sz_word, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); comprintf("\tsign_extend_16_rr(scratchie,src);\n" "\tsign_extend_16_rr(dst,dst);\n" "\timul_32_32(dst,scratchie);\n"); genflags (flag_logical, sz_long, "dst", "", ""); genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); break; - case i_CHK: - isjump; + + case i_CHK: + isjump; failure; break; case i_CHK2: - isjump; + isjump; failure; break; case i_ASR: - mayfail; +#ifdef DISABLE_I_ASR + failure; +#endif + mayfail; if (curi->smode==Dreg) { - comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" - " FAIL(1);\n" - " return;\n" - "} \n"); - start_brace(); + comprintf( + " if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + " }\n"); + start_brace(); } comprintf("\tdont_care_flags();\n"); - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); - if (curi->smode!=immi) { -/* failure; UNTESTED: NEW: from "Ipswitch Town" release */ - if (!noflags) { - uses_cmov; - start_brace(); - comprintf("\tint highmask;\n" - "\tint cdata=scratchie++;\n" - "\tint sdata=scratchie++;\n" - "\tint tmpcnt=scratchie++;\n"); - comprintf("\tmov_l_rr(sdata,data);\n" - "\tmov_l_rr(cdata,data);\n" - "\tmov_l_rr(tmpcnt,cnt);\n"); - switch (curi->size) { - case sz_byte: comprintf("\tshra_b_ri(sdata,7);\n"); break; - case sz_word: comprintf("\tshra_w_ri(sdata,15);\n"); break; - case sz_long: comprintf("\tshra_l_ri(sdata,31);\n"); break; - default: abort(); - } - /* sdata is now the MSB propagated to all bits for the - register of specified size */ - comprintf("\tand_l_ri(tmpcnt,63);\n"); - switch(curi->size) { - case sz_byte: comprintf("\tshra_b_rr(data,tmpcnt);\n" - "\thighmask=0x38;\n"); - break; - case sz_word: comprintf("\tshra_w_rr(data,tmpcnt);\n" - "\thighmask=0x30;\n"); - break; - case sz_long: comprintf("\tshra_l_rr(data,tmpcnt);\n" - "\thighmask=0x20;\n"); - break; - } - comprintf("\ttest_l_ri(tmpcnt,highmask);\n"); - switch (curi->size) { - case sz_byte: comprintf("\tcmov_b_rr(data,sdata,NATIVE_CC_NE);\n"); break; - case sz_word: comprintf("\tcmov_w_rr(data,sdata,NATIVE_CC_NE);\n"); break; - case sz_long: comprintf("\tcmov_l_rr(data,sdata,NATIVE_CC_NE);\n"); break; - } - - /* Result of shift is now in data. Now we need to determine - the carry by shifting cdata one less */ - /* NOTE: carry bit is cleared if shift count is zero */ - comprintf("\tmov_l_ri(scratchie,0);\n" - "\ttest_l_rr(tmpcnt,tmpcnt);\n" - "\tcmov_l_rr(sdata,scratchie,NATIVE_CC_EQ);\n" - "\tforget_about(scratchie);\n"); - comprintf("\tsub_l_ri(tmpcnt,1);\n"); - switch(curi->size) { - case sz_byte: comprintf("\tshra_b_rr(cdata,tmpcnt);\n");break; - case sz_word: comprintf("\tshra_w_rr(cdata,tmpcnt);\n");break; - case sz_long: comprintf("\tshra_l_rr(cdata,tmpcnt);\n");break; - default: abort(); - } - /* If the shift count was higher than the width, we need - to pick up the sign from original data (sdata) */ - /* NOTE: for shift count of zero, the following holds - true and cdata contains 0 so that carry bit is cleared */ - comprintf("\ttest_l_ri(tmpcnt,highmask);\n" - "\tforget_about(tmpcnt);\n" - "\tcmov_l_rr(cdata,sdata,NATIVE_CC_NE);\n"); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); - /* And create the flags (preserve X flag if shift count is zero) */ - comprintf("\ttest_l_ri(cnt,63);\n" - "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n"); + start_brace(); + if (!noflags) comprintf("\tstart_needflags();\n"); - comprintf("\tif (needed_flags & FLAG_ZNV)\n"); - switch(curi->size) { - case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; - case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; - case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; - } - comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */ - comprintf("\t live_flags();\n"); - comprintf("\t end_needflags();\n"); - comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); - } - else { + if (curi->smode!=immi) { uses_cmov; start_brace(); - comprintf("\tint highmask;\n" - "\tint width;\n" - "\tint highshift=scratchie++;\n"); + comprintf("\tint zero = scratchie++;\n"); + comprintf("\tint tmpcnt = scratchie++;\n"); + comprintf("\tint minus1 = scratchie++;\n"); + comprintf("\tint cdata = minus1;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n"); + comprintf("\tand_l_ri(tmpcnt,63);\n"); + comprintf("\tmov_l_ri(zero, 0);\n"); + comprintf("\tmov_l_ri(minus1, -1);\n"); switch(curi->size) { - case sz_byte: comprintf("\tshra_b_rr(data,cnt);\n" - "\thighmask=0x38;\n" - "\twidth=8;\n"); - break; - case sz_word: comprintf("\tshra_w_rr(data,cnt);\n" - "\thighmask=0x30;\n" - "\twidth=16;\n"); - break; - case sz_long: comprintf("\tshra_l_rr(data,cnt);\n" - "\thighmask=0x20;\n" - "\twidth=32;\n"); - break; - default: abort(); + case sz_byte: + comprintf("\ttest_b_rr(data,data);\n"); + comprintf("\tcmov_l_rr(zero, minus1, NATIVE_CC_MI);\n"); + comprintf("\ttest_l_ri(tmpcnt, 0x38);\n"); + comprintf("\tmov_l_rr(cdata,data);\n"); + comprintf("\tcmov_l_rr(cdata, zero, NATIVE_CC_NE);\n"); + comprintf("\tshra_b_rr(cdata,tmpcnt);\n"); + comprintf("\tmov_b_rr(data,cdata);\n"); + break; + case sz_word: + comprintf("\ttest_w_rr(data,data);\n"); + comprintf("\tcmov_l_rr(zero, minus1, NATIVE_CC_MI);\n"); + comprintf("\ttest_l_ri(tmpcnt, 0x30);\n"); + comprintf("\tmov_l_rr(cdata,data);\n"); + comprintf("\tcmov_l_rr(cdata, zero, NATIVE_CC_NE);\n"); + comprintf("\tshra_w_rr(cdata,tmpcnt);\n"); + comprintf("\tmov_w_rr(data,cdata);\n"); + break; + case sz_long: + comprintf("\ttest_l_rr(data,data);\n"); + comprintf("\tcmov_l_rr(zero, minus1, NATIVE_CC_MI);\n"); + comprintf("\ttest_l_ri(tmpcnt, 0x20);\n"); + comprintf("\tmov_l_rr(cdata,data);\n"); + comprintf("\tcmov_l_rr(cdata, zero, NATIVE_CC_NE);\n"); + comprintf("\tshra_l_rr(cdata,tmpcnt);\n"); + comprintf("\tmov_l_rr(data,cdata);\n"); + break; + default: assert(0); } - comprintf("test_l_ri(cnt,highmask);\n" - "mov_l_ri(highshift,0);\n" - "mov_l_ri(scratchie,width/2);\n" - "cmov_l_rr(highshift,scratchie,5);\n"); - /* The x86 masks out bits, so we now make sure that things - really get shifted as much as planned */ - switch(curi->size) { - case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break; - case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break; - case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break; - default: abort(); - } - /* And again */ - switch(curi->size) { - case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break; - case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break; - case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break; - default: abort(); - } - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); - } + /* Result of shift is now in data. */ } else { - start_brace(); - comprintf("\tint tmp=scratchie++;\n" - "\tint bp;\n" - "\tmov_l_rr(tmp,data);\n"); switch(curi->size) { - case sz_byte: comprintf("\tshra_b_ri(data,srcreg);\n" - "\tbp=srcreg-1;\n"); break; - case sz_word: comprintf("\tshra_w_ri(data,srcreg);\n" - "\tbp=srcreg-1;\n"); break; - case sz_long: comprintf("\tshra_l_ri(data,srcreg);\n" - "\tbp=srcreg-1;\n"); break; - default: abort(); + case sz_byte: comprintf("\tshra_b_ri(data,srcreg);\n"); break; + case sz_word: comprintf("\tshra_w_ri(data,srcreg);\n"); break; + case sz_long: comprintf("\tshra_l_ri(data,srcreg);\n"); break; + default: assert(0); } - - if (!noflags) { - comprintf("\tstart_needflags();\n"); - comprintf("\tif (needed_flags & FLAG_ZNV)\n"); - switch(curi->size) { - case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; - case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; - case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; - } - comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ - comprintf("\t live_flags();\n"); - comprintf("\t end_needflags();\n"); - comprintf("\t duplicate_carry();\n"); - comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); - } - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); } + /* And create the flags */ + if (!noflags) { + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + if (curi->smode!=immi) + comprintf("\tsetcc_for_cntzero(tmpcnt, data, %d);\n", curi->size == sz_byte ? 1 : curi->size == sz_word ? 2 : 4); + else + comprintf("\tduplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); break; case i_ASL: -/* failure; NEW: from "Ipswitch Town" release */ - mayfail; +#ifdef DISABLE_I_ASL + failure; +#endif + mayfail; if (curi->smode==Dreg) { - comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" - " FAIL(1);\n" - " return;\n" - "} \n"); - start_brace(); + comprintf( + " if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + " }\n"); + start_brace(); } comprintf("\tdont_care_flags();\n"); /* Except for the handling of the V flag, this is identical to LSL. The handling of V is, uhm, unpleasant, so if it's needed, let the normal emulation handle it. Shoulders of giants kinda thing ;-) */ - comprintf("if (needed_flags & FLAG_V) {\n" - " FAIL(1);\n" - " return;\n" - "} \n"); - - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); - if (curi->smode!=immi) { - if (!noflags) { - uses_cmov; - start_brace(); - comprintf("\tint highmask;\n" - "\tint cdata=scratchie++;\n" - "\tint tmpcnt=scratchie++;\n"); - comprintf("\tmov_l_rr(tmpcnt,cnt);\n" - "\tand_l_ri(tmpcnt,63);\n" - "\tmov_l_ri(cdata,0);\n" - "\tcmov_l_rr(cdata,data,5);\n"); - /* cdata is now either data (for shift count!=0) or - 0 (for shift count==0) */ - switch(curi->size) { - case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n" - "\thighmask=0x38;\n"); - break; - case sz_word: comprintf("\tshll_w_rr(data,cnt);\n" - "\thighmask=0x30;\n"); - break; - case sz_long: comprintf("\tshll_l_rr(data,cnt);\n" - "\thighmask=0x20;\n"); - break; - default: abort(); - } - comprintf("test_l_ri(cnt,highmask);\n" - "mov_l_ri(scratchie,0);\n" - "cmov_l_rr(scratchie,data,4);\n"); - switch(curi->size) { - case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; - case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; - case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; - default: abort(); - } - /* Result of shift is now in data. Now we need to determine - the carry by shifting cdata one less */ - comprintf("\tsub_l_ri(tmpcnt,1);\n"); - switch(curi->size) { - case sz_byte: comprintf("\tshll_b_rr(cdata,tmpcnt);\n");break; - case sz_word: comprintf("\tshll_w_rr(cdata,tmpcnt);\n");break; - case sz_long: comprintf("\tshll_l_rr(cdata,tmpcnt);\n");break; - default: abort(); - } - comprintf("test_l_ri(tmpcnt,highmask);\n" - "mov_l_ri(scratchie,0);\n" - "cmov_l_rr(cdata,scratchie,5);\n"); - /* And create the flags */ - comprintf("\tstart_needflags();\n"); + comprintf( + " if (needed_flags & FLAG_V) {\n" + " FAIL(1);\n" + " " RETURN "\n" + " }\n"); - comprintf("\tif (needed_flags & FLAG_ZNV)\n"); - switch(curi->size) { - case sz_byte: comprintf("\t test_b_rr(data,data);\n"); - comprintf("\t bt_l_ri(cdata,7);\n"); break; - case sz_word: comprintf("\t test_w_rr(data,data);\n"); - comprintf("\t bt_l_ri(cdata,15);\n"); break; - case sz_long: comprintf("\t test_l_rr(data,data);\n"); - comprintf("\t bt_l_ri(cdata,31);\n"); break; - } - comprintf("\t live_flags();\n"); - comprintf("\t end_needflags();\n"); - comprintf("\t duplicate_carry();\n"); - comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); - } - else { + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + + start_brace(); + if (!noflags) + comprintf("\tstart_needflags();\n"); + if (curi->smode!=immi) { uses_cmov; start_brace(); - comprintf("\tint highmask;\n"); + comprintf("\tint cdata = scratchie++;\n"); + comprintf("\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n"); + comprintf("\tand_l_ri(tmpcnt,63);\n"); + comprintf("\tmov_l_ri(cdata, 0);\n"); switch(curi->size) { - case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n" - "\thighmask=0x38;\n"); - break; - case sz_word: comprintf("\tshll_w_rr(data,cnt);\n" - "\thighmask=0x30;\n"); - break; - case sz_long: comprintf("\tshll_l_rr(data,cnt);\n" - "\thighmask=0x20;\n"); - break; - default: abort(); + case sz_byte: + comprintf("\ttest_l_ri(tmpcnt, 0x38);\n"); + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); + comprintf("\tshll_b_rr(cdata,tmpcnt);\n"); + comprintf("\tmov_b_rr(data, cdata);\n"); + break; + case sz_word: + comprintf("\ttest_l_ri(tmpcnt, 0x30);\n"); + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); + comprintf("\tshll_w_rr(cdata,tmpcnt);\n"); + comprintf("\tmov_w_rr(data, cdata);\n"); + break; + case sz_long: + comprintf("\ttest_l_ri(tmpcnt, 0x20);\n"); + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); + comprintf("\tshll_l_rr(cdata,tmpcnt);\n"); + comprintf("\tmov_l_rr(data, cdata);\n"); + break; + default: assert(0); } - comprintf("test_l_ri(cnt,highmask);\n" - "mov_l_ri(scratchie,0);\n" - "cmov_l_rr(scratchie,data,4);\n"); - switch(curi->size) { - case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; - case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; - case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; - default: abort(); - } - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); - } + /* Result of shift is now in data. */ } else { - start_brace(); - comprintf("\tint tmp=scratchie++;\n" - "\tint bp;\n" - "\tmov_l_rr(tmp,data);\n"); switch(curi->size) { - case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n" - "\tbp=8-srcreg;\n"); break; - case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n" - "\tbp=16-srcreg;\n"); break; - case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n" - "\tbp=32-srcreg;\n"); break; - default: abort(); + case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n"); break; + case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n"); break; + case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n"); break; + default: assert(0); } - - if (!noflags) { - comprintf("\tstart_needflags();\n"); - comprintf("\tif (needed_flags & FLAG_ZNV)\n"); - switch(curi->size) { - case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; - case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; - case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; - } - comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ - comprintf("\t live_flags();\n"); - comprintf("\t end_needflags();\n"); - comprintf("\t duplicate_carry();\n"); - comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); - } - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); } + /* And create the flags */ + if (!noflags) { + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + if (curi->smode!=immi) + comprintf("\tsetcc_for_cntzero(tmpcnt, data, %d);\n", curi->size == sz_byte ? 1 : curi->size == sz_word ? 2 : 4); + else + comprintf("\tduplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); break; - - case i_LSR: -/* failure; NEW: from "Ipswitch Town" release */ - mayfail; + + case i_LSR: +#ifdef DISABLE_I_LSR + failure; +#endif + mayfail; if (curi->smode==Dreg) { - comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" - " FAIL(1);\n" - " return;\n" - "} \n"); - start_brace(); + comprintf( + " if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + " }\n"); + start_brace(); } comprintf("\tdont_care_flags();\n"); - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); - if (curi->smode!=immi) { - if (!noflags) { - uses_cmov; - start_brace(); - comprintf("\tint highmask;\n" - "\tint cdata=scratchie++;\n" - "\tint tmpcnt=scratchie++;\n"); - comprintf("\tmov_l_rr(tmpcnt,cnt);\n" - "\tand_l_ri(tmpcnt,63);\n" - "\tmov_l_ri(cdata,0);\n" - "\tcmov_l_rr(cdata,data,NATIVE_CC_NE);\n"); - /* cdata is now either data (for shift count!=0) or - 0 (for shift count==0) */ - switch(curi->size) { - case sz_byte: comprintf("\tshrl_b_rr(data,tmpcnt);\n" - "\thighmask=0x38;\n"); - break; - case sz_word: comprintf("\tshrl_w_rr(data,tmpcnt);\n" - "\thighmask=0x30;\n"); - break; - case sz_long: comprintf("\tshrl_l_rr(data,tmpcnt);\n" - "\thighmask=0x20;\n"); - break; - default: abort(); - } - comprintf("\ttest_l_ri(tmpcnt,highmask);\n" - "\rmov_l_ri(scratchie,0);\n"); - if (curi->size == sz_long) - comprintf("\tcmov_l_rr(data,scratchie,NATIVE_CC_NE);\n"); - else { - comprintf("\tcmov_l_rr(scratchie,data,NATIVE_CC_EQ);\n"); - switch(curi->size) { - case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; - case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; - default: abort(); - } - } - /* Result of shift is now in data. Now we need to determine - the carry by shifting cdata one less */ - comprintf("\tsub_l_ri(tmpcnt,1);\n"); - comprintf("\tshrl_l_rr(cdata,tmpcnt);\n"); - comprintf("\ttest_l_ri(tmpcnt,highmask);\n"); - comprintf("\tforget_about(tmpcnt);\n"); - if (curi->size != sz_long) /* scratchie is still live for LSR.L */ - comprintf("\tmov_l_ri(scratchie,0);\n"); - comprintf("\tcmov_l_rr(cdata,scratchie,NATIVE_CC_NE);\n"); - comprintf("\tforget_about(scratchie);\n"); - /* And create the flags (preserve X flag if shift count is zero) */ - comprintf("\ttest_l_ri(cnt,63);\n" - "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n"); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + + start_brace(); + if (!noflags) comprintf("\tstart_needflags();\n"); - comprintf("\tif (needed_flags & FLAG_ZNV)\n"); - switch(curi->size) { - case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; - case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; - case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; - } - comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */ - comprintf("\t live_flags();\n"); - comprintf("\t end_needflags();\n"); - comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); - } - else { + if (curi->smode!=immi) { uses_cmov; start_brace(); - comprintf("\tint highmask;\n"); + comprintf("\tint cdata = scratchie++;\n"); + comprintf("\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n"); + comprintf("\tand_l_ri(tmpcnt,63);\n"); + comprintf("\tmov_l_ri(cdata, 0);\n"); switch(curi->size) { - case sz_byte: comprintf("\tshrl_b_rr(data,cnt);\n" - "\thighmask=0x38;\n"); - break; - case sz_word: comprintf("\tshrl_w_rr(data,cnt);\n" - "\thighmask=0x30;\n"); - break; - case sz_long: comprintf("\tshrl_l_rr(data,cnt);\n" - "\thighmask=0x20;\n"); - break; - default: abort(); + case sz_byte: + comprintf("\ttest_l_ri(tmpcnt, 0x38);\n"); + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); + comprintf("\tshrl_b_rr(cdata,tmpcnt);\n"); + comprintf("\tmov_b_rr(data, cdata);\n"); + break; + case sz_word: + comprintf("\ttest_l_ri(tmpcnt, 0x30);\n"); + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); + comprintf("\tshrl_w_rr(cdata,tmpcnt);\n"); + comprintf("\tmov_w_rr(data, cdata);\n"); + break; + case sz_long: + comprintf("\ttest_l_ri(tmpcnt, 0x20);\n"); + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); + comprintf("\tshrl_l_rr(cdata, tmpcnt);\n"); + comprintf("\tmov_l_rr(data, cdata);\n"); + break; + default: assert(0); } - comprintf("test_l_ri(cnt,highmask);\n" - "mov_l_ri(scratchie,0);\n" - "cmov_l_rr(scratchie,data,4);\n"); - switch(curi->size) { - case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; - case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; - case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; - default: abort(); - } - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); - } + /* Result of shift is now in data. */ } else { - start_brace(); - comprintf("\tint tmp=scratchie++;\n" - "\tint bp;\n" - "\tmov_l_rr(tmp,data);\n"); switch(curi->size) { - case sz_byte: comprintf("\tshrl_b_ri(data,srcreg);\n" - "\tbp=srcreg-1;\n"); break; - case sz_word: comprintf("\tshrl_w_ri(data,srcreg);\n" - "\tbp=srcreg-1;\n"); break; - case sz_long: comprintf("\tshrl_l_ri(data,srcreg);\n" - "\tbp=srcreg-1;\n"); break; - default: abort(); + case sz_byte: comprintf("\tshrl_b_ri(data,srcreg);\n"); break; + case sz_word: comprintf("\tshrl_w_ri(data,srcreg);\n"); break; + case sz_long: comprintf("\tshrl_l_ri(data,srcreg);\n"); break; + default: assert(0); } - - if (!noflags) { - comprintf("\tstart_needflags();\n"); - comprintf("\tif (needed_flags & FLAG_ZNV)\n"); - switch(curi->size) { - case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; - case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; - case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; - } - comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ - comprintf("\t live_flags();\n"); - comprintf("\t end_needflags();\n"); - comprintf("\t duplicate_carry();\n"); - comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); - } - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); } + /* And create the flags */ + if (!noflags) { + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + if (curi->smode!=immi) + comprintf("\tsetcc_for_cntzero(tmpcnt, data, %d);\n", curi->size == sz_byte ? 1 : curi->size == sz_word ? 2 : 4); + else + comprintf("\tduplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); break; - case i_LSL: - mayfail; + case i_LSL: +#ifdef DISABLE_I_LSL + failure; +#endif + mayfail; if (curi->smode==Dreg) { - comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" - " FAIL(1);\n" - " return;\n" - "} \n"); - start_brace(); + comprintf( + " if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + " }\n"); + start_brace(); } comprintf("\tdont_care_flags();\n"); - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); - if (curi->smode!=immi) { -/* failure; UNTESTED: NEW: from "Ipswitch Town" release */ - if (!noflags) { - uses_cmov; - start_brace(); - comprintf("\tint highmask;\n" - "\tint cdata=scratchie++;\n" - "\tint tmpcnt=scratchie++;\n"); - comprintf("\tmov_l_rr(tmpcnt,cnt);\n" - "\tand_l_ri(tmpcnt,63);\n" - "\tmov_l_ri(cdata,0);\n" - "\tcmov_l_rr(cdata,data,NATIVE_CC_NE);\n"); - /* cdata is now either data (for shift count!=0) or - 0 (for shift count==0) */ - switch(curi->size) { - case sz_byte: comprintf("\tshll_b_rr(data,tmpcnt);\n" - "\thighmask=0x38;\n"); - break; - case sz_word: comprintf("\tshll_w_rr(data,tmpcnt);\n" - "\thighmask=0x30;\n"); - break; - case sz_long: comprintf("\tshll_l_rr(data,tmpcnt);\n" - "\thighmask=0x20;\n"); - break; - default: abort(); - } - comprintf("\ttest_l_ri(tmpcnt,highmask);\n" - "\tmov_l_ri(scratchie,0);\n"); - if (curi->size == sz_long) - comprintf("\tcmov_l_rr(data,scratchie,NATIVE_CC_NE);\n"); - else { - comprintf("\tcmov_l_rr(scratchie,data,NATIVE_CC_EQ);\n"); - switch(curi->size) { - case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; - case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; - default: abort(); - } - } - /* Result of shift is now in data. Now we need to determine - the carry by shifting cdata one less */ - comprintf("\tsub_l_ri(tmpcnt,1);\n"); - comprintf("\tshll_l_rr(cdata,tmpcnt);\n"); - comprintf("\ttest_l_ri(tmpcnt,highmask);\n"); - comprintf("\tforget_about(tmpcnt);\n"); - if (curi->size != sz_long) /* scratchie is still live for LSL.L */ - comprintf("\tmov_l_ri(scratchie,0);\n"); - comprintf("\tcmov_l_rr(cdata,scratchie,NATIVE_CC_NE);\n"); - comprintf("\tforget_about(scratchie);\n"); - /* And create the flags (preserve X flag if shift count is zero) */ - switch (curi->size) { - case sz_byte: comprintf("\tshrl_l_ri(cdata,7);\n"); break; - case sz_word: comprintf("\tshrl_l_ri(cdata,15);\n"); break; - case sz_long: comprintf("\tshrl_l_ri(cdata,31);\n"); break; - } - comprintf("\ttest_l_ri(cnt,63);\n" - "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n"); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + + start_brace(); + if (!noflags) comprintf("\tstart_needflags();\n"); - comprintf("\tif (needed_flags & FLAG_ZNV)\n"); - switch(curi->size) { - case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; - case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; - case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; - } - comprintf("\t bt_l_ri(cdata,0);\n"); - comprintf("\t live_flags();\n"); - comprintf("\t end_needflags();\n"); - comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); - } - else { + if (curi->smode!=immi) { uses_cmov; start_brace(); - comprintf("\tint highmask;\n"); + comprintf("\tint cdata = scratchie++;\n"); + comprintf("\tint tmpcnt = scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n"); + comprintf("\tand_l_ri(tmpcnt,63);\n"); + comprintf("\tmov_l_ri(cdata, 0);\n"); switch(curi->size) { - case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n" - "\thighmask=0x38;\n"); - break; - case sz_word: comprintf("\tshll_w_rr(data,cnt);\n" - "\thighmask=0x30;\n"); - break; - case sz_long: comprintf("\tshll_l_rr(data,cnt);\n" - "\thighmask=0x20;\n"); - break; - default: abort(); + case sz_byte: + comprintf("\ttest_l_ri(tmpcnt, 0x38);\n"); + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); + comprintf("\tshll_b_rr(cdata,tmpcnt);\n"); + comprintf("\tmov_b_rr(data, cdata);\n"); + break; + case sz_word: + comprintf("\ttest_l_ri(tmpcnt, 0x30);\n"); + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); + comprintf("\tshll_w_rr(cdata,tmpcnt);\n"); + comprintf("\tmov_w_rr(data, cdata);\n"); + break; + case sz_long: + comprintf("\ttest_l_ri(tmpcnt, 0x20);\n"); + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); + comprintf("\tshll_l_rr(cdata,tmpcnt);\n"); + comprintf("\tmov_l_rr(data, cdata);\n"); + break; + default: assert(0); } - comprintf("test_l_ri(cnt,highmask);\n" - "mov_l_ri(scratchie,0);\n" - "cmov_l_rr(scratchie,data,4);\n"); - switch(curi->size) { - case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; - case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; - case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; - default: abort(); - } - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); - } + /* Result of shift is now in data. */ } else { - start_brace(); - comprintf("\tint tmp=scratchie++;\n" - "\tint bp;\n" - "\tmov_l_rr(tmp,data);\n"); switch(curi->size) { - case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n" - "\tbp=8-srcreg;\n"); break; - case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n" - "\tbp=16-srcreg;\n"); break; - case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n" - "\tbp=32-srcreg;\n"); break; - default: abort(); + case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n"); break; + case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n"); break; + case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n"); break; + default: assert(0); } - - if (!noflags) { - comprintf("\tstart_needflags();\n"); - comprintf("\tif (needed_flags & FLAG_ZNV)\n"); - switch(curi->size) { - case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; - case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; - case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; - } - comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ - comprintf("\t live_flags();\n"); - comprintf("\t end_needflags();\n"); - comprintf("\t duplicate_carry();\n"); - comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); - } - genastore ("data", curi->dmode, "dstreg", curi->size, "data"); } + /* And create the flags */ + if (!noflags) { + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + if (curi->smode!=immi) + comprintf("\tsetcc_for_cntzero(tmpcnt, data, %d);\n", curi->size == sz_byte ? 1 : curi->size == sz_word ? 2 : 4); + else + comprintf("\tduplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); break; - case i_ROL: - mayfail; + case i_ROL: +#ifdef DISABLE_I_ROL + failure; +#endif + mayfail; if (curi->smode==Dreg) { - comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" - " FAIL(1);\n" - " return;\n" - "} \n"); - start_brace(); + comprintf( + " if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + " }\n"); + start_brace(); } comprintf("\tdont_care_flags();\n"); - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace (); switch(curi->size) { - case sz_long: comprintf("\t rol_l_rr(data,cnt);\n"); break; - case sz_word: comprintf("\t rol_w_rr(data,cnt);\n"); break; - case sz_byte: comprintf("\t rol_b_rr(data,cnt);\n"); break; + case sz_long: comprintf("\trol_l_rr(data,cnt);\n"); break; + case sz_word: comprintf("\trol_w_rr(data,cnt);\n"); break; + case sz_byte: comprintf("\trol_b_rr(data,cnt);\n"); break; } - + if (!noflags) { comprintf("\tstart_needflags();\n"); + /* + * x86 ROL instruction does not set ZF/SF, so we need extra checks here + */ comprintf("\tif (needed_flags & FLAG_ZNV)\n"); switch(curi->size) { - case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; - case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; - case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; } - comprintf("\t bt_l_ri(data,0x00);\n"); /* Set C */ - comprintf("\t live_flags();\n"); - comprintf("\t end_needflags();\n"); + comprintf("\tbt_l_ri(data,0x00);\n"); /* Set C */ + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); } genastore ("data", curi->dmode, "dstreg", curi->size, "data"); break; case i_ROR: - mayfail; +#ifdef DISABLE_I_ROR + failure; +#endif + mayfail; if (curi->smode==Dreg) { - comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" - " FAIL(1);\n" - " return;\n" - "} \n"); - start_brace(); + comprintf( + " if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + " }\n"); + start_brace(); } comprintf("\tdont_care_flags();\n"); - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); start_brace (); switch(curi->size) { - case sz_long: comprintf("\t ror_l_rr(data,cnt);\n"); break; - case sz_word: comprintf("\t ror_w_rr(data,cnt);\n"); break; - case sz_byte: comprintf("\t ror_b_rr(data,cnt);\n"); break; + case sz_long: comprintf("\tror_l_rr(data,cnt);\n"); break; + case sz_word: comprintf("\tror_w_rr(data,cnt);\n"); break; + case sz_byte: comprintf("\tror_b_rr(data,cnt);\n"); break; } - + if (!noflags) { comprintf("\tstart_needflags();\n"); + /* + * x86 ROR instruction does not set ZF/SF, so we need extra checks here + */ comprintf("\tif (needed_flags & FLAG_ZNV)\n"); switch(curi->size) { - case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; - case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; - case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; } switch(curi->size) { - case sz_byte: comprintf("\t bt_l_ri(data,0x07);\n"); break; - case sz_word: comprintf("\t bt_l_ri(data,0x0f);\n"); break; - case sz_long: comprintf("\t bt_l_ri(data,0x1f);\n"); break; + case sz_byte: comprintf("\tbt_l_ri(data,0x07);\n"); break; + case sz_word: comprintf("\tbt_l_ri(data,0x0f);\n"); break; + case sz_long: comprintf("\tbt_l_ri(data,0x1f);\n"); break; } - comprintf("\t live_flags();\n"); - comprintf("\t end_needflags();\n"); + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); } genastore ("data", curi->dmode, "dstreg", curi->size, "data"); break; @@ -2543,73 +2631,95 @@ gen_opcode (unsigned long int opcode) case i_ROXL: failure; break; + case i_ROXR: failure; break; + case i_ASRW: failure; break; + case i_ASLW: failure; break; + case i_LSRW: failure; break; + case i_LSLW: failure; break; + case i_ROLW: failure; break; + case i_RORW: failure; break; + case i_ROXLW: failure; break; + case i_ROXRW: failure; break; + case i_MOVEC2: - isjump; + isjump; failure; break; + case i_MOVE2C: - isjump; + isjump; failure; break; + case i_CAS: failure; break; + case i_CAS2: failure; break; + case i_MOVES: /* ignore DFC and SFC because we have no MMU */ - isjump; + isjump; failure; break; + case i_BKPT: /* only needed for hardware emulators */ - isjump; + isjump; failure; break; + case i_CALLM: /* not present in 68030 */ - isjump; + isjump; failure; break; + case i_RTM: /* not present in 68030 */ - isjump; + isjump; failure; break; + case i_TRAPcc: - isjump; + isjump; failure; break; + case i_DIVL: - isjump; + isjump; failure; break; + case i_MULL: -/* failure; NEW: from "Ipswitch Town" release */ +#ifdef DISABLE_I_MULL + failure; +#endif if (!noflags) { failure; break; @@ -2617,8 +2727,8 @@ gen_opcode (unsigned long int opcode) comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); comprintf("\tint r2=(extra>>12)&7;\n" "\tint tmp=scratchie++;\n"); - - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC); /* The two operands are in dst and r2 */ comprintf("\tif (extra&0x0400) {\n" /* Need full 64 bit result */ "\tint r3=(extra&7);\n" @@ -2646,65 +2756,83 @@ gen_opcode (unsigned long int opcode) case i_BFINS: failure; break; + case i_PACK: failure; break; + case i_UNPK: failure; break; - case i_TAS: + + case i_TAS: failure; break; + case i_FPP: +#ifdef DISABLE_I_FPP + failure; +#endif uses_fpu; -#ifdef USE_JIT_FPU mayfail; + comprintf("#ifdef USE_JIT_FPU\n"); comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); swap_opcode(); comprintf("\tcomp_fpp_opp(opcode,extra);\n"); -#else - failure; -#endif + comprintf("#else\n"); + comprintf("\tfailure = 1;\n"); + comprintf("#endif\n"); break; + case i_FBcc: +#ifdef DISABLE_I_FBCC + failure; +#endif uses_fpu; -#ifdef USE_JIT_FPU isjump; uses_cmov; mayfail; + comprintf("#ifdef USE_JIT_FPU\n"); swap_opcode(); comprintf("\tcomp_fbcc_opp(opcode);\n"); -#else - isjump; - failure; -#endif + comprintf("#else\n"); + comprintf("\tfailure = 1;\n"); + comprintf("#endif\n"); break; + case i_FDBcc: uses_fpu; - isjump; + isjump; failure; break; + case i_FScc: +#ifdef DISABLE_I_FSCC + failure; +#endif uses_fpu; -#ifdef USE_JIT_FPU mayfail; uses_cmov; + comprintf("#ifdef USE_JIT_FPU\n"); comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); swap_opcode(); comprintf("\tcomp_fscc_opp(opcode,extra);\n"); -#else - failure; -#endif + comprintf("#else\n"); + comprintf("\tfailure = 1;\n"); + comprintf("#endif\n"); break; + case i_FTRAPcc: uses_fpu; - isjump; + isjump; failure; break; + case i_FSAVE: uses_fpu; failure; break; + case i_FRESTORE: uses_fpu; failure; @@ -2713,105 +2841,252 @@ gen_opcode (unsigned long int opcode) case i_CINVL: case i_CINVP: case i_CINVA: - isjump; /* Not really, but it's probably a good idea to stop + isjump; /* Not really, but it's probably a good idea to stop translating at this point */ failure; comprintf ("\tflush_icache();\n"); /* Differentiate a bit more? */ break; + case i_CPUSHL: case i_CPUSHP: case i_CPUSHA: - isjump; /* Not really, but it's probably a good idea to stop + isjump; /* Not really, but it's probably a good idea to stop translating at this point */ failure; break; + case i_MOVE16: - genmov16(opcode, curi); +#ifdef DISABLE_I_MOVE16 + failure; +#endif + genmov16(opcode,curi); break; - case i_EMULOP_RETURN: +#ifdef UAE + case i_MMUOP030: + case i_PFLUSHN: + case i_PFLUSH: + case i_PFLUSHAN: + case i_PFLUSHA: + case i_PLPAR: + case i_PLPAW: + case i_PTESTR: + case i_PTESTW: + case i_LPSTOP: + isjump; + failure; + break; +#endif + +#ifdef WINUAE_ARANYM + case i_EMULOP_RETURN: isjump; failure; break; - case i_EMULOP: + case i_EMULOP: failure; break; - + + // case i_NATFEAT_ID: + // case i_NATFEAT_CALL: + // failure; + // break; + case i_MMUOP: isjump; failure; break; - default: - abort (); +#endif + + default: + assert(0); break; } comprintf("%s",endstr); finish_braces (); sync_m68k_pc (); if (global_mayfail) - comprintf("\tif (failure) m68k_pc_offset=m68k_pc_offset_thisinst;\n"); + comprintf(" if (failure)\n m68k_pc_offset = m68k_pc_offset_thisinst;\n"); return global_failure; } -static void +static void generate_includes (FILE * f) { - fprintf (f, "#include \"sysdeps.h\"\n"); + // fprintf (f, "#include \"sysconfig.h\"\n"); + fprintf (f, "#if defined(JIT)\n"); + fprintf (f, "#include \"sysdeps.h\"\n"); +#ifdef UAE + fprintf (f, "#include \"options.h\"\n"); + fprintf (f, "#include \"uae/memory.h\"\n"); +#else fprintf (f, "#include \"m68k.h\"\n"); - fprintf (f, "#include \"memory.h\"\n"); - fprintf (f, "#include \"readcpu.h\"\n"); - fprintf (f, "#include \"newcpu.h\"\n"); - fprintf (f, "#include \"comptbl.h\"\n"); + fprintf (f, "#include \"memory.h\"\n"); +#endif + fprintf (f, "#include \"readcpu.h\"\n"); + fprintf (f, "#include \"newcpu.h\"\n"); + fprintf (f, "#include \"comptbl.h\"\n"); + fprintf (f, "#include \"debug.h\"\n"); } static int postfix; -static void + +static char *decodeEA (amodes mode, wordsizes size) +{ + static char buffer[80]; + + buffer[0] = 0; + switch (mode){ + case Dreg: + strcpy (buffer,"Dn"); + break; + case Areg: + strcpy (buffer,"An"); + break; + case Aind: + strcpy (buffer,"(An)"); + break; + case Aipi: + strcpy (buffer,"(An)+"); + break; + case Apdi: + strcpy (buffer,"-(An)"); + break; + case Ad16: + strcpy (buffer,"(d16,An)"); + break; + case Ad8r: + strcpy (buffer,"(d8,An,Xn)"); + break; + case PC16: + strcpy (buffer,"(d16,PC)"); + break; + case PC8r: + strcpy (buffer,"(d8,PC,Xn)"); + break; + case absw: + strcpy (buffer,"(xxx).W"); + break; + case absl: + strcpy (buffer,"(xxx).L"); + break; + case imm: + switch (size){ + case sz_byte: + strcpy (buffer,"#.B"); + break; + case sz_word: + strcpy (buffer,"#.W"); + break; + case sz_long: + strcpy (buffer,"#.L"); + break; + default: + break; + } + break; + case imm0: + strcpy (buffer,"#.B"); + break; + case imm1: + strcpy (buffer,"#.W"); + break; + case imm2: + strcpy (buffer,"#.L"); + break; + case immi: + strcpy (buffer,"#"); + break; + + default: + break; + } + return buffer; +} + +static char *outopcode (const char *name, int opcode) +{ + static char out[100]; + struct instr *ins; + + ins = &table68k[opcode]; + strcpy (out, name); + if (ins->smode == immi) + strcat (out, "Q"); + if (ins->size == sz_byte) + strcat (out,".B"); + if (ins->size == sz_word) + strcat (out,".W"); + if (ins->size == sz_long) + strcat (out,".L"); + strcat (out," "); + if (ins->suse) + strcat (out, decodeEA (ins->smode, ins->size)); + if (ins->duse) { + if (ins->suse) strcat (out,","); + strcat (out, decodeEA (ins->dmode, ins->size)); + } + return out; +} + + +static void generate_one_opcode (int rp, int noflags) { + int i; uae_u16 smsk, dmsk; - const long int opcode = opcode_map[rp]; - const char *opcode_str; + unsigned int opcode = opcode_map[rp]; int aborted=0; int have_srcreg=0; int have_dstreg=0; + const char *name; if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) return; + for (i = 0; lookuptab[i].name[0]; i++) + { + if (table68k[opcode].mnemo == lookuptab[i].mnemo) + break; + } + if (table68k[opcode].handler != -1) return; switch (table68k[opcode].stype) { - case 0: + case 0: smsk = 7; break; - case 1: + case 1: smsk = 255; break; - case 2: + case 2: smsk = 15; break; - case 3: + case 3: smsk = 7; break; - case 4: + case 4: smsk = 7; break; - case 5: + case 5: smsk = 63; break; - case 6: +#ifndef UAE + case 6: smsk = 255; break; - case 7: +#endif + case 7: smsk = 3; break; - default: - abort (); + default: + smsk = 0; + assert(0); } dmsk = 7; @@ -2834,8 +3109,9 @@ generate_one_opcode (int rp, int noflags) { char source[100]; int pos = table68k[opcode].spos; - - comprintf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); + +#ifndef UAE + comprintf ("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); if (pos < 8 && (smsk >> (8 - pos)) != 0) sprintf (source, "(((opcode >> %d) | (opcode << %d)) & %d)", @@ -2853,6 +3129,7 @@ generate_one_opcode (int rp, int noflags) comprintf ("\tuae_u32 srcreg = %s;\n", source); comprintf ("#else\n"); +#endif if (pos) sprintf (source, "((opcode >> %d) & %d)", pos, smsk); @@ -2865,8 +3142,10 @@ generate_one_opcode (int rp, int noflags) comprintf ("\tuae_s32 srcreg = (uae_s32)(uae_s8)%s;\n", source); else comprintf ("\tuae_s32 srcreg = %s;\n", source); - + +#ifndef UAE comprintf ("#endif\n"); +#endif } } if (table68k[opcode].duse @@ -2886,8 +3165,9 @@ generate_one_opcode (int rp, int noflags) else { int pos = table68k[opcode].dpos; - - comprintf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); + +#ifndef UAE + comprintf ("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); if (pos < 8 && (dmsk >> (8 - pos)) != 0) comprintf ("\tuae_u32 dstreg = ((opcode >> %d) | (opcode << %d)) & %d;\n", @@ -2896,33 +3176,36 @@ generate_one_opcode (int rp, int noflags) comprintf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", pos ^ 8, dmsk); else - comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); + comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); comprintf ("#else\n"); - +#endif + if (pos) comprintf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", pos, dmsk); else - comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); - + comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); + +#ifndef UAE comprintf ("#endif\n"); +#endif } } if (have_srcreg && have_dstreg && - (table68k[opcode].dmode==Areg || - table68k[opcode].dmode==Aind || - table68k[opcode].dmode==Aipi || - table68k[opcode].dmode==Apdi || - table68k[opcode].dmode==Ad16 || + (table68k[opcode].dmode==Areg || + table68k[opcode].dmode==Aind || + table68k[opcode].dmode==Aipi || + table68k[opcode].dmode==Apdi || + table68k[opcode].dmode==Ad16 || table68k[opcode].dmode==Ad8r) && - (table68k[opcode].smode==Areg || - table68k[opcode].smode==Aind || - table68k[opcode].smode==Aipi || - table68k[opcode].smode==Apdi || - table68k[opcode].smode==Ad16 || - table68k[opcode].smode==Ad8r) + (table68k[opcode].smode==Areg || + table68k[opcode].smode==Aind || + table68k[opcode].smode==Aipi || + table68k[opcode].smode==Apdi || + table68k[opcode].smode==Ad16 || + table68k[opcode].smode==Ad8r) ) { comprintf("\tuae_u32 dodgy=(srcreg==(uae_s32)dstreg);\n"); } @@ -2932,35 +3215,36 @@ generate_one_opcode (int rp, int noflags) comprintf("\tuae_u32 m68k_pc_offset_thisinst=m68k_pc_offset;\n"); comprintf("\tm68k_pc_offset+=2;\n"); - opcode_str = get_instruction_string (opcode); - aborted=gen_opcode (opcode); { - int flags=0; - if (global_isjump) flags|=1; - if (long_opcode) flags|=2; - if (global_cmov) flags|=4; - if (global_isaddx) flags|=8; - if (global_iscjump) flags|=16; - if (global_fpu) flags|=32; - + char flags[64 * 6]; + *flags = '\0'; + if (global_isjump) strcat(flags, "COMP_OPCODE_ISJUMP|"); + if (long_opcode) strcat(flags, "COMP_OPCODE_LONG_OPCODE|"); + if (global_cmov) strcat(flags, "COMP_OPCODE_CMOV|"); + if (global_isaddx) strcat(flags, "COMP_OPCODE_ISADDX|"); + if (global_iscjump) strcat(flags, "COMP_OPCODE_ISCJUMP|"); + if (global_fpu) strcat(flags, "COMP_OPCODE_USES_FPU|"); + if (*flags) + flags[strlen(flags) - 1] = '\0'; + else + strcpy(flags, "0"); + +#ifdef UAE /* RETTYPE != void */ + comprintf ("return 0;\n"); +#endif comprintf ("}\n"); - + + name = lookuptab[i].name; if (aborted) { - fprintf (stblfile, "{ NULL, 0x%08x, %ld }, /* %s */\n", flags, opcode, opcode_str); + fprintf (stblfile, "{ NULL, %u, %s }, /* %s */\n", opcode, flags, name); com_discard(); - } - else { - if (noflags) { - fprintf (stblfile, "{ op_%lx_%d_comp_nf, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, opcode_str); - fprintf (headerfile, "extern compop_func op_%lx_%d_comp_nf;\n", opcode, postfix); - printf ("void REGPARAM2 op_%lx_%d_comp_nf(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, opcode_str); - } - else { - fprintf (stblfile, "{ op_%lx_%d_comp_ff, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, opcode_str); - fprintf (headerfile, "extern compop_func op_%lx_%d_comp_ff;\n", opcode, postfix); - printf ("void REGPARAM2 op_%lx_%d_comp_ff(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, opcode_str); - } + } else { + const char *tbl = noflags ? "nf" : "ff"; + fprintf (stblfile, "{ op_%x_%d_comp_%s, %u, %s }, /* %s */\n", opcode, postfix, tbl, opcode, flags, name); + fprintf (headerfile, "extern compop_func op_%x_%d_comp_%s;\n", opcode, postfix, tbl); + printf ("/* %s */\n", outopcode (name, opcode)); + printf (RETTYPE " REGPARAM2 op_%x_%d_comp_%s(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, tbl, name); com_flush(); } } @@ -2968,23 +3252,20 @@ generate_one_opcode (int rp, int noflags) opcode_last_postfix[rp] = postfix; } -static void +static void generate_func (int noflags) { int i, j, rp; + const char *tbl = noflags ? "nf" : "ff"; using_prefetch = 0; using_exception_3 = 0; for (i = 0; i < 1; i++) /* We only do one level! */ { - cpu_level = 4 - i; + cpu_level = NEXT_CPU_LEVEL - i; postfix = i; - if (noflags) - fprintf (stblfile, "struct comptbl op_smalltbl_%d_comp_nf[] = {\n", postfix); - else - fprintf (stblfile, "struct comptbl op_smalltbl_%d_comp_ff[] = {\n", postfix); - + fprintf (stblfile, "const struct comptbl op_smalltbl_%d_comp_%s[] = {\n", postfix, tbl); /* sam: this is for people with low memory (eg. me :)) */ printf ("\n" @@ -3002,6 +3283,11 @@ generate_func (int noflags) "#define PART_7 1\n" "#define PART_8 1\n" "#endif\n\n"); +#ifdef UAE + printf ("extern void comp_fpp_opp();\n" + "extern void comp_fscc_opp();\n" + "extern void comp_fbcc_opp();\n\n"); +#endif rp = 0; for (j = 1; j <= 8; ++j) @@ -3013,16 +3299,27 @@ generate_func (int noflags) printf ("#endif\n\n"); } - fprintf (stblfile, "{ 0, 0,65536 }};\n"); + fprintf (stblfile, "{ 0, 65536, 0 }};\n"); } } -int -main (int argc, char **argv) +#if (defined(OS_cygwin) || defined(OS_mingw)) && defined(EXTENDED_SIGSEGV) +void cygwin_mingw_abort() { - read_table68k (); - do_merges (); +#undef abort + abort(); +} +#endif + +#if defined(FSUAE) && defined (WINDOWS) +#include "windows.h" +int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow) +#else +int main(void) +#endif +{ + init_table68k (); opcode_map = (int *) malloc (sizeof (int) * nr_cpuop_funcs); opcode_last_postfix = (int *) malloc (sizeof (int) * nr_cpuop_funcs); @@ -3034,25 +3331,30 @@ main (int argc, char **argv) * cputbl.h that way), but cpuopti can't cope. That could be fixed, but * I don't dare to touch the 68k version. */ - headerfile = fopen ("comptbl.h", "wb"); - stblfile = fopen ("compstbl.cpp", "wb"); - freopen ("compemu.cpp", "wb", stdout); + headerfile = fopen (GEN_PATH "comptbl.h", "wb"); + fprintf (headerfile, "" + "extern const struct comptbl op_smalltbl_0_comp_nf[];\n" + "extern const struct comptbl op_smalltbl_0_comp_ff[];\n" + ""); - fprintf(stblfile, "#if USE_JIT\n"); - printf("#if USE_JIT\n"); + stblfile = fopen (GEN_PATH "compstbl.cpp", "wb"); + if (freopen (GEN_PATH "compemu.cpp", "wb", stdout) == NULL) { + abort(); + } generate_includes (stdout); generate_includes (stblfile); - printf("#include \"compiler/compemu.h\"\n"); + printf("#include \"" JIT_PATH "compemu.h\"\n"); + printf("#include \"" JIT_PATH "flags_x86.h\"\n"); noflags=0; generate_func (noflags); - - free(opcode_map); - free(opcode_last_postfix); - free(opcode_next_clev); - free(counts); + + free(opcode_map); + free(opcode_last_postfix); + free(opcode_next_clev); + free(counts); opcode_map = (int *) malloc (sizeof (int) * nr_cpuop_funcs); opcode_last_postfix = (int *) malloc (sizeof (int) * nr_cpuop_funcs); @@ -3062,17 +3364,23 @@ main (int argc, char **argv) noflags=1; generate_func (noflags); - fprintf(stblfile, "#endif //USE_JIT\n"); - printf("#endif //USE_JIT\n"); + printf ("#endif\n"); + fprintf (stblfile, "#endif\n"); - free(opcode_map); - free(opcode_last_postfix); - free(opcode_next_clev); - free(counts); + free(opcode_map); + free(opcode_last_postfix); + free(opcode_next_clev); + free(counts); free (table68k); - fclose (stblfile); - fclose (headerfile); - fflush (stdout); + fclose (stblfile); + fclose (headerfile); + (void)disasm_this_inst; return 0; } + +#ifdef UAE +void write_log (const TCHAR *format,...) +{ +} +#endif diff --git a/BasiliskII/src/uae_cpu/compiler/gencomp_arm.c b/BasiliskII/src/uae_cpu/compiler/gencomp_arm.c new file mode 100644 index 00000000..913361ab --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/gencomp_arm.c @@ -0,0 +1,5082 @@ +/* + * compiler/gencomp_arm2.c - MC680x0 compilation generator (ARM Adaption JIT v1 & JIT v2) + * + * Based on work Copyright 1995, 1996 Bernd Schmidt + * Changes for UAE-JIT Copyright 2000 Bernd Meyer + * + * Adaptation for ARAnyM/ARM, copyright 2001-2015 + * Milan Jurik, Jens Heitmann + * + * Basilisk II (C) 1997-2005 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Notes + * ===== + * + * Advantages of JIT v2 + * - Processor independent style + * - Reduced overhead + * - Easier to understand / read + * - Easier to optimize + * - More precise flag handling + * - Better optimization for different CPU version ARM, ARMv6 etc.. + * + * Disadvantages of JIT v2 + * - Less generated + * - Requires more code implementation by hand (MidFunc) + * - MIDFUNCS are more CPU minded (closer to raw) + * - Separate code for each instruction (but this could be also an advantage, because you can concentrate on it) + * + * Additional note: + * - current using jnf_xxx calls for non-flag operations and + * jff_xxx for flag operations + * + * Still todo: + * - Optimize genamode, genastore, gen_writeXXX, gen_readXXX, genmovemXXX + * + */ + +#define CC_FOR_BUILD 1 +// #include "sysconfig.h" + +#include "sysdeps.h" +#include "readcpu.h" + +#include +#include +#include +#include +#include +#include +#undef abort + +#define BOOL_TYPE "int" +#define failure global_failure=1 +#define FAILURE global_failure=1 +#define isjump global_isjump=1 +#define is_const_jump global_iscjump=1 +#define isaddx global_isaddx=1 +#define uses_cmov global_cmov=1 +#define mayfail global_mayfail=1 +#define uses_fpu global_fpu=1 + +int hack_opcode; + +static int global_failure; +static int global_isjump; +static int global_iscjump; +static int global_isaddx; +static int global_cmov; +static int long_opcode; +static int global_mayfail; +static int global_fpu; + +static char endstr[1000]; +static char lines[100000]; +static int comp_index = 0; + +#include "flags_arm.h" + +#ifndef __attribute__ +# ifndef __GNUC__ +# define __attribute__(x) +# endif +#endif + + +static int cond_codes[] = { // + NATIVE_CC_AL, -1, // + NATIVE_CC_HI, NATIVE_CC_LS, // + NATIVE_CC_CC, NATIVE_CC_CS, // + NATIVE_CC_NE, NATIVE_CC_EQ, // + NATIVE_CC_VC, NATIVE_CC_VS, // + NATIVE_CC_PL, NATIVE_CC_MI, // + NATIVE_CC_GE, NATIVE_CC_LT, // + NATIVE_CC_GT, NATIVE_CC_LE // + }; + +__attribute__((format(printf, 1, 2))) +static void comprintf(const char *format, ...) +{ + va_list args; + + va_start(args, format); + comp_index += vsprintf(lines + comp_index, format, args); + va_end(args); +} + +static void com_discard(void) +{ + comp_index = 0; +} + +static void com_flush(void) +{ + int i; + for (i = 0; i < comp_index; i++) + putchar(lines[i]); + com_discard(); +} + + +static FILE *headerfile; +static FILE *stblfile; + +static int using_prefetch; +static int using_exception_3; +static int cpu_level; +static int noflags; + +/* For the current opcode, the next lower level that will have different code. + * Initialized to -1 for each opcode. If it remains unchanged, indicates we + * are done with that opcode. */ +static int next_cpu_level; + +static int *opcode_map; +static int *opcode_next_clev; +static int *opcode_last_postfix; +static unsigned long *counts; + +static void read_counts(void) +{ + FILE *file; + unsigned long opcode, count, total; + char name[20]; + int nr = 0; + memset(counts, 0, 65536 * sizeof *counts); + + file = fopen("frequent.68k", "r"); + if (file) { + if (fscanf(file, "Total: %lu\n", &total) != 1) + { + assert(0); + } + while (fscanf(file, "%lx: %lu %s\n", &opcode, &count, name) == 3) { + opcode_next_clev[nr] = 4; + opcode_last_postfix[nr] = -1; + opcode_map[nr++] = opcode; + counts[opcode] = count; + } + fclose(file); + } + if (nr == nr_cpuop_funcs) + return; + for (opcode = 0; opcode < 0x10000; opcode++) { + if (table68k[opcode].handler == -1 && table68k[opcode].mnemo != i_ILLG + && counts[opcode] == 0) { + opcode_next_clev[nr] = 4; + opcode_last_postfix[nr] = -1; + opcode_map[nr++] = opcode; + counts[opcode] = count; + } + } + assert (nr == nr_cpuop_funcs); +} + +static int n_braces = 0; +static int insn_n_cycles; + +static void start_brace(void) { + n_braces++; + comprintf("{"); +} + +static void close_brace(void) { + assert(n_braces > 0); + n_braces--; + comprintf("}"); +} + +static void finish_braces(void) { + while (n_braces > 0) + close_brace(); +} + +static inline void gen_update_next_handler(void) { + return; /* Can anything clever be done here? */ +} + +static void gen_writebyte(const char *address, const char *source) +{ + comprintf("\twritebyte(%s, %s, scratchie);\n", address, source); +} + +static void gen_writeword(const char *address, const char *source) +{ + comprintf("\twriteword(%s, %s, scratchie);\n", address, source); +} + +static void gen_writelong(const char *address, const char *source) +{ + comprintf("\twritelong(%s, %s, scratchie);\n", address, source); +} + +static void gen_readbyte(const char *address, const char* dest) +{ + comprintf("\treadbyte(%s, %s, scratchie);\n", address, dest); +} + +static void gen_readword(const char *address, const char *dest) +{ + comprintf("\treadword(%s,%s,scratchie);\n", address, dest); +} + +static void gen_readlong(const char *address, const char *dest) +{ + comprintf("\treadlong(%s, %s, scratchie);\n", address, dest); +} + +static const char * +gen_nextilong(void) { + static char buffer[80]; + + sprintf(buffer, "comp_get_ilong((m68k_pc_offset+=4)-4)"); + insn_n_cycles += 4; + + long_opcode = 1; + return buffer; +} + +static const char * +gen_nextiword(void) { + static char buffer[80]; + + sprintf(buffer, "comp_get_iword((m68k_pc_offset+=2)-2)"); + insn_n_cycles += 2; + + long_opcode = 1; + return buffer; +} + +static const char * +gen_nextibyte(void) { + static char buffer[80]; + + sprintf(buffer, "comp_get_ibyte((m68k_pc_offset+=2)-2)"); + insn_n_cycles += 2; + + long_opcode = 1; + return buffer; +} + +#if defined(USE_JIT_FPU) +// Only used by FPU (future), get rid of unused warning +static void +swap_opcode (void) +{ + comprintf("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + comprintf("\topcode = do_byteswap_16(opcode);\n"); + comprintf("#endif\n"); +} +#endif + +static void sync_m68k_pc(void) { + comprintf("\t if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();\n"); +} + +/* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0, + * the calling routine handles Apdi and Aipi modes. + * gb-- movem == 2 means the same thing but for a MOVE16 instruction */ +static void genamode(amodes mode, const char *reg, wordsizes size, const char *name, int getv, int movem) +{ + start_brace(); + switch (mode) + { + case Dreg: /* Do we need to check dodgy here? */ + assert (!movem); + if (getv == 1 || getv == 2) + { + /* We generate the variable even for getv==2, so we can use + it as a destination for MOVE */ + comprintf("\tint %s = %s;\n", name, reg); + } + return; + + case Areg: + assert (!movem); + if (getv == 1 || getv == 2) + { + /* see above */ + comprintf("\tint %s = dodgy ? scratchie++ : %s + 8;\n", name, reg); + if (getv == 1) + { + comprintf("\tif (dodgy) \n"); + comprintf("\t\tmov_l_rr(%s, %s + 8);\n", name, reg); + } + } + return; + + case Aind: + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, %s + 8);\n", name, reg); + break; + case Aipi: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_rr(%sa, %s + 8);\n", name, reg); + break; + case Apdi: + switch (size) + { + case sz_byte: + if (movem) + { + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, (uae_s32)-areg_byteinc[%s]);\n", reg, reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + case sz_word: + if (movem) + { + comprintf("\tint %sa=dodgy?scratchie++:%s+8;\n", name, reg); + comprintf("\tif (dodgy) \n"); + comprintf("\tmov_l_rr(%sa,8+%s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, -2);\n", reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + case sz_long: + if (movem) + { + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, -4);\n", reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + default: + assert(0); + break; + } + break; + case Ad16: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + comprintf("\tlea_l_brr(%sa, %sa, (uae_s32)(uae_s16)%s);\n", name, name, gen_nextiword()); + break; + case Ad8r: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tcalc_disp_ea_020(%s + 8, %s, %sa, scratchie);\n", reg, gen_nextiword(), name); + break; + + case PC16: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tuae_u32 address = start_pc + ((char *)comp_pc_p - (char *)start_pc_p) + m68k_pc_offset;\n"); + comprintf("\tuae_s32 PC16off = (uae_s32)(uae_s16)%s;\n", gen_nextiword()); + comprintf("\tmov_l_ri(%sa, address + PC16off);\n", name); + break; + + case PC8r: + comprintf("\tint pctmp = scratchie++;\n"); + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tuae_u32 address = start_pc + ((char *)comp_pc_p - (char *)start_pc_p) + m68k_pc_offset;\n"); + start_brace(); + comprintf("\tmov_l_ri(pctmp,address);\n"); + + comprintf("\tcalc_disp_ea_020(pctmp, %s, %sa, scratchie);\n", gen_nextiword(), name); + break; + case absw: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_ri(%sa, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + break; + case absl: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_ri(%sa, %s); /* absl */\n", name, gen_nextilong()); + break; + case imm: + assert (getv == 1); + switch (size) + { + case sz_byte: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s8)%s);\n", name, gen_nextibyte()); + break; + case sz_word: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + break; + case sz_long: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, gen_nextilong()); + break; + default: + assert(0); + break; + } + return; + case imm0: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s8)%s);\n", name, gen_nextibyte()); + return; + case imm1: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + return; + case imm2: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, gen_nextilong()); + return; + case immi: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, reg); + return; + default: + assert(0); + break; + } + + /* We get here for all non-reg non-immediate addressing modes to + * actually fetch the value. */ + if (getv == 1) + { + char astring[80]; + sprintf(astring, "%sa", name); + switch (size) + { + case sz_byte: + insn_n_cycles += 2; + break; + case sz_word: + insn_n_cycles += 2; + break; + case sz_long: + insn_n_cycles += 4; + break; + default: + assert(0); + break; + } + start_brace(); + comprintf("\tint %s = scratchie++;\n", name); + switch (size) + { + case sz_byte: + gen_readbyte(astring, name); + break; + case sz_word: + gen_readword(astring, name); + break; + case sz_long: + gen_readlong(astring, name); + break; + default: + assert(0); + break; + } + } + + /* We now might have to fix up the register for pre-dec or post-inc + * addressing modes. */ + if (!movem) + { + switch (mode) + { + case Aipi: + switch (size) + { + case sz_byte: + comprintf("\tlea_l_brr(%s + 8,%s + 8, areg_byteinc[%s]);\n", reg, reg, reg); + break; + case sz_word: + comprintf("\tlea_l_brr(%s + 8, %s + 8, 2);\n", reg, reg); + break; + case sz_long: + comprintf("\tlea_l_brr(%s + 8, %s + 8, 4);\n", reg, reg); + break; + default: + assert(0); + break; + } + break; + case Apdi: + break; + default: + break; + } + } +} + +static void genastore(const char *from, amodes mode, const char *reg, wordsizes size, const char *to) +{ + switch (mode) + { + case Dreg: + switch (size) + { + case sz_byte: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_b_rr(%s, %s);\n", reg, from); + break; + case sz_word: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_w_rr(%s, %s);\n", reg, from); + break; + case sz_long: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_l_rr(%s, %s);\n", reg, from); + break; + default: + assert(0); + break; + } + break; + case Areg: + switch (size) + { + case sz_word: + comprintf("\tif(%s + 8 != %s)\n", reg, from); + comprintf("\t\tmov_w_rr(%s + 8, %s);\n", reg, from); + break; + case sz_long: + comprintf("\tif(%s + 8 != %s)\n", reg, from); + comprintf("\t\tmov_l_rr(%s + 8, %s);\n", reg, from); + break; + default: + assert(0); + break; + } + break; + + case Apdi: + case absw: + case PC16: + case PC8r: + case Ad16: + case Ad8r: + case Aipi: + case Aind: + case absl: + { + char astring[80]; + sprintf(astring, "%sa", to); + + switch (size) + { + case sz_byte: + insn_n_cycles += 2; + gen_writebyte(astring, from); + break; + case sz_word: + insn_n_cycles += 2; + gen_writeword(astring, from); + break; + case sz_long: + insn_n_cycles += 4; + gen_writelong(astring, from); + break; + default: + assert(0); + break; + } + } + break; + case imm: + case imm0: + case imm1: + case imm2: + case immi: + assert(0); + break; + default: + assert(0); + break; + } +} + +static void gen_move16(uae_u32 opcode, struct instr *curi) { +#if defined(USE_JIT2) + comprintf("\tint src=scratchie++;\n"); + comprintf("\tint dst=scratchie++;\n"); + + uae_u32 masked_op = (opcode & 0xfff8); + if (masked_op == 0xf620) { + // POSTINCREMENT SOURCE AND DESTINATION version + comprintf("\t uae_u16 dstreg = ((%s)>>12) & 0x07;\n", gen_nextiword()); + comprintf("\t jnf_MOVE(src, srcreg + 8);"); + comprintf("\t jnf_MOVE(dst, dstreg + 8);"); + comprintf("\t if (srcreg != dstreg)\n"); + comprintf("\t jnf_ADD_imm(srcreg + 8, srcreg + 8, 16);"); + comprintf("\t jnf_ADD_imm(dstreg + 8, dstreg + 8, 16);"); + } else { + /* Other variants */ + genamode(curi->smode, "srcreg", curi->size, "src", 0, 2); + genamode(curi->dmode, "dstreg", curi->size, "dst", 0, 2); + switch (masked_op) { + case 0xf600: + comprintf("\t jnf_ADD_imm(srcreg + 8, srcreg + 8, 16);"); + break; + case 0xf608: + comprintf("\t jnf_ADD_imm(dstreg + 8, dstreg + 8, 16);"); + break; + } + } + comprintf("\t jnf_MOVE16(dst, src);"); +#else + comprintf("\tint src=scratchie++;\n"); + comprintf("\tint dst=scratchie++;\n"); + + if ((opcode & 0xfff8) == 0xf620) { + /* MOVE16 (Ax)+,(Ay)+ */ + comprintf("\tuae_u16 dstreg=((%s)>>12)&0x07;\n", gen_nextiword()); + comprintf("\tmov_l_rr(src,8+srcreg);\n"); + comprintf("\tmov_l_rr(dst,8+dstreg);\n"); + } else { + /* Other variants */ + genamode(curi->smode, "srcreg", curi->size, "src", 0, 2); + genamode(curi->dmode, "dstreg", curi->size, "dst", 0, 2); + comprintf("\tmov_l_rr(src,srca);\n"); + comprintf("\tmov_l_rr(dst,dsta);\n"); + } + + /* Align on 16-byte boundaries */ + comprintf("\tand_l_ri(src,~15);\n"); + comprintf("\tand_l_ri(dst,~15);\n"); + + if ((opcode & 0xfff8) == 0xf620) { + comprintf("\tif (srcreg != dstreg)\n"); + comprintf("\tarm_ADD_l_ri8(srcreg+8,16);\n"); + comprintf("\tarm_ADD_l_ri8(dstreg+8,16);\n"); + } else if ((opcode & 0xfff8) == 0xf600) + comprintf("\tarm_ADD_l_ri8(srcreg+8,16);\n"); + else if ((opcode & 0xfff8) == 0xf608) + comprintf("\tarm_ADD_l_ri8(dstreg+8,16);\n"); + + start_brace(); + comprintf("\tint tmp=scratchie;\n"); + comprintf("\tscratchie+=4;\n"); + + comprintf("\tget_n_addr(src,src,scratchie);\n" + "\tget_n_addr(dst,dst,scratchie);\n" + "\tmov_l_rR(tmp+0,src,0);\n" + "\tmov_l_rR(tmp+1,src,4);\n" + "\tmov_l_rR(tmp+2,src,8);\n" + "\tmov_l_rR(tmp+3,src,12);\n" + "\tmov_l_Rr(dst,tmp+0,0);\n" + "\tforget_about(tmp+0);\n" + "\tmov_l_Rr(dst,tmp+1,4);\n" + "\tforget_about(tmp+1);\n" + "\tmov_l_Rr(dst,tmp+2,8);\n" + "\tforget_about(tmp+2);\n" + "\tmov_l_Rr(dst,tmp+3,12);\n"); + close_brace(); +#endif +} + +static void genmovemel(uae_u16 opcode) { + comprintf("\tuae_u16 mask = %s;\n", gen_nextiword()); + comprintf("\tint native=scratchie++;\n"); + comprintf("\tint i;\n"); + comprintf("\tsigned char offset=0;\n"); + genamode(table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, + 1); + comprintf("\tget_n_addr(srca,native,scratchie);\n"); + + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch (table68k[opcode].size) { + case sz_long: + comprintf("\t\t\tmov_l_rR(i,native,offset);\n" + "\t\t\tmid_bswap_32(i);\n" + "\t\t\toffset+=4;\n"); + break; + case sz_word: + comprintf("\t\t\tmov_w_rR(i,native,offset);\n" + "\t\t\tmid_bswap_16(i);\n" + "\t\t\tsign_extend_16_rr(i,i);\n" + "\t\t\toffset+=2;\n"); + break; + default: + assert(0); + break; + } + comprintf("\t\t}\n" + "\t}"); + if (table68k[opcode].dmode == Aipi) { + comprintf("\t\t\tlea_l_brr(8+dstreg,srca,offset);\n"); + } +} + +static void genmovemle(uae_u16 opcode) { + comprintf("\tuae_u16 mask = %s;\n", gen_nextiword()); + comprintf("\tint native=scratchie++;\n"); + comprintf("\tint i;\n"); + comprintf("\tint tmp=scratchie++;\n"); + comprintf("\tsigned char offset=0;\n"); + genamode(table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, + 1); + + comprintf("\tget_n_addr(srca,native,scratchie);\n"); + + if (table68k[opcode].dmode != Apdi) { + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch (table68k[opcode].size) { + case sz_long: + comprintf("\t\t\tmov_l_rr(tmp,i);\n" + "\t\t\tmid_bswap_32(tmp);\n" + "\t\t\tmov_l_Rr(native,tmp,offset);\n" + "\t\t\toffset+=4;\n"); + break; + case sz_word: + comprintf("\t\t\tmov_l_rr(tmp,i);\n" + "\t\t\tmid_bswap_16(tmp);\n" + "\t\t\tmov_w_Rr(native,tmp,offset);\n" + "\t\t\toffset+=2;\n"); + break; + default: + assert(0); + break; + } + } else { /* Pre-decrement */ + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch (table68k[opcode].size) { + case sz_long: + comprintf("\t\t\toffset-=4;\n" + "\t\t\tmov_l_rr(tmp,15-i);\n" + "\t\t\tmid_bswap_32(tmp);\n" + "\t\t\tmov_l_Rr(native,tmp,offset);\n"); + break; + case sz_word: + comprintf("\t\t\toffset-=2;\n" + "\t\t\tmov_l_rr(tmp,15-i);\n" + "\t\t\tmid_bswap_16(tmp);\n" + "\t\t\tmov_w_Rr(native,tmp,offset);\n"); + break; + default: + assert(0); + break; + } + } + + comprintf("\t\t}\n" + "\t}"); + if (table68k[opcode].dmode == Apdi) { + comprintf("\t\t\tlea_l_brr(8+dstreg,srca,(uae_s32)offset);\n"); + } +} + +static void duplicate_carry(void) { + comprintf("\tif (needed_flags&FLAG_X) duplicate_carry();\n"); +} + +typedef enum { + flag_logical_noclobber, + flag_logical, + flag_add, + flag_sub, + flag_cmp, + flag_addx, + flag_subx, + flag_zn, + flag_av, + flag_sv, + flag_and, + flag_or, + flag_eor, + flag_mov +} flagtypes; + +#if !defined(USE_JIT2) +static void genflags(flagtypes type, wordsizes size, const char *value, const char *src, const char *dst) +{ + if (noflags) { + switch (type) { + case flag_cmp: + comprintf("\tdont_care_flags();\n"); + comprintf("/* Weird --- CMP with noflags ;-) */\n"); + return; + case flag_add: + case flag_sub: + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch (type) { + case flag_add: + op = "add"; + break; // nf + case flag_sub: + op = "sub"; + break; // nf + default: + assert(0); + break; + } + switch (size) { + case sz_byte: + comprintf("\t%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("\t%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\t%s_l(%s,%s);\n", op, dst, src); + break; + } + return; + } + break; + + case flag_and: + comprintf("\tdont_care_flags();\n"); + switch (size) { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n", src); + comprintf("\tor_l_ri(scratchie,0xffffff00);\n"); // nf + comprintf("\tarm_AND_l(%s,scratchie);\n", dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tarm_AND_b(%s,%s);\n", dst, src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n", src); + comprintf("\tor_l_ri(scratchie,0xffff0000);\n"); // nf + comprintf("\tarm_AND_l(%s,scratchie);\n", dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tarm_AND_w(%s,%s);\n", dst, src); + break; + case sz_long: + comprintf("\tarm_AND_l(%s,%s);\n", dst, src); + break; + } + return; + + case flag_mov: + comprintf("\tdont_care_flags();\n"); + switch (size) { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n", src); + comprintf("\tand_l_ri(%s,0xffffff00);\n", dst); // nf + comprintf("\tarm_ORR_l(%s,scratchie);\n", dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tmov_b_rr(%s,%s);\n", dst, src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n", src); + comprintf("\tand_l_ri(%s,0xffff0000);\n", dst); // nf + comprintf("\tarm_ORR_l(%s,scratchie);\n", dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tmov_w_rr(%s,%s);\n", dst, src); + break; + case sz_long: + comprintf("\tmov_l_rr(%s,%s);\n", dst, src); + break; + } + return; + + case flag_or: + case flag_eor: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + const char* op; + switch (type) { + case flag_or: + op = "ORR"; + break; // nf + case flag_eor: + op = "EOR"; + break; // nf + default: + assert(0); + break; + } + switch (size) { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n", src); + comprintf("\tarm_%s_l(%s,scratchie);\n", op, dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tarm_%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n", src); + comprintf("\tarm_%s_l(%s,scratchie);\n", op, dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tarm_%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\tarm_%s_l(%s,%s);\n", op, dst, src); + break; + } + close_brace(); + return; + } + + case flag_addx: + case flag_subx: + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch (type) { + case flag_addx: + op = "adc"; + break; + case flag_subx: + op = "sbb"; + break; + default: + assert(0); + break; + } + comprintf("\trestore_carry();\n"); /* Reload the X flag into C */ + switch (size) { + case sz_byte: + comprintf("\t%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("\t%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\t%s_l(%s,%s);\n", op, dst, src); + break; + } + return; + } + break; + default: + return; + } + } + + /* Need the flags, but possibly not all of them */ + switch (type) { + case flag_logical_noclobber: + failure; + /* fall through */ + + case flag_and: + case flag_or: + case flag_eor: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + const char* op; + switch (type) { + case flag_and: + op = "and"; + break; + case flag_or: + op = "or"; + break; + case flag_eor: + op = "xor"; + break; + default: + assert(0); + break; + } + switch (size) { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n", op, dst, src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + } + + case flag_mov: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + switch (size) { + case sz_byte: + comprintf("\tif (%s!=%s) {\n", src, dst); + comprintf("\tmov_b_ri(%s,0);\n" + "\tstart_needflags();\n", dst); + comprintf("\tor_b(%s,%s);\n", dst, src); + comprintf("\t} else {\n"); + comprintf("\tmov_b_rr(%s,%s);\n", dst, src); + comprintf("\ttest_b_rr(%s,%s);\n", dst, dst); + comprintf("\t}\n"); + break; + case sz_word: + comprintf("\tif (%s!=%s) {\n", src, dst); + comprintf("\tmov_w_ri(%s,0);\n" + "\tstart_needflags();\n", dst); + comprintf("\tor_w(%s,%s);\n", dst, src); + comprintf("\t} else {\n"); + comprintf("\tmov_w_rr(%s,%s);\n", dst, src); + comprintf("\ttest_w_rr(%s,%s);\n", dst, dst); + comprintf("\t}\n"); + break; + case sz_long: + comprintf("\tif (%s!=%s) {\n", src, dst); + comprintf("\tmov_l_ri(%s,0);\n" + "\tstart_needflags();\n", dst); + comprintf("\tor_l(%s,%s);\n", dst, src); + comprintf("\t} else {\n"); + comprintf("\tmov_l_rr(%s,%s);\n", dst, src); + comprintf("\ttest_l_rr(%s,%s);\n", dst, dst); + comprintf("\t}\n"); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + } + + case flag_logical: + comprintf("\tdont_care_flags();\n"); + start_brace(); + switch (size) { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\ttest_b_rr(%s,%s);\n", value, value); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\ttest_w_rr(%s,%s);\n", value, value); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\ttest_l_rr(%s,%s);\n", value, value); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + + case flag_add: + case flag_sub: + case flag_cmp: + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch (type) { + case flag_add: + op = "add"; + break; + case flag_sub: + op = "sub"; + break; + case flag_cmp: + op = "cmp"; + break; + default: + assert(0); + break; + } + switch (size) { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n", op, dst, src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + if (type != flag_cmp) { + duplicate_carry(); + } + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + + return; + } + + case flag_addx: + case flag_subx: + uses_cmov; + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch (type) { + case flag_addx: + op = "adc"; + break; + case flag_subx: + op = "sbb"; + break; + default: + assert(0); + break; + } + start_brace(); + comprintf("\tint zero=scratchie++;\n" + "\tint one=scratchie++;\n" + "\tif (needed_flags&FLAG_Z) {\n" + "\tmov_l_ri(zero,0);\n" + "\tmov_l_ri(one,-1);\n" + "\tmake_flags_live();\n" + "\tcmov_l_rr(zero,one,%d);\n" + "\t}\n", NATIVE_CC_NE); + comprintf("\trestore_carry();\n"); /* Reload the X flag into C */ + switch (size) { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n", op, dst, src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tif (needed_flags&FLAG_Z) {\n" + "\tcmov_l_rr(zero,one,%d);\n" + "\tset_zero(zero, one);\n" /* No longer need one */ + "\tlive_flags();\n" + "\t}\n", NATIVE_CC_NE); + comprintf("\tend_needflags();\n"); + duplicate_carry(); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + return; + } + default: + failure; + break; + } +} +#endif + +static void gen_abcd(uae_u32 opcode, struct instr *curi, const char* ssize) { +#if 0 +#else + (void) opcode; + (void) curi; + (void) ssize; + failure; + /* No BCD maths for me.... */ +#endif +} + +static void gen_add(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + // Use tmp register to avoid destroying upper part in .B., .W cases + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ADD_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_ADD(tmp,dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_add, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_adda(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\t jnf_ADDA_%s(dst, src);\n", ssize); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tsign_extend_8_rr(tmp,src);\n"); + break; + case sz_word: + comprintf("\tsign_extend_16_rr(tmp,src);\n"); + break; + case sz_long: + comprintf("\ttmp=src;\n"); + break; + default: + assert(0); + break; + } + comprintf("\tarm_ADD_l(dst,tmp);\n"); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#endif +} + +static void gen_addx(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + + // Use tmp register to avoid destroying upper part in .B., .W cases + comprintf("\t dont_care_flags();\n"); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ADDX_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf("\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_ADDX(tmp,dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + genflags(flag_addx, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_and(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + + comprintf("\t dont_care_flags();\n"); + comprintf("\t int tmp=scratchie++;\n"); + start_brace(); + if (!noflags) { + comprintf("\t jff_AND_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_AND(tmp,dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_and, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_andsr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ANDSR(ARM_CCR_MAP[src & 0xF], (src & 0x10));\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } +#else + (void) curi; + failure; + isjump; +#endif +} + +static void gen_asl(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\t dont_care_flags();\n"); + comprintf("\t int tmp=scratchie++;\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + + if (curi->smode != immi) { + if (!noflags) { + start_brace(); + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ASL_%s_reg(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + start_brace(); + comprintf("\t jnf_LSL_reg(tmp,data,cnt);\n"); + } + } else { + start_brace(); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ASL_%s_imm(tmp,data,srcreg);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_LSL_imm(tmp,data,srcreg);\n"); + } + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) ssize; + + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + /* Except for the handling of the V flag, this is identical to + LSL. The handling of V is, uhm, unpleasant, so if it's needed, + let the normal emulation handle it. Shoulders of giants kinda + thing ;-) */ + comprintf("if (needed_flags & FLAG_V) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode != immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(cdata,tmpcnt);\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(cdata,tmpcnt);\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(cdata,tmpcnt);\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(tmpcnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(cdata,scratchie,%d);\n", NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,7);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,15);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,31);\n"); + break; + } + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } + } else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_ri(data,srcreg);\n" + "\tbp=8-srcreg;\n"); + break; + case sz_word: + comprintf("\tshll_w_ri(data,srcreg);\n" + "\tbp=16-srcreg;\n"); + break; + case sz_long: + comprintf("\tshll_l_ri(data,srcreg);\n" + "\tbp=32-srcreg;\n"); + break; + default: + assert(0); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } +#endif +} + +static void gen_aslw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ASLW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_ASLW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_asr(uae_u32 opcode, struct instr *curi, const char* ssize) { +#if defined(USE_JIT2) + (void)opcode; + + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\t dont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (curi->smode != immi) { + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ASR_%s_reg(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_ASR_%s_reg(tmp,data,cnt);\n", ssize); + } + } else { + char *op; + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + op = "ff"; + } else + op = "nf"; + + comprintf("\t j%s_ASR_%s_imm(tmp,data,srcreg);\n", op, ssize); + if (!noflags) { + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) opcode; + (void) ssize; + + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode != immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint width;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n" + "\tint highshift=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,cnt);\n" + "\thighmask=0x38;\n" + "\twidth=8;\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,cnt);\n" + "\thighmask=0x30;\n" + "\twidth=16;\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,cnt);\n" + "\thighmask=0x20;\n" + "\twidth=32;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(highshift,0);\n" + "mov_l_ri(scratchie,width/2);\n" + "cmov_l_rr(highshift,scratchie,%d);\n", NATIVE_CC_NE); + /* The x86 masks out bits, so we now make sure that things + really get shifted as much as planned */ + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,highshift);\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,highshift);\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,highshift);\n"); + break; + default: + assert(0); + break; + } + /* And again */ + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,highshift);\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,highshift);\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,highshift);\n"); + break; + default: + assert(0); + break; + } + + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(cdata,tmpcnt);\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(cdata,tmpcnt);\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(cdata,tmpcnt);\n"); + break; + default: + assert(0); + break; + } + /* If the shift count was higher than the width, we need + to pick up the sign from data */ + comprintf("test_l_ri(tmpcnt,highmask);\n" + "cmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint width;\n" + "\tint highshift=scratchie++;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,cnt);\n" + "\thighmask=0x38;\n" + "\twidth=8;\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,cnt);\n" + "\thighmask=0x30;\n" + "\twidth=16;\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,cnt);\n" + "\thighmask=0x20;\n" + "\twidth=32;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(highshift,0);\n" + "mov_l_ri(scratchie,width/2);\n" + "cmov_l_rr(highshift,scratchie,%d);\n", NATIVE_CC_NE); + /* The x86 masks out bits, so we now make sure that things + really get shifted as much as planned */ + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,highshift);\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,highshift);\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,highshift);\n"); + break; + default: + assert(0); + break; + } + /* And again */ + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,highshift);\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,highshift);\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,highshift);\n"); + break; + default: + assert(0); + break; + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } + } else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + case sz_word: + comprintf("\tshra_w_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + case sz_long: + comprintf("\tshra_l_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + default: + assert(0); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } +#endif +} + +static void gen_asrw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ASRW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_ASRW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_bchg(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_BCHG_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_BCHG_%s(dst,src);\n", ssize); + comprintf("\t dont_care_flags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint s=scratchie++;\n" + "\tint tmp=scratchie++;\n" + "\tmov_l_rr(s,src);\n"); + if (curi->size == sz_byte) + comprintf("\tand_l_ri(s,7);\n"); + else + comprintf("\tand_l_ri(s,31);\n"); + + comprintf("\tbtc_l_rr(dst,s);\n" /* Answer now in C */ + "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */ + "\tmake_flags_live();\n" /* Get the flags back */ + "\tdont_care_flags();\n"); + if (!noflags) { + comprintf("\tstart_needflags();\n" + "\tset_zero(s,tmp);\n" + "\tlive_flags();\n" + "\tend_needflags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_bclr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_BCLR_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_BCLR_%s(dst,src);\n", ssize); + comprintf("\t dont_care_flags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint s=scratchie++;\n" + "\tint tmp=scratchie++;\n" + "\tmov_l_rr(s,src);\n"); + if (curi->size == sz_byte) + comprintf("\tand_l_ri(s,7);\n"); + else + comprintf("\tand_l_ri(s,31);\n"); + + comprintf("\tbtr_l_rr(dst,s);\n" /* Answer now in C */ + "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */ + "\tmake_flags_live();\n" /* Get the flags back */ + "\tdont_care_flags();\n"); + if (!noflags) { + comprintf("\tstart_needflags();\n" + "\tset_zero(s,tmp);\n" + "\tlive_flags();\n" + "\tend_needflags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_bset(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_BSET_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_BSET_%s(dst,src);\n", ssize); + comprintf("\t dont_care_flags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint s=scratchie++;\n" + "\tint tmp=scratchie++;\n" + "\tmov_l_rr(s,src);\n"); + if (curi->size == sz_byte) + comprintf("\tand_l_ri(s,7);\n"); + else + comprintf("\tand_l_ri(s,31);\n"); + + comprintf("\tbts_l_rr(dst,s);\n" /* Answer now in C */ + "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */ + "\tmake_flags_live();\n" /* Get the flags back */ + "\tdont_care_flags();\n"); + if (!noflags) { + comprintf("\tstart_needflags();\n" + "\tset_zero(s,tmp);\n" + "\tlive_flags();\n" + "\tend_needflags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_btst(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + + // If we are not interested in flags it is not necessary to do + // anything with the data + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_BTST_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t dont_care_flags();\n"); + } +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint s=scratchie++;\n" + "\tint tmp=scratchie++;\n" + "\tmov_l_rr(s,src);\n"); + if (curi->size == sz_byte) + comprintf("\tand_l_ri(s,7);\n"); + else + comprintf("\tand_l_ri(s,31);\n"); + + comprintf("\tbt_l_rr(dst,s);\n" /* Answer now in C */ + "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */ + "\tmake_flags_live();\n" /* Get the flags back */ + "\tdont_care_flags();\n"); + if (!noflags) { + comprintf("\tstart_needflags();\n" + "\tset_zero(s,tmp);\n" + "\tlive_flags();\n" + "\tend_needflags();\n"); + } +#endif +} + +static void gen_clr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 2, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_CLR(tmp);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_CLR(tmp);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + genamode(curi->smode, "srcreg", curi->size, "src", 2, 0); + start_brace(); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags(flag_logical, curi->size, "dst", "", ""); + genastore("dst", curi->smode, "srcreg", curi->size, "src"); +#endif +} + +static void gen_cmp(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\t dont_care_flags();\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_CMP_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("/* Weird --- CMP with noflags ;-) */\n"); + } +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + genflags(flag_cmp, curi->size, "", "src", "dst"); +#endif +} + +static void gen_cmpa(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + if (!noflags) { + comprintf("\t dont_care_flags();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_CMPA_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\tdont_care_flags();\n"); + comprintf("/* Weird --- CMP with noflags ;-) */\n"); + } +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmps=scratchie++;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tsign_extend_8_rr(tmps,src);\n"); + break; + case sz_word: + comprintf("\tsign_extend_16_rr(tmps,src);\n"); + break; + case sz_long: + comprintf("tmps=src;\n"); + break; + default: + assert(0); + break; + } + genflags(flag_cmp, sz_long, "", "tmps", "dst"); +#endif +} + +static void gen_dbcc(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if 0 + isjump; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "offs", 1, 0); + + comprintf("uae_u32 voffs;\n"); + comprintf("voffs = get_const(offs);\n"); + /* That offs is an immediate, so we can clobber it with abandon */ + switch (curi->size) { + case sz_word: + comprintf("\t voffs = (uae_s32)((uae_s16)voffs);\n"); + break; + default: + assert(0); /* Seems this only comes in word flavour */ + break; + } + comprintf("\t voffs -= m68k_pc_offset - m68k_pc_offset_thisinst - 2;\n"); + comprintf("\t voffs += (uintptr)comp_pc_p + m68k_pc_offset;\n"); + + comprintf("\t add_const_v(PC_P, m68k_pc_offset);\n"); + comprintf("\t m68k_pc_offset = 0;\n"); + + start_brace(); + + if (curi->cc >= 2) { + comprintf("\t make_flags_live();\n"); /* Load the flags */ + } + + assert(curi->size == sz_word); + + switch (curi->cc) { + case 0: /* This is an elaborate nop? */ + break; + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\t start_needflags();\n"); + comprintf("\t jnf_DBcc(src,voffs,%d);\n", curi->cc); + comprintf("\t end_needflags();\n"); + break; + default: + assert(0); + break; + } + genastore("src", curi->smode, "srcreg", curi->size, "src"); + gen_update_next_handler(); +#else + isjump; + uses_cmov; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "offs", 1, 0); + + /* That offs is an immediate, so we can clobber it with abandon */ + switch (curi->size) { + case sz_word: + comprintf("\tsign_extend_16_rr(offs,offs);\n"); + break; + default: + assert(0); /* Seems this only comes in word flavour */ + break; + } + comprintf("\tsub_l_ri(offs,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n"); + comprintf("\tarm_ADD_l_ri(offs,(uintptr)comp_pc_p);\n"); + /* New PC, + once the + offset_68k is + * also added */ + /* Let's fold in the m68k_pc_offset at this point */ + comprintf("\tarm_ADD_l_ri(offs,m68k_pc_offset);\n"); + comprintf("\tarm_ADD_l_ri(PC_P,m68k_pc_offset);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + + start_brace(); + comprintf("\tint nsrc=scratchie++;\n"); + + if (curi->cc >= 2) { + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + } + + assert (curi->size == sz_word); + + switch (curi->cc) { + case 0: /* This is an elaborate nop? */ + break; + case 1: + comprintf("\tstart_needflags();\n"); + comprintf("\tsub_w_ri(src,1);\n"); + comprintf("\t end_needflags();\n"); + start_brace(); + comprintf("\tuae_u32 v2,v;\n" + "\tuae_u32 v1=get_const(PC_P);\n"); + comprintf("\tv2=get_const(offs);\n" + "\tregister_branch(v1,v2,%d);\n", NATIVE_CC_CC); + break; + + case 8: + failure; + break; /* Work out details! FIXME */ + case 9: + failure; + break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\tmov_l_rr(nsrc,src);\n"); + comprintf("\tlea_l_brr(scratchie,src,(uae_s32)-1);\n" + "\tmov_w_rr(src,scratchie);\n"); + comprintf("\tcmov_l_rr(offs,PC_P,%d);\n", cond_codes[curi->cc]); + comprintf("\tcmov_l_rr(src,nsrc,%d);\n", cond_codes[curi->cc]); + /* OK, now for cc=true, we have src==nsrc and offs==PC_P, + so whether we move them around doesn't matter. However, + if cc=false, we have offs==jump_pc, and src==nsrc-1 */ + + comprintf("\t start_needflags();\n"); + comprintf("\ttest_w_rr(nsrc,nsrc);\n"); + comprintf("\t end_needflags();\n"); + comprintf("\tcmov_l_rr(PC_P,offs,%d);\n", NATIVE_CC_NE); + break; + default: + assert(0); + break; + } + genastore("src", curi->smode, "srcreg", curi->size, "src"); + gen_update_next_handler(); +#endif +} + +static void gen_eor(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t jff_EOR_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_EOR(tmp,dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_eor, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_eorsr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_EORSR(ARM_CCR_MAP[src & 0xF], ((src & 0x10) >> 4));\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } +#else + (void) curi; + failure; + isjump; +#endif +} + +static void gen_exg(uae_u32 opcode, struct instr *curi, const char* ssize) { +#if 0 +#else + (void) opcode; + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tmov_l_rr(tmp,src);\n"); + genastore("dst", curi->smode, "srcreg", curi->size, "src"); + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_ext(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_EXT_%s(tmp,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_EXT_%s(tmp,src);\n", ssize); + } + genastore("tmp", curi->smode, "srcreg", + curi->size == sz_word ? sz_word : sz_long, "src"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\tdont_care_flags();\n"); + start_brace(); + switch (curi->size) { + case sz_byte: + comprintf("\tint dst = src;\n" + "\tsign_extend_8_rr(src,src);\n"); + break; + case sz_word: + comprintf("\tint dst = scratchie++;\n" + "\tsign_extend_8_rr(dst,src);\n"); + break; + case sz_long: + comprintf("\tint dst = src;\n" + "\tsign_extend_16_rr(src,src);\n"); + break; + default: + assert(0); + break; + } + genflags(flag_logical, curi->size == sz_word ? sz_word : sz_long, "dst", "", + ""); + genastore("dst", curi->smode, "srcreg", + curi->size == sz_word ? sz_word : sz_long, "src"); +#endif +} + +static void gen_lsl(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + comprintf("\t int tmp=scratchie++;\n"); + if (curi->smode != immi) { + if (!noflags) { + start_brace(); + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_LSL_%s_reg(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + start_brace(); + comprintf("\t jnf_LSL_reg(tmp,data,cnt);\n"); + } + } else { + start_brace(); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_LSL_%s_imm(tmp,data,srcreg);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_LSL_imm(tmp,data,srcreg);\n"); + } + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode != immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(cdata,tmpcnt);\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(cdata,tmpcnt);\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(cdata,tmpcnt);\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(tmpcnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(cdata,scratchie,%d);\n", NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,7);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,15);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,31);\n"); + break; + } + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } + } else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_ri(data,srcreg);\n" + "\tbp=8-srcreg;\n"); + break; + case sz_word: + comprintf("\tshll_w_ri(data,srcreg);\n" + "\tbp=16-srcreg;\n"); + break; + case sz_long: + comprintf("\tshll_l_ri(data,srcreg);\n" + "\tbp=32-srcreg;\n"); + break; + default: + assert(0); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } +#endif +} + +static void gen_lslw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_LSLW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_LSLW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_lsr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\t dont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + comprintf("\t int tmp=scratchie++;\n"); + if (curi->smode != immi) { + if (!noflags) { + start_brace(); + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_LSR_%s_reg(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + start_brace(); + comprintf("\t jnf_LSR_%s_reg(tmp,data,cnt);\n", ssize); + } + } else { + start_brace(); + char *op; + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + op = "ff"; + } else + op = "nf"; + + comprintf("\t j%s_LSR_%s_imm(tmp,data,srcreg);\n", op, ssize); + + if (!noflags) { + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) ssize; + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode != immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch (curi->size) { + case sz_byte: + comprintf("\tshrl_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshrl_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshrl_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshrl_b_rr(cdata,tmpcnt);\n"); + break; + case sz_word: + comprintf("\tshrl_w_rr(cdata,tmpcnt);\n"); + break; + case sz_long: + comprintf("\tshrl_l_rr(cdata,tmpcnt);\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(tmpcnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(cdata,scratchie,%d);\n", NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshrl_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshrl_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshrl_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } + } else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshrl_b_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + case sz_word: + comprintf("\tshrl_w_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + case sz_long: + comprintf("\tshrl_l_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + default: + assert(0); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } +#endif +} + +static void gen_lsrw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_LSRW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_LSRW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_move(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + switch (curi->dmode) { + case Dreg: + case Areg: + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags && curi->dmode == Dreg) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_MOVE_%s(tmp, src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t tmp = src;\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); + break; + + default: /* It goes to memory, not a register */ + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_TST_%s(src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } + genastore("src", curi->dmode, "dstreg", curi->size, "dst"); + break; + } +#else + (void) ssize; + + switch (curi->dmode) { + case Dreg: + case Areg: + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genflags(flag_mov, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + default: /* It goes to memory, not a register */ + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genflags(flag_logical, curi->size, "src", "", ""); + genastore("src", curi->dmode, "dstreg", curi->size, "dst"); + break; + } +#endif +} + +static void gen_movea(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + + start_brace(); + comprintf("\t jnf_MOVEA_%s(dst, src);\n", ssize); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#else + (void) ssize; + + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + + start_brace(); + comprintf("\tint tmps=scratchie++;\n"); + switch (curi->size) { + case sz_word: + comprintf("\tsign_extend_16_rr(dst,src);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(dst,src);\n"); + break; + default: + assert(0); + break; + } + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#endif +} + +static void gen_mull(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t uae_u16 extra=%s;\n", gen_nextiword()); + comprintf("\t int r2=(extra>>12)&7;\n" + "\t int tmp=scratchie++;\n"); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + /* The two operands are in dst and r2 */ + if (!noflags) { + comprintf("\t if (extra & 0x0400) {\n"); /* Need full 64 bit result */ + comprintf("\t int r3=(extra & 7);\n"); + comprintf("\t mov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t\t jff_MULS64(r2,r3);\n"); + comprintf("\t } else { \n"); + comprintf("\t\t jff_MULU64(r2,r3);\n"); + comprintf("\t } \n"); /* The result is in r2/r3, with r2 holding the lower 32 bits */ + comprintf("\t } else {\n"); /* Only want 32 bit result */ + /* operands in dst and r2, result goes into r2 */ + /* shouldn't matter whether it's signed or unsigned?!? */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t jff_MULS32(r2,dst);\n"); + comprintf("\t } else { \n"); + comprintf("\t\t jff_MULU32(r2,dst);\n"); + comprintf("\t } \n"); /* The result is in r2, with r2 holding the lower 32 bits */ + comprintf("\t }\n"); + } else { + comprintf("\t if (extra & 0x0400) {\n"); /* Need full 64 bit result */ + comprintf("\t int r3=(extra & 7);\n"); + comprintf("\t mov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t\t jnf_MULS64(r2,r3);\n"); + comprintf("\t } else { \n"); + comprintf("\t\t jnf_MULU64(r2,r3);\n"); + comprintf("\t } \n"); /* The result is in r2/r3, with r2 holding the lower 32 bits */ + comprintf("\t } else {\n"); /* Only want 32 bit result */ + /* operands in dst and r2, result foes into r2 */ + /* shouldn't matter whether it's signed or unsigned?!? */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t jnf_MULS32(r2,dst);\n"); + comprintf("\t } else { \n"); + comprintf("\t\t jnf_MULU32(r2,dst);\n"); + comprintf("\t } \n"); /* The result is in r2, with r2 holding the lower 32 bits */ + comprintf("\t }\n"); + } +#else + if (!noflags) { + failure; + return; + } + comprintf("\tuae_u16 extra=%s;\n", gen_nextiword()); + comprintf("\tint r2=(extra>>12)&7;\n" + "\tint tmp=scratchie++;\n"); + + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + /* The two operands are in dst and r2 */ + comprintf("\tif (extra&0x0400) {\n" /* Need full 64 bit result */ + "\tint r3=(extra&7);\n" + "\tmov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ + comprintf("\tif (extra&0x0800) { \n" /* signed */ + "\t\timul_64_32(r2,r3);\n" + "\t} else { \n" + "\t\tmul_64_32(r2,r3);\n" + "\t} \n"); + /* The result is in r2/tmp, with r2 holding the lower 32 bits */ + comprintf("\t} else {\n"); /* Only want 32 bit result */ + /* operands in dst and r2, result foes into r2 */ + /* shouldn't matter whether it's signed or unsigned?!? */ + comprintf("\timul_32_32(r2,dst);\n" + "\t}\n"); +#endif +} + +static void gen_muls(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_word, "dst", 1, 0); + start_brace(); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_MULS(dst,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_MULS(dst,src);\n"); + } + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#else + comprintf("\tdont_care_flags();\n"); + genamode(curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_word, "dst", 1, 0); + comprintf("\tsign_extend_16_rr(scratchie,src);\n" + "\tsign_extend_16_rr(dst,dst);\n" + "\timul_32_32(dst,scratchie);\n"); + genflags(flag_logical, sz_long, "dst", "", ""); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#endif +} + +static void gen_mulu(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_word, "dst", 1, 0); + start_brace(); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_MULU(dst,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_MULU(dst,src);\n"); + } + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#else + comprintf("\tdont_care_flags();\n"); + genamode(curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_word, "dst", 1, 0); + /* To do 16x16 unsigned multiplication, we actually use + 32x32 signed, and zero-extend the registers first. + That solves the problem of MUL needing dedicated registers + on the x86 */ + comprintf("\tzero_extend_16_rr(scratchie,src);\n" + "\tzero_extend_16_rr(dst,dst);\n" + "\timul_32_32(dst,scratchie);\n"); + genflags(flag_logical, sz_long, "dst", "", ""); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); + +#endif +} + +static void gen_nbcd(uae_u32 opcode, struct instr *curi, const char* ssize) { +#if 0 +#else + (void) opcode; + (void) curi; + (void) ssize; + failure; + /* Nope! */ +#endif +} + +static void gen_neg(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_NEG_%s(tmp,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf("\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_NEG(tmp,src);\n"); + } + + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags(flag_sub, curi->size, "", "src", "dst"); + genastore("dst", curi->smode, "srcreg", curi->size, "src"); +#endif +} + +static void gen_negx(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int dst=scratchie++;\n"); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_inverted_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_NEGX_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf("\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t restore_inverted_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_NEGX(dst,src);\n"); + } + + genastore("dst", curi->smode, "srcreg", curi->size, "src"); +#else + (void) ssize; + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags(flag_subx, curi->size, "", "src", "dst"); + genastore("dst", curi->smode, "srcreg", curi->size, "src"); +#endif +} + +static void gen_not(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_NOT_%s(tmp,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_NOT(tmp,src);\n", ssize); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0xffffffff);\n"); + genflags(flag_eor, curi->size, "", "src", "dst"); + genastore("dst", curi->smode, "srcreg", curi->size, "src"); +#endif +} + +static void gen_or(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t jff_OR_%s(tmp, dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_OR(tmp, dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_or, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_orsr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ORSR(ARM_CCR_MAP[src & 0xF], ((src & 0x10) >> 4));\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } +#else + (void) curi; + failure; + isjump; +#endif +} + +static void gen_rol(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROL_%s(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_ROL_%s(tmp,data,cnt);\n", ssize); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) ssize; + + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + + switch (curi->size) { + case sz_long: + comprintf("\t rol_l_rr(data,cnt);\n"); + break; + case sz_word: + comprintf("\t rol_w_rr(data,cnt);\n"); + break; + case sz_byte: + comprintf("\t rol_b_rr(data,cnt);\n"); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(data,0x00);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); +#endif +} + +static void gen_rolw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROLW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_ROLW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_ror(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROR_%s(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_ROR_%s(tmp,data,cnt);\n", ssize); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) ssize; + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + + switch (curi->size) { + case sz_long: + comprintf("\t ror_l_rr(data,cnt);\n"); + break; + case sz_word: + comprintf("\t ror_w_rr(data,cnt);\n"); + break; + case sz_byte: + comprintf("\t ror_b_rr(data,cnt);\n"); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + switch (curi->size) { + case sz_byte: + comprintf("\t bt_l_ri(data,0x07);\n"); + break; + case sz_word: + comprintf("\t bt_l_ri(data,0x0f);\n"); + break; + case sz_long: + comprintf("\t bt_l_ri(data,0x1f);\n"); + break; + } + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); +#endif +} + +static void gen_rorw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_RORW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_RORW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_roxl(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + isaddx; + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROXL_%s(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + } else { + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_ROXL_%s(tmp,data,cnt);\n", ssize); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) curi; + (void) ssize; + failure; +#endif +} + +static void gen_roxlw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + isaddx; + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROXLW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + } else { + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_ROXLW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_roxr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + isaddx; + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROXR_%s(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + } else { + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_ROXR_%s(tmp,data,cnt);\n", ssize); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) curi; + failure; +#endif +} + +static void gen_roxrw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + isaddx; + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROXRW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + } else { + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_ROXRW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_sbcd(uae_u32 opcode, struct instr *curi, const char* ssize) { +#if 0 +#else + (void) opcode; + (void) curi; + (void) ssize; + failure; + /* I don't think so! */ +#endif +} + +static void gen_scc(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if 0 + genamode(curi->smode, "srcreg", curi->size, "src", 2, 0); + start_brace(); + comprintf("\t int val = scratchie++;\n"); + switch (curi->cc) { + case 0: /* Unconditional set */ + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\t make_flags_live();\n"); /* Load the flags */ + comprintf("\t jnf_Scc_ri(val,%d);\n", curi->cc); + break; + default: + assert(0); + break; + } + genastore("val", curi->smode, "srcreg", curi->size, "src"); +#else + genamode(curi->smode, "srcreg", curi->size, "src", 2, 0); + start_brace(); + comprintf("\tint val = scratchie++;\n"); + + /* We set val to 0 if we really should use 255, and to 1 for real 0 */ + switch (curi->cc) { + case 0: /* Unconditional set */ + comprintf("\tmov_l_ri(val,0);\n"); + break; + case 1: + /* Unconditional not-set */ + comprintf("\tmov_l_ri(val,1);\n"); + break; + case 8: + failure; + break; /* Work out details! FIXME */ + case 9: + failure; + break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + /* All condition codes can be inverted by changing the LSB */ + comprintf("\tsetcc(val,%d);\n", cond_codes[curi->cc] ^ 1); + break; + default: + assert(0); + break; + } + comprintf("\tsub_b_ri(val,1);\n"); + genastore("val", curi->smode, "srcreg", curi->size, "src"); +#endif +} + +static void gen_sub(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + + comprintf("\t dont_care_flags();\n"); + start_brace(); + // Use tmp register to avoid destroying upper part in .B., .W cases + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_SUB_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_SUB_%s(tmp,dst,src);\n", ssize); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_sub, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_suba(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\t jnf_SUBA_%s(dst, src);\n", ssize); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tsign_extend_8_rr(tmp,src);\n"); + break; + case sz_word: + comprintf("\tsign_extend_16_rr(tmp,src);\n"); + break; + case sz_long: + comprintf("\ttmp=src;\n"); + break; + default: + assert(0); + break; + } + comprintf("\tsub_l(dst,tmp);\n"); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#endif +} + +static void gen_subx(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n"); + comprintf("\tdont_care_flags();\n"); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_inverted_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_SUBX_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t restore_inverted_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_SUBX(tmp,dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_subx, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_swap(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_SWAP(src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_SWAP(src);\n"); + } + genastore("src", curi->smode, "srcreg", sz_long, "src"); +#else + genamode(curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\tdont_care_flags();\n"); + comprintf("\tarm_ROR_l_ri8(src,16);\n"); + genflags(flag_logical, sz_long, "src", "", ""); + genastore("src", curi->smode, "srcreg", sz_long, "src"); +#endif +} + +static void gen_tst(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + comprintf("\t dont_care_flags();\n"); + if (!noflags) { + start_brace(); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_TST_%s(src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } +#else + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genflags(flag_logical, curi->size, "src", "", ""); +#endif +} + +static int /* returns zero for success, non-zero for failure */ +gen_opcode(unsigned long int opcode) { + struct instr *curi = table68k + opcode; + const char* ssize = NULL; + + insn_n_cycles = 2; + global_failure = 0; + long_opcode = 0; + global_isjump = 0; + global_iscjump = 0; + global_isaddx = 0; + global_cmov = 0; + global_fpu = 0; + global_mayfail = 0; + hack_opcode = opcode; + endstr[0] = 0; + + start_brace(); + comprintf("\tuae_u8 scratchie=S1;\n"); + switch (curi->plev) { + case 0: /* not privileged */ + break; + case 1: /* unprivileged only on 68000 */ + if (cpu_level == 0) + break; + if (next_cpu_level < 0) + next_cpu_level = 0; + + /* fall through */ + case 2: /* priviledged */ + failure; /* Easy ones first */ + break; + case 3: /* privileged if size == word */ + if (curi->size == sz_byte) + break; + failure; + break; + } + switch (curi->size) { + case sz_byte: + ssize = "b"; + break; + case sz_word: + ssize = "w"; + break; + case sz_long: + ssize = "l"; + break; + default: + assert(0); + break; + } + (void) ssize; + + switch (curi->mnemo) { + case i_AND: + gen_and(opcode, curi, ssize); + break; + + case i_OR: + gen_or(opcode, curi, ssize); + break; + + case i_EOR: + gen_eor(opcode, curi, ssize); + break; + + case i_ORSR: + gen_orsr(opcode, curi, ssize); + break; + + case i_EORSR: + gen_eorsr(opcode, curi, ssize); + break; + + case i_ANDSR: + gen_andsr(opcode, curi, ssize); + break; + + case i_SUB: + gen_sub(opcode, curi, ssize); + break; + + case i_SUBA: + gen_suba(opcode, curi, ssize); + break; + + case i_SUBX: + gen_subx(opcode, curi, ssize); + break; + + case i_SBCD: + gen_sbcd(opcode, curi, ssize); + break; + + case i_ADD: + gen_add(opcode, curi, ssize); + break; + + case i_ADDA: + gen_adda(opcode, curi, ssize); + break; + + case i_ADDX: + gen_addx(opcode, curi, ssize); + break; + + case i_ABCD: + gen_abcd(opcode, curi, ssize); + break; + + case i_NEG: + gen_neg(opcode, curi, ssize); + break; + + case i_NEGX: + gen_negx(opcode, curi, ssize); + break; + + case i_NBCD: + gen_nbcd(opcode, curi, ssize); + break; + + case i_CLR: + gen_clr(opcode, curi, ssize); + break; + + case i_NOT: + gen_not(opcode, curi, ssize); + break; + + case i_TST: + gen_tst(opcode, curi, ssize); + break; + + case i_BCHG: + gen_bchg(opcode, curi, ssize); + break; + + case i_BCLR: + gen_bclr(opcode, curi, ssize); + break; + + case i_BSET: + gen_bset(opcode, curi, ssize); + break; + + case i_BTST: + gen_btst(opcode, curi, ssize); + break; + + case i_CMPM: + case i_CMP: + gen_cmp(opcode, curi, ssize); + break; + + case i_CMPA: + gen_cmpa(opcode, curi, ssize); + break; + + /* The next two are coded a little unconventional, but they are doing + * weird things... */ + case i_MVPRM: + isjump; + failure; + break; + + case i_MVPMR: + isjump; + failure; + break; + + case i_MOVE: + gen_move(opcode, curi, ssize); + break; + + case i_MOVEA: + gen_movea(opcode, curi, ssize); + break; + + case i_MVSR2: + isjump; + failure; + break; + + case i_MV2SR: + isjump; + failure; + break; + + case i_SWAP: + gen_swap(opcode, curi, ssize); + break; + + case i_EXG: + gen_exg(opcode, curi, ssize); + break; + + case i_EXT: + gen_ext(opcode, curi, ssize); + break; + + case i_MVMEL: + genmovemel(opcode); + break; + + case i_MVMLE: + genmovemle(opcode); + break; + + case i_TRAP: + isjump; + failure; + break; + + case i_MVR2USP: + isjump; + failure; + break; + + case i_MVUSP2R: + isjump; + failure; + break; + + case i_RESET: + isjump; + failure; + break; + + case i_NOP: + break; + + case i_STOP: + isjump; + failure; + break; + + case i_RTE: + isjump; + failure; + break; + + case i_RTD: + genamode(curi->smode, "srcreg", curi->size, "offs", 1, 0); + /* offs is constant */ + comprintf("\tarm_ADD_l_ri8(offs,4);\n"); + start_brace(); + comprintf("\tint newad=scratchie++;\n" + "\treadlong(15,newad,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc,newad);\n" + "\tget_n_addr_jmp(newad,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n" + "\tarm_ADD_l(15,offs);\n"); + gen_update_next_handler(); + isjump; + break; + + case i_LINK: + genamode(curi->smode, "srcreg", sz_long, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "offs", 1, 0); + comprintf("\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,src,scratchie);\n" + "\tmov_l_rr(src,15);\n"); + if (curi->size == sz_word) + comprintf("\tsign_extend_16_rr(offs,offs);\n"); + comprintf("\tarm_ADD_l(15,offs);\n"); + genastore("src", curi->smode, "srcreg", sz_long, "src"); + break; + + case i_UNLK: + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + comprintf("\tmov_l_rr(15,src);\n" + "\treadlong(15,src,scratchie);\n" + "\tarm_ADD_l_ri8(15,4);\n"); + genastore("src", curi->smode, "srcreg", curi->size, "src"); + break; + + case i_RTS: + comprintf("\tint newad=scratchie++;\n" + "\treadlong(15,newad,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc,newad);\n" + "\tget_n_addr_jmp(newad,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n" + "\tlea_l_brr(15,15,4);\n"); + gen_update_next_handler(); + isjump; + break; + + case i_TRAPV: + isjump; + failure; + break; + + case i_RTR: + isjump; + failure; + break; + + case i_JSR: + isjump; + genamode(curi->smode, "srcreg", curi->size, "src", 0, 0); + start_brace(); + comprintf( + "\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf("\tint ret=scratchie++;\n" + "\tmov_l_ri(ret,retadd);\n" + "\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,ret,scratchie);\n"); + comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n" + "\tget_n_addr_jmp(srca,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n"); + gen_update_next_handler(); + break; + + case i_JMP: + isjump; + genamode(curi->smode, "srcreg", curi->size, "src", 0, 0); + comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n" + "\tget_n_addr_jmp(srca,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n"); + gen_update_next_handler(); + break; + + case i_BSR: + is_const_jump; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf( + "\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf("\tint ret=scratchie++;\n" + "\tmov_l_ri(ret,retadd);\n" + "\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,ret,scratchie);\n"); + comprintf("\tarm_ADD_l_ri(src,m68k_pc_offset_thisinst+2);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + comprintf("\tarm_ADD_l(PC_P,src);\n"); + comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n"); + break; + + case i_Bcc: + comprintf("\tuae_u32 v,v1,v2;\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + /* That source is an immediate, so we can clobber it with abandon */ + switch (curi->size) { + case sz_byte: + comprintf("\tsign_extend_8_rr(src,src);\n"); + break; + case sz_word: + comprintf("\tsign_extend_16_rr(src,src);\n"); + break; + case sz_long: + break; + } + comprintf( + "\tsub_l_ri(src,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n"); + /* Leave the following as "add" --- it will allow it to be optimized + away due to src being a constant ;-) */ + comprintf("\tarm_ADD_l_ri(src,(uintptr)comp_pc_p);\n"); + comprintf("\tmov_l_ri(PC_P,(uintptr)comp_pc_p);\n"); + /* Now they are both constant. Might as well fold in m68k_pc_offset */ + comprintf("\tarm_ADD_l_ri(src,m68k_pc_offset);\n"); + comprintf("\tarm_ADD_l_ri(PC_P,m68k_pc_offset);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + + if (curi->cc >= 2) { + comprintf("\tv1=get_const(PC_P);\n" + "\tv2=get_const(src);\n" + "\tregister_branch(v1,v2,%d);\n", cond_codes[curi->cc]); + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + isjump; + } else { + is_const_jump; + } + + switch (curi->cc) { + case 0: /* Unconditional jump */ + comprintf("\tmov_l_rr(PC_P,src);\n"); + comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n"); + break; + case 1: + break; /* This is silly! */ + case 8: + failure; + break; /* Work out details! FIXME */ + case 9: + failure; + break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + break; + default: + assert(0); + break; + } + break; + + case i_LEA: + genamode(curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genastore("srca", curi->dmode, "dstreg", curi->size, "dst"); + break; + + case i_PEA: + if (table68k[opcode].smode == Areg || table68k[opcode].smode == Aind + || table68k[opcode].smode == Aipi + || table68k[opcode].smode == Apdi + || table68k[opcode].smode == Ad16 + || table68k[opcode].smode == Ad8r) + comprintf("if (srcreg==7) dodgy=1;\n"); + + genamode(curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode(Apdi, "7", sz_long, "dst", 2, 0); + genastore("srca", Apdi, "7", sz_long, "dst"); + break; + + case i_DBcc: + gen_dbcc(opcode, curi, ssize); + break; + + case i_Scc: + gen_scc(opcode, curi, ssize); + break; + + case i_DIVU: + isjump; + failure; + break; + + case i_DIVS: + isjump; + failure; + break; + + case i_MULU: + gen_mulu(opcode, curi, ssize); + break; + + case i_MULS: + gen_muls(opcode, curi, ssize); + break; + + case i_CHK: + isjump; + failure; + break; + + case i_CHK2: + isjump; + failure; + break; + + case i_ASR: + gen_asr(opcode, curi, ssize); + break; + + case i_ASL: + gen_asl(opcode, curi, ssize); + break; + + case i_LSR: + gen_lsr(opcode, curi, ssize); + break; + + case i_LSL: + gen_lsl(opcode, curi, ssize); + break; + + case i_ROL: + gen_rol(opcode, curi, ssize); + break; + + case i_ROR: + gen_ror(opcode, curi, ssize); + break; + + case i_ROXL: + gen_roxl(opcode, curi, ssize); + break; + + case i_ROXR: + gen_roxr(opcode, curi, ssize); + break; + + case i_ASRW: + gen_asrw(opcode, curi, ssize); + break; + + case i_ASLW: + gen_aslw(opcode, curi, ssize); + break; + + case i_LSRW: + gen_lsrw(opcode, curi, ssize); + break; + + case i_LSLW: + gen_lslw(opcode, curi, ssize); + break; + + case i_ROLW: + gen_rolw(opcode, curi, ssize); + break; + + case i_RORW: + gen_rorw(opcode, curi, ssize); + break; + + case i_ROXLW: + gen_roxlw(opcode, curi, ssize); + break; + + case i_ROXRW: + gen_roxrw(opcode, curi, ssize); + break; + + case i_MOVEC2: + isjump; + failure; + break; + + case i_MOVE2C: + isjump; + failure; + break; + + case i_CAS: + failure; + break; + + case i_CAS2: + failure; + break; + + case i_MOVES: + /* ignore DFC and SFC because we have no MMU */ + isjump; + failure; + break; + + case i_BKPT: + /* only needed for hardware emulators */ + isjump; + failure; + break; + + case i_CALLM: + /* not present in 68030 */ + isjump; + failure; + break; + + case i_RTM: + /* not present in 68030 */ + isjump; + failure; + break; + + case i_TRAPcc: + isjump; + failure; + break; + + case i_DIVL: + isjump; + failure; + break; + + case i_MULL: + gen_mull(opcode, curi, ssize); + break; + + case i_BFTST: + case i_BFEXTU: + case i_BFCHG: + case i_BFEXTS: + case i_BFCLR: + case i_BFFFO: + case i_BFSET: + case i_BFINS: + failure; + break; + case i_PACK: + failure; + break; + case i_UNPK: + failure; + break; + case i_TAS: + failure; + break; + case i_FPP: + uses_fpu; +#ifdef USE_JIT_FPU + mayfail; + comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); + swap_opcode(); + comprintf("\tcomp_fpp_opp(opcode,extra);\n"); +#else + failure; +#endif + break; + case i_FBcc: + uses_fpu; +#ifdef USE_JIT_FPU + isjump; + uses_cmov; + mayfail; + swap_opcode(); + comprintf("\tcomp_fbcc_opp(opcode);\n"); +#else + isjump; + failure; +#endif + break; + case i_FDBcc: + uses_fpu; + isjump; + failure; + break; + case i_FScc: + uses_fpu; +#ifdef USE_JIT_FPU + mayfail; + uses_cmov; + comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); + swap_opcode(); + comprintf("\tcomp_fscc_opp(opcode,extra);\n"); +#else + failure; +#endif + break; + case i_FTRAPcc: + uses_fpu; + isjump; + failure; + break; + case i_FSAVE: + uses_fpu; + failure; + break; + case i_FRESTORE: + uses_fpu; + failure; + break; + + case i_CINVL: + case i_CINVP: + case i_CINVA: + isjump; /* Not really, but it's probably a good idea to stop + translating at this point */ + failure; + comprintf("\tflush_icache();\n"); /* Differentiate a bit more? */ + break; + case i_CPUSHL: + case i_CPUSHP: + case i_CPUSHA: + isjump; /* Not really, but it's probably a good idea to stop + translating at this point */ + failure; + break; + + case i_MOVE16: + gen_move16(opcode, curi); + break; + + case i_EMULOP_RETURN: + isjump; + failure; + break; + + case i_EMULOP: + failure; + break; + + // case i_NATFEAT_ID: + // case i_NATFEAT_CALL: + // failure; + // break; + + case i_MMUOP: + isjump; + failure; + break; + default: + assert(0); + break; + } + comprintf("%s", endstr); + finish_braces(); + sync_m68k_pc(); + if (global_mayfail) + comprintf("\tif (failure) m68k_pc_offset=m68k_pc_offset_thisinst;\n"); + return global_failure; +} + +static void generate_includes(FILE * f) { + fprintf(f, "#include \"sysdeps.h\"\n"); + fprintf(f, "#include \"m68k.h\"\n"); + fprintf(f, "#include \"memory.h\"\n"); + fprintf(f, "#include \"readcpu.h\"\n"); + fprintf(f, "#include \"newcpu.h\"\n"); + fprintf(f, "#include \"comptbl.h\"\n"); + fprintf(f, "#include \"debug.h\"\n"); +} + +static int postfix; + +static char *decodeEA (amodes mode, wordsizes size) +{ + static char buffer[80]; + + buffer[0] = 0; + switch (mode){ + case Dreg: + strcpy (buffer,"Dn"); + break; + case Areg: + strcpy (buffer,"An"); + break; + case Aind: + strcpy (buffer,"(An)"); + break; + case Aipi: + strcpy (buffer,"(An)+"); + break; + case Apdi: + strcpy (buffer,"-(An)"); + break; + case Ad16: + strcpy (buffer,"(d16,An)"); + break; + case Ad8r: + strcpy (buffer,"(d8,An,Xn)"); + break; + case PC16: + strcpy (buffer,"(d16,PC)"); + break; + case PC8r: + strcpy (buffer,"(d8,PC,Xn)"); + break; + case absw: + strcpy (buffer,"(xxx).W"); + break; + case absl: + strcpy (buffer,"(xxx).L"); + break; + case imm: + switch (size){ + case sz_byte: + strcpy (buffer,"#.B"); + break; + case sz_word: + strcpy (buffer,"#.W"); + break; + case sz_long: + strcpy (buffer,"#.L"); + break; + default: + break; + } + break; + case imm0: + strcpy (buffer,"#.B"); + break; + case imm1: + strcpy (buffer,"#.W"); + break; + case imm2: + strcpy (buffer,"#.L"); + break; + case immi: + strcpy (buffer,"#"); + break; + + default: + break; + } + return buffer; +} + +static char *outopcode (const char *name, int opcode) +{ + static char out[100]; + struct instr *ins; + + ins = &table68k[opcode]; + strcpy (out, name); + if (ins->smode == immi) + strcat (out, "Q"); + if (ins->size == sz_byte) + strcat (out,".B"); + if (ins->size == sz_word) + strcat (out,".W"); + if (ins->size == sz_long) + strcat (out,".L"); + strcat (out," "); + if (ins->suse) + strcat (out, decodeEA (ins->smode, ins->size)); + if (ins->duse) { + if (ins->suse) strcat (out,","); + strcat (out, decodeEA (ins->dmode, ins->size)); + } + return out; +} + + +static void generate_one_opcode(int rp, int noflags) { + int i; + uae_u16 smsk, dmsk; + int opcode = opcode_map[rp]; + int aborted = 0; + int have_srcreg = 0; + int have_dstreg = 0; + const char *name; + + if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) + return; + + for (i = 0; lookuptab[i].name[0]; i++) { + if (table68k[opcode].mnemo == lookuptab[i].mnemo) + break; + } + + if (table68k[opcode].handler != -1) + return; + + switch (table68k[opcode].stype) { + case 0: + smsk = 7; + break; + case 1: + smsk = 255; + break; + case 2: + smsk = 15; + break; + case 3: + smsk = 7; + break; + case 4: + smsk = 7; + break; + case 5: + smsk = 63; + break; + case 6: + smsk = 255; + break; + case 7: + smsk = 3; + break; + default: + assert(0); + break; + } + dmsk = 7; + + next_cpu_level = -1; + if (table68k[opcode].suse && table68k[opcode].smode != imm + && table68k[opcode].smode != imm0 && table68k[opcode].smode != imm1 + && table68k[opcode].smode != imm2 && table68k[opcode].smode != absw + && table68k[opcode].smode != absl && table68k[opcode].smode != PC8r + && table68k[opcode].smode != PC16) { + have_srcreg = 1; + if (table68k[opcode].spos == -1) { + if (((int) table68k[opcode].sreg) >= 128) + comprintf("\tuae_s32 srcreg = (uae_s32)(uae_s8)%d;\n", + (int) table68k[opcode].sreg); + else + comprintf("\tuae_s32 srcreg = %d;\n", + (int) table68k[opcode].sreg); + } else { + char source[100]; + int pos = table68k[opcode].spos; + + comprintf( + "#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + + if (pos < 8 && (smsk >> (8 - pos)) != 0) + sprintf(source, "(((opcode >> %d) | (opcode << %d)) & %d)", + pos ^ 8, 8 - pos, dmsk); + else if (pos != 8) + sprintf(source, "((opcode >> %d) & %d)", pos ^ 8, smsk); + else + sprintf(source, "(opcode & %d)", smsk); + + if (table68k[opcode].stype == 3) + comprintf("\tuae_u32 srcreg = imm8_table[%s];\n", source); + else if (table68k[opcode].stype == 1) + comprintf("\tuae_u32 srcreg = (uae_s32)(uae_s8)%s;\n", source); + else + comprintf("\tuae_u32 srcreg = %s;\n", source); + + comprintf("#else\n"); + + if (pos) + sprintf(source, "((opcode >> %d) & %d)", pos, smsk); + else + sprintf(source, "(opcode & %d)", smsk); + + if (table68k[opcode].stype == 3) + comprintf("\tuae_s32 srcreg = imm8_table[%s];\n", source); + else if (table68k[opcode].stype == 1) + comprintf("\tuae_s32 srcreg = (uae_s32)(uae_s8)%s;\n", source); + else + comprintf("\tuae_s32 srcreg = %s;\n", source); + + comprintf("#endif\n"); + } + } + if (table68k[opcode].duse + /* Yes, the dmode can be imm, in case of LINK or DBcc */ + && table68k[opcode].dmode != imm && table68k[opcode].dmode != imm0 + && table68k[opcode].dmode != imm1 && table68k[opcode].dmode != imm2 + && table68k[opcode].dmode != absw + && table68k[opcode].dmode != absl) { + have_dstreg = 1; + if (table68k[opcode].dpos == -1) { + if (((int) table68k[opcode].dreg) >= 128) + comprintf("\tuae_s32 dstreg = (uae_s32)(uae_s8)%d;\n", + (int) table68k[opcode].dreg); + else + comprintf("\tuae_s32 dstreg = %d;\n", + (int) table68k[opcode].dreg); + } else { + int pos = table68k[opcode].dpos; + + comprintf( + "#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + + if (pos < 8 && (dmsk >> (8 - pos)) != 0) + comprintf( + "\tuae_u32 dstreg = ((opcode >> %d) | (opcode << %d)) & %d;\n", + pos ^ 8, 8 - pos, dmsk); + else if (pos != 8) + comprintf("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", pos ^ 8, + dmsk); + else + comprintf("\tuae_u32 dstreg = opcode & %d;\n", dmsk); + + comprintf("#else\n"); + + if (pos) + comprintf("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", pos, + dmsk); + else + comprintf("\tuae_u32 dstreg = opcode & %d;\n", dmsk); + + comprintf("#endif\n"); + } + } + + if (have_srcreg && have_dstreg + && (table68k[opcode].dmode == Areg || table68k[opcode].dmode == Aind + || table68k[opcode].dmode == Aipi + || table68k[opcode].dmode == Apdi + || table68k[opcode].dmode == Ad16 + || table68k[opcode].dmode == Ad8r) + && (table68k[opcode].smode == Areg || table68k[opcode].smode == Aind + || table68k[opcode].smode == Aipi + || table68k[opcode].smode == Apdi + || table68k[opcode].smode == Ad16 + || table68k[opcode].smode == Ad8r)) { + comprintf("\tuae_u32 dodgy=(srcreg==(uae_s32)dstreg);\n"); + } else { + comprintf("\tuae_u32 dodgy=0;\n"); + } + comprintf("\tuae_u32 m68k_pc_offset_thisinst=m68k_pc_offset;\n"); + comprintf("\tm68k_pc_offset+=2;\n"); + + aborted = gen_opcode(opcode); + { + int flags = 0; + if (global_isjump) + flags |= 1; + if (long_opcode) + flags |= 2; + if (global_cmov) + flags |= 4; + if (global_isaddx) + flags |= 8; + if (global_iscjump) + flags |= 16; + if (global_fpu) + flags |= 32; + + comprintf("}\n"); + + name = lookuptab[i].name; + if (aborted) { + fprintf(stblfile, "{ NULL, 0x%08x, %d }, /* %s */\n", opcode, flags, name); + com_discard(); + } else { + const char *tbl = noflags ? "nf" : "ff"; + fprintf(stblfile, + "{ op_%x_%d_comp_%s, %d, 0x%08x }, /* %s */\n", + opcode, postfix, tbl, opcode, flags, name); + fprintf(headerfile, "extern compop_func op_%x_%d_comp_%s;\n", + opcode, postfix, tbl); + printf ("/* %s */\n", outopcode (name, opcode)); + printf( + "void REGPARAM2 op_%x_%d_comp_%s(uae_u32 opcode) /* %s */\n{\n", + opcode, postfix, tbl, name); + com_flush(); + } + } + opcode_next_clev[rp] = next_cpu_level; + opcode_last_postfix[rp] = postfix; +} + +static void generate_func(int noflags) { + int i, j, rp; + const char *tbl = noflags ? "nf" : "ff"; + + using_prefetch = 0; + using_exception_3 = 0; + for (i = 0; i < 1; i++) /* We only do one level! */ + { + cpu_level = 4 - i; + postfix = i; + + fprintf(stblfile, "const struct comptbl op_smalltbl_%d_comp_%s[] = {\n", + postfix, tbl); + + /* sam: this is for people with low memory (eg. me :)) */ + printf("\n" + "#if !defined(PART_1) && !defined(PART_2) && " + "!defined(PART_3) && !defined(PART_4) && " + "!defined(PART_5) && !defined(PART_6) && " + "!defined(PART_7) && !defined(PART_8)" + "\n" + "#define PART_1 1\n" + "#define PART_2 1\n" + "#define PART_3 1\n" + "#define PART_4 1\n" + "#define PART_5 1\n" + "#define PART_6 1\n" + "#define PART_7 1\n" + "#define PART_8 1\n" + "#endif\n\n"); + + rp = 0; + for (j = 1; j <= 8; ++j) { + int k = (j * nr_cpuop_funcs) / 8; + printf("#ifdef PART_%d\n", j); + for (; rp < k; rp++) + generate_one_opcode(rp, noflags); + printf("#endif\n\n"); + } + + fprintf(stblfile, "{ 0, 65536, 0 }};\n"); + } + +} + +#if (defined(OS_cygwin) || defined(OS_mingw)) && defined(EXTENDED_SIGSEGV) +void cygwin_mingw_abort() +{ +#undef abort + abort(); +} +#endif + +int main(void) +{ + init_table68k (); + + opcode_map = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + opcode_last_postfix = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + opcode_next_clev = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + counts = (unsigned long *) malloc(65536 * sizeof(unsigned long)); + read_counts(); + + /* It would be a lot nicer to put all in one file (we'd also get rid of + * cputbl.h that way), but cpuopti can't cope. That could be fixed, but + * I don't dare to touch the 68k version. */ + + headerfile = fopen("comptbl.h", "wb"); + fprintf (headerfile, "" + "extern const struct comptbl op_smalltbl_0_comp_nf[];\n" + "extern const struct comptbl op_smalltbl_0_comp_ff[];\n" + ""); + + stblfile = fopen("compstbl.cpp", "wb"); + if (freopen("compemu.cpp", "wb", stdout) == NULL) + { + assert(0); + } + + generate_includes(stdout); + generate_includes(stblfile); + + printf("#include \"compiler/compemu.h\"\n"); + + noflags = 0; + generate_func(noflags); + + free(opcode_map); + free(opcode_last_postfix); + free(opcode_next_clev); + free(counts); + + opcode_map = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + opcode_last_postfix = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + opcode_next_clev = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + counts = (unsigned long *) malloc(65536 * sizeof(unsigned long)); + read_counts(); + noflags = 1; + generate_func(noflags); + + free(opcode_map); + free(opcode_last_postfix); + free(opcode_next_clev); + free(counts); + + free(table68k); + fclose(stblfile); + fclose(headerfile); + return 0; +} diff --git a/BasiliskII/src/uae_cpu/compiler/test_codegen_arm.c b/BasiliskII/src/uae_cpu/compiler/test_codegen_arm.c new file mode 100644 index 00000000..227a99d3 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/test_codegen_arm.c @@ -0,0 +1,264 @@ +/* Example of using sigaction() to setup a signal handler with 3 arguments + * including siginfo_t. + */ +#include +#include +#include +#include + +#include "flags_arm.h" +#include "codegen_arm.h" + +#define TEST(c,ex,s) { c; if (opcode != ex) printf("(%s) Invalid opcode %x expected %x\n", s, opcode, ex); } + +int opcode; + +void emit_long(v) { + opcode = v; +} + +int main (int argc, char *argv[]) +{ +TEST(MOV_ri(8, 15), 0xe3a0800f, "mov r8,#15"); +TEST(MOV_rr(8,9), 0xe1a08009, "mov r8, r9"); +TEST(MOV_rrLSLi(8,9,5), 0xe1a08289, "lsl r8, r9, #5"); +TEST(MOV_rrLSLr(8,9,7), 0xe1a08719, "lsl r8, r9, r7"); +TEST(MOV_rrLSRi(8,9,5), 0xe1a082a9, "lsr r8, r9, #5"); +TEST(MOV_rrLSRr(8,9,7), 0xe1a08739, "lsr r8, r9, r7"); +TEST(MOV_rrASRi(8,9,5), 0xe1a082c9, "asr r8, r9, #5"); +TEST(MOV_rrASRr(8,9,7), 0xe1a08759, "asr r8, r9, r7"); +TEST(MOV_rrRORi(8,9,5), 0xe1a082e9, "ror r8, r9, #5"); +TEST(MOV_rrRORr(8,9,7), 0xe1a08779, "ror r8, r9, r7"); +TEST(MOV_rrRRX(8,9), 0xe1a08069, "rrx r8, r9"); + +TEST(MOVS_ri(8, 15), 0xe3b0800f, "movs r8,#15"); +TEST(MOVS_rr(8,9), 0xe1b08009, "movs r8, r9"); +TEST(MOVS_rrLSLi(8,9,5), 0xe1b08289, "lsls r8, r9, #5"); +TEST(MOVS_rrLSLr(8,9,7), 0xe1b08719, "lsls r8, r9, r7"); +TEST(MOVS_rrLSRi(8,9,5), 0xe1b082a9, "lsrs r8, r9, #5"); +TEST(MOVS_rrLSRr(8,9,7), 0xe1b08739, "lsrs r8, r9, r7"); +TEST(MOVS_rrASRi(8,9,5), 0xe1b082c9, "asrs r8, r9, #5"); +TEST(MOVS_rrASRr(8,9,7), 0xe1b08759, "asrs r8, r9, r7"); +TEST(MOVS_rrRORi(8,9,5), 0xe1b082e9, "rors r8, r9, #5"); +TEST(MOVS_rrRORr(8,9,7), 0xe1b08779, "rors r8, r9, r7"); +TEST(MOVS_rrRRX(8,9), 0xe1b08069, "rrxs r8, r9"); + +TEST(MVN_ri(8, 15), 0xe3e0800f, "mvn r8,#15"); +TEST(MVN_rr(8,9), 0xe1e08009, "mvn r8, r9"); +TEST(MVN_rrLSLi(8,9,5), 0xe1e08289, "mvn r8, r9, lsl #5"); +TEST(MVN_rrLSLr(8,9,7), 0xe1e08719, "mvn r8, r9, lsl r7"); +TEST(MVN_rrLSRi(8,9,5), 0xe1e082a9, "mvn r8, r9, lsr #5"); +TEST(MVN_rrLSRr(8,9,7), 0xe1e08739, "mvn r8, r9, lsr r7"); +TEST(MVN_rrASRi(8,9,5), 0xe1e082c9, "mvn r8, r9, asr #5"); +TEST(MVN_rrASRr(8,9,7), 0xe1e08759, "mvn r8, r9, asr r7"); +TEST(MVN_rrRORi(8,9,5), 0xe1e082e9, "mvn r8, r9, ror #5"); +TEST(MVN_rrRORr(8,9,7), 0xe1e08779, "mvn r8, r9, ror r7"); +TEST(MVN_rrRRX(8,9), 0xe1e08069, "mvn r8, r9, rrx"); + +TEST(CMP_ri(8, 15), 0xe358000f, "cmp r8,#15"); +TEST(CMP_rr(8,9), 0xe1580009, "cmp r8, r9"); +TEST(CMP_rrLSLi(8,9,5), 0xe1580289, "cmp r8, r9, #5"); +TEST(CMP_rrLSLr(8,9,7), 0xe1580719, "cmp r8, r9, r7"); +TEST(CMP_rrLSRi(8,9,5), 0xe15802a9, "cmp r8, r9, #5"); +TEST(CMP_rrLSRr(8,9,7), 0xe1580739, "cmp r8, r9, r7"); +TEST(CMP_rrASRi(8,9,5), 0xe15802c9, "cmp r8, r9, #5"); +TEST(CMP_rrASRr(8,9,7), 0xe1580759, "cmp r8, r9, r7"); +TEST(CMP_rrRORi(8,9,5), 0xe15802e9, "cmp r8, r9, #5"); +TEST(CMP_rrRORr(8,9,7), 0xe1580779, "cmp r8, r9, r7"); +TEST(CMP_rrRRX(8,9), 0xe1580069, "cmp r8, r9"); + +TEST(CMP_ri(8, 0x81), 0xe3580081, "cmp r8,#0x81"); +TEST(CMP_ri(8, 0x204), 0xe3580f81, "cmp r8,#0x204"); +TEST(CMP_ri(8, 0x810), 0xe3580e81, "cmp r8,#0x8100"); +TEST(CMP_ri(8, 0x2040), 0xe3580d81, "cmp r8,#0x2040"); +TEST(CMP_ri(8, 0x8100), 0xe3580c81, "cmp r8,#0x8100"); +TEST(CMP_ri(8, 0x20400), 0xe3580b81, "cmp r8,#0x20400"); +TEST(CMP_ri(8, 0x81000), 0xe3580a81, "cmp r8,#0x81000"); +TEST(CMP_ri(8, 0x204000), 0xe3580981, "cmp r8,#0x204000"); +TEST(CMP_ri(8, 0x810000), 0xe3580881, "cmp r8,#0x810000"); +TEST(CMP_ri(8, 0x2040000), 0xe3580781, "cmp r8,#0x2040000"); +TEST(CMP_ri(8, 0x8100000), 0xe3580681, "cmp r8,#0x8100000"); +TEST(CMP_ri(8, 0x20400000), 0xe3580581, "cmp r8,#0x20400000"); +TEST(CMP_ri(8, 0x81000000), 0xe3580481, "cmp r8,#0x81000000"); +TEST(CMP_ri(8, 0x04000002), 0xe3580381, "cmp r8,#0x04000002"); +TEST(CMP_ri(8, 0x10000008), 0xe3580281, "cmp r8,#0x10000008"); +TEST(CMP_ri(8, 0x40000020), 0xe3580181, "cmp r8,#0x40000020"); + +TEST(CMP_ri(8, 0x1200), 0xe3580c12, "cmp r8,#0x1200"); +TEST(CMP_ri(8, 0x120000), 0xe3580812, "cmp r8,#0x120000"); +TEST(CMP_ri(8, 0x12000000), 0xe3580412, "cmp r8,#0x12000000"); + +TEST(BEQ_i(5), 0x0a000005, "beq #5"); +TEST(BNE_i(5), 0x1a000005, "bne #5"); +TEST(BCS_i(5), 0x2a000005, "bcs #5"); +TEST(BCC_i(5), 0x3a000005, "bcc #5"); +TEST(BMI_i(5), 0x4a000005, "bmi #5"); +TEST(BPL_i(5), 0x5a000005, "bpl #5"); +TEST(BVS_i(5), 0x6a000005, "bvs #5"); +TEST(BVC_i(5), 0x7a000005, "bvc #5"); +TEST(BHI_i(5), 0x8a000005, "bhi #5"); +TEST(BLS_i(5), 0x9a000005, "bls #5"); +TEST(BGE_i(5), 0xaa000005, "bge #5"); +TEST(BLT_i(5), 0xba000005, "blt #5"); +TEST(BGT_i(5), 0xca000005, "bgt #5"); +TEST(BLE_i(5), 0xda000005, "ble #5"); +TEST(B_i(5), 0xea000005, "b #5"); + +TEST(BL_i(5), 0xeb000005, "bl #5"); +TEST(BLX_r(8), 0xe12fff38, "blx r8"); +TEST(BX_r(8), 0xe12fff18, "bx r8"); + +TEST(EOR_rri(6, 8, 15), 0xe228600f, "eor r6, r8,#15"); +TEST(EOR_rrr(6, 8,9), 0xe0286009, "eor r6, r8, r9"); +TEST(EOR_rrrLSLi(6,8,9,5), 0xe0286289, "eor r6, r8, r9, lsl #5"); +TEST(EOR_rrrLSLr(6,8,9,7), 0xe0286719, "eor r6, r8, r9, lsl r7"); +TEST(EOR_rrrLSRi(6,8,9,5), 0xe02862a9, "eor r6, r8, r9, lsr #5"); +TEST(EOR_rrrLSRr(6,8,9,7), 0xe0286739, "eor r6, r8, r9, lsr r7"); +TEST(EOR_rrrASRi(6,8,9,5), 0xe02862c9, "eor r6, r8, r9, asr #5"); +TEST(EOR_rrrASRr(6,8,9,7), 0xe0286759, "eor r6, r8, r9, asr r7"); +TEST(EOR_rrrRORi(6,8,9,5), 0xe02862e9, "eor r6, r8, r9, ror #5"); +TEST(EOR_rrrRORr(6,8,9,7), 0xe0286779, "eor r6, r8, r9, ror r7"); +TEST(EOR_rrrRRX(6,8,9), 0xe0286069, "eor r6, r8, r9, rrx"); + +TEST(EORS_rri(6, 8, 15), 0xe238600f, "eors r6, r8,#15"); +TEST(EORS_rrr(6, 8,9), 0xe0386009, "eors r6, r8, r9"); +TEST(EORS_rrrLSLi(6,8,9,5), 0xe0386289, "eors r6, r8, r9, lsl #5"); +TEST(EORS_rrrLSLr(6,8,9,7), 0xe0386719, "eors r6, r8, r9, lsr r7"); +TEST(EORS_rrrLSRi(6,8,9,5), 0xe03862a9, "eors r6, r8, r9, lsr #5"); +TEST(EORS_rrrLSRr(6,8,9,7), 0xe0386739, "eors r6, r8, r9, lsr r7"); +TEST(EORS_rrrASRi(6,8,9,5), 0xe03862c9, "eors r6, r8, r9, asr #5"); +TEST(EORS_rrrASRr(6,8,9,7), 0xe0386759, "eors r6, r8, r9, asr r7"); +TEST(EORS_rrrRORi(6,8,9,5), 0xe03862e9, "eors r6, r8, r9, ror #5"); +TEST(EORS_rrrRORr(6,8,9,7), 0xe0386779, "eors r6, r8, r9, ror r7"); +TEST(EORS_rrrRRX(6,8,9), 0xe0386069, "eors r6, r8, r9, rrx"); + +TEST(MRS_CPSR(6), 0xe10f6000, "mrs r6, CPSR"); +TEST(MRS_SPSR(6), 0xe14f6000, "mrs r6, SPSR"); + +TEST(MSR_CPSR_i(5), 0xe329f005, "msr CPSR_fc, #5"); +TEST(MSR_CPSR_r(5), 0xe129f005, "msr CPSR_fc, r5"); + +TEST(MSR_CPSRf_i(5), 0xe328f005, "msr CPSR_f, #5"); +TEST(MSR_CPSRf_r(5), 0xe128f005, "msr CPSR_f, r5"); + +TEST(MSR_CPSRc_i(5), 0xe321f005, "msr CPSR_c, #5"); +TEST(MSR_CPSRc_r(5), 0xe121f005, "msr CPSR_c, r5"); + +TEST(PUSH(6), 0xe92d0040, "push {r6}"); +TEST(POP(6), 0xe8bd0040, "pop {r6}"); + +TEST(BIC_rri(0, 0, 0x9f000000), 0xe3c0049f, "bic r0, r0, #0x9f000000"); +TEST(BIC_rri(2, 3, 0xff00), 0xe3c32cff, "bic r2, r3, #0xff00"); +TEST(BIC_rri(3, 4, 0xff), 0xe3c430ff, "bic r3, r4, #0xff"); + +TEST(ORR_rrrLSRi(0, 1, 2, 16), 0xe1810822, "orr r0, r1, r2, lsr #16"); +TEST(ORR_rrrLSRi(0, 1, 2, 24), 0xe1810c22, "orr r0, r1, r2, lsr #24"); + +TEST(LDR_rR(8, 9), 0xe5998000, "ldr r8, [r9]"); +TEST(LDR_rRI(8, 9, 4), 0xe5998004, "ldr r8, [r9, #4]"); +TEST(LDR_rRi(8, 9, 4), 0xe5198004, "ldr r8, [r9, #-4]"); +TEST(LDR_rRR(8, 9, 7), 0xe7998007, "ldr r8, [r9, r7]"); +TEST(LDR_rRr(8, 9, 7), 0xe7198007, "ldr r8, [r9, -r7]"); +TEST(LDR_rRR_LSLi(8, 9, 7, 5), 0xe7998287, "ldr r8, [r9, r7, lsl #5]"); +TEST(LDR_rRr_LSLi(8, 9, 7, 5), 0xe7198287, "ldr r8, [r9, -r7, lsl #5]"); +TEST(LDR_rRR_LSRi(8, 9, 7, 5), 0xe79982a7, "ldr r8, [r9, r7, lsr #5]"); +TEST(LDR_rRr_LSRi(8, 9, 7, 5), 0xe71982a7, "ldr r8, [r9, -r7, lsr #5]"); +TEST(LDR_rRR_ASRi(8, 9, 7, 5), 0xe79982c7, "ldr r8, [r9, r7, asr #5]"); +TEST(LDR_rRr_ASRi(8, 9, 7, 5), 0xe71982c7, "ldr r8, [r9, -r7, asr #5]"); +TEST(LDR_rRR_RORi(8, 9, 7, 5), 0xe79982e7, "ldr r8, [r9, r7, ror #5]"); +TEST(LDR_rRr_RORi(8, 9, 7, 5), 0xe71982e7, "ldr r8, [r9, -r7, ror #5]"); +TEST(LDR_rRR_RRX(8, 9, 7), 0xe7998067, "ldr r8, [r9, r7, rrx]"); +TEST(LDR_rRr_RRX(8, 9, 7), 0xe7198067, "ldr r8, [r9, -r7, rrx]"); + +TEST(LDRB_rR(8, 9), 0xe5d98000, "ldrb r8, [r9]"); +TEST(LDRB_rRI(8, 9, 4), 0xe5d98004, "ldrb r8, [r9, #4]"); +TEST(LDRB_rRi(8, 9, 4), 0xe5598004, "ldrb r8, [r9, #-4]"); +TEST(LDRB_rRR(8, 9, 7), 0xe7d98007, "ldrb r8, [r9, r7]"); +TEST(LDRB_rRr(8, 9, 7), 0xe7598007, "ldrb r8, [r9, -r7]"); +TEST(LDRB_rRR_LSLi(8, 9, 7, 5), 0xe7d98287, "ldrb r8, [r9, r7, lsl #5]"); +TEST(LDRB_rRr_LSLi(8, 9, 7, 5), 0xe7598287, "ldrb r8, [r9, -r7, lsl #5]"); +TEST(LDRB_rRR_LSRi(8, 9, 7, 5), 0xe7d982a7, "ldrb r8, [r9, r7, lsr #5]"); +TEST(LDRB_rRr_LSRi(8, 9, 7, 5), 0xe75982a7, "ldrb r8, [r9, -r7, lsr #5]"); +TEST(LDRB_rRR_ASRi(8, 9, 7, 5), 0xe7d982c7, "ldrb r8, [r9, r7, asr #5]"); +TEST(LDRB_rRr_ASRi(8, 9, 7, 5), 0xe75982c7, "ldrb r8, [r9, -r7, asr #5]"); +TEST(LDRB_rRR_RORi(8, 9, 7, 5), 0xe7d982e7, "ldrb r8, [r9, r7, ror #5]"); +TEST(LDRB_rRr_RORi(8, 9, 7, 5), 0xe75982e7, "ldrb r8, [r9, -r7, ror #5]"); +TEST(LDRB_rRR_RRX(8, 9, 7), 0xe7d98067, "ldrb r8, [r9, r7, rrx]"); +TEST(LDRB_rRr_RRX(8, 9, 7), 0xe7598067, "ldrb r8, [r9, -r7, rrx]"); + +TEST(LDRSB_rR(8, 9), 0xe1d980d0, "ldrsb r8, [r9]"); +TEST(LDRSB_rRI(8, 9, 4), 0xe1d980d4, "ldrsb r8, [r9, #4]"); +TEST(LDRSB_rRi(8, 9, 4), 0xe15980d4, "ldrsb r8, [r9, #-4]"); +TEST(LDRSB_rRR(8, 9, 7), 0xe19980d7, "ldrsb r8, [r9, r7]"); +TEST(LDRSB_rRr(8, 9, 7), 0xe11980d7, "ldrsb r8, [r9, -r7]"); + +TEST(LDRSH_rR(8, 9), 0xe1d980f0, "ldrsh r8, [r9]"); +TEST(LDRSH_rRI(8, 9, 4), 0xe1d980f4, "ldrsh r8, [r9, #4]"); +TEST(LDRSH_rRi(8, 9, 4), 0xe15980f4, "ldrsh r8, [r9, #-4]"); +TEST(LDRSH_rRR(8, 9, 7), 0xe19980f7, "ldrsh r8, [r9, r7]"); +TEST(LDRSH_rRr(8, 9, 7), 0xe11980f7, "ldrsh r8, [r9, -r7]"); + +TEST(LDRH_rR(8, 9), 0xe1d980b0, "ldrh r8, [r9]"); +TEST(LDRH_rRI(8, 9, 4), 0xe1d980b4, "ldrh r8, [r9, #4]"); +TEST(LDRH_rRi(8, 9, 4), 0xe15980b4, "ldrh r8, [r9, #-4]"); +TEST(LDRH_rRR(8, 9, 7), 0xe19980b7, "ldrh r8, [r9, r7]"); +TEST(LDRH_rRr(8, 9, 7), 0xe11980b7, "ldrh r8, [r9, -r7]"); + +TEST(STR_rRR(8,9,7), 0xe7898007, "str r8, [r9, r7]"); +TEST(STR_rRr(8,9,7), 0xe7098007, "str r8, [r9, -r7]"); + +TEST(STRB_rR(5, 6), 0xe5c65000, "strb r5,[r6]"); + +TEST(STRH_rR(8, 9), 0xe1c980b0, "strh r8, [r9]"); +TEST(STRH_rRI(8, 9, 4), 0xe1c980b4, "strh r8, [r9, #4]"); +TEST(STRH_rRi(8, 9, 4), 0xe14980b4, "strh r8, [r9, #-4]"); +TEST(STRH_rRR(8, 9, 7), 0xe18980b7, "strh r8, [r9, r7]"); +TEST(STRH_rRr(8, 9, 7), 0xe10980b7, "strh r8, [r9, -r7]"); + +TEST(CLZ_rr(2, 3), 0xe16f2f13, "clz r2,r3"); +TEST(REV_rr(2, 3), 0xe6bf2f33, "rev r2, r3"); +TEST(REV16_rr(2, 3), 0xe6bf2fb3, "rev16 r2, r3"); +TEST(REVSH_rr(2, 3), 0xe6ff2fb3, "revsh r2, r3"); + +TEST(SXTB_rr(2,3), 0xe6af2073, "sxtb r2,r3"); +TEST(SXTB_rr(3,4), 0xe6af3074, "sxtb r3,r4"); + +TEST(SXTB_rr_ROR8(2,3), 0xe6af2473, "sxtb r2, r3, ror #8"); +TEST(SXTB_rr_ROR16(2,3), 0xe6af2873, "sxtb r2, r3, ror #16"); +TEST(SXTB_rr_ROR24(2,3), 0xe6af2c73, "sxtb r2, r3, ror #24"); +TEST(SXTH_rr(2,3), 0xe6bf2073, "sxth r2, r3"); +TEST(SXTH_rr_ROR8(2,3), 0xe6bf2473, "sxth r2, r3, ror #8"); +TEST(SXTH_rr_ROR16(2,3), 0xe6bf2873, "sxth r2, r3, ror #16"); +TEST(SXTH_rr_ROR24(2,3), 0xe6bf2c73, "sxth r2, r3, ror #24"); +TEST(UXTB_rr(2,3), 0xe6ef2073, "uxtb r2, r3"); +TEST(UXTB_rr_ROR8(2,3), 0xe6ef2473, "uxtb r2, r3, ror #8"); +TEST(UXTB_rr_ROR16(2,3), 0xe6ef2873, "uxtb r2, r3, ror #16"); +TEST(UXTB_rr_ROR24(2,3), 0xe6ef2c73, "uxtb r2, r3, ror #24"); +TEST(UXTH_rr(2,3), 0xe6ff2073, "uxth r2, r3"); +TEST(UXTH_rr_ROR8(2,3), 0xe6ff2473, "uxth r2, r3, ror #8"); +TEST(UXTH_rr_ROR16(2,3), 0xe6ff2873, "uxth r2, r3, ror #16"); +TEST(UXTH_rr_ROR24(2,3), 0xe6ff2c73, "uxth r2, r3, ror #24"); + +TEST(REV_rr(2,3), 0xe6bf2f33, "rev r2, r3"); +TEST(REV16_rr(2,3), 0xe6bf2fb3, "rev16 r2, r3"); +TEST(REVSH_rr(2,3), 0xe6ff2fb3, "revsh r2, r3"); + +TEST(CC_MOV_ri(NATIVE_CC_CS, 4,1), 0x23a04001, "movcs r4, #1"); +TEST(CC_MOV_ri(NATIVE_CC_CC, 4,1), 0x33a04001, "movcc r4, #1"); + +int imm = 0x9f; +TEST(ADDS_rri(0, 0, imm << 24), 0xe290049f, "adds r0, r0, 0x9f000000"); + +TEST(PKHBT_rrr(1, 2, 3), 0xe6821013, "pkhbt r1,r2,r3"); +TEST(MVN_ri8(1,2), 0xe3e01002, "mvn r1,#2"); + +TEST(ORR_rri8RORi(1,2,0x12,24), 0xe3821c12, "orr r1, r2, #0x1200"); +TEST(PKHTB_rrrASRi(1, 2, 3, 4), 0xe6821253, "pkhtb r1,r2,r3,ASR #4"); +TEST(PKHBT_rrrLSLi(1, 2, 3, 4), 0xe6821213, "pkhbt r1,r2,r3,LSL #4"); + +TEST(MUL_rrr(1,2,3), 0xe0010392, "mul r1, r2, r3"); +TEST(MULS_rrr(1,2,3), 0xe0110392, "muls r1, r2, r3"); + + +} + diff --git a/BasiliskII/src/uae_cpu/compiler/test_codegen_x86.cpp b/BasiliskII/src/uae_cpu/compiler/test_codegen_x86.cpp index 236a2d5e..216effe5 100644 --- a/BasiliskII/src/uae_cpu/compiler/test_codegen_x86.cpp +++ b/BasiliskII/src/uae_cpu/compiler/test_codegen_x86.cpp @@ -7,7 +7,7 @@ /*********************************************************************** * - * Copyright 2004-2008 Gwenole Beauchesne + * Copyright 2004 Gwenole Beauchesne * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,7 +26,7 @@ ***********************************************************************/ /* - * STATUS: 26M variations covering unary register based operations, + * STATUS: 5.5M variations covering unary register based operations, * reg/reg operations, imm/reg operations. * * TODO: @@ -44,35 +44,6 @@ #include "sysdeps.h" -static int verbose = 2; - -#define TEST_INST_ALU 1 -#define TEST_INST_FPU 1 -#define TEST_INST_MMX 1 -#define TEST_INST_SSE 1 -#if TEST_INST_ALU -#define TEST_INST_ALU_REG 1 -#define TEST_INST_ALU_REG_REG 1 -#define TEST_INST_ALU_CNT_REG 1 -#define TEST_INST_ALU_IMM_REG 1 -#define TEST_INST_ALU_MEM_REG 1 -#endif -#if TEST_INST_FPU -#define TEST_INST_FPU_UNARY 1 -#define TEST_INST_FPU_REG 1 -#define TEST_INST_FPU_MEM 1 -#endif -#if TEST_INST_MMX -#define TEST_INST_MMX_REG_REG 1 -#define TEST_INST_MMX_IMM_REG 1 -#define TEST_INST_MMX_MEM_REG 1 -#endif -#if TEST_INST_SSE -#define TEST_INST_SSE_REG 1 -#define TEST_INST_SSE_REG_REG 1 -#define TEST_INST_SSE_MEM_REG 1 -#endif - #undef abort #define abort() do { \ fprintf(stderr, "ABORT: %s, line %d\n", __FILE__, __LINE__); \ @@ -83,30 +54,8 @@ static int verbose = 2; #define X86_FLAT_REGISTERS 0 #define X86_OPTIMIZE_ALU 1 #define X86_OPTIMIZE_ROTSHI 1 -#define X86_RIP_RELATIVE_ADDR 0 #include "compiler/codegen_x86.h" -#if X86_TARGET_64BIT -#define X86_MAX_ALU_REGS 16 -#define X86_MAX_SSE_REGS 16 -#else -#define X86_MAX_ALU_REGS 8 -#define X86_MAX_SSE_REGS 8 -#endif -#define X86_MAX_FPU_REGS 8 -#define X86_MAX_MMX_REGS 8 - -#define VALID_REG(r, b, n) (((unsigned)((r) - X86_##b)) < (n)) -#if X86_TARGET_64BIT -#define VALID_REG8(r) (VALID_REG(r, AL, 16) || VALID_REG(r, AH, 4)) -#define VALID_REG64(r) VALID_REG(r, RAX, X86_MAX_ALU_REGS) -#else -#define VALID_REG8(r) (VALID_REG(r, AL, 4) || VALID_REG(r, AH, 4)) -#define VALID_REG64(r) (0) -#endif -#define VALID_REG16(r) VALID_REG(r, AX, X86_MAX_ALU_REGS) -#define VALID_REG32(r) VALID_REG(r, EAX, X86_MAX_ALU_REGS) - #define x86_emit_byte(B) emit_byte(B) #define x86_emit_word(W) emit_word(W) #define x86_emit_long(L) emit_long(L) @@ -214,7 +163,7 @@ static int disass_x86(char *buf, uintptr adr) sfile.buffer = buf; sfile.current = buf; INIT_DISASSEMBLE_INFO(info, (FILE *)&sfile, (fprintf_ftype)mon_sprintf); - info.mach = X86_TARGET_64BIT ? bfd_mach_x86_64 : bfd_mach_i386_i386; + info.mach = bfd_mach_x86_64; info.disassembler_options = "suffix"; return print_insn_i386(adr, &info); } @@ -254,19 +203,16 @@ struct operand_t { } }; -#define MAX_INSNS 1024 -#define MAX_INSN_LENGTH 16 -#define MAX_INSN_OPERANDS 3 - struct insn_t { char name[16]; int n_operands; - operand_t operands[MAX_INSN_OPERANDS]; +#define MAX_OPERANDS 3 + operand_t operands[MAX_OPERANDS]; void clear() { memset(name, 0, sizeof(name)); n_operands = 0; - for (int i = 0; i < MAX_INSN_OPERANDS; i++) + for (int i = 0; i < MAX_OPERANDS; i++) operands[i].clear(); } @@ -292,226 +238,40 @@ struct insn_t { } }; -static inline char *find_blanks(char *p) -{ - while (*p && !isspace(*p)) - ++p; - return p; +static const struct { + const char *name; + int reg; } +regnames[] = { +#define _(REG) { #REG, X86_##REG } -static inline char *skip_blanks(char *p) -{ - while (*p && isspace(*p)) - ++p; - return p; -} + _(AL), _(CL), _(DL), _(BL), + _(AH), _(CH), _(DH), _(BH), + _(SPL), _(BPL), _(SIL), _(DIL), + _(R8B), _(R9B), _(R10B), _(R11B), _(R12B), _(R13B), _(R14B), _(R15B), + + _(AX), _(CX), _(DX), _(BX), _(SP), _(BP), _(SI), _(DI), + _(R8W), _(R9W), _(R10W), _(R11W), _(R12W), _(R13W), _(R14W), _(R15W), + + _(EAX), _(ECX), _(EDX), _(EBX), _(ESP), _(EBP), _(ESI), _(EDI), + _(R8D), _(R9D), _(R10D), _(R11D), _(R12D), _(R13D), _(R14D), _(R15D), + + _(RAX), _(RCX), _(RDX), _(RBX), _(RSP), _(RBP), _(RSI), _(RDI), + _(R8), _(R9), _(R10), _(R11), _(R12), _(R13), _(R14), _(R15), + + { NULL, -1 } +#undef _ +}; static int parse_reg(operand_t *op, int optype, char *buf) { - int reg = X86_NOREG; - int len = 0; - char *p = buf; - switch (p[0]) { - case 'a': case 'A': - len = 2; - switch (p[1]) { - case 'l': case 'L': reg = X86_AL; break; - case 'h': case 'H': reg = X86_AH; break; - case 'x': case 'X': reg = X86_AX; break; + for (int i = 0; regnames[i].name; i++) { + int len = strlen(regnames[i].name); + if (strncasecmp(regnames[i].name, buf, len) == 0) { + op->fill(optype, regnames[i].reg); + return len; } - break; - case 'b': case 'B': - len = 2; - switch (p[1]) { - case 'l': case 'L': reg = X86_BL; break; - case 'h': case 'H': reg = X86_BH; break; - case 'x': case 'X': reg = X86_BX; break; - case 'p': case 'P': - switch (p[2]) { -#if X86_TARGET_64BIT - case 'l': case 'L': reg = X86_BPL, ++len; break; -#endif - default: reg = X86_BP; break; - } - break; - } - break; - case 'c': case 'C': - len = 2; - switch (p[1]) { - case 'l': case 'L': reg = X86_CL; break; - case 'h': case 'H': reg = X86_CH; break; - case 'x': case 'X': reg = X86_CX; break; - } - break; - case 'd': case 'D': - len = 2; - switch (p[1]) { - case 'l': case 'L': reg = X86_DL; break; - case 'h': case 'H': reg = X86_DH; break; - case 'x': case 'X': reg = X86_DX; break; - case 'i': case 'I': - switch (p[2]) { -#if X86_TARGET_64BIT - case 'l': case 'L': reg = X86_DIL; ++len; break; -#endif - default: reg = X86_DI; break; - } - break; - } - break; - case 's': case 'S': - len = 2; - switch (p[2]) { -#if X86_TARGET_64BIT - case 'l': case 'L': - ++len; - switch (p[1]) { - case 'p': case 'P': reg = X86_SPL; break; - case 'i': case 'I': reg = X86_SIL; break; - } - break; -#endif - case '(': - if ((p[1] == 't' || p[1] == 'T') && isdigit(p[3]) && p[4] == ')') - len += 3, reg = X86_ST0 + (p[3] - '0'); - break; - default: - switch (p[1]) { - case 't': case 'T': reg = X86_ST0; break; - case 'p': case 'P': reg = X86_SP; break; - case 'i': case 'I': reg = X86_SI; break; - } - break; - } - break; - case 'e': case 'E': - len = 3; - switch (p[2]) { - case 'x': case 'X': - switch (p[1]) { - case 'a': case 'A': reg = X86_EAX; break; - case 'b': case 'B': reg = X86_EBX; break; - case 'c': case 'C': reg = X86_ECX; break; - case 'd': case 'D': reg = X86_EDX; break; - } - break; - case 'i': case 'I': - switch (p[1]) { - case 's': case 'S': reg = X86_ESI; break; - case 'd': case 'D': reg = X86_EDI; break; - } - break; - case 'p': case 'P': - switch (p[1]) { - case 'b': case 'B': reg = X86_EBP; break; - case 's': case 'S': reg = X86_ESP; break; - } - break; - } - break; -#if X86_TARGET_64BIT - case 'r': case 'R': - len = 3; - switch (p[2]) { - case 'x': case 'X': - switch (p[1]) { - case 'a': case 'A': reg = X86_RAX; break; - case 'b': case 'B': reg = X86_RBX; break; - case 'c': case 'C': reg = X86_RCX; break; - case 'd': case 'D': reg = X86_RDX; break; - } - break; - case 'i': case 'I': - switch (p[1]) { - case 's': case 'S': reg = X86_RSI; break; - case 'd': case 'D': reg = X86_RDI; break; - } - break; - case 'p': case 'P': - switch (p[1]) { - case 'b': case 'B': reg = X86_RBP; break; - case 's': case 'S': reg = X86_RSP; break; - } - break; - case 'b': case 'B': - switch (p[1]) { - case '8': reg = X86_R8B; break; - case '9': reg = X86_R9B; break; - } - break; - case 'w': case 'W': - switch (p[1]) { - case '8': reg = X86_R8W; break; - case '9': reg = X86_R9W; break; - } - break; - case 'd': case 'D': - switch (p[1]) { - case '8': reg = X86_R8D; break; - case '9': reg = X86_R9D; break; - } - break; - case '0': case '1': case '2': case '3': case '4': case '5': - if (p[1] == '1') { - const int r = p[2] - '0'; - switch (p[3]) { - case 'b': case 'B': reg = X86_R10B + r, ++len; break; - case 'w': case 'W': reg = X86_R10W + r, ++len; break; - case 'd': case 'D': reg = X86_R10D + r, ++len; break; - default: reg = X86_R10 + r; break; - } - } - break; - default: - switch (p[1]) { - case '8': reg = X86_R8, len = 2; break; - case '9': reg = X86_R9, len = 2; break; - } - break; - } - break; -#endif - case 'm': case 'M': - if ((p[1] == 'm' || p[1] == 'M') && isdigit(p[2])) - reg = X86_MM0 + (p[2] - '0'), len = 3; - break; - case 'x': case 'X': - if ((p[1] == 'm' || p[1] == 'M') && (p[2] == 'm' || p[2] == 'M')) { -#if X86_TARGET_64BIT - if (p[3] == '1' && isdigit(p[4])) - reg = X86_XMM10 + (p[4] - '0'), len = 5; - else -#endif - if (isdigit(p[3])) - reg = X86_XMM0 + (p[3] - '0'), len = 4; - } - break; } - - if (len > 0 && reg != X86_NOREG) { - op->fill(optype, reg); - return len; - } - - return X86_NOREG; -} - -static unsigned long parse_imm(char *nptr, char **endptr, int base = 0) -{ - errno = 0; -#if X86_TARGET_64BIT - if (sizeof(unsigned long) != 8) { - unsigned long long val = strtoull(nptr, endptr, 0); - if (errno == 0) - return val; - abort(); - } -#endif - unsigned long val = strtoul(nptr, endptr, 0); - if (errno == 0) - return val; - abort(); return 0; } @@ -519,8 +279,12 @@ static int parse_mem(operand_t *op, char *buf) { char *p = buf; - if (strncmp(buf, "0x", 2) == 0) - op->disp = parse_imm(buf, &p, 16); + if (strncmp(buf, "0x", 2) == 0) { + unsigned long val = strtoul(buf, &p, 16); + if (val == 0 && errno == EINVAL) + abort(); + op->disp = val; + } if (*p == '(') { p++; @@ -571,32 +335,6 @@ static void parse_insn(insn_t *ii, char *buf) char *p = buf; ii->clear(); -#if 0 - printf("BUF: %s\n", buf); -#endif - - if (strncmp(p, "rex64", 5) == 0) { - char *q = find_blanks(p); - if (verbose > 1) { - char prefix[16]; - memset(prefix, 0, sizeof(prefix)); - memcpy(prefix, p, q - p); - fprintf(stderr, "Instruction '%s', skip REX prefix '%s'\n", buf, prefix); - } - p = skip_blanks(q); - } - - if (strncmp(p, "rep", 3) == 0) { - char *q = find_blanks(p); - if (verbose > 1) { - char prefix[16]; - memset(prefix, 0, sizeof(prefix)); - memcpy(prefix, p, q - p); - fprintf(stderr, "Instruction '%s', skip REP prefix '%s'\n", buf, prefix); - } - p = skip_blanks(q); - } - for (int i = 0; !isspace(*p); i++) ii->name[i] = *p++; @@ -628,7 +366,10 @@ static void parse_insn(insn_t *ii, char *buf) p += n; break; case '$': { - ii->operands[n_operands].imm = parse_imm(++p, &p, 0); + unsigned long val = strtoul(++p, &p, 16); + if (val == 0 && errno == EINVAL) + abort(); + ii->operands[n_operands].imm = val; break; } case '*': @@ -652,23 +393,8 @@ static void parse_insn(insn_t *ii, char *buf) ii->n_operands = n_operands + 1; } -static unsigned long n_tests, n_failures; -static unsigned long n_all_tests, n_all_failures; - -static bool check_unary(insn_t *ii, const char *name) -{ - if (strcasecmp(ii->name, name) != 0) { - fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); - return false; - } - - if (ii->n_operands != 0) { - fprintf(stderr, "ERROR: instruction expected 0 operand, got %d\n", ii->n_operands); - return false; - } - - return true; -} +static long n_tests, n_failures; +static long n_all_tests, n_all_failures; static bool check_reg(insn_t *ii, const char *name, int r) { @@ -689,7 +415,7 @@ static bool check_reg(insn_t *ii, const char *name, int r) if (reg == -1) fprintf(stderr, "nothing\n"); else - fprintf(stderr, "r%d\n", reg); + fprintf(stderr, "%d\n", reg); return false; } @@ -716,7 +442,7 @@ static bool check_reg_reg(insn_t *ii, const char *name, int s, int d) if (srcreg == -1) fprintf(stderr, "nothing\n"); else - fprintf(stderr, "r%d\n", srcreg); + fprintf(stderr, "%d\n", srcreg); return false; } @@ -725,7 +451,7 @@ static bool check_reg_reg(insn_t *ii, const char *name, int s, int d) if (dstreg == -1) fprintf(stderr, "nothing\n"); else - fprintf(stderr, "r%d\n", dstreg); + fprintf(stderr, "%d\n", dstreg); return false; } @@ -782,13 +508,26 @@ static bool check_imm_reg(insn_t *ii, const char *name, uint32 v, int d, int mod return true; } -static bool do_check_mem(insn_t *ii, uint32 D, int B, int I, int S, int Mpos) +static bool check_mem_reg(insn_t *ii, const char *name, uint32 D, int B, int I, int S, int R) { - operand_t *mem = &ii->operands[Mpos]; + if (strcasecmp(ii->name, name) != 0) { + fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); + return false; + } + + if (ii->n_operands != 2) { + fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands); + return false; + } + + operand_t *mem = &ii->operands[0]; + operand_t *reg = &ii->operands[1]; + uint32 d = mem->disp; int b = mem->base; int i = mem->index; int s = mem->scale; + int r = reg->reg; if (d != D) { fprintf(stderr, "ERROR: instruction expected 0x%08x as displacement, got 0x%08x\n", D, d); @@ -810,41 +549,6 @@ static bool do_check_mem(insn_t *ii, uint32 D, int B, int I, int S, int Mpos) return false; } - return true; -} - -static bool check_mem(insn_t *ii, const char *name, uint32 D, int B, int I, int S) -{ - if (strcasecmp(ii->name, name) != 0) { - fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); - return false; - } - - if (ii->n_operands != 1) { - fprintf(stderr, "ERROR: instruction expected 1 operand, got %d\n", ii->n_operands); - return false; - } - - return do_check_mem(ii, D, B, I, S, 0); -} - -static bool check_mem_reg(insn_t *ii, const char *name, uint32 D, int B, int I, int S, int R, int Rpos = 1) -{ - if (strcasecmp(ii->name, name) != 0) { - fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); - return false; - } - - if (ii->n_operands != 2) { - fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands); - return false; - } - - if (!do_check_mem(ii, D, B, I, S, Rpos ^ 1)) - return false; - - int r = ii->operands[Rpos].reg; - if (r != R) { fprintf(stderr, "ERROR: instruction expected r%d as reg operand, got r%d\n", R, r); return false; @@ -853,51 +557,21 @@ static bool check_mem_reg(insn_t *ii, const char *name, uint32 D, int B, int I, return true; } -static inline bool check_reg_mem(insn_t *ii, const char *name, uint32 D, int B, int I, int S, int R) -{ - return check_mem_reg(ii, name, D, B, I, S, R, 0); -} - -static void show_instruction(const char *buffer, const uint8 *bytes) -{ - if (verbose > 1) { - if (1) { - for (int j = 0; j < MAX_INSN_LENGTH; j++) - fprintf(stderr, "%02x ", bytes[j]); - fprintf(stderr, "| "); - } - fprintf(stderr, "%s\n", buffer); - } -} - -static void show_status(unsigned long n_tests) -{ -#if 1 - const unsigned long N_STEPS = 100000; - static const char cursors[] = { '-', '\\', '|', '/' }; - if ((n_tests % N_STEPS) == 0) { - printf(" %c (%d)\r", cursors[(n_tests/N_STEPS)%sizeof(cursors)], n_tests); - fflush(stdout); - } -#else - const unsigned long N_STEPS = 1000000; - if ((n_tests % N_STEPS) == 0) - printf(" ... %d\n", n_tests); -#endif -} +static int verbose = 2; int main(void) { static char buffer[1024]; +#define MAX_INSN_LENGTH 16 +#define MAX_INSNS 1024 static uint8 block[MAX_INSNS * MAX_INSN_LENGTH]; static char *insns[MAX_INSNS]; static int modes[MAX_INSNS]; n_all_tests = n_all_failures = 0; -#if TEST_INST_ALU_REG printf("Testing reg forms\n"); n_tests = n_failures = 0; - for (int r = 0; r < X86_MAX_ALU_REGS; r++) { + for (int r = 0; r < 16; r++) { set_target(block); uint8 *b = get_target(); int i = 0; @@ -905,16 +579,11 @@ int main(void) insns[i++] = INSN; \ GENOP##r(r); \ } while (0) -#define GEN64(INSN, GENOP) do { \ - if (X86_TARGET_64BIT) \ - GEN(INSN, GENOP); \ -} while (0) #define GENA(INSN, GENOP) do { \ - if (VALID_REG8(r)) \ - GEN(INSN "b", GENOP##B); \ + GEN(INSN "b", GENOP##B); \ GEN(INSN "w", GENOP##W); \ GEN(INSN "l", GENOP##L); \ - GEN64(INSN "q", GENOP##Q); \ + GEN(INSN "q", GENOP##Q); \ } while (0) GENA("not", NOT); GENA("neg", NEG); @@ -924,40 +593,29 @@ int main(void) GENA("idiv", IDIV); GENA("dec", DEC); GENA("inc", INC); - if (X86_TARGET_64BIT) { - GEN("callq", CALLs); - GEN("jmpq", JMPs); - GEN("pushq", PUSHQ); - GEN("popq", POPQ); - } - else { - GEN("calll", CALLs); - GEN("jmpl", JMPs); - GEN("pushl", PUSHL); - GEN("popl", POPL); - } + GEN("callq", CALLs); + GEN("jmpq", JMPs); + GEN("pushl", PUSHQ); // FIXME: disass bug? wrong suffix + GEN("popl", POPQ); // FIXME: disass bug? wrong suffix GEN("bswap", BSWAPL); // FIXME: disass bug? no suffix - GEN64("bswap", BSWAPQ); // FIXME: disass bug? no suffix - if (VALID_REG8(r)) { - GEN("seto", SETO); - GEN("setno", SETNO); - GEN("setb", SETB); - GEN("setae", SETAE); - GEN("sete", SETE); - GEN("setne", SETNE); - GEN("setbe", SETBE); - GEN("seta", SETA); - GEN("sets", SETS); - GEN("setns", SETNS); - GEN("setp", SETP); - GEN("setnp", SETNP); - GEN("setl", SETL); - GEN("setge", SETGE); - GEN("setle", SETLE); - GEN("setg", SETG); - } + GEN("bswap", BSWAPQ); // FIXME: disass bug? no suffix + GEN("seto", SETO); + GEN("setno", SETNO); + GEN("setb", SETB); + GEN("setae", SETAE); + GEN("sete", SETE); + GEN("setne", SETNE); + GEN("setbe", SETBE); + GEN("seta", SETA); + GEN("sets", SETS); + GEN("setns", SETNS); + GEN("setp", SETP); + GEN("setnp", SETNP); + GEN("setl", SETL); + GEN("setge", SETGE); + GEN("setle", SETLE); + GEN("setg", SETG); #undef GENA -#undef GEN64 #undef GEN int last_insn = i; uint8 *e = get_target(); @@ -970,7 +628,8 @@ int main(void) parse_insn(&ii, buffer); if (!check_reg(&ii, insns[i], r)) { - show_instruction(buffer, p); + if (verbose > 1) + fprintf(stderr, "%s\n", buffer); n_failures++; } @@ -984,13 +643,11 @@ int main(void) printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); n_all_tests += n_tests; n_all_failures += n_failures; -#endif -#if TEST_INST_ALU_REG_REG printf("Testing reg,reg forms\n"); n_tests = n_failures = 0; - for (int s = 0; s < X86_MAX_ALU_REGS; s++) { - for (int d = 0; d < X86_MAX_ALU_REGS; d++) { + for (int s = 0; s < 16; s++) { + for (int d = 0; d < 16; d++) { set_target(block); uint8 *b = get_target(); int i = 0; @@ -998,20 +655,15 @@ int main(void) insns[i++] = INSN; \ GENOP##rr(s, d); \ } while (0) -#define GEN64(INSN, GENOP) do { \ - if (X86_TARGET_64BIT) \ - GEN(INSN, GENOP); \ -} while (0) #define GEN1(INSN, GENOP, OP) do { \ insns[i++] = INSN; \ GENOP##rr(OP, s, d); \ } while (0) #define GENA(INSN, GENOP) do { \ - if (VALID_REG8(s) && VALID_REG8(d)) \ - GEN(INSN "b", GENOP##B); \ + GEN(INSN "b", GENOP##B); \ GEN(INSN "w", GENOP##W); \ GEN(INSN "l", GENOP##L); \ - GEN64(INSN "q", GENOP##Q); \ + GEN(INSN "q", GENOP##Q); \ } while (0) GENA("adc", ADC); GENA("add", ADD); @@ -1024,49 +676,45 @@ int main(void) GENA("mov", MOV); GEN("btw", BTW); GEN("btl", BTL); - GEN64("btq", BTQ); + GEN("btq", BTQ); GEN("btcw", BTCW); GEN("btcl", BTCL); - GEN64("btcq", BTCQ); + GEN("btcq", BTCQ); GEN("btrw", BTRW); GEN("btrl", BTRL); - GEN64("btrq", BTRQ); + GEN("btrq", BTRQ); GEN("btsw", BTSW); GEN("btsl", BTSL); - GEN64("btsq", BTSQ); + GEN("btsq", BTSQ); GEN("imulw", IMULW); GEN("imull", IMULL); - GEN64("imulq", IMULQ); + GEN("imulq", IMULQ); GEN1("cmove", CMOVW, X86_CC_Z); GEN1("cmove", CMOVL, X86_CC_Z); - if (X86_TARGET_64BIT) - GEN1("cmove", CMOVQ, X86_CC_Z); + GEN1("cmove", CMOVQ, X86_CC_Z); GENA("test", TEST); GENA("cmpxchg", CMPXCHG); GENA("xadd", XADD); GENA("xchg", XCHG); GEN("bsfw", BSFW); GEN("bsfl", BSFL); - GEN64("bsfq", BSFQ); + GEN("bsfq", BSFQ); GEN("bsrw", BSRW); GEN("bsrl", BSRL); - GEN64("bsrq", BSRQ); - if (VALID_REG8(s)) { - GEN("movsbw", MOVSBW); - GEN("movsbl", MOVSBL); - GEN64("movsbq", MOVSBQ); - GEN("movzbw", MOVZBW); - GEN("movzbl", MOVZBL); - GEN64("movzbq", MOVZBQ); - } + GEN("bsrq", BSRQ); + GEN("movsbw", MOVSBW); + GEN("movsbl", MOVSBL); + GEN("movsbq", MOVSBQ); + GEN("movzbw", MOVZBW); + GEN("movzbl", MOVZBL); + GEN("movzbq", MOVZBQ); GEN("movswl", MOVSWL); - GEN64("movswq", MOVSWQ); + GEN("movswq", MOVSWQ); GEN("movzwl", MOVZWL); - GEN64("movzwq", MOVZWQ); - GEN64("movslq", MOVSLQ); + GEN("movzwq", MOVZWQ); + GEN("movslq", MOVSLQ); #undef GENA #undef GEN1 -#undef GEN64 #undef GEN int last_insn = i; uint8 *e = get_target(); @@ -1079,7 +727,8 @@ int main(void) parse_insn(&ii, buffer); if (!check_reg_reg(&ii, insns[i], s, d)) { - show_instruction(buffer, p); + if (verbose > 1) + fprintf(stderr, "%s\n", buffer); n_failures++; } @@ -1094,12 +743,10 @@ int main(void) printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); n_all_tests += n_tests; n_all_failures += n_failures; -#endif -#if TEST_INST_ALU_CNT_REG printf("Testing cl,reg forms\n"); n_tests = n_failures = 0; - for (int d = 0; d < X86_MAX_ALU_REGS; d++) { + for (int d = 0; d < 16; d++) { set_target(block); uint8 *b = get_target(); int i = 0; @@ -1107,16 +754,11 @@ int main(void) insns[i++] = INSN; \ GENOP##rr(X86_CL, d); \ } while (0) -#define GEN64(INSN, GENOP) do { \ - if (X86_TARGET_64BIT) \ - GEN(INSN, GENOP); \ -} while (0) #define GENA(INSN, GENOP) do { \ - if (VALID_REG8(d)) \ - GEN(INSN "b", GENOP##B); \ + GEN(INSN "b", GENOP##B); \ GEN(INSN "w", GENOP##W); \ GEN(INSN "l", GENOP##L); \ - GEN64(INSN "q", GENOP##Q); \ + GEN(INSN "q", GENOP##Q); \ } while (0) GENA("rol", ROL); GENA("ror", ROR); @@ -1126,7 +768,6 @@ int main(void) GENA("shr", SHR); GENA("sar", SAR); #undef GENA -#undef GEN64 #undef GEN int last_insn = i; uint8 *e = get_target(); @@ -1139,7 +780,8 @@ int main(void) parse_insn(&ii, buffer); if (!check_reg_reg(&ii, insns[i], X86_CL, d)) { - show_instruction(buffer, p); + if (verbose > 1) + fprintf(stderr, "%s\n", buffer); n_failures++; } @@ -1153,8 +795,8 @@ int main(void) printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); n_all_tests += n_tests; n_all_failures += n_failures; -#endif + printf("Testing imm,reg forms\n"); static const uint32 imm_table[] = { 0x00000000, 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020, 0x00000040, @@ -1171,13 +813,10 @@ int main(void) 0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee, }; const int n_imm_tab_count = sizeof(imm_table)/sizeof(imm_table[0]); - -#if TEST_INST_ALU_IMM_REG - printf("Testing imm,reg forms\n"); n_tests = n_failures = 0; for (int j = 0; j < n_imm_tab_count; j++) { const uint32 value = imm_table[j]; - for (int d = 0; d < X86_MAX_ALU_REGS; d++) { + for (int d = 0; d < 16; d++) { set_target(block); uint8 *b = get_target(); int i = 0; @@ -1186,32 +825,22 @@ int main(void) modes[i] = -1; \ i++; GENOP##ir(value, d); \ } while (0) -#define GEN64(INSN, GENOP) do { \ - if (X86_TARGET_64BIT) \ - GEN(INSN, GENOP); \ - } while (0) #define GENM(INSN, GENOP, MODE) do { \ insns[i] = INSN; \ modes[i] = MODE; \ i++; GENOP##ir(value, d); \ } while (0) -#define GENM64(INSN, GENOP, MODE) do { \ - if (X86_TARGET_64BIT) \ - GENM(INSN, GENOP, MODE); \ - } while (0) #define GENA(INSN, GENOP) do { \ - if (VALID_REG8(d)) \ GEN(INSN "b", GENOP##B); \ GEN(INSN "w", GENOP##W); \ GEN(INSN "l", GENOP##L); \ - GEN64(INSN "q", GENOP##Q); \ + GEN(INSN "q", GENOP##Q); \ } while (0) #define GENAM(INSN, GENOP, MODE) do { \ - if (VALID_REG8(d)) \ GENM(INSN "b", GENOP##B, MODE); \ GENM(INSN "w", GENOP##W, MODE); \ GENM(INSN "l", GENOP##L, MODE); \ - GENM64(INSN "q", GENOP##Q, MODE); \ + GENM(INSN "q", GENOP##Q, MODE); \ } while (0) GENA("adc", ADC); GENA("add", ADD); @@ -1224,16 +853,16 @@ int main(void) GENA("mov", MOV); GENM("btw", BTW, 1); GENM("btl", BTL, 1); - GENM64("btq", BTQ, 1); + GENM("btq", BTQ, 1); GENM("btcw", BTCW, 1); GENM("btcl", BTCL, 1); - GENM64("btcq", BTCQ, 1); + GENM("btcq", BTCQ, 1); GENM("btrw", BTRW, 1); GENM("btrl", BTRL, 1); - GENM64("btrq", BTRQ, 1); + GENM("btrq", BTRQ, 1); GENM("btsw", BTSW, 1); GENM("btsl", BTSL, 1); - GENM64("btsq", BTSQ, 1); + GENM("btsq", BTSQ, 1); if (value != 1) { GENAM("rol", ROL, 1); GENAM("ror", ROR, 1); @@ -1246,9 +875,7 @@ int main(void) GENA("test", TEST); #undef GENAM #undef GENA -#undef GENM64 #undef GENM -#undef GEN64 #undef GEN int last_insn = i; uint8 *e = get_target(); @@ -1261,7 +888,8 @@ int main(void) parse_insn(&ii, buffer); if (!check_imm_reg(&ii, insns[i], value, d, modes[i])) { - show_instruction(buffer, p); + if (verbose > 1) + fprintf(stderr, "%s\n", buffer); n_failures++; } @@ -1276,8 +904,9 @@ int main(void) printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); n_all_tests += n_tests; n_all_failures += n_failures; -#endif + printf("Testing mem,reg forms\n"); + n_tests = n_failures = 0; static const uint32 off_table[] = { 0x00000000, 0x00000001, @@ -1289,20 +918,16 @@ int main(void) 0xffffffff, }; const int off_table_count = sizeof(off_table) / sizeof(off_table[0]); - -#if TEST_INST_ALU_MEM_REG - printf("Testing mem,reg forms\n"); - n_tests = n_failures = 0; for (int d = 0; d < off_table_count; d++) { const uint32 D = off_table[d]; - for (int B = -1; B < X86_MAX_ALU_REGS; B++) { - for (int I = -1; I < X86_MAX_ALU_REGS; I++) { + for (int B = -1; B < 16; B++) { + for (int I = -1; I < 16; I++) { if (I == X86_RSP) continue; - for (int S = 1; S < 16; S *= 2) { - if (I == -1 && S > 1) + for (int S = 1; S < 8; S *= 2) { + if (I == -1) continue; - for (int r = 0; r < X86_MAX_ALU_REGS; r++) { + for (int r = 0; r < 16; r++) { set_target(block); uint8 *b = get_target(); int i = 0; @@ -1310,16 +935,11 @@ int main(void) insns[i++] = INSN; \ GENOP##mr(D, B, I, S, r); \ } while (0) -#define GEN64(INSN, GENOP) do { \ - if (X86_TARGET_64BIT) \ - GEN(INSN, GENOP); \ - } while (0) #define GENA(INSN, GENOP) do { \ - if (VALID_REG8(r)) \ - GEN(INSN "b", GENOP##B); \ + GEN(INSN "b", GENOP##B); \ GEN(INSN "w", GENOP##W); \ GEN(INSN "l", GENOP##L); \ - GEN64(INSN "q", GENOP##Q); \ + GEN(INSN "q", GENOP##Q); \ } while (0) GENA("adc", ADC); GENA("add", ADD); @@ -1332,26 +952,25 @@ int main(void) GENA("mov", MOV); GEN("imulw", IMULW); GEN("imull", IMULL); - GEN64("imulq", IMULQ); + GEN("imulq", IMULQ); GEN("bsfw", BSFW); GEN("bsfl", BSFL); - GEN64("bsfq", BSFQ); + GEN("bsfq", BSFQ); GEN("bsrw", BSRW); GEN("bsrl", BSRL); - GEN64("bsrq", BSRQ); + GEN("bsrq", BSRQ); GEN("movsbw", MOVSBW); GEN("movsbl", MOVSBL); - GEN64("movsbq", MOVSBQ); + GEN("movsbq", MOVSBQ); GEN("movzbw", MOVZBW); GEN("movzbl", MOVZBL); - GEN64("movzbq", MOVZBQ); + GEN("movzbq", MOVZBQ); GEN("movswl", MOVSWL); - GEN64("movswq", MOVSWQ); + GEN("movswq", MOVSWQ); GEN("movzwl", MOVZWL); - GEN64("movzwq", MOVZWQ); - GEN64("movslq", MOVSLQ); + GEN("movzwq", MOVZWQ); + GEN("movslq", MOVSLQ); #undef GENA -#undef GEN64 #undef GEN int last_insn = i; uint8 *e = get_target(); @@ -1364,14 +983,14 @@ int main(void) parse_insn(&ii, buffer); if (!check_mem_reg(&ii, insns[i], D, B, I, S, r)) { - show_instruction(buffer, p); + if (verbose > 1) + fprintf(stderr, "%s\n", buffer); n_failures++; } p += n; i += 1; n_tests++; - show_status(n_tests); } if (i != last_insn) abort(); @@ -1383,871 +1002,6 @@ int main(void) printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); n_all_tests += n_tests; n_all_failures += n_failures; -#endif - -#if TEST_INST_FPU_UNARY - printf("Testing FPU unary forms\n"); - n_tests = n_failures = 0; - { - set_target(block); - uint8 *b = get_target(); - int i = 0; -#define GEN(INSN, GENOP) do { \ - insns[i++] = INSN; \ - GENOP(); \ -} while (0) - GEN("f2xm1", F2XM1); - GEN("fabs", FABS); - GEN("fchs", FCHS); - GEN("fcompp", FCOMPP); - GEN("fcos", FCOS); - GEN("fdecstp", FDECSTP); - GEN("fincstp", FINCSTP); - GEN("fld1", FLD1); - GEN("fldl2t", FLDL2T); - GEN("fldl2e", FLDL2E); - GEN("fldpi", FLDPI); - GEN("fldlg2", FLDLG2); - GEN("fldln2", FLDLN2); - GEN("fldz", FLDZ); - GEN("fnop", FNOP); - GEN("fpatan", FPATAN); - GEN("fprem", FPREM); - GEN("fprem1", FPREM1); - GEN("fptan", FPTAN); - GEN("frndint", FRNDINT); - GEN("fscale", FSCALE); - GEN("fsin", FSIN); - GEN("fsincos", FSINCOS); - GEN("fsqrt", FSQRT); - GEN("ftst", FTST); - GEN("fucompp", FUCOMPP); - GEN("fxam", FXAM); - GEN("fxtract", FXTRACT); - GEN("fyl2x", FYL2X); - GEN("fyl2xp1", FYL2XP1); -#undef GEN - int last_insn = i; - uint8 *e = get_target(); - - uint8 *p = b; - i = 0; - while (p < e) { - int n = disass_x86(buffer, (uintptr)p); - insn_t ii; - parse_insn(&ii, buffer); - - if (!check_unary(&ii, insns[i])) { - show_instruction(buffer, p); - n_failures++; - } - - p += n; - i += 1; - n_tests++; - } - if (i != last_insn) - abort(); - } - printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); - n_all_tests += n_tests; - n_all_failures += n_failures; -#endif - -#if TEST_INST_FPU_REG - printf("Testing FPU reg forms\n"); - n_tests = n_failures = 0; - for (int r = 0; r < X86_MAX_FPU_REGS; r++) { - set_target(block); - uint8 *b = get_target(); - int i = 0; -#define GENr(INSN, GENOP) do { \ - insns[i] = INSN; \ - modes[i] = 0; \ - i++, GENOP##r(r); \ -} while (0) -#define GENr0(INSN, GENOP) do { \ - insns[i] = INSN; \ - modes[i] = 1; \ - i++, GENOP##r0(r); \ -} while (0) -#define GEN0r(INSN, GENOP) do { \ - insns[i] = INSN; \ - modes[i] = 2; \ - i++, GENOP##0r(r); \ -} while (0) - GENr("fcom", FCOM); - GENr("fcomp", FCOMP); - GENr("ffree", FFREE); - GENr("fxch", FXCH); - GENr("fst", FST); - GENr("fstp", FSTP); - GENr("fucom", FUCOM); - GENr("fucomp", FUCOMP); - GENr0("fadd", FADD); - GENr0("fcmovb", FCMOVB); - GENr0("fcmove", FCMOVE); - GENr0("fcmovbe", FCMOVBE); - GENr0("fcmovu", FCMOVU); - GENr0("fcmovnb", FCMOVNB); - GENr0("fcmovne", FCMOVNE); - GENr0("fcmovnbe", FCMOVNBE); - GENr0("fcmovnu", FCMOVNU); - GENr0("fcomi", FCOMI); - GENr0("fcomip", FCOMIP); - GENr0("fucomi", FUCOMI); - GENr0("fucomip", FUCOMIP); - GENr0("fdiv", FDIV); - GENr0("fdivr", FDIVR); - GENr0("fmul", FMUL); - GENr0("fsub", FSUB); - GENr0("fsubr", FSUBR); -#undef GEN0r -#undef GENr0 -#undef GENr - int last_insn = i; - uint8 *e = get_target(); - - uint8 *p = b; - i = 0; - while (p < e) { - int n = disass_x86(buffer, (uintptr)p); - insn_t ii; - parse_insn(&ii, buffer); - - switch (modes[i]) { - case 0: - if (!check_reg(&ii, insns[i], r)) { - show_instruction(buffer, p); - n_failures++; - } - break; - case 1: - if (!check_reg_reg(&ii, insns[i], r, 0)) { - show_instruction(buffer, p); - n_failures++; - } - break; - case 2: - if (!check_reg_reg(&ii, insns[i], 0, r)) { - show_instruction(buffer, p); - n_failures++; - } - break; - } - - p += n; - i += 1; - n_tests++; - } - if (i != last_insn) - abort(); - } - printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); - n_all_tests += n_tests; - n_all_failures += n_failures; -#endif - -#if TEST_INST_FPU_MEM - printf("Testing FPU mem forms\n"); - n_tests = n_failures = 0; - for (int d = 0; d < off_table_count; d++) { - const uint32 D = off_table[d]; - for (int B = -1; B < X86_MAX_ALU_REGS; B++) { - for (int I = -1; I < X86_MAX_ALU_REGS; I++) { - if (I == X86_RSP) - continue; - for (int S = 1; S < 16; S *= 2) { - if (I == -1 && S > 1) - continue; - set_target(block); - uint8 *b = get_target(); - int i = 0; -#define GEN(INSN, GENOP) do { \ - insns[i++] = INSN; \ - GENOP##m(D, B, I, S); \ -} while (0) - GEN("fadds", FADDS); - GEN("faddl", FADDD); - GEN("fiadd", FIADDW); - GEN("fiaddl", FIADDL); - GEN("fbld", FBLD); - GEN("fbstp", FBSTP); - GEN("fcoms", FCOMS); - GEN("fcoml", FCOMD); - GEN("fcomps", FCOMPS); - GEN("fcompl", FCOMPD); - GEN("fdivs", FDIVS); - GEN("fdivl", FDIVD); - GEN("fidiv", FIDIVW); - GEN("fidivl", FIDIVL); - GEN("fdivrs", FDIVRS); - GEN("fdivrl", FDIVRD); - GEN("fidivr", FIDIVRW); - GEN("fidivrl", FIDIVRL); - GEN("ficom", FICOMW); - GEN("ficoml", FICOML); - GEN("ficomp", FICOMPW); - GEN("ficompl", FICOMPL); - GEN("fild", FILDW); - GEN("fildl", FILDL); - GEN("fildll", FILDQ); - GEN("fist", FISTW); - GEN("fistl", FISTL); - GEN("fistp", FISTPW); - GEN("fistpl", FISTPL); - GEN("fistpll", FISTPQ); - GEN("fisttp", FISTTPW); - GEN("fisttpl", FISTTPL); - GEN("fisttpll", FISTTPQ); - GEN("flds", FLDS); - GEN("fldl", FLDD); - GEN("fldt", FLDT); - GEN("fmuls", FMULS); - GEN("fmull", FMULD); - GEN("fimul", FIMULW); - GEN("fimull", FIMULL); - GEN("fsts", FSTS); - GEN("fstl", FSTD); - GEN("fstps", FSTPS); - GEN("fstpl", FSTPD); - GEN("fstpt", FSTPT); - GEN("fsubs", FSUBS); - GEN("fsubl", FSUBD); - GEN("fisub", FISUBW); - GEN("fisubl", FISUBL); - GEN("fsubrs", FSUBRS); - GEN("fsubrl", FSUBRD); - GEN("fisubr", FISUBRW); - GEN("fisubrl", FISUBRL); -#undef GEN - int last_insn = i; - uint8 *e = get_target(); - - uint8 *p = b; - i = 0; - while (p < e) { - int n = disass_x86(buffer, (uintptr)p); - insn_t ii; - parse_insn(&ii, buffer); - - if (!check_mem(&ii, insns[i], D, B, I, S)) { - show_instruction(buffer, p); - n_failures++; - } - - p += n; - i += 1; - n_tests++; - show_status(n_tests); - } - if (i != last_insn) - abort(); - } - } - } - } - printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); - n_all_tests += n_tests; - n_all_failures += n_failures; -#endif - -#if TEST_INST_MMX_REG_REG - printf("Testing MMX reg,reg forms\n"); - n_tests = n_failures = 0; - for (int s = 0; s < X86_MAX_MMX_REGS; s++) { - for (int d = 0; d < X86_MAX_MMX_REGS; d++) { - set_target(block); - uint8 *b = get_target(); - int i = 0; -#define GEN(INSN, GENOP) do { \ - insns[i++] = INSN; \ - MMX_##GENOP##rr(s, d); \ -} while (0) -#define GEN64(INSN, GENOP) do { \ - if (X86_TARGET_64BIT) \ - GEN(INSN, GENOP); \ -} while (0) - GEN("movq", MOVQ); - GEN("packsswb", PACKSSWB); - GEN("packssdw", PACKSSDW); - GEN("packuswb", PACKUSWB); - GEN("paddb", PADDB); - GEN("paddw", PADDW); - GEN("paddd", PADDD); - GEN("paddq", PADDQ); - GEN("paddsb", PADDSB); - GEN("paddsw", PADDSW); - GEN("paddusb", PADDUSB); - GEN("paddusw", PADDUSW); - GEN("pand", PAND); - GEN("pandn", PANDN); - GEN("pavgb", PAVGB); - GEN("pavgw", PAVGW); - GEN("pcmpeqb", PCMPEQB); - GEN("pcmpeqw", PCMPEQW); - GEN("pcmpeqd", PCMPEQD); - GEN("pcmpgtb", PCMPGTB); - GEN("pcmpgtw", PCMPGTW); - GEN("pcmpgtd", PCMPGTD); - GEN("pmaddwd", PMADDWD); - GEN("pmaxsw", PMAXSW); - GEN("pmaxub", PMAXUB); - GEN("pminsw", PMINSW); - GEN("pminub", PMINUB); - GEN("pmulhuw", PMULHUW); - GEN("pmulhw", PMULHW); - GEN("pmullw", PMULLW); - GEN("pmuludq", PMULUDQ); - GEN("por", POR); - GEN("psadbw", PSADBW); - GEN("psllw", PSLLW); - GEN("pslld", PSLLD); - GEN("psllq", PSLLQ); - GEN("psraw", PSRAW); - GEN("psrad", PSRAD); - GEN("psrlw", PSRLW); - GEN("psrld", PSRLD); - GEN("psrlq", PSRLQ); - GEN("psubb", PSUBB); - GEN("psubw", PSUBW); - GEN("psubd", PSUBD); - GEN("psubq", PSUBQ); - GEN("psubsb", PSUBSB); - GEN("psubsw", PSUBSW); - GEN("psubusb", PSUBUSB); - GEN("psubusw", PSUBUSW); - GEN("punpckhbw", PUNPCKHBW); - GEN("punpckhwd", PUNPCKHWD); - GEN("punpckhdq", PUNPCKHDQ); - GEN("punpcklbw", PUNPCKLBW); - GEN("punpcklwd", PUNPCKLWD); - GEN("punpckldq", PUNPCKLDQ); - GEN("pxor", PXOR); - GEN("pabsb", PABSB); - GEN("pabsw", PABSW); - GEN("pabsd", PABSD); - GEN("phaddw", PHADDW); - GEN("phaddd", PHADDD); - GEN("phaddsw", PHADDSW); - GEN("phsubw", PHSUBW); - GEN("phsubd", PHSUBD); - GEN("phsubsw", PHSUBSW); - GEN("pmaddubsw", PMADDUBSW); - GEN("pmulhrsw", PMULHRSW); - GEN("pshufb", PSHUFB); - GEN("psignb", PSIGNB); - GEN("psignw", PSIGNW); - GEN("psignd", PSIGND); -#undef GEN64 -#undef GEN - int last_insn = i; - uint8 *e = get_target(); - - uint8 *p = b; - i = 0; - while (p < e) { - int n = disass_x86(buffer, (uintptr)p); - insn_t ii; - parse_insn(&ii, buffer); - - if (!check_reg_reg(&ii, insns[i], s, d)) { - show_instruction(buffer, p); - n_failures++; - } - - p += n; - i += 1; - n_tests++; - } - if (i != last_insn) - abort(); - } - } - printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); - n_all_tests += n_tests; - n_all_failures += n_failures; -#endif - - static const uint8 imm8_table[] = { - 0x00, 0x01, 0x02, 0x03, - 0x06, 0x07, 0x08, 0x09, - 0x0e, 0x0f, 0x10, 0x11, - 0x1e, 0x1f, 0x20, 0x21, - 0xfc, 0xfd, 0xfe, 0xff, - }; - const int n_imm8_tab_count = sizeof(imm8_table)/sizeof(imm8_table[0]); - -#if TEST_INST_MMX_IMM_REG - printf("Testing imm,reg forms\n"); - n_tests = n_failures = 0; - for (int j = 0; j < n_imm8_tab_count; j++) { - const uint8 value = imm8_table[j]; - for (int d = 0; d < X86_MAX_MMX_REGS; d++) { - set_target(block); - uint8 *b = get_target(); - int i = 0; -#define GEN(INSN, GENOP) do { \ - insns[i] = INSN; \ - modes[i] = 1; \ - i++; MMX_##GENOP##ir(value, d); \ -} while (0) - GEN("psllw", PSLLW); - GEN("pslld", PSLLD); - GEN("psllq", PSLLQ); - GEN("psraw", PSRAW); - GEN("psrad", PSRAD); - GEN("psrlw", PSRLW); - GEN("psrld", PSRLD); - GEN("psrlq", PSRLQ); -#undef GEN - int last_insn = i; - uint8 *e = get_target(); - - uint8 *p = b; - i = 0; - while (p < e) { - int n = disass_x86(buffer, (uintptr)p); - insn_t ii; - parse_insn(&ii, buffer); - - if (!check_imm_reg(&ii, insns[i], value, d, modes[i])) { - show_instruction(buffer, p); - n_failures++; - } - - p += n; - i += 1; - n_tests++; - } - if (i != last_insn) - abort(); - } - } - printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); - n_all_tests += n_tests; - n_all_failures += n_failures; -#endif - -#if TEST_INST_MMX_MEM_REG - printf("Testing MMX mem,reg forms\n"); - n_tests = n_failures = 0; - for (int d = 0; d < off_table_count; d++) { - const uint32 D = off_table[d]; - for (int B = -1; B < X86_MAX_ALU_REGS; B++) { - for (int I = -1; I < X86_MAX_ALU_REGS; I++) { - if (I == X86_RSP) - continue; - for (int S = 1; S < 16; S *= 2) { - if (I == -1 && S > 1) - continue; - for (int r = 0; r < X86_MAX_MMX_REGS; r++) { - set_target(block); - uint8 *b = get_target(); - int i = 0; -#define _GENrm(INSN, GENOP) do { \ - insns[i] = INSN; \ - modes[i] = 0; \ - i++; MMX_##GENOP##rm(r, D, B, I, S); \ -} while (0) -#define _GENmr(INSN, GENOP) do { \ - insns[i] = INSN; \ - modes[i] = 1; \ - i++; MMX_##GENOP##mr(D, B, I, S, r); \ -} while (0) -#define GEN(INSN, GENOP) do { \ - _GENmr(INSN, GENOP); \ -} while (0) - _GENmr("movd", MOVD); - _GENrm("movd", MOVD); - _GENmr("movq", MOVQ); - _GENrm("movq", MOVQ); - GEN("packsswb", PACKSSWB); - GEN("packssdw", PACKSSDW); - GEN("packuswb", PACKUSWB); - GEN("paddb", PADDB); - GEN("paddw", PADDW); - GEN("paddd", PADDD); - GEN("paddq", PADDQ); - GEN("paddsb", PADDSB); - GEN("paddsw", PADDSW); - GEN("paddusb", PADDUSB); - GEN("paddusw", PADDUSW); - GEN("pand", PAND); - GEN("pandn", PANDN); - GEN("pavgb", PAVGB); - GEN("pavgw", PAVGW); - GEN("pcmpeqb", PCMPEQB); - GEN("pcmpeqw", PCMPEQW); - GEN("pcmpeqd", PCMPEQD); - GEN("pcmpgtb", PCMPGTB); - GEN("pcmpgtw", PCMPGTW); - GEN("pcmpgtd", PCMPGTD); - GEN("pmaddwd", PMADDWD); - GEN("pmaxsw", PMAXSW); - GEN("pmaxub", PMAXUB); - GEN("pminsw", PMINSW); - GEN("pminub", PMINUB); - GEN("pmulhuw", PMULHUW); - GEN("pmulhw", PMULHW); - GEN("pmullw", PMULLW); - GEN("pmuludq", PMULUDQ); - GEN("por", POR); - GEN("psadbw", PSADBW); - GEN("psllw", PSLLW); - GEN("pslld", PSLLD); - GEN("psllq", PSLLQ); - GEN("psraw", PSRAW); - GEN("psrad", PSRAD); - GEN("psrlw", PSRLW); - GEN("psrld", PSRLD); - GEN("psrlq", PSRLQ); - GEN("psubb", PSUBB); - GEN("psubw", PSUBW); - GEN("psubd", PSUBD); - GEN("psubq", PSUBQ); - GEN("psubsb", PSUBSB); - GEN("psubsw", PSUBSW); - GEN("psubusb", PSUBUSB); - GEN("psubusw", PSUBUSW); - GEN("punpckhbw", PUNPCKHBW); - GEN("punpckhwd", PUNPCKHWD); - GEN("punpckhdq", PUNPCKHDQ); - GEN("punpcklbw", PUNPCKLBW); - GEN("punpcklwd", PUNPCKLWD); - GEN("punpckldq", PUNPCKLDQ); - GEN("pxor", PXOR); - GEN("pabsb", PABSB); - GEN("pabsw", PABSW); - GEN("pabsd", PABSD); - GEN("phaddw", PHADDW); - GEN("phaddd", PHADDD); - GEN("phaddsw", PHADDSW); - GEN("phsubw", PHSUBW); - GEN("phsubd", PHSUBD); - GEN("phsubsw", PHSUBSW); - GEN("pmaddubsw", PMADDUBSW); - GEN("pmulhrsw", PMULHRSW); - GEN("pshufb", PSHUFB); - GEN("psignb", PSIGNB); - GEN("psignw", PSIGNW); - GEN("psignd", PSIGND); -#undef GEN -#undef _GENmr -#undef _GENrm - int last_insn = i; - uint8 *e = get_target(); - - uint8 *p = b; - i = 0; - while (p < e) { - int n = disass_x86(buffer, (uintptr)p); - insn_t ii; - parse_insn(&ii, buffer); - - if (!check_mem_reg(&ii, insns[i], D, B, I, S, r, modes[i])) { - show_instruction(buffer, p); - n_failures++; - } - - p += n; - i += 1; - n_tests++; - show_status(n_tests); - } - if (i != last_insn) - abort(); - } - } - } - } - } - printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); - n_all_tests += n_tests; - n_all_failures += n_failures; -#endif - -#if TEST_INST_SSE_REG_REG - printf("Testing SSE reg,reg forms\n"); - n_tests = n_failures = 0; - for (int s = 0; s < X86_MAX_SSE_REGS; s++) { - for (int d = 0; d < X86_MAX_SSE_REGS; d++) { - set_target(block); - uint8 *b = get_target(); - int i = 0; -#define GEN(INSN, GENOP) do { \ - insns[i++] = INSN; \ - GENOP##rr(s, d); \ -} while (0) -#define GEN64(INSN, GENOP) do { \ - if (X86_TARGET_64BIT) \ - GEN(INSN, GENOP); \ -} while (0) -#define GEN1(INSN, GENOP) do { \ - GEN(INSN "s", GENOP##S); \ - GEN(INSN "d", GENOP##D); \ -} while (0) -#define GENA(INSN, GENOP) do { \ - GEN1(INSN "s", GENOP##S); \ - GEN1(INSN "p", GENOP##P); \ -} while (0) -#define GENI(INSN, GENOP, IMM) do { \ - insns[i++] = INSN; \ - GENOP##rr(IMM, s, d); \ -} while (0) -#define GENI1(INSN, GENOP, IMM) do { \ - GENI(INSN "s", GENOP##S, IMM); \ - GENI(INSN "d", GENOP##D, IMM); \ -} while (0) -#define GENIA(INSN, GENOP, IMM) do { \ - GENI1(INSN "s", GENOP##S, IMM); \ - GENI1(INSN "p", GENOP##P, IMM); \ -} while (0) - GEN1("andp", ANDP); - GEN1("andnp", ANDNP); - GEN1("orp", ORP); - GEN1("xorp", XORP); - GENA("add", ADD); - GENA("sub", SUB); - GENA("mul", MUL); - GENA("div", DIV); - GEN1("comis", COMIS); - GEN1("ucomis", UCOMIS); - GENA("min", MIN); - GENA("max", MAX); - GEN("rcpss", RCPSS); - GEN("rcpps", RCPPS); - GEN("rsqrtss", RSQRTSS); - GEN("rsqrtps", RSQRTPS); - GENA("sqrt", SQRT); - GENIA("cmpeq", CMP, X86_SSE_CC_EQ); - GENIA("cmplt", CMP, X86_SSE_CC_LT); - GENIA("cmple", CMP, X86_SSE_CC_LE); - GENIA("cmpunord", CMP, X86_SSE_CC_U); - GENIA("cmpneq", CMP, X86_SSE_CC_NEQ); - GENIA("cmpnlt", CMP, X86_SSE_CC_NLT); - GENIA("cmpnle", CMP, X86_SSE_CC_NLE); - GENIA("cmpord", CMP, X86_SSE_CC_O); - GEN1("movap", MOVAP); - GEN("movdqa", MOVDQA); - GEN("movdqu", MOVDQU); - GEN("movd", MOVDXD); - GEN64("movd", MOVQXD); // FIXME: disass bug? "movq" expected - GEN("movd", MOVDXS); - GEN64("movd", MOVQXS); // FIXME: disass bug? "movq" expected - GEN("cvtdq2pd", CVTDQ2PD); - GEN("cvtdq2ps", CVTDQ2PS); - GEN("cvtpd2dq", CVTPD2DQ); - GEN("cvtpd2ps", CVTPD2PS); - GEN("cvtps2dq", CVTPS2DQ); - GEN("cvtps2pd", CVTPS2PD); - GEN("cvtsd2si", CVTSD2SIL); - GEN64("cvtsd2siq", CVTSD2SIQ); - GEN("cvtsd2ss", CVTSD2SS); - GEN("cvtsi2sd", CVTSI2SDL); - GEN64("cvtsi2sdq", CVTSI2SDQ); - GEN("cvtsi2ss", CVTSI2SSL); - GEN64("cvtsi2ssq", CVTSI2SSQ); - GEN("cvtss2sd", CVTSS2SD); - GEN("cvtss2si", CVTSS2SIL); - GEN64("cvtss2siq", CVTSS2SIQ); - GEN("cvttpd2dq", CVTTPD2DQ); - GEN("cvttps2dq", CVTTPS2DQ); - GEN("cvttsd2si", CVTTSD2SIL); - GEN64("cvttsd2siq", CVTTSD2SIQ); - GEN("cvttss2si", CVTTSS2SIL); - GEN64("cvttss2siq", CVTTSS2SIQ); - if (s < 8) { - // MMX source register - GEN("cvtpi2pd", CVTPI2PD); - GEN("cvtpi2ps", CVTPI2PS); - } - if (d < 8) { - // MMX dest register - GEN("cvtpd2pi", CVTPD2PI); - GEN("cvtps2pi", CVTPS2PI); - GEN("cvttpd2pi", CVTTPD2PI); - GEN("cvttps2pi", CVTTPS2PI); - } -#undef GENIA -#undef GENI1 -#undef GENI -#undef GENA -#undef GEN1 -#undef GEN64 -#undef GEN - int last_insn = i; - uint8 *e = get_target(); - - uint8 *p = b; - i = 0; - while (p < e) { - int n = disass_x86(buffer, (uintptr)p); - insn_t ii; - parse_insn(&ii, buffer); - - if (!check_reg_reg(&ii, insns[i], s, d)) { - show_instruction(buffer, p); - n_failures++; - } - - p += n; - i += 1; - n_tests++; - } - if (i != last_insn) - abort(); - } - } - printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); - n_all_tests += n_tests; - n_all_failures += n_failures; -#endif - -#if TEST_INST_SSE_MEM_REG - printf("Testing SSE mem,reg forms\n"); - n_tests = n_failures = 0; - for (int d = 0; d < off_table_count; d++) { - const uint32 D = off_table[d]; - for (int B = -1; B < X86_MAX_ALU_REGS; B++) { - for (int I = -1; I < X86_MAX_ALU_REGS; I++) { - if (I == X86_RSP) - continue; - for (int S = 1; S < 16; S *= 2) { - if (I == -1 && S > 1) - continue; - for (int r = 0; r < X86_MAX_SSE_REGS; r++) { - set_target(block); - uint8 *b = get_target(); - int i = 0; -#define GEN(INSN, GENOP) do { \ - insns[i++] = INSN; \ - GENOP##mr(D, B, I, S, r); \ -} while (0) -#define GEN64(INSN, GENOP) do { \ - if (X86_TARGET_64BIT) \ - GEN(INSN, GENOP); \ -} while (0) -#define GEN1(INSN, GENOP) do { \ - GEN(INSN "s", GENOP##S); \ - GEN(INSN "d", GENOP##D); \ -} while (0) -#define GENA(INSN, GENOP) do { \ - GEN1(INSN "s", GENOP##S); \ - GEN1(INSN "p", GENOP##P); \ -} while (0) -#define GENI(INSN, GENOP, IMM) do { \ - insns[i++] = INSN; \ - GENOP##mr(IMM, D, B, I, S, r); \ -} while (0) -#define GENI1(INSN, GENOP, IMM) do { \ - GENI(INSN "s", GENOP##S, IMM); \ - GENI(INSN "d", GENOP##D, IMM); \ -} while (0) -#define GENIA(INSN, GENOP, IMM) do { \ - GENI1(INSN "s", GENOP##S, IMM); \ - GENI1(INSN "p", GENOP##P, IMM); \ -} while (0) - GEN1("andp", ANDP); - GEN1("andnp", ANDNP); - GEN1("orp", ORP); - GEN1("xorp", XORP); - GENA("add", ADD); - GENA("sub", SUB); - GENA("mul", MUL); - GENA("div", DIV); - GEN1("comis", COMIS); - GEN1("ucomis", UCOMIS); - GENA("min", MIN); - GENA("max", MAX); - GEN("rcpss", RCPSS); - GEN("rcpps", RCPPS); - GEN("rsqrtss", RSQRTSS); - GEN("rsqrtps", RSQRTPS); - GENA("sqrt", SQRT); - GENIA("cmpeq", CMP, X86_SSE_CC_EQ); - GENIA("cmplt", CMP, X86_SSE_CC_LT); - GENIA("cmple", CMP, X86_SSE_CC_LE); - GENIA("cmpunord", CMP, X86_SSE_CC_U); - GENIA("cmpneq", CMP, X86_SSE_CC_NEQ); - GENIA("cmpnlt", CMP, X86_SSE_CC_NLT); - GENIA("cmpnle", CMP, X86_SSE_CC_NLE); - GENIA("cmpord", CMP, X86_SSE_CC_O); - GEN1("movap", MOVAP); - GEN("movdqa", MOVDQA); - GEN("movdqu", MOVDQU); -#if 0 - // FIXME: extraneous REX bits generated - GEN("movd", MOVDXD); - GEN64("movd", MOVQXD); // FIXME: disass bug? "movq" expected -#endif - GEN("cvtdq2pd", CVTDQ2PD); - GEN("cvtdq2ps", CVTDQ2PS); - GEN("cvtpd2dq", CVTPD2DQ); - GEN("cvtpd2ps", CVTPD2PS); - GEN("cvtps2dq", CVTPS2DQ); - GEN("cvtps2pd", CVTPS2PD); - GEN("cvtsd2si", CVTSD2SIL); - GEN64("cvtsd2siq", CVTSD2SIQ); - GEN("cvtsd2ss", CVTSD2SS); - GEN("cvtsi2sd", CVTSI2SDL); - GEN64("cvtsi2sdq", CVTSI2SDQ); - GEN("cvtsi2ss", CVTSI2SSL); - GEN64("cvtsi2ssq", CVTSI2SSQ); - GEN("cvtss2sd", CVTSS2SD); - GEN("cvtss2si", CVTSS2SIL); - GEN64("cvtss2siq", CVTSS2SIQ); - GEN("cvttpd2dq", CVTTPD2DQ); - GEN("cvttps2dq", CVTTPS2DQ); - GEN("cvttsd2si", CVTTSD2SIL); - GEN64("cvttsd2siq", CVTTSD2SIQ); - GEN("cvttss2si", CVTTSS2SIL); - GEN64("cvttss2siq", CVTTSS2SIQ); - if (r < 8) { - // MMX dest register - GEN("cvtpd2pi", CVTPD2PI); - GEN("cvtps2pi", CVTPS2PI); - GEN("cvttpd2pi", CVTTPD2PI); - GEN("cvttps2pi", CVTTPS2PI); - } -#undef GENIA -#undef GENI1 -#undef GENI -#undef GENA -#undef GEN1 -#undef GEN64 -#undef GEN - int last_insn = i; - uint8 *e = get_target(); - - uint8 *p = b; - i = 0; - while (p < e) { - int n = disass_x86(buffer, (uintptr)p); - insn_t ii; - parse_insn(&ii, buffer); - - if (!check_mem_reg(&ii, insns[i], D, B, I, S, r)) { - show_instruction(buffer, p); - n_failures++; - } - - p += n; - i += 1; - n_tests++; - show_status(n_tests); - } - if (i != last_insn) - abort(); - } - } - } - } - } - printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); - n_all_tests += n_tests; - n_all_failures += n_failures; -#endif printf("\n"); printf("All %ld tests run, %ld failures\n", n_all_tests, n_all_failures); diff --git a/BasiliskII/src/uae_cpu/cpu_emulation.h b/BasiliskII/src/uae_cpu/cpu_emulation.h index cd588ec1..b014be79 100644 --- a/BasiliskII/src/uae_cpu/cpu_emulation.h +++ b/BasiliskII/src/uae_cpu/cpu_emulation.h @@ -1,52 +1,170 @@ /* - * cpu_emulation.h - Definitions for Basilisk II CPU emulation module (UAE 0.8.10 version) + * cpu_emulation.h - CPU interface * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2005 Milan Jurik of ARAnyM dev team (see AUTHORS) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Inspired by Christian Bauer's Basilisk II * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef CPU_EMULATION_H #define CPU_EMULATION_H -#include - - /* * Memory system */ +#if 0 +#include "sysdeps.h" +#include "memory.h" +#include "tools.h" +#endif + // RAM and ROM pointers (allocated and set by main_*.cpp) +#if 0 +extern memptr RAMBase; // RAM base (Atari address space), does not include Low Mem when != 0 +#else extern uint32 RAMBaseMac; // RAM base (Mac address space), does not include Low Mem when != 0 -extern uint8 *RAMBaseHost; // RAM base (host address space) -extern uint32 RAMSize; // Size of RAM - +#endif +extern uint8 *RAMBaseHost; // RAM base (host address space) +extern uint32 RAMSize; // Size of RAM +#if 0 +extern memptr ROMBase; // ROM base (Atari address space) +#else extern uint32 ROMBaseMac; // ROM base (Mac address space) -extern uint8 *ROMBaseHost; // ROM base (host address space) -extern uint32 ROMSize; // Size of ROM +#endif +extern uint8 *ROMBaseHost; // ROM base (host address space) +extern uint32 ROMSize; // Size of ROM +#if 0 +extern uint32 RealROMSize; // Real size of ROM +extern memptr HWBase; // HW base (Atari address space) +extern uint8 *HWBaseHost; // HW base (host address space) +extern uint32 HWSize; // Size of HW space -#if !REAL_ADDRESSING && !DIRECT_ADDRESSING -// If we are not using real or direct addressing, the Mac frame buffer gets -// mapped to this location. The memory must be allocated by VideoInit(). -// If multiple monitors are used, they must share the frame buffer +extern memptr FastRAMBase; // Fast-RAM base (Atari address space) +extern uint8 *FastRAMBaseHost; // Fast-RAM base (host address space) +extern memptr VideoRAMBase; // VideoRAM base (Atari address space) +extern uint8 *VideoRAMBaseHost; // VideoRAM base (host address space) + +#ifdef HW_SIGSEGV +extern uint8 *FakeIOBaseHost; +#endif + +#ifdef RAMENDNEEDED +# define RAMEnd 0x01000000 // Not accessible top of memory +#else +# define RAMEnd 0 +#endif +#endif +#if !REAL_ADDRESSING +// If we are not using real addressing, the Mac frame buffer gets mapped to this location +// The memory must be allocated by VideoInit(). If multiple monitors are used, they must +// share the frame buffer const uint32 MacFrameBaseMac = 0xa0000000; extern uint8 *MacFrameBaseHost; // Frame buffer base (host address space) extern uint32 MacFrameSize; // Size of frame buffer -#endif extern int MacFrameLayout; // Frame buffer layout (see defines below) +#endif +#if 0 +// Atari memory access functions +// Direct access to CPU address space +// For HW operations +// Read/WriteAtariIntXX +// +static inline uint64 ReadAtariInt64(memptr addr) {return phys_get_quad(addr);} +static inline uint32 ReadAtariInt32(memptr addr) {return phys_get_long(addr);} +static inline uint16 ReadAtariInt16(memptr addr) {return phys_get_word(addr);} +static inline uint8 ReadAtariInt8(memptr addr) {return phys_get_byte(addr);} +static inline void WriteAtariInt64(memptr addr, uint64 q) {phys_put_quad(addr, q);} +static inline void WriteAtariInt32(memptr addr, uint32 l) {phys_put_long(addr, l);} +static inline void WriteAtariInt16(memptr addr, uint16 w) {phys_put_word(addr, w);} +static inline void WriteAtariInt8(memptr addr, uint8 b) {phys_put_byte(addr, b);} + +// Direct access to allocated memory +// Ignores HW checks, so that be carefull +// Read/WriteHWMemIntXX +// +static inline uint32 ReadHWMemInt32(memptr addr) {return do_get_mem_long((uae_u32 *)phys_get_real_address(addr));} +static inline uint16 ReadHWMemInt16(memptr addr) {return do_get_mem_word((uae_u16 *)phys_get_real_address(addr));} +static inline uint8 ReadHWMemInt8(memptr addr) {return do_get_mem_byte((uae_u8 *)phys_get_real_address(addr));} +static inline void WriteHWMemInt32(memptr addr, uint32 l) {do_put_mem_long((uae_u32 *)phys_get_real_address(addr), l);} +static inline void WriteHWMemInt16(memptr addr, uint16 w) {do_put_mem_word((uae_u16 *)phys_get_real_address(addr), w);} +static inline void WriteHWMemInt8(memptr addr, uint8 b) {do_put_mem_byte((uae_u8 *)phys_get_real_address(addr), b);} + +// Indirect access to CPU address space +// Uses MMU if available +// For SW operations +// Only data space +// Read/WriteIntXX +// +static inline uint64 ReadInt64(memptr addr) {return get_quad(addr);} +static inline uint32 ReadInt32(memptr addr) {return get_long(addr);} +static inline uint16 ReadInt16(memptr addr) {return get_word(addr);} +static inline uint8 ReadInt8(memptr addr) {return get_byte(addr);} +static inline void WriteInt64(memptr addr, uint64 q) {put_quad(addr, q);} +static inline void WriteInt32(memptr addr, uint32 l) {put_long(addr, l);} +static inline void WriteInt16(memptr addr, uint16 w) {put_word(addr, w);} +static inline void WriteInt8(memptr addr, uint8 b) {put_byte(addr, b);} + +#ifdef EXTENDED_SIGSEGV +extern int in_handler; +#ifdef NO_NESTED_SIGSEGV +extern JMP_BUF sigsegv_env; +# define BUS_ERROR(a) \ +{ \ + regs.mmu_fault_addr=(a); \ + if (in_handler) \ + { \ + in_handler = 0; \ + LONGJMP(sigsegv_env, 1); \ + } \ + else { \ + breakpt(); \ + THROW(2); \ + } \ +} +#else /* NO_NESTED_SIGSEGV */ +# define BUS_ERROR(a) \ +{ \ + regs.mmu_fault_addr=(a); \ + in_handler = 0; \ + breakpt(); \ + THROW(2); \ +} +#endif /* NO_NESTED_SIGSEGV */ +#else /* EXTENDED_SIGSEGV */ +# define BUS_ERROR(a) \ +{ \ + regs.mmu_fault_addr=(a); \ + breakpt(); \ + THROW(2); \ +} +#endif /* EXTENDED_SIGSEGV */ + +// For address validation +static inline bool ValidAtariAddr(memptr addr, bool write, uint32 len) { return phys_valid_address(addr, write, len); } +static inline bool ValidAddr(memptr addr, bool write, uint32 len) { return valid_address(addr, write, len); } + +// Helper functions for usual memory operations +static inline uint8 *Atari2HostAddr(memptr addr) {return phys_get_real_address(addr);} +#endif // Possible frame buffer layouts enum { FLAYOUT_NONE, // No frame buffer @@ -73,30 +191,73 @@ static inline void *Host2Mac_memcpy(uint32 dest, const void *src, size_t n) {ret static inline void *Mac2Mac_memcpy(uint32 dest, uint32 src, size_t n) {return memcpy(Mac2HostAddr(dest), Mac2HostAddr(src), n);} +// From newcpu.cpp +extern int quit_program; +extern int exit_val; + /* * 680x0 emulation */ // Initialization -extern bool Init680x0(void); // This routine may want to look at CPUType/FPUType to set up the apropriate emulation +#if 0 +extern bool InitMEM(); +#endif +extern bool Init680x0(void); +#if 0 +extern void Reset680x0(void); +#endif extern void Exit680x0(void); -extern void InitFrameBufferMapping(void); - -// 680x0 dynamic recompilation activation flag -#if USE_JIT -extern bool UseJIT; -#else -const bool UseJIT = false; +#if 0 +extern void AtariReset(void); #endif // 680x0 emulation functions struct M68kRegisters; -extern void Start680x0(void); // Reset and start 680x0 +extern void Start680x0(void); // Reset and start 680x0 +#if 0 +extern void Restart680x0(void); // Restart running 680x0 +extern void Quit680x0(void); // Quit 680x0 +#endif + extern "C" void Execute68k(uint32 addr, M68kRegisters *r); // Execute 68k code from EMUL_OP routine extern "C" void Execute68kTrap(uint16 trap, M68kRegisters *r); // Execute MacOS 68k trap from EMUL_OP routine // Interrupt functions -extern void TriggerInterrupt(void); // Trigger interrupt level 1 (InterruptFlag must be set first) -extern void TriggerNMI(void); // Trigger interrupt level 7 +#if 0 +extern int MFPdoInterrupt(void); +extern int SCCdoInterrupt(void); +extern void TriggerInternalIRQ(void); +extern void TriggerInt3(void); // Trigger interrupt level 3 +extern void TriggerVBL(void); // Trigger interrupt level 4 +extern void TriggerInt5(void); // Trigger interrupt level 5 +extern void TriggerSCC(bool); // Trigger interrupt level 5 +extern void TriggerMFP(bool); // Trigger interrupt level 6 +#endif +extern void TriggerInterrupt(void); // Trigger interrupt level 1 (InterruptFlag must be set first) +extern void TriggerNMI(void); // Trigger interrupt level 7 + +#if 0 +#ifdef FLIGHT_RECORDER +extern void cpu_flight_recorder(int); +extern void dump_flight_recorder(void); +#endif +#endif + +// CPU looping handlers +void check_eps_limit(uaecptr); +void report_double_bus_error(void); + +#if 0 +// This function will be removed +static inline uaecptr showPC(void) { return m68k_getpc(); } // for debugging only +#endif + +extern int intlev(void); +static inline void AtariReset(void) {} #endif + +/* +vim:ts=4:sw=4: +*/ diff --git a/BasiliskII/src/uae_cpu/cpudefsa.cpp b/BasiliskII/src/uae_cpu/cpudefsa.cpp new file mode 100644 index 00000000..ad7d6979 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpudefsa.cpp @@ -0,0 +1,5 @@ +/* + * cpudefs.cpp must be compiled twice, once for the generator program + * and once for the actual executable + */ +#include "cpudefs.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu1.cpp b/BasiliskII/src/uae_cpu/cpuemu1.cpp new file mode 100644 index 00000000..089eefd4 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu1.cpp @@ -0,0 +1,2 @@ +#define PART_1 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu1_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu1_nf.cpp new file mode 100644 index 00000000..58acf444 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu1_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_1 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu2.cpp b/BasiliskII/src/uae_cpu/cpuemu2.cpp new file mode 100644 index 00000000..1e18b587 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu2.cpp @@ -0,0 +1,2 @@ +#define PART_2 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu2_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu2_nf.cpp new file mode 100644 index 00000000..8e5136c4 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu2_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_2 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu3.cpp b/BasiliskII/src/uae_cpu/cpuemu3.cpp new file mode 100644 index 00000000..0385e2f0 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu3.cpp @@ -0,0 +1,2 @@ +#define PART_3 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu3_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu3_nf.cpp new file mode 100644 index 00000000..6565dc8c --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu3_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_3 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu4.cpp b/BasiliskII/src/uae_cpu/cpuemu4.cpp new file mode 100644 index 00000000..13d27e7a --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu4.cpp @@ -0,0 +1,2 @@ +#define PART_4 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu4_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu4_nf.cpp new file mode 100644 index 00000000..a16c36cb --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu4_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_4 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu5.cpp b/BasiliskII/src/uae_cpu/cpuemu5.cpp new file mode 100644 index 00000000..9b33a654 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu5.cpp @@ -0,0 +1,2 @@ +#define PART_5 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu5_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu5_nf.cpp new file mode 100644 index 00000000..5bf24360 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu5_nf.cpp @@ -0,0 +1,4 @@ +#define NOFLAGS 1 +#define PART_5 +#include "cpuemu.cpp" + diff --git a/BasiliskII/src/uae_cpu/cpuemu6.cpp b/BasiliskII/src/uae_cpu/cpuemu6.cpp new file mode 100644 index 00000000..e4b1efb0 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu6.cpp @@ -0,0 +1,2 @@ +#define PART_6 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu6_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu6_nf.cpp new file mode 100644 index 00000000..7afe15d4 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu6_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_6 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu7.cpp b/BasiliskII/src/uae_cpu/cpuemu7.cpp new file mode 100644 index 00000000..faec7ef8 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu7.cpp @@ -0,0 +1,2 @@ +#define PART_7 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu7_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu7_nf.cpp new file mode 100644 index 00000000..1e404dea --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu7_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_7 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu8.cpp b/BasiliskII/src/uae_cpu/cpuemu8.cpp new file mode 100644 index 00000000..c4efcfa3 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu8.cpp @@ -0,0 +1,2 @@ +#define PART_8 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu8_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu8_nf.cpp new file mode 100644 index 00000000..7c7f8f6e --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu8_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_8 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpufunctbla.cpp b/BasiliskII/src/uae_cpu/cpufunctbla.cpp new file mode 100644 index 00000000..17dd0d3f --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpufunctbla.cpp @@ -0,0 +1,5 @@ +/* + * cpufunctbl.cpp must be compiled twice, once for the generator program + * and once for the actual executable + */ +#include "cpufunctbl.cpp" diff --git a/BasiliskII/src/uae_cpu/cpummu.cpp b/BasiliskII/src/uae_cpu/cpummu.cpp new file mode 100644 index 00000000..1a3bd91a --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpummu.cpp @@ -0,0 +1,1096 @@ +/* + * cpummu.cpp - MMU emulation + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by UAE MMU patch + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define DEBUG 0 +#include "sysdeps.h" + +#include "cpummu.h" +#include "memory.h" +#include "newcpu.h" +#include "debug.h" +#ifdef USE_JIT +# include "compiler/compemu.h" +#endif + +#define DBG_MMU_VERBOSE 1 +#define DBG_MMU_SANITY 1 + +#ifdef FULLMMU + +mmu_atc_l1_array atc_l1[2]; +mmu_atc_l1_array *current_atc; +struct mmu_atc_line atc_l2[2][ATC_L2_SIZE]; + +# ifdef ATC_STATS +static unsigned int mmu_atc_hits[ATC_L2_SIZE]; +# endif + + +static void mmu_dump_ttr(const char * label, uae_u32 ttr) +{ + DUNUSED(label); +#if DEBUG + uae_u32 from_addr, to_addr; + + from_addr = ttr & MMU_TTR_LOGICAL_BASE; + to_addr = (ttr & MMU_TTR_LOGICAL_MASK) << 8; + + D(bug("%s: [%08x] %08x - %08x enabled=%d supervisor=%d wp=%d cm=%02d", + label, ttr, + from_addr, to_addr, + ttr & MMU_TTR_BIT_ENABLED ? 1 : 0, + (ttr & (MMU_TTR_BIT_SFIELD_ENABLED | MMU_TTR_BIT_SFIELD_SUPER)) >> MMU_TTR_SFIELD_SHIFT, + ttr & MMU_TTR_BIT_WRITE_PROTECT ? 1 : 0, + (ttr & MMU_TTR_CACHE_MASK) >> MMU_TTR_CACHE_SHIFT + )); +#else + DUNUSED(ttr); +#endif +} + +void mmu_make_transparent_region(uaecptr baseaddr, uae_u32 size, int datamode) +{ + uae_u32 * ttr; + uae_u32 * ttr0 = datamode ? ®s.dtt0 : ®s.itt0; + uae_u32 * ttr1 = datamode ? ®s.dtt1 : ®s.itt1; + + if ((*ttr1 & MMU_TTR_BIT_ENABLED) == 0) + ttr = ttr1; + else if ((*ttr0 & MMU_TTR_BIT_ENABLED) == 0) + ttr = ttr0; + else + return; + + *ttr = baseaddr & MMU_TTR_LOGICAL_BASE; + *ttr |= ((baseaddr + size - 1) & MMU_TTR_LOGICAL_BASE) >> 8; + *ttr |= MMU_TTR_BIT_ENABLED; + + D(bug("MMU: map transparent mapping of %08x", *ttr)); +} + +/* check if an address matches a ttr */ +static int mmu_do_match_ttr(uae_u32 ttr, uaecptr addr, int super) +{ + if (ttr & MMU_TTR_BIT_ENABLED) { /* TTR enabled */ + uae_u8 msb, mask; + + msb = ((addr ^ ttr) & MMU_TTR_LOGICAL_BASE) >> 24; + mask = (ttr & MMU_TTR_LOGICAL_MASK) >> 16; + + if (!(msb & ~mask)) { + + if ((ttr & MMU_TTR_BIT_SFIELD_ENABLED) == 0) { + if (((ttr & MMU_TTR_BIT_SFIELD_SUPER) == 0) != (super == 0)) { + return TTR_NO_MATCH; + } + } + + return (ttr & MMU_TTR_BIT_WRITE_PROTECT) ? TTR_NO_WRITE : TTR_OK_MATCH; + } + } + return TTR_NO_MATCH; +} + +static inline int mmu_match_ttr(uaecptr addr, int super, int data) +{ + int res; + + if (data) { + res = mmu_do_match_ttr(regs.dtt0, addr, super); + if (res == TTR_NO_MATCH) + res = mmu_do_match_ttr(regs.dtt1, addr, super); + } else { + res = mmu_do_match_ttr(regs.itt0, addr, super); + if (res == TTR_NO_MATCH) + res = mmu_do_match_ttr(regs.itt1, addr, super); + } + return res; +} + +#if DEBUG +/* {{{ mmu_dump_table */ +static void mmu_dump_table(const char * label, uaecptr root_ptr) +{ + DUNUSED(label); + const int ROOT_TABLE_SIZE = 128, + PTR_TABLE_SIZE = 128, + PAGE_TABLE_SIZE = regs.mmu_pagesize_8k ? 32 : 64, + ROOT_INDEX_SHIFT = 25, + PTR_INDEX_SHIFT = 18; + const uae_u32 ptr_addr_mask = (regs.mmu_pagesize_8k ? MMU_PTR_PAGE_ADDR_MASK_8 : MMU_PTR_PAGE_ADDR_MASK_4); + const uae_u32 page_addr_mask = (regs.mmu_pagesize_8k ? MMU_PAGE_ADDR_MASK_8 : MMU_PAGE_ADDR_MASK_4); + const uae_u32 page_ur_mask = (regs.mmu_pagesize_8k ? MMU_PAGE_UR_MASK_8 : MMU_PAGE_UR_MASK_4); + const uae_u32 page_size = (regs.mmu_pagesize_8k ? (1 << 13) : (1 << 12)); + int root_idx, ptr_idx, page_idx; + uae_u32 root_des, ptr_des, page_des; + uaecptr ptr_des_addr, page_addr, + root_log, ptr_log, page_log; + + D(bug("%s: root=%x", label, root_ptr)); + + for (root_idx = 0; root_idx < ROOT_TABLE_SIZE; root_idx++) { + root_des = phys_get_long(root_ptr + (root_idx << 2)); + + if ((root_des & 2) == 0) + continue; /* invalid */ + + D(bug("ROOT: %03d U=%d W=%d UDT=%02d", root_idx, + root_des & 8 ? 1 : 0, + root_des & 4 ? 1 : 0, + root_des & 3 + )); + + root_log = root_idx << ROOT_INDEX_SHIFT; + + ptr_des_addr = root_des & MMU_ROOT_PTR_ADDR_MASK; + + for (ptr_idx = 0; ptr_idx < PTR_TABLE_SIZE; ptr_idx++) { + struct { + uaecptr log, phys; + int start_idx, n_pages; /* number of pages covered by this entry */ + uae_u32 match; + } page_info[PAGE_TABLE_SIZE]; + int n_pages_used; + + ptr_des = phys_get_long(ptr_des_addr + (ptr_idx << 2)); + ptr_log = root_log | (ptr_idx << PTR_INDEX_SHIFT); + + if ((ptr_des & 2) == 0) + continue; /* invalid */ + + page_addr = ptr_des & ptr_addr_mask; + + n_pages_used = -1; + for (page_idx = 0; page_idx < PAGE_TABLE_SIZE; page_idx++) { + + page_des = phys_get_long(page_addr + (page_idx << 2)); + page_log = ptr_log | (page_idx * page_size); + + switch (page_des & 3) { + case 0: /* invalid */ + continue; + case 1: case 3: /* resident */ + case 2: /* indirect */ + if (n_pages_used == -1 || + (page_info[n_pages_used].match & ~page_addr_mask) != (page_des & ~page_addr_mask) || + page_info[n_pages_used].phys + (page_info[n_pages_used].n_pages * page_size) != (page_des & page_addr_mask)) + { + /* use the next entry */ + n_pages_used++; + + page_info[n_pages_used].match = page_des; + page_info[n_pages_used].n_pages = 1; + page_info[n_pages_used].start_idx = page_idx; + page_info[n_pages_used].log = page_log; + page_info[n_pages_used].phys = page_des & page_addr_mask; + } else { + page_info[n_pages_used].n_pages++; + } + break; + } + } + + if (n_pages_used == -1) + continue; + + D(bug(" PTR: %03d U=%d W=%d UDT=%02d", ptr_idx, + ptr_des & 8 ? 1 : 0, + ptr_des & 4 ? 1 : 0, + ptr_des & 3 + )); + + + for (page_idx = 0; page_idx <= n_pages_used; page_idx++) { + page_des = page_info[page_idx].match; + + if ((page_des & MMU_PDT_MASK) == 2) { + D(bug(" PAGE: %03d-%03d log=%08x INDIRECT --> addr=%08x", + page_info[page_idx].start_idx, + page_info[page_idx].start_idx + page_info[page_idx].n_pages - 1, + page_info[page_idx].log, + page_des & MMU_PAGE_INDIRECT_MASK + )); + + } else { + D(bug(" PAGE: %03d-%03d log=%08x addr=%08x UR=%02d G=%d U1/0=%d S=%d CM=%d M=%d U=%d W=%d", + page_info[page_idx].start_idx, + page_info[page_idx].start_idx + page_info[page_idx].n_pages - 1, + page_info[page_idx].log, + page_info[page_idx].phys, + (page_des & page_ur_mask) >> MMU_PAGE_UR_SHIFT, + page_des & MMU_DES_GLOBAL ? 1 : 0, + (page_des & MMU_TTR_UX_MASK) >> MMU_TTR_UX_SHIFT, + page_des & MMU_DES_SUPER ? 1 : 0, + (page_des & MMU_TTR_CACHE_MASK) >> MMU_TTR_CACHE_SHIFT, + page_des & MMU_DES_MODIFIED ? 1 : 0, + page_des & MMU_DES_USED ? 1 : 0, + page_des & MMU_DES_WP ? 1 : 0 + )); + } + } + } + + } +} +/* }}} */ +#endif + +/* {{{ mmu_dump_atc */ +void mmu_dump_atc(void) +{ + int i, j; + for (i = 0; i < 2; i++) { + for (j = 0; j < ATC_L2_SIZE; j++) { + if (atc_l2[i][j].tag == 0x8000) + continue; + D(bug("ATC[%02d] G=%d TT=%d M=%d WP=%d VD=%d VI=%d tag=%08x --> phys=%08x", + j, atc_l2[i][j].global, atc_l2[i][j].tt, atc_l2[i][j].modified, + atc_l2[i][j].write_protect, atc_l2[i][j].valid_data, atc_l2[i][j].valid_inst, + atc_l2[i][j].tag, atc_l2[i][j].phys)); + } + } +} +/* }}} */ + +/* {{{ mmu_dump_tables */ +void mmu_dump_tables(void) +{ + D(bug("URP: %08x SRP: %08x MMUSR: %x TC: %x", regs.urp, regs.srp, regs.mmusr, regs.tc)); + mmu_dump_ttr("DTT0", regs.dtt0); + mmu_dump_ttr("DTT1", regs.dtt1); + mmu_dump_ttr("ITT0", regs.itt0); + mmu_dump_ttr("ITT1", regs.itt1); + mmu_dump_atc(); + //mmu_dump_table("SRP", regs.srp); +} +/* }}} */ + +static uaecptr REGPARAM2 mmu_lookup_pagetable(uaecptr addr, int super, int write); + +static ALWAYS_INLINE int mmu_get_fc(bool super, bool data) +{ + return (super ? 4 : 0) | (data ? 1 : 2); +} + +static void mmu_bus_error(uaecptr addr, int fc, int write, int size) +{ + uae_u16 ssw = 0; + + ssw |= fc & MMU_SSW_TM; /* Copy TM */ + switch (size) { + case sz_byte: + ssw |= MMU_SSW_SIZE_B; + break; + case sz_word: + ssw |= MMU_SSW_SIZE_W; + break; + case sz_long: + ssw |= MMU_SSW_SIZE_L; + break; + } + + regs.wb3_status = write ? 0x80 | ssw : 0; + if (!write) + ssw |= MMU_SSW_RW; + + regs.mmu_fault_addr = addr; + regs.mmu_ssw = ssw | MMU_SSW_ATC; + + D(bug("BUS ERROR: fc=%d w=%d log=%08x ssw=%04x", fc, write, addr, ssw)); + + breakpt(); + THROW(2); +} + +/* + * Update the atc line for a given address by doing a mmu lookup. + */ +static uaecptr mmu_fill_atc_l2(uaecptr addr, int super, int data, int write, + struct mmu_atc_line *l) +{ + int res; + uae_u32 desc; + + l->tag = ATC_TAG(addr); + l->hw = l->bus_fault = 0; + + /* check ttr0 */ + res = mmu_match_ttr(addr, super, data); + if (res != TTR_NO_MATCH) { + l->tt = 1; + if (data) { + l->valid_data = 1; + l->valid_inst = mmu_match_ttr(addr, super, 0) == res; + } else { + l->valid_inst = 1; + l->valid_data = mmu_match_ttr(addr, super, 1) == res; + } + l->global = 1; + l->modified = 1; + l->write_protect = (res == TTR_NO_WRITE); + l->phys = 0; + + return 0; + } + + l->tt = 0; + if (!regs.mmu_enabled) { + l->valid_data = l->valid_inst = 1; + l->global = 1; + l->modified = 1; + l->write_protect = 0; + l->phys = 0; + return 0; + } + + SAVE_EXCEPTION; + TRY(prb) { + desc = mmu_lookup_pagetable(addr, super, write); + D(bug("translate: %x,%u,%u,%u -> %x", addr, super, write, data, desc)); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + /* bus error during table search */ + desc = 0; + goto fail; + } + + if ((desc & 1) == 0 || (!super && desc & MMU_MMUSR_S)) { + fail: + l->valid_data = l->valid_inst = 0; + l->global = 0; + } else { + l->valid_data = l->valid_inst = 1; + if (regs.mmu_pagesize_8k) + l->phys = (desc & ~0x1fff) - (addr & ~0x1fff); + else + l->phys = (desc & ~0xfff) - (addr & ~0xfff); + l->global = (desc & MMU_MMUSR_G) != 0; + l->modified = (desc & MMU_MMUSR_M) != 0; + l->write_protect = (desc & MMU_MMUSR_W) != 0; + } + + return desc; +} + +static ALWAYS_INLINE bool +mmu_fill_atc_l1(uaecptr addr, int super, int data, int write, + struct mmu_atc_line *l1) +{ + int idx = ATC_L2_INDEX(addr); + int tag = ATC_TAG(addr); + struct mmu_atc_line *l = &atc_l2[super][idx]; + uaecptr phys_addr; + + if (l->tag != tag) { + restart: + mmu_fill_atc_l2(addr, super, data, write, l); + } + if (!(data ? l->valid_data : l->valid_inst)) { + D(bug("MMU: non-resident page (%x,%x,%x)!", addr, regs.pc, regs.fault_pc)); + goto fail; + } + if (write) { + if (l->write_protect) { + D(bug("MMU: write protected (via %s) %x", l->tt ? "ttr" : "atc", addr)); + goto fail; + } + if (!l->modified) + goto restart; + } + *l1 = *l; + + phys_addr = addr + l1->phys; + if ((phys_addr & 0xfff00000) == 0x00f00000) { + l1->hw = 1; + goto fail; + } + if ((phys_addr & 0xfff00000) == 0xfff00000) { + l1->hw = 1; + l1->phys -= 0xff000000; + goto fail; + } + + if (!test_ram_boundary(phys_addr, 1, super, write)) { + l1->bus_fault = 1; + goto fail; + } + + return true; + +fail: + l1->tag = ~l1->tag; + return false; +} + +uaecptr mmu_translate(uaecptr addr, int super, int data, int write) +{ + struct mmu_atc_line *l; + + l = &atc_l2[super][ATC_L2_INDEX(addr)]; + mmu_fill_atc_l2(addr, super, data, write, l); + if (!(data ? l->valid_data : l->valid_inst)) + { + breakpt(); + THROW(2); + } + + return addr + l->phys; +} + +/* + * Lookup the address by walking the page table and updating + * the page descriptors accordingly. Returns the found descriptor + * or produces a bus error. + */ +static uaecptr REGPARAM2 mmu_lookup_pagetable(uaecptr addr, int super, int write) +{ + uae_u32 desc, desc_addr, wp; + int i; + + wp = 0; + desc = super ? regs.srp : regs.urp; + + /* fetch root table descriptor */ + i = (addr >> 23) & 0x1fc; + desc_addr = (desc & MMU_ROOT_PTR_ADDR_MASK) | i; + desc = phys_get_long(desc_addr); + if ((desc & 2) == 0) { + D(bug("MMU: invalid root descriptor for %x", addr)); + return 0; + } + + wp |= desc; + if ((desc & MMU_DES_USED) == 0) + phys_put_long(desc_addr, desc | MMU_DES_USED); + + /* fetch pointer table descriptor */ + i = (addr >> 16) & 0x1fc; + desc_addr = (desc & MMU_ROOT_PTR_ADDR_MASK) | i; + desc = phys_get_long(desc_addr); + if ((desc & 2) == 0) { + D(bug("MMU: invalid ptr descriptor for %x", addr)); + return 0; + } + wp |= desc; + if ((desc & MMU_DES_USED) == 0) + phys_put_long(desc_addr, desc | MMU_DES_USED); + + /* fetch page table descriptor */ + if (regs.mmu_pagesize_8k) { + i = (addr >> 11) & 0x7c; + desc_addr = (desc & MMU_PTR_PAGE_ADDR_MASK_8) | i; + } else { + i = (addr >> 10) & 0xfc; + desc_addr = (desc & MMU_PTR_PAGE_ADDR_MASK_4) | i; + } + + desc = phys_get_long(desc_addr); + if ((desc & 3) == 2) { + /* indirect */ + desc_addr = desc & MMU_PAGE_INDIRECT_MASK; + desc = phys_get_long(desc_addr); + } + if ((desc & 1) == 0) { + D(bug("MMU: invalid page descriptor log=%08x desc=%08x @%08x", addr, desc, desc_addr)); + return desc; + } + + desc |= wp & MMU_DES_WP; + if (write) { + if (desc & MMU_DES_WP) { + if ((desc & MMU_DES_USED) == 0) { + desc |= MMU_DES_USED; + phys_put_long(desc_addr, desc); + } + } else if ((desc & (MMU_DES_USED|MMU_DES_MODIFIED)) != + (MMU_DES_USED|MMU_DES_MODIFIED)) { + desc |= MMU_DES_USED|MMU_DES_MODIFIED; + phys_put_long(desc_addr, desc); + } + } else { + if ((desc & MMU_DES_USED) == 0) { + desc |= MMU_DES_USED; + phys_put_long(desc_addr, desc); + } + } + return desc; +} + +uae_u16 mmu_get_word_unaligned(uaecptr addr, int data) +{ + uae_u16 res; + + res = (uae_u16)mmu_get_byte(addr, data, sz_word) << 8; + SAVE_EXCEPTION; + TRY(prb) { + res |= mmu_get_byte(addr + 1, data, sz_word); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + return res; +} + +uae_u32 mmu_get_long_unaligned(uaecptr addr, int data) +{ + uae_u32 res; + + if (likely(!(addr & 1))) { + res = (uae_u32)mmu_get_word(addr, data, sz_long) << 16; + SAVE_EXCEPTION; + TRY(prb) { + res |= mmu_get_word(addr + 2, data, sz_long); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + } else { + res = (uae_u32)mmu_get_byte(addr, data, sz_long) << 8; + SAVE_EXCEPTION; + TRY(prb) { + res = (res | mmu_get_byte(addr + 1, data, sz_long)) << 8; + res = (res | mmu_get_byte(addr + 2, data, sz_long)) << 8; + res |= mmu_get_byte(addr + 3, data, sz_long); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + } + return res; +} + +uae_u8 mmu_get_byte_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) + return HWget_b(cl->phys + addr); + mmu_bus_error(addr, mmu_get_fc(super, data), 0, size); + return 0; + } + + if (!mmu_fill_atc_l1(addr, super, data, 0, cl)) + goto redo; + + return do_get_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl)); +} + +uae_u16 mmu_get_word_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) + return HWget_w(cl->phys + addr); + mmu_bus_error(addr, mmu_get_fc(super, data), 0, size); + return 0; + } + + if (!mmu_fill_atc_l1(addr, super, data, 0, cl)) + goto redo; + + return do_get_mem_word((uae_u16 *)mmu_get_real_address(addr, cl)); +} + +uae_u32 mmu_get_long_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) + return HWget_l(cl->phys + addr); + mmu_bus_error(addr, mmu_get_fc(super, data), 0, size); + return 0; + } + + if (!mmu_fill_atc_l1(addr, super, data, 0, cl)) + goto redo; + + return do_get_mem_long((uae_u32 *)mmu_get_real_address(addr, cl)); +} + + +uae_u64 mmu_get_quad_slow(uaecptr addr, int super, int data, + struct mmu_atc_line *cl) +{ + uae_u64 h = mmu_get_long_slow(addr, super, data, sz_long, cl); + uae_u64 l = mmu_get_long_slow(addr + 4, super, data, sz_long, cl); + return (h << 32) | l; +} + +REGPARAM2 void mmu_put_long_unaligned(uaecptr addr, uae_u32 val, int data) +{ + SAVE_EXCEPTION; + TRY(prb) { + if (likely(!(addr & 1))) { + mmu_put_word(addr, val >> 16, data, sz_long); + mmu_put_word(addr + 2, val, data, sz_long); + } else { + mmu_put_byte(addr, val >> 24, data, sz_long); + mmu_put_byte(addr + 1, val >> 16, data, sz_long); + mmu_put_byte(addr + 2, val >> 8, data, sz_long); + mmu_put_byte(addr + 3, val, data, sz_long); + } + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.wb3_data = val; + if (regs.mmu_fault_addr != addr) { + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + } + breakpt(); + THROW_AGAIN(prb); + } +} + +REGPARAM2 void mmu_put_word_unaligned(uaecptr addr, uae_u16 val, int data) +{ + SAVE_EXCEPTION; + TRY(prb) { + mmu_put_byte(addr, val >> 8, data, sz_word); + mmu_put_byte(addr + 1, val, data, sz_word); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.wb3_data = val; + if (regs.mmu_fault_addr != addr) { + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + } + breakpt(); + THROW_AGAIN(prb); + } +} + +REGPARAM2 void mmu_put_byte_slow(uaecptr addr, uae_u8 val, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) { + HWput_b(cl->phys + addr, val); + return; + } + regs.wb3_data = val; + mmu_bus_error(addr, mmu_get_fc(super, data), 1, size); + return; + } + + if (!mmu_fill_atc_l1(addr, super, data, 1, cl)) + goto redo; + + do_put_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl), val); +} + +REGPARAM2 void mmu_put_word_slow(uaecptr addr, uae_u16 val, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) { + HWput_w(cl->phys + addr, val); + return; + } + regs.wb3_data = val; + mmu_bus_error(addr, mmu_get_fc(super, data), 1, size); + return; + } + + if (!mmu_fill_atc_l1(addr, super, data, 1, cl)) + goto redo; + + do_put_mem_word((uae_u16 *)mmu_get_real_address(addr, cl), val); +} + +REGPARAM2 void mmu_put_long_slow(uaecptr addr, uae_u32 val, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) { + HWput_l(cl->phys + addr, val); + return; + } + regs.wb3_data = val; + mmu_bus_error(addr, mmu_get_fc(super, data), 1, size); + return; + } + + if (!mmu_fill_atc_l1(addr, super, data, 1, cl)) + goto redo; + + do_put_mem_long((uae_u32 *)mmu_get_real_address(addr, cl), val); +} + +REGPARAM2 void mmu_put_quad_slow(uaecptr addr, uae_u64 val, int super, int data, + struct mmu_atc_line *cl) +{ + mmu_put_long_slow(addr, (uae_u32)(val >> 32), super, data, sz_long, cl); + mmu_put_long_slow(addr + 4, (uae_u32)(val), super, data, sz_long, cl); +} + +uae_u32 sfc_get_long(uaecptr addr) +{ + int super = (regs.sfc & 4) != 0; + int data = (regs.sfc & 3) != 2; + uae_u32 res; + + if (likely(!is_unaligned(addr, 4))) + return mmu_get_user_long(addr, super, data, sz_long); + + if (likely(!(addr & 1))) { + res = (uae_u32)mmu_get_user_word(addr, super, data, sz_long) << 16; + SAVE_EXCEPTION; + TRY(prb) { + res |= mmu_get_user_word(addr + 2, super, data, sz_long); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + } else { + res = (uae_u32)mmu_get_user_byte(addr, super, data, sz_long) << 8; + SAVE_EXCEPTION; + TRY(prb) { + res = (res | mmu_get_user_byte(addr + 1, super, data, sz_long)) << 8; + res = (res | mmu_get_user_byte(addr + 2, super, data, sz_long)) << 8; + res |= mmu_get_user_byte(addr + 3, super, data, sz_long); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + } + return res; +} + +uae_u16 sfc_get_word(uaecptr addr) +{ + int super = (regs.sfc & 4) != 0; + int data = (regs.sfc & 3) != 2; + uae_u16 res; + + if (likely(!is_unaligned(addr, 2))) + return mmu_get_user_word(addr, super, data, sz_word); + + res = (uae_u16)mmu_get_user_byte(addr, super, data, sz_word) << 8; + SAVE_EXCEPTION; + TRY(prb) { + res |= mmu_get_user_byte(addr + 1, super, data, sz_word); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + return res; +} + +uae_u8 sfc_get_byte(uaecptr addr) +{ + int super = (regs.sfc & 4) != 0; + int data = (regs.sfc & 3) != 2; + + return mmu_get_user_byte(addr, super, data, sz_byte); +} + +void dfc_put_long(uaecptr addr, uae_u32 val) +{ + int super = (regs.dfc & 4) != 0; + int data = (regs.dfc & 3) != 2; + + SAVE_EXCEPTION; + TRY(prb) { + if (likely(!is_unaligned(addr, 4))) + mmu_put_user_long(addr, val, super, data, sz_long); + else if (likely(!(addr & 1))) { + mmu_put_user_word(addr, val >> 16, super, data, sz_long); + mmu_put_user_word(addr + 2, val, super, data, sz_long); + } else { + mmu_put_user_byte(addr, val >> 24, super, data, sz_long); + mmu_put_user_byte(addr + 1, val >> 16, super, data, sz_long); + mmu_put_user_byte(addr + 2, val >> 8, super, data, sz_long); + mmu_put_user_byte(addr + 3, val, super, data, sz_long); + } + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.wb3_data = val; + if (regs.mmu_fault_addr != addr) { + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + } + breakpt(); + THROW_AGAIN(prb); + } +} + +void dfc_put_word(uaecptr addr, uae_u16 val) +{ + int super = (regs.dfc & 4) != 0; + int data = (regs.dfc & 3) != 2; + + SAVE_EXCEPTION; + TRY(prb) { + if (likely(!is_unaligned(addr, 2))) + mmu_put_user_word(addr, val, super, data, sz_word); + else { + mmu_put_user_byte(addr, val >> 8, super, data, sz_word); + mmu_put_user_byte(addr + 1, val, super, data, sz_word); + } + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.wb3_data = val; + if (regs.mmu_fault_addr != addr) { + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + } + breakpt(); + THROW_AGAIN(prb); + } +} + +void dfc_put_byte(uaecptr addr, uae_u8 val) +{ + int super = (regs.dfc & 4) != 0; + int data = (regs.dfc & 3) != 2; + + SAVE_EXCEPTION; + TRY(prb) { + mmu_put_user_byte(addr, val, super, data, sz_byte); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.wb3_data = val; + breakpt(); + THROW_AGAIN(prb); + } +} + +void mmu_op(uae_u32 opcode, uae_u16 extra) +{ + int super = (regs.dfc & 4) != 0; + DUNUSED(extra); + if ((opcode & 0xFE0) == 0x0500) { + int regno, glob; + //D(didflush = 0); + uae_u32 addr; + /* PFLUSH */ + regno = opcode & 7; + glob = (opcode & 8) != 0; + + if (opcode & 16) { + D(bug("pflusha(%u,%u)", glob, regs.dfc)); + mmu_flush_atc_all(glob); + } else { + addr = m68k_areg(regs, regno); + D(bug("pflush(%u,%u,%x)", glob, regs.dfc, addr)); + mmu_flush_atc(addr, super, glob); + } + flush_internals(); +#ifdef USE_JIT + flush_icache(); +#endif + } else if ((opcode & 0x0FD8) == 0x548) { + int write, regno; + uae_u32 addr; + + regno = opcode & 7; + write = (opcode & 32) == 0; + addr = m68k_areg(regs, regno); + //bug("ptest(%u,%u,%x)", write, regs.dfc, addr); + D(bug("PTEST%c (A%d) %08x DFC=%d", write ? 'W' : 'R', regno, addr, regs.dfc)); + mmu_flush_atc(addr, super, true); + SAVE_EXCEPTION; + TRY(prb) { + struct mmu_atc_line *l; + uae_u32 desc; + bool data = (regs.dfc & 3) != 2; + + l = &atc_l2[super][ATC_L2_INDEX(addr)]; + desc = mmu_fill_atc_l2(addr, super, data, write, l); + if (!(data ? l->valid_data : l->valid_inst)) + regs.mmusr = MMU_MMUSR_B; + else if (l->tt) + regs.mmusr = MMU_MMUSR_T | MMU_MMUSR_R; + else { + regs.mmusr = desc & (~0xfff|MMU_MMUSR_G|MMU_MMUSR_Ux|MMU_MMUSR_S| + MMU_MMUSR_CM|MMU_MMUSR_M|MMU_MMUSR_W); + regs.mmusr |= MMU_MMUSR_R; + } + } + CATCH(prb) { + regs.mmusr = MMU_MMUSR_B; + } + RESTORE_EXCEPTION; + D(bug("PTEST result: mmusr %08x", regs.mmusr)); + } else + op_illg (opcode); +} + +void mmu_flush_atc(uaecptr addr, bool super, bool global) +{ + struct mmu_atc_line *l; + int i, j; + + l = atc_l1[super][0][0]; + i = ATC_L1_INDEX(addr); + for (j = 0; j < 4; j++) { + if (global || !l[i].global) + l[i].tag = 0x8000; + l += ATC_L1_SIZE; + } + if (regs.mmu_pagesize_8k) { + i = ATC_L1_INDEX(addr) ^ 1; + for (j = 0; j < 4; j++) { + if (global || !l[i].global) + l[i].tag = 0x8000; + l += ATC_L1_SIZE; + } + } + l = atc_l2[super]; + i = ATC_L2_INDEX(addr); + if (global || !l[i].global) + l[i].tag = 0x8000; + if (regs.mmu_pagesize_8k) { + i ^= 1; + if (global || !l[i].global) + l[i].tag = 0x8000; + } +} + +void mmu_flush_atc_all(bool global) +{ + struct mmu_atc_line *l; + unsigned int i; + + l = atc_l1[0][0][0]; + for (i = 0; i < sizeof(atc_l1) / sizeof(*l); l++, i++) { + if (global || !l->global) + l->tag = 0x8000; + } + + l = atc_l2[0]; + for (i = 0; i < sizeof(atc_l2) / sizeof(*l); l++, i++) { + if (global || !l->global) + l->tag = 0x8000; + } +} + +void mmu_reset(void) +{ + mmu_flush_atc_all(true); + + regs.urp = regs.srp = 0; + regs.itt0 = regs.itt1 = 0; + regs.dtt0 = regs.dtt1 = 0; + regs.mmusr = 0; +} + + +void mmu_set_tc(uae_u16 tc) +{ + if (regs.tc == tc) + return; + + regs.tc = tc; + regs.mmu_enabled = tc & 0x8000 ? 1 : 0; + regs.mmu_pagesize_8k = tc & 0x4000 ? 1 : 0; + mmu_flush_atc_all(true); + + D(bug("MMU: enabled=%d page8k=%d\n", regs.mmu_enabled, regs.mmu_pagesize_8k)); +} + +void mmu_set_super(bool super) +{ + current_atc = &atc_l1[super]; +} + +#else + +void mmu_op(uae_u32 opcode, uae_u16 /*extra*/) +{ + if ((opcode & 0xFE0) == 0x0500) { + /* PFLUSH instruction */ + flush_internals(); + } else if ((opcode & 0x0FD8) == 0x548) { + /* PTEST instruction */ + } else + op_illg(opcode); +} + +#endif + +/* +vim:ts=4:sw=4: +*/ diff --git a/BasiliskII/src/uae_cpu/cpummu.h b/BasiliskII/src/uae_cpu/cpummu.h new file mode 100644 index 00000000..01359f6f --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpummu.h @@ -0,0 +1,267 @@ +/* + * cpummu.h - MMU emulation + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by UAE MMU patch + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef CPUMMU_H +#define CPUMMU_H + +#include "registers.h" + +# include + +#define MMU_TEST_PTEST 1 +#define MMU_TEST_VERBOSE 2 +#define MMU_TEST_FORCE_TABLE_SEARCH 4 +#define MMU_TEST_NO_BUSERR 8 + +extern void mmu_dump_tables(void); + +#define MMU_TTR_LOGICAL_BASE 0xff000000 +#define MMU_TTR_LOGICAL_MASK 0x00ff0000 +#define MMU_TTR_BIT_ENABLED (1 << 15) +#define MMU_TTR_BIT_SFIELD_ENABLED (1 << 14) +#define MMU_TTR_BIT_SFIELD_SUPER (1 << 13) +#define MMU_TTR_SFIELD_SHIFT 13 +#define MMU_TTR_UX_MASK ((1 << 9) | (1 << 8)) +#define MMU_TTR_UX_SHIFT 8 +#define MMU_TTR_CACHE_MASK ((1 << 6) | (1 << 5)) +#define MMU_TTR_CACHE_SHIFT 5 +#define MMU_TTR_BIT_WRITE_PROTECT (1 << 2) + +#define MMU_UDT_MASK 3 +#define MMU_PDT_MASK 3 + +#define MMU_DES_WP 4 +#define MMU_DES_USED 8 + +/* page descriptors only */ +#define MMU_DES_MODIFIED 16 +#define MMU_DES_SUPER (1 << 7) +#define MMU_DES_GLOBAL (1 << 10) + +#define MMU_ROOT_PTR_ADDR_MASK 0xfffffe00 +#define MMU_PTR_PAGE_ADDR_MASK_8 0xffffff80 +#define MMU_PTR_PAGE_ADDR_MASK_4 0xffffff00 + +#define MMU_PAGE_INDIRECT_MASK 0xfffffffc +#define MMU_PAGE_ADDR_MASK_8 0xffffe000 +#define MMU_PAGE_ADDR_MASK_4 0xfffff000 +#define MMU_PAGE_UR_MASK_8 ((1 << 12) | (1 << 11)) +#define MMU_PAGE_UR_MASK_4 (1 << 11) +#define MMU_PAGE_UR_SHIFT 11 + +#define MMU_MMUSR_ADDR_MASK 0xfffff000 +#define MMU_MMUSR_B (1 << 11) +#define MMU_MMUSR_G (1 << 10) +#define MMU_MMUSR_U1 (1 << 9) +#define MMU_MMUSR_U0 (1 << 8) +#define MMU_MMUSR_Ux (MMU_MMUSR_U1 | MMU_MMUSR_U0) +#define MMU_MMUSR_S (1 << 7) +#define MMU_MMUSR_CM ((1 << 6) | ( 1 << 5)) +#define MMU_MMUSR_M (1 << 4) +#define MMU_MMUSR_W (1 << 2) +#define MMU_MMUSR_T (1 << 1) +#define MMU_MMUSR_R (1 << 0) + +/* special status word (access error stack frame) */ +#define MMU_SSW_TM 0x0007 +#define MMU_SSW_TT 0x0018 +#define MMU_SSW_SIZE 0x0060 +#define MMU_SSW_SIZE_B 0x0020 +#define MMU_SSW_SIZE_W 0x0040 +#define MMU_SSW_SIZE_L 0x0000 +#define MMU_SSW_RW 0x0100 +#define MMU_SSW_LK 0x0200 +#define MMU_SSW_ATC 0x0400 +#define MMU_SSW_MA 0x0800 + +#define TTR_I0 4 +#define TTR_I1 5 +#define TTR_D0 6 +#define TTR_D1 7 + +#define TTR_NO_MATCH 0 +#define TTR_NO_WRITE 1 +#define TTR_OK_MATCH 2 + +struct mmu_atc_line { + uae_u16 tag; + unsigned tt : 1; + unsigned valid_data : 1; + unsigned valid_inst : 1; + unsigned global : 1; + unsigned modified : 1; + unsigned write_protect : 1; + unsigned hw : 1; + unsigned bus_fault : 1; + uaecptr phys; +}; + +/* + * We don't need to store the whole logical address in the atc cache, as part of + * it is encoded as index into the cache. 14 bits of the address are stored in + * the tag, this means at least 6 bits must go into the index. The upper two + * bits of the tag define the type of data in the atc line: + * - 00: a normal memory address + * - 11: invalid memory address or hardware access + * (generated via ~ATC_TAG(addr) in the slow path) + * - 10: empty atc line + */ + +#define ATC_TAG_SHIFT 18 +#define ATC_TAG(addr) ((uae_u32)(addr) >> ATC_TAG_SHIFT) + + +#define ATC_L1_SIZE_LOG 8 +#define ATC_L1_SIZE (1 << ATC_L1_SIZE_LOG) + +#define ATC_L1_INDEX(addr) (((addr) >> 12) % ATC_L1_SIZE) + +/* + * first level atc cache + * indexed by [super][data][rw][idx] + */ + +typedef struct mmu_atc_line mmu_atc_l1_array[2][2][ATC_L1_SIZE]; +extern mmu_atc_l1_array atc_l1[2]; +extern mmu_atc_l1_array *current_atc; + +#define ATC_L2_SIZE_LOG 12 +#define ATC_L2_SIZE (1 << ATC_L2_SIZE_LOG) + +#define ATC_L2_INDEX(addr) ((((addr) >> 12) ^ ((addr) >> (32 - ATC_L2_SIZE_LOG))) % ATC_L2_SIZE) + +extern struct mmu_atc_line atc_l2[2][ATC_L2_SIZE]; + +/* + * lookup address in the level 1 atc cache, + * the data and write arguments are constant in the common, + * thus allows gcc to generate a constant offset. + */ +static ALWAYS_INLINE int mmu_lookup(uaecptr addr, bool data, bool write, + struct mmu_atc_line **cl) +{ + addr >>= 12; + *cl = &(*current_atc)[data][write][addr % ATC_L1_SIZE]; + return (*cl)->tag == addr >> (ATC_TAG_SHIFT - 12); +} + +/* + * similiar to mmu_user_lookup, but for the use of the moves instruction + */ +static ALWAYS_INLINE int mmu_user_lookup(uaecptr addr, bool super, bool data, + bool write, struct mmu_atc_line **cl) +{ + addr >>= 12; + *cl = &atc_l1[super][data][write][addr % ATC_L1_SIZE]; + return (*cl)->tag == addr >> (ATC_TAG_SHIFT - 12); +} + +extern REGPARAM2 uae_u16 mmu_get_word_unaligned(uaecptr addr, int data); +extern REGPARAM2 uae_u32 mmu_get_long_unaligned(uaecptr addr, int data); + +extern REGPARAM2 uae_u8 mmu_get_byte_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 uae_u16 mmu_get_word_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 uae_u32 mmu_get_long_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 uae_u64 mmu_get_quad_slow(uaecptr addr, int super, int data, + struct mmu_atc_line *cl); + +extern REGPARAM2 void mmu_put_word_unaligned(uaecptr addr, uae_u16 val, int data); +extern REGPARAM2 void mmu_put_long_unaligned(uaecptr addr, uae_u32 val, int data); + +extern REGPARAM2 void mmu_put_byte_slow(uaecptr addr, uae_u8 val, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 void mmu_put_word_slow(uaecptr addr, uae_u16 val, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 void mmu_put_long_slow(uaecptr addr, uae_u32 val, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 void mmu_put_quad_slow(uaecptr addr, uae_u64 val, int super, int data, + struct mmu_atc_line *cl); + +extern void mmu_make_transparent_region(uaecptr baseaddr, uae_u32 size, int datamode); + +static inline void mmu_set_ttr(int regno, uae_u32 val) +{ + uae_u32 * ttr; + switch(regno) { + case TTR_I0: ttr = ®s.itt0; break; + case TTR_I1: ttr = ®s.itt1; break; + case TTR_D0: ttr = ®s.dtt0; break; + case TTR_D1: ttr = ®s.dtt1; break; + default: abort(); + } + *ttr = val; +} + +static inline void mmu_set_mmusr(uae_u32 val) +{ + regs.mmusr = val; +} + +#define FC_DATA (regs.s ? 5 : 1) +#define FC_INST (regs.s ? 6 : 2) + +extern uaecptr REGPARAM2 mmu_translate(uaecptr addr, int super, int data, int write); + +extern uae_u32 REGPARAM2 sfc_get_long(uaecptr addr); +extern uae_u16 REGPARAM2 sfc_get_word(uaecptr addr); +extern uae_u8 REGPARAM2 sfc_get_byte(uaecptr addr); +extern void REGPARAM2 dfc_put_long(uaecptr addr, uae_u32 val); +extern void REGPARAM2 dfc_put_word(uaecptr addr, uae_u16 val); +extern void REGPARAM2 dfc_put_byte(uaecptr addr, uae_u8 val); + + +extern void REGPARAM2 mmu_flush_atc(uaecptr addr, bool super, bool global); +extern void REGPARAM2 mmu_flush_atc_all(bool global); +extern void REGPARAM2 mmu_op(uae_u32 opcode, uae_u16 extra); + +#ifdef FULLMMU + +extern void REGPARAM2 mmu_reset(void); +extern void REGPARAM2 mmu_set_tc(uae_u16 tc); +extern void REGPARAM2 mmu_set_super(bool super); + +#else + +static inline void mmu_reset(void) +{ +} + +static inline void mmu_set_tc(uae_u16 /*tc*/) +{ +} + +static inline void mmu_set_super(bool /*super*/) +{ +} + +#endif + +#endif /* CPUMMU_H */ +/* +vim:ts=4:sw=4: +*/ diff --git a/BasiliskII/src/uae_cpu/cpustbl_nf.cpp b/BasiliskII/src/uae_cpu/cpustbl_nf.cpp new file mode 100644 index 00000000..0ea66010 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpustbl_nf.cpp @@ -0,0 +1,2 @@ +#define NOFLAGS 1 +#include "cpustbl.cpp" diff --git a/BasiliskII/src/uae_cpu/cpustbla.cpp b/BasiliskII/src/uae_cpu/cpustbla.cpp new file mode 100644 index 00000000..f3f8e320 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpustbla.cpp @@ -0,0 +1,5 @@ +/* + * cpustbl.cpp must be compiled twice, once for the generator program + * and once for the actual executable + */ +#include "cpustbl.cpp" diff --git a/BasiliskII/src/uae_cpu/debug.cpp b/BasiliskII/src/uae_cpu/debug.cpp new file mode 100644 index 00000000..8b2f14e0 --- /dev/null +++ b/BasiliskII/src/uae_cpu/debug.cpp @@ -0,0 +1,82 @@ +/* + * debug.cpp - CPU debugger + * + * Copyright (c) 2001-2010 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Bernd Schmidt's UAE + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * UAE - The Un*x Amiga Emulator + * + * Debugger + * + * (c) 1995 Bernd Schmidt + * + */ + +#include "sysdeps.h" + +#include "memory.h" +#include "newcpu.h" +#include "debug.h" + +#include "input.h" +#include "cpu_emulation.h" + +#include "main.h" + +static int debugger_active = 0; +int debugging = 0; +int irqindebug = 0; + +int ignore_irq = 0; + + +void activate_debugger (void) +{ +#ifdef DEBUGGER + ndebug::do_skip = false; +#endif + debugger_active = 1; + SPCFLAGS_SET( SPCFLAG_BRK ); + debugging = 1; + /* use_debugger = 1; */ +} + +void deactivate_debugger(void) +{ + debugging = 0; + debugger_active = 0; +} + +void debug (void) +{ + if (ignore_irq && regs.s && !regs.m ) { + SPCFLAGS_SET( SPCFLAG_BRK ); + return; + } +#ifdef DEBUGGER + ndebug::run(); +#endif +} + +/* +vim:ts=4:sw=4: +*/ diff --git a/BasiliskII/src/uae_cpu/fpu/core.h b/BasiliskII/src/uae_cpu/fpu/core.h index 66358a2d..2eccc4d2 100644 --- a/BasiliskII/src/uae_cpu/fpu/core.h +++ b/BasiliskII/src/uae_cpu/fpu/core.h @@ -1,28 +1,33 @@ /* - * fpu/core.h - base fpu context definition + * fpu/core.h - base fpu context definition * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_CORE_H @@ -34,11 +39,15 @@ /* Always use x87 FPU stack on IA-32. */ #if defined(X86_ASSEMBLY) #define USE_X87_ASSEMBLY 1 +#ifndef USE_JIT_FPU +#define ACCURATE_SIN_COS_TAN 1 +#endif #endif /* Only use x87 FPU on x86-64 if long double precision is requested. */ -#if defined(X86_64_ASSEMBLY) && USE_LONG_DOUBLE +#if defined(X86_64_ASSEMBLY) && defined(USE_LONG_DOUBLE) #define USE_X87_ASSEMBLY 1 +#define ACCURATE_SIN_COS_TAN 1 #endif /* ========================================================================== */ @@ -65,10 +74,7 @@ struct fpu_t { /* --- Floating-Point Control Register --- */ /* ---------------------------------------------------------------------- */ - struct { - /* Exception Enable Byte */ - uae_u32 exception_enable; #define FPCR_EXCEPTION_ENABLE 0x0000ff00 #define FPCR_EXCEPTION_BSUN 0x00008000 #define FPCR_EXCEPTION_SNAN 0x00004000 @@ -83,21 +89,19 @@ struct fpu_t { #define FPCR_MODE_CONTROL 0x000000ff /* Rounding precision */ - uae_u32 rounding_precision; #define FPCR_ROUNDING_PRECISION 0x000000c0 #define FPCR_PRECISION_SINGLE 0x00000040 #define FPCR_PRECISION_DOUBLE 0x00000080 #define FPCR_PRECISION_EXTENDED 0x00000000 /* Rounding mode */ - uae_u32 rounding_mode; #define FPCR_ROUNDING_MODE 0x00000030 #define FPCR_ROUND_NEAR 0x00000000 #define FPCR_ROUND_ZERO 0x00000010 #define FPCR_ROUND_MINF 0x00000020 #define FPCR_ROUND_PINF 0x00000030 - } fpcr; + uae_u32 fpcr; /* ---------------------------------------------------------------------- */ /* --- Floating-Point Status Register --- */ @@ -107,7 +111,7 @@ struct fpu_t { /* Floating-Point Condition Code Byte */ uae_u32 condition_codes; - #define FPSR_CCB 0xff000000 + #define FPSR_CCB 0x0f000000 #define FPSR_CCB_NEGATIVE 0x08000000 #define FPSR_CCB_ZERO 0x04000000 #define FPSR_CCB_INFINITY 0x02000000 @@ -133,7 +137,7 @@ struct fpu_t { /* Accrued Exception Byte */ uae_u32 accrued_exception; - #define FPSR_ACCRUED_EXCEPTION 0x000000ff + #define FPSR_ACCRUED_EXCEPTION 0x000000f8 #define FPSR_ACCR_IOP 0x00000080 #define FPSR_ACCR_OVFL 0x00000040 #define FPSR_ACCR_UNFL 0x00000020 @@ -219,7 +223,7 @@ struct fpu_t { extern fpu_t fpu; /* Return the address of a particular register */ -inline fpu_register * const fpu_register_address(int i) +inline fpu_register * fpu_register_address(int i) { return &fpu.registers[i]; } /* Dump functions for m68k_dumpstate */ @@ -227,16 +231,16 @@ extern void fpu_dump_registers(void); extern void fpu_dump_flags(void); /* Accessors to FPU Control Register */ -static inline uae_u32 get_fpcr(void); -static inline void set_fpcr(uae_u32 new_fpcr); +//static inline uae_u32 get_fpcr(void); +//static inline void set_fpcr(uae_u32 new_fpcr); /* Accessors to FPU Status Register */ -static inline uae_u32 get_fpsr(void); -static inline void set_fpsr(uae_u32 new_fpsr); +//static inline uae_u32 get_fpsr(void); +//static inline void set_fpsr(uae_u32 new_fpsr); /* Accessors to FPU Instruction Address Register */ -static inline uae_u32 get_fpiar(); -static inline void set_fpiar(uae_u32 new_fpiar); +//static inline uae_u32 get_fpiar(); +//static inline void set_fpiar(uae_u32 new_fpiar); /* Initialization / Finalization */ extern void fpu_init(bool integral_68040); @@ -254,6 +258,6 @@ void fpuop_scc(uae_u32 opcode, uae_u32 extra) REGPARAM; /* Floating-point system control operations */ void fpuop_save(uae_u32 opcode) REGPARAM; void fpuop_restore(uae_u32 opcode) REGPARAM; -void fpuop_trapcc(uae_u32 opcode, uaecptr oldpc) REGPARAM; +void fpuop_trapcc(uae_u32 opcode, uaecptr oldpc, uae_u32 extra) REGPARAM; #endif /* FPU_CORE_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/exceptions.cpp b/BasiliskII/src/uae_cpu/fpu/exceptions.cpp index 6aa6431a..2a597997 100644 --- a/BasiliskII/src/uae_cpu/fpu/exceptions.cpp +++ b/BasiliskII/src/uae_cpu/fpu/exceptions.cpp @@ -1,28 +1,33 @@ /* - * fpu/exceptions.cpp - system-dependant FPU exceptions management + * fpu/exceptions.cpp - system-dependant FPU exceptions management * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #undef PRIVATE diff --git a/BasiliskII/src/uae_cpu/fpu/exceptions.h b/BasiliskII/src/uae_cpu/fpu/exceptions.h index 8c69a69d..f943da04 100644 --- a/BasiliskII/src/uae_cpu/fpu/exceptions.h +++ b/BasiliskII/src/uae_cpu/fpu/exceptions.h @@ -1,28 +1,33 @@ /* - * fpu/exceptions.h - system-dependant FPU exceptions management + * fpu/exceptions.h - system-dependant FPU exceptions management * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_EXCEPTIONS_H diff --git a/BasiliskII/src/uae_cpu/fpu/flags.cpp b/BasiliskII/src/uae_cpu/fpu/flags.cpp index 2eabef85..4b0972df 100644 --- a/BasiliskII/src/uae_cpu/fpu/flags.cpp +++ b/BasiliskII/src/uae_cpu/fpu/flags.cpp @@ -1,28 +1,33 @@ /* - * fpu/flags.cpp - Floating-point flags + * fpu/flags.cpp - Floating-point flags * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* NOTE: this file shall be included only from fpu/fpu_*.cpp */ diff --git a/BasiliskII/src/uae_cpu/fpu/flags.h b/BasiliskII/src/uae_cpu/fpu/flags.h index 7c0c5b74..de25a2b6 100644 --- a/BasiliskII/src/uae_cpu/fpu/flags.h +++ b/BasiliskII/src/uae_cpu/fpu/flags.h @@ -1,28 +1,33 @@ /* - * fpu/flags.h - Floating-point flags + * fpu/flags.h - Floating-point flags * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_FLAGS_H @@ -112,7 +117,7 @@ PRIVATE inline void FFPU set_fpccr(uae_u32 new_fpcond) /* Make FPSR according to the value passed in argument */ PRIVATE inline void FFPU make_fpsr(fpu_register const & r) - { uae_u16 sw; __asm__ __volatile__ ("fxam\n\tfnstsw %0" : "=r" (sw) : "f" (r)); FPU fpsr.condition_codes = sw; } + { uae_u16 sw; __asm__ __volatile__ ("fxam\n\tfnstsw %0" : "=a" (sw) : "f" (r)); FPU fpsr.condition_codes = sw; } /* Return the corresponding ID of the current floating-point condition codes */ /* NOTE: only valid for evaluation of a condition */ @@ -181,27 +186,27 @@ PRIVATE inline uae_u32 FFPU get_fpccr(void) uae_u32 fpccr = 0; if (isnan(FPU result)) fpccr |= FPSR_CCB_NAN; - else if (FPU result == 0.0) - fpccr |= FPSR_CCB_ZERO; - else if (FPU result < 0.0) - fpccr |= FPSR_CCB_NEGATIVE; - if (isinf(FPU result)) + else if (isinf(FPU result)) fpccr |= FPSR_CCB_INFINITY; + else if (iszero(FPU result)) + fpccr |= FPSR_CCB_ZERO; + if (isneg(FPU result)) + fpccr |= FPSR_CCB_NEGATIVE; return fpccr; } /* M68k to native floating-point condition codes - SELF */ PRIVATE inline void FFPU set_fpccr(uae_u32 new_fpcond) { + bool negative = (new_fpcond & FPSR_CCB_NEGATIVE) != 0; if (new_fpcond & FPSR_CCB_NAN) - make_nan(FPU result); + make_nan(FPU result, negative); + else if (new_fpcond & FPSR_CCB_INFINITY) + make_inf(FPU result, negative); else if (new_fpcond & FPSR_CCB_ZERO) - FPU result = 0.0; - else if (new_fpcond & FPSR_CCB_NEGATIVE) - FPU result = -1.0; + make_zero(FPU result, negative); else - FPU result = +1.0; - /* gb-- where is Infinity ? */ + FPU result = negative ? -1.0 : +1.0; } /* Make FPSR according to the value passed in argument */ @@ -217,7 +222,7 @@ PRIVATE inline void FFPU make_fpsr(fpu_register const & r) /* -------------------------------------------------------------------------- */ /* Return the address of the floating-point condition codes register */ -static inline uae_u32 * const FFPU address_of_fpccr(void) +static inline uae_u32 * FFPU address_of_fpccr(void) { return ((uae_u32 *)& FPU fpsr.condition_codes); } #endif /* FPU_FLAGS_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/fpu.h b/BasiliskII/src/uae_cpu/fpu/fpu.h index 3940a75b..d1fe6dd2 100644 --- a/BasiliskII/src/uae_cpu/fpu/fpu.h +++ b/BasiliskII/src/uae_cpu/fpu/fpu.h @@ -1,28 +1,33 @@ /* - * fpu/fpu.h - public header + * fpu/fpu.h - public header * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_PUBLIC_HEADER_H @@ -46,4 +51,9 @@ #include "fpu/types.h" #include "fpu/core.h" +void fpu_set_fpsr(uae_u32 new_fpsr); +uae_u32 fpu_get_fpsr(void); +void fpu_set_fpcr(uae_u32 new_fpcr); +uae_u32 fpu_get_fpcr(void); + #endif /* FPU_PUBLIC_HEADER_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp b/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp index f5a1aeb4..ce18967e 100644 --- a/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp +++ b/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp @@ -1,31 +1,42 @@ /* - * fpu/fpu_ieee.cpp + * fpu_ieee.cpp - the IEEE FPU * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2008 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * MC68881/68040 fpu emulation * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - /* + * UAE - The Un*x Amiga Emulator + * + * MC68881/MC68040 emulation + * + * Copyright 1996 Herman ten Brugge + * + * * Following fixes by Lauri Pesonen, July 1999: * * FMOVEM list handling: @@ -87,7 +98,7 @@ */ #include "sysdeps.h" -#include +#include #include "memory.h" #include "readcpu.h" #include "newcpu.h" @@ -130,6 +141,24 @@ fpu_t fpu; #include "fpu/exceptions.cpp" #include "fpu/rounding.cpp" +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) +#define LD(x) x ## L +#ifdef HAVE_POWL +#define POWL(x, y) powl(x, y) +#else +#define POWL(x, y) pow(x, y) +#endif +#ifdef HAVE_LOG10L +#define LOG10L(x) log10l(x) +#else +#define LOG10L(x) log10(x) +#endif +#else +#define LD(x) x +#define POWL(x, y) pow(x, y) +#define LOG10L(x) log10(x) +#endif + /* -------------------------------------------------------------------------- */ /* --- Debugging --- */ /* -------------------------------------------------------------------------- */ @@ -152,9 +181,9 @@ PUBLIC void FFPU fpu_dump_flags(void) (get_fpsr() & FPSR_CCB_NAN) != 0); } +#if FPU_DEBUG && FPU_DUMP_REGISTERS PRIVATE void FFPU dump_registers(const char * str) { -#if FPU_DEBUG && FPU_DUMP_REGISTERS char temp_str[512]; sprintf(temp_str, "%s: %.04f, %.04f, %.04f, %.04f, %.04f, %.04f, %.04f, %.04f\n", @@ -164,12 +193,15 @@ PRIVATE void FFPU dump_registers(const char * str) fpu_get_register(6), fpu_get_register(7) ); fpu_debug((temp_str)); +#else +PRIVATE void FFPU dump_registers(const char *) +{ #endif } +#if FPU_DEBUG && FPU_DUMP_FIRST_BYTES PRIVATE void FFPU dump_first_bytes(uae_u8 * buffer, uae_s32 actual) { -#if FPU_DEBUG && FPU_DUMP_FIRST_BYTES char temp_buf1[256], temp_buf2[10]; int bytes = sizeof(temp_buf1)/3-1-3; if (actual < bytes) @@ -183,6 +215,9 @@ PRIVATE void FFPU dump_first_bytes(uae_u8 * buffer, uae_s32 actual) strcat(temp_buf1, "\n"); fpu_debug((temp_buf1)); +#else + PRIVATE void FFPU dump_first_bytes(uae_u8 *, uae_s32) +{ #endif } @@ -200,11 +235,12 @@ PRIVATE inline fpu_register FFPU make_single(uae_u32 value) #if 1 // Use a single, otherwise some checks for NaN, Inf, Zero would have to // be performed - fpu_single result = 0; // = 0 to workaround a compiler bug on SPARC - fp_declare_init_shape(srp, result, single); - srp->ieee.negative = (value >> 31) & 1; - srp->ieee.exponent = (value >> 23) & FP_SINGLE_EXP_MAX; - srp->ieee.mantissa = value & 0x007fffff; + fpu_single result = 0; + fp_declare_init_shape(srp, single); + srp.ieee.negative = (value >> 31) & 1; + srp.ieee.exponent = (value >> 23) & FP_SINGLE_EXP_MAX; + srp.ieee.mantissa = value & 0x007fffff; + result = srp.value; fpu_debug(("make_single (%X) = %.04f\n",value,(double)result)); return result; #elif 0 /* Original code */ @@ -212,13 +248,13 @@ PRIVATE inline fpu_register FFPU make_single(uae_u32 value) return (0.0); fpu_register result; - uae_u32 * p = (uae_u32 *)&result; + fpu_register_parts *p = (fpu_register_parts *)&result; uae_u32 sign = (value & 0x80000000); uae_u32 exp = ((value & 0x7F800000) >> 23) + 1023 - 127; - p[FLO] = value << 29; - p[FHI] = sign | (exp << 20) | ((value & 0x007FFFFF) >> 3); + p->parts[FLO] = value << 29; + p->parts[FHI] = sign | (exp << 20) | ((value & 0x007FFFFF) >> 3); fpu_debug(("make_single (%X) = %.04f\n",value,(double)result)); @@ -231,10 +267,11 @@ PRIVATE inline uae_u32 FFPU extract_single(fpu_register const & src) { #if 1 fpu_single input = (fpu_single) src; - fp_declare_init_shape(sip, input, single); - uae_u32 result = (sip->ieee.negative << 31) - | (sip->ieee.exponent << 23) - | sip->ieee.mantissa; + fp_declare_init_shape(sip, single); + sip.value = input; + uae_u32 result = (sip.ieee.negative << 31) + | (sip.ieee.exponent << 23) + | sip.ieee.mantissa; fpu_debug(("extract_single (%.04f) = %X\n",(double)src,result)); return result; #elif 0 /* Original code */ @@ -242,10 +279,10 @@ PRIVATE inline uae_u32 FFPU extract_single(fpu_register const & src) return 0; uae_u32 result; - uae_u32 *p = (uae_u32 *)&src; + fpu_register_parts const *p = (fpu_register_parts const *)&src; - uae_u32 sign = (p[FHI] & 0x80000000); - uae_u32 exp = (p[FHI] & 0x7FF00000) >> 20; + uae_u32 sign = (p->parts[FHI] & 0x80000000); + uae_u32 exp = (p->parts[FHI] & 0x7FF00000) >> 20; if(exp + 127 < 1023) { exp = 0; @@ -255,7 +292,7 @@ PRIVATE inline uae_u32 FFPU extract_single(fpu_register const & src) exp = exp + 127 - 1023; } - result = sign | (exp << 23) | ((p[FHI] & 0x000FFFFF) << 3) | (p[FLO] >> 29); + result = sign | (exp << 23) | ((p->parts[FHI] & 0x000FFFFF) << 3) | (p->parts[FLO] >> 29); fpu_debug(("extract_single (%.04f) = %X\n",(double)src,result)); @@ -268,36 +305,34 @@ PRIVATE inline fpu_register FFPU make_extended(uae_u32 wrd1, uae_u32 wrd2, uae_u { // is it zero? if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) - return 0.0; + return (wrd1 & 0x80000000) ? -0.0 : 0.0; fpu_register result; -#if USE_QUAD_DOUBLE +#if defined(USE_QUAD_DOUBLE) // is it NaN? - if ((wrd1 & 0x7fff0000) == 0x7fff0000 && wrd2 != 0 && wrd3 != 0) { - make_nan(result); + if ((wrd1 & 0x7fff0000) == 0x7fff0000 && ((wrd2 & 0x7fffffff) != 0 || wrd3 != 0)) { + make_nan(result, (wrd1 & 0x80000000) != 0); return result; } // is it inf? - if ((wrd1 & 0x7ffff000) == 0x7fff0000 && wrd2 == 0 && wrd3 == 0) { - if ((wrd1 & 0x80000000) == 0) - make_inf_positive(result); - else - make_inf_negative(result); + if ((wrd1 & 0x7ffff000) == 0x7fff0000 && (wrd2 & 0x7fffffff) == 0 && wrd3 == 0) { + make_inf(result, (wrd1 & 0x80000000) != 0); return result; } - fp_declare_init_shape(srp, result, extended); - srp->ieee.negative = (wrd1 >> 31) & 1; - srp->ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; - srp->ieee.mantissa0 = (wrd2 >> 16) & 0xffff; - srp->ieee.mantissa1 = ((wrd2 & 0xffff) << 16) | ((wrd3 >> 16) & 0xffff); - srp->ieee.mantissa2 = (wrd3 & 0xffff) << 16; - srp->ieee.mantissa3 = 0; -#elif USE_LONG_DOUBLE - fp_declare_init_shape(srp, result, extended); - srp->ieee.negative = (wrd1 >> 31) & 1; - srp->ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; - srp->ieee.mantissa0 = wrd2; - srp->ieee.mantissa1 = wrd3; + fp_declare_init_shape(srp, extended); + srp.ieee.negative = (wrd1 >> 31) & 1; + srp.ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; + srp.ieee.mantissa0 = (wrd2 >> 16) & 0xffff; + srp.ieee.mantissa1 = ((wrd2 & 0xffff) << 16) | ((wrd3 >> 16) & 0xffff); + srp.ieee.mantissa2 = (wrd3 & 0xffff) << 16; + srp.ieee.mantissa3 = 0; +#elif defined(USE_LONG_DOUBLE) + fp_declare_init_shape(srp, extended); + srp.ieee.negative = (wrd1 >> 31) & 1; + srp.ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; + srp.ieee.mantissa0 = wrd2; + srp.ieee.mantissa1 = wrd3; + #else uae_u32 sgn = (wrd1 >> 31) & 1; uae_u32 exp = (wrd1 >> 16) & 0x7fff; @@ -326,13 +361,14 @@ PRIVATE inline fpu_register FFPU make_extended(uae_u32 wrd1, uae_u32 wrd2, uae_u else exp += FP_DOUBLE_EXP_BIAS - FP_EXTENDED_EXP_BIAS; - fp_declare_init_shape(srp, result, double); - srp->ieee.negative = sgn; - srp->ieee.exponent = exp; + fp_declare_init_shape(srp, double); + srp.ieee.negative = sgn; + srp.ieee.exponent = exp; // drop the explicit integer bit - srp->ieee.mantissa0 = (wrd2 & 0x7fffffff) >> 11; - srp->ieee.mantissa1 = (wrd2 << 21) | (wrd3 >> 11); + srp.ieee.mantissa0 = (wrd2 & 0x7fffffff) >> 11; + srp.ieee.mantissa1 = (wrd2 << 21) | (wrd3 >> 11); #endif + result = srp.value; fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result)); return result; } @@ -347,37 +383,34 @@ PRIVATE inline void FFPU make_extended_no_normalize( ) { // is it zero? - if ((wrd1 && 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) { - make_zero_positive(result); + if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) { + make_zero(result, (wrd1 & 0x80000000) != 0); return; } // is it NaN? - if ((wrd1 & 0x7fff0000) == 0x7fff0000 && wrd2 != 0 && wrd3 != 0) { - make_nan(result); + if ((wrd1 & 0x7fff0000) == 0x7fff0000 && ((wrd2 & 0x7fffffff) != 0 || wrd3 != 0)) { + make_nan(result, (wrd1 & 0x80000000) != 0); return; } -#if USE_QUAD_DOUBLE +#if defined(USE_QUAD_DOUBLE) // is it inf? - if ((wrd1 & 0x7ffff000) == 0x7fff0000 && wrd2 == 0 && wrd3 == 0) { - if ((wrd1 & 0x80000000) == 0) - make_inf_positive(result); - else - make_inf_negative(result); + if ((wrd1 & 0x7ffff000) == 0x7fff0000 && (wrd2 & 0x7fffffff) == 0 && wrd3 == 0) { + make_inf(result, (wrd1 & 0x80000000) != 0); return; } - fp_declare_init_shape(srp, result, extended); - srp->ieee.negative = (wrd1 >> 31) & 1; - srp->ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; - srp->ieee.mantissa0 = (wrd2 >> 16) & 0xffff; - srp->ieee.mantissa1 = ((wrd2 & 0xffff) << 16) | ((wrd3 >> 16) & 0xffff); - srp->ieee.mantissa2 = (wrd3 & 0xffff) << 16; - srp->ieee.mantissa3 = 0; -#elif USE_LONG_DOUBLE - fp_declare_init_shape(srp, result, extended); - srp->ieee.negative = (wrd1 >> 31) & 1; - srp->ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; - srp->ieee.mantissa0 = wrd2; - srp->ieee.mantissa1 = wrd3; + fp_declare_init_shape(srp, extended); + srp.ieee.negative = (wrd1 >> 31) & 1; + srp.ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; + srp.ieee.mantissa0 = (wrd2 >> 16) & 0xffff; + srp.ieee.mantissa1 = ((wrd2 & 0xffff) << 16) | ((wrd3 >> 16) & 0xffff); + srp.ieee.mantissa2 = (wrd3 & 0xffff) << 16; + srp.ieee.mantissa3 = 0; +#elif defined(USE_LONG_DOUBLE) + fp_declare_init_shape(srp, extended); + srp.ieee.negative = (wrd1 >> 31) & 1; + srp.ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; + srp.ieee.mantissa0 = wrd2; + srp.ieee.mantissa1 = wrd3; #else uae_u32 exp = (wrd1 >> 16) & 0x7fff; if (exp < FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS) @@ -387,13 +420,14 @@ PRIVATE inline void FFPU make_extended_no_normalize( else exp += FP_DOUBLE_EXP_BIAS - FP_EXTENDED_EXP_BIAS; - fp_declare_init_shape(srp, result, double); - srp->ieee.negative = (wrd1 >> 31) & 1; - srp->ieee.exponent = exp; + fp_declare_init_shape(srp, double); + srp.ieee.negative = (wrd1 >> 31) & 1; + srp.ieee.exponent = exp; // drop the explicit integer bit - srp->ieee.mantissa0 = (wrd2 & 0x7fffffff) >> 11; - srp->ieee.mantissa1 = (wrd2 << 21) | (wrd3 >> 11); + srp.ieee.mantissa0 = (wrd2 & 0x7fffffff) >> 11; + srp.ieee.mantissa1 = (wrd2 << 21) | (wrd3 >> 11); #endif + result = srp.value; fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result)); } @@ -406,41 +440,43 @@ PRIVATE inline void FFPU extract_extended(fpu_register const & src, *wrd1 = *wrd2 = *wrd3 = 0; return; } -#if USE_QUAD_DOUBLE +#if defined(USE_QUAD_DOUBLE) // FIXME: deal with denormals? - fp_declare_init_shape(srp, src, extended); - *wrd1 = (srp->ieee.negative << 31) | (srp->ieee.exponent << 16); + fp_declare_init_shape(srp, extended); + srp.value = src; + *wrd1 = (srp.ieee.negative << 31) | (srp.ieee.exponent << 16); // always set the explicit integer bit. - *wrd2 = 0x80000000 | (srp->ieee.mantissa0 << 15) | ((srp->ieee.mantissa1 & 0xfffe0000) >> 17); - *wrd3 = (srp->ieee.mantissa1 << 15) | ((srp->ieee.mantissa2 & 0xfffe0000) >> 17); -#elif USE_LONG_DOUBLE - uae_u32 *p = (uae_u32 *)&src; + *wrd2 = 0x80000000 | (srp.ieee.mantissa0 << 15) | ((srp.ieee.mantissa1 & 0xfffe0000) >> 17); + *wrd3 = (srp.ieee.mantissa1 << 15) | ((srp.ieee.mantissa2 & 0xfffe0000) >> 17); +#elif defined(USE_LONG_DOUBLE) + fpu_register_parts p = { src }; #ifdef WORDS_BIGENDIAN - *wrd1 = p[0]; - *wrd2 = p[1]; - *wrd3 = p[2]; + *wrd1 = p.parts[0]; + *wrd2 = p.parts[1]; + *wrd3 = p.parts[2]; #else - *wrd3 = p[0]; - *wrd2 = p[1]; - *wrd1 = ( (uae_u32)*((uae_u16 *)&p[2]) ) << 16; + *wrd3 = p.parts[0]; + *wrd2 = p.parts[1]; + *wrd1 = (p.parts[2] & 0xffff) << 16; #endif #else - fp_declare_init_shape(srp, src, double); + fp_declare_init_shape(srp, double); + srp.value = src; fpu_debug(("extract_extended (%d,%d,%X,%X)\n", - srp->ieee.negative , srp->ieee.exponent, - srp->ieee.mantissa0, srp->ieee.mantissa1)); + srp.ieee.negative , srp.ieee.exponent, + srp.ieee.mantissa0, srp.ieee.mantissa1)); - uae_u32 exp = srp->ieee.exponent; + uae_u32 exp = srp.ieee.exponent; if (exp == FP_DOUBLE_EXP_MAX) exp = FP_EXTENDED_EXP_MAX; else exp += FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS; - *wrd1 = (srp->ieee.negative << 31) | (exp << 16); + *wrd1 = (srp.ieee.negative << 31) | (exp << 16); // always set the explicit integer bit. - *wrd2 = 0x80000000 | (srp->ieee.mantissa0 << 11) | ((srp->ieee.mantissa1 & 0xffe00000) >> 21); - *wrd3 = srp->ieee.mantissa1 << 11; + *wrd2 = 0x80000000 | (srp.ieee.mantissa0 << 11) | ((srp.ieee.mantissa1 & 0xffe00000) >> 21); + *wrd3 = srp.ieee.mantissa1 << 11; #endif fpu_debug(("extract_extended (%.04f) = %X,%X,%X\n",(double)src,*wrd1,*wrd2,*wrd3)); } @@ -472,7 +508,14 @@ PRIVATE inline void FFPU extract_double(fpu_register const & src, fpu_double value; uae_u32 parts[2]; } dest; +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fpu_register_parts p = { src }; + // always set the explicit integer bit. + p.parts[1] |= 0x80000000; + dest.value = (fpu_double)p.val; +#else dest.value = (fpu_double)src; +#endif #ifdef WORDS_BIGENDIAN *wrd1 = dest.parts[0]; *wrd2 = dest.parts[1]; @@ -486,41 +529,88 @@ PRIVATE inline void FFPU extract_double(fpu_register const & src, // to_pack PRIVATE inline fpu_register FFPU make_packed(uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) { - fpu_double d; - char *cp; - char str[100]; + fpu_register d; + bool sm = (wrd1 & 0x80000000) != 0; + bool se = (wrd1 & 0x40000000) != 0; + int exp = (wrd1 & 0x7fff0000) >> 16; + unsigned int dig; + fpu_register pwr; + + if (exp == 0x7fff) + { + if ((wrd2 & 0x7fffffff) == 0 && wrd3 == 0) + { + make_inf(d, sm); + } else + { + make_nan(d, sm); + } + return d; + } + dig = wrd1 & 0x0000000f; + if (dig == 0 && wrd2 == 0 && wrd3 == 0) + { + make_zero(d, sm); + return d; + } - cp = str; - if (wrd1 & 0x80000000) - *cp++ = '-'; - *cp++ = (char)((wrd1 & 0xf) + '0'); - *cp++ = '.'; - *cp++ = (char)(((wrd2 >> 28) & 0xf) + '0'); - *cp++ = (char)(((wrd2 >> 24) & 0xf) + '0'); - *cp++ = (char)(((wrd2 >> 20) & 0xf) + '0'); - *cp++ = (char)(((wrd2 >> 16) & 0xf) + '0'); - *cp++ = (char)(((wrd2 >> 12) & 0xf) + '0'); - *cp++ = (char)(((wrd2 >> 8) & 0xf) + '0'); - *cp++ = (char)(((wrd2 >> 4) & 0xf) + '0'); - *cp++ = (char)(((wrd2 >> 0) & 0xf) + '0'); - *cp++ = (char)(((wrd3 >> 28) & 0xf) + '0'); - *cp++ = (char)(((wrd3 >> 24) & 0xf) + '0'); - *cp++ = (char)(((wrd3 >> 20) & 0xf) + '0'); - *cp++ = (char)(((wrd3 >> 16) & 0xf) + '0'); - *cp++ = (char)(((wrd3 >> 12) & 0xf) + '0'); - *cp++ = (char)(((wrd3 >> 8) & 0xf) + '0'); - *cp++ = (char)(((wrd3 >> 4) & 0xf) + '0'); - *cp++ = (char)(((wrd3 >> 0) & 0xf) + '0'); - *cp++ = 'E'; - if (wrd1 & 0x40000000) - *cp++ = '-'; - *cp++ = (char)(((wrd1 >> 24) & 0xf) + '0'); - *cp++ = (char)(((wrd1 >> 20) & 0xf) + '0'); - *cp++ = (char)(((wrd1 >> 16) & 0xf) + '0'); - *cp = 0; - sscanf(str, "%le", &d); + /* + * Convert the bcd exponent to binary by successive adds and + * muls. Set the sign according to SE. Subtract 16 to compensate + * for the mantissa which is to be interpreted as 17 integer + * digits, rather than 1 integer and 16 fraction digits. + * Note: this operation can never overflow. + */ + exp = ((wrd1 >> 24) & 0xf); + exp = exp * 10 + ((wrd1 >> 20) & 0xf); + exp = exp * 10 + ((wrd1 >> 16) & 0xf); + if (se) + exp = -exp; + /* sub to compensate for shift of mant */ + exp = exp - 16; + + /* + * Convert the bcd mantissa to binary by successive + * adds and muls. Set the sign according to SM. + * The mantissa digits will be converted with the decimal point + * assumed following the least-significant digit. + * Note: this operation can never overflow. + */ + d = wrd1 & 0xf; + d = (d * LD(10.0)) + ((wrd2 >> 28) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 24) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 20) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 16) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 12) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 8) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 4) & 0xf); + d = (d * LD(10.0)) + ((wrd2 ) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 28) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 24) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 20) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 16) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 12) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 8) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 4) & 0xf); + d = (d * LD(10.0)) + ((wrd3 ) & 0xf); - fpu_debug(("make_packed str = %s\n",str)); + /* Check the sign of the mant and make the value in fp0 the same sign. */ + if (sm) + d = -d; + + /* + * Calculate power-of-ten factor from exponent. + */ + if (exp < 0) + { + exp = -exp; + pwr = POWL(LD(10.0), exp); + d = d / pwr; + } else + { + pwr = POWL(LD(10.0), exp); + d = d * pwr; + } fpu_debug(("make_packed(%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)d)); return d; @@ -529,52 +619,88 @@ PRIVATE inline fpu_register FFPU make_packed(uae_u32 wrd1, uae_u32 wrd2, uae_u32 // from_pack PRIVATE inline void FFPU extract_packed(fpu_register const & src, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3) { - int i; - int t; - char *cp; - char str[100]; - - sprintf(str, "%.16e", src); - - fpu_debug(("extract_packed(%.04f,%s)\n",(double)src,str)); - - cp = str; + fpu_register pwr; + int exp; + fpu_register d; + bool sm, se; + int dig; + *wrd1 = *wrd2 = *wrd3 = 0; - if (*cp == '-') { - cp++; - *wrd1 = 0x80000000; - } - if (*cp == '+') - cp++; - *wrd1 |= (*cp++ - '0'); - if (*cp == '.') - cp++; - for (i = 0; i < 8; i++) { - *wrd2 <<= 4; - if (*cp >= '0' && *cp <= '9') - *wrd2 |= *cp++ - '0'; - } - for (i = 0; i < 8; i++) { - *wrd3 <<= 4; - if (*cp >= '0' && *cp <= '9') - *wrd3 |= *cp++ - '0'; - } - if (*cp == 'e' || *cp == 'E') { - cp++; - if (*cp == '-') { - cp++; - *wrd1 |= 0x40000000; - } - if (*cp == '+') - cp++; - t = 0; - for (i = 0; i < 3; i++) { - if (*cp >= '0' && *cp <= '9') - t = (t << 4) | (*cp++ - '0'); - } - *wrd1 |= t << 16; + + d = src; + sm = false; + if (isneg(src)) + { + d = -d; + sm = true; } + if (isnan(src)) + { + *wrd1 = sm ? 0xffff0000 : 0x7fff0000; + *wrd2 = 0xffffffff; + *wrd3 = 0xffffffff; + return; + } + if (isinf(src)) + { + *wrd1 = sm ? 0xffff0000 : 0x7fff0000; + *wrd2 = *wrd3 = 0; + return; + } + if (iszero(src)) + { + *wrd1 = sm ? 0x80000000 : 0x00000000; + *wrd2 = *wrd3 = 0; + return; + } + sm = false; + if (isneg(src)) + { + d = -d; + sm = true; + } + exp = (int)floor(LOG10L(d)); + se = false; + if (exp < 0) + { + exp = -exp; + se = true; + pwr = POWL(LD(10.0), exp); + d = d * pwr; + } else + { + pwr = POWL(LD(10.0), exp); + d = d / pwr; + } + dig = (int)d; d = LD(10) * (d - dig); *wrd1 |= dig; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 28; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 24; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 20; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 16; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 12; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 8; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 4; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 28; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 24; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 20; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 16; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 12; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 8; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 4; + dig = (int)d; *wrd3 |= dig; + + dig = (exp / 100) % 10; + *wrd1 |= dig << 24; + dig = (exp / 10) % 10; + *wrd1 |= dig << 20; + dig = (exp) % 10; + *wrd1 |= dig << 16; + if (sm) + *wrd1 |= 0x80000000; + if (se) + *wrd1 |= 0x40000000; fpu_debug(("extract_packed(%.04f) = %X,%X,%X\n",(double)src,*wrd1,*wrd2,*wrd3)); } @@ -628,11 +754,9 @@ PRIVATE inline int FFPU get_fp_value (uae_u32 opcode, uae_u16 extra, fpu_registe break; case 3: ad = m68k_areg (regs, reg); - m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; break; case 4: - m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; - ad = m68k_areg (regs, reg); + ad = m68k_areg (regs, reg) - (reg == 7 ? sz2[size] : sz1[size]); break; case 5: ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); @@ -673,8 +797,8 @@ PRIVATE inline int FFPU get_fp_value (uae_u32 opcode, uae_u16 extra, fpu_registe fpu_debug(("get_fp_value m68k_getpc()=%X\n",m68k_getpc())); fpu_debug(("get_fp_value ad=%X\n",ad)); fpu_debug(("get_fp_value get_long (ad)=%X\n",get_long (ad))); - dump_first_bytes( get_real_address(ad)-64, 64 ); - dump_first_bytes( get_real_address(ad), 64 ); + //dump_first_bytes( get_real_address(ad, 0, 0)-64, 64 ); + //dump_first_bytes( get_real_address(ad, 0, 0), 64 ); switch (size) { case 0: @@ -721,15 +845,24 @@ PRIVATE inline int FFPU get_fp_value (uae_u32 opcode, uae_u16 extra, fpu_registe return 0; } + switch (mode) { + case 3: + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + break; + } + // fpu_debug(("get_fp_value result = %.04f\n",(float)src)); return 1; } /* Convert the FP value to integer according to the current m68k rounding mode */ -PRIVATE inline uae_s32 FFPU toint(fpu_register const & src) +PRIVATE inline fpu_register FFPU fp_doround(fpu_register const & src) { fpu_register result; - switch (get_fpcr() & 0x30) { + switch (get_fpcr() & FPCR_ROUNDING_MODE) { case FPCR_ROUND_ZERO: result = fp_round_to_zero(src); break; @@ -746,7 +879,12 @@ PRIVATE inline uae_s32 FFPU toint(fpu_register const & src) result = src; /* should never be reached */ break; } - return (uae_s32)result; + return result; +} + +PRIVATE inline uae_s32 FFPU toint(fpu_register const & src) +{ + return (uae_s32)fp_doround(src); } PRIVATE inline int FFPU put_fp_value (uae_u32 opcode, uae_u16 extra, fpu_register const & value) @@ -844,37 +982,40 @@ PRIVATE inline int FFPU put_fp_value (uae_u32 opcode, uae_u16 extra, fpu_registe case 1: put_long (ad, extract_single(value)); break; - case 2: { - uae_u32 wrd1, wrd2, wrd3; - extract_extended(value, &wrd1, &wrd2, &wrd3); - put_long (ad, wrd1); - ad += 4; - put_long (ad, wrd2); - ad += 4; - put_long (ad, wrd3); + case 2: + { + uae_u32 wrd1, wrd2, wrd3; + extract_extended(value, &wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + } break; - } - case 3: { - uae_u32 wrd1, wrd2, wrd3; - extract_packed(value, &wrd1, &wrd2, &wrd3); - put_long (ad, wrd1); - ad += 4; - put_long (ad, wrd2); - ad += 4; - put_long (ad, wrd3); + case 3: + { + uae_u32 wrd1, wrd2, wrd3; + extract_packed(value, &wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + } break; - } case 4: put_word(ad, (uae_s16) toint(value)); break; - case 5: { - uae_u32 wrd1, wrd2; - extract_double(value, &wrd1, &wrd2); - put_long (ad, wrd1); - ad += 4; - put_long (ad, wrd2); + case 5: + { + uae_u32 wrd1, wrd2; + extract_double(value, &wrd1, &wrd2); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + } break; - } case 6: put_byte(ad, (uae_s8) toint(value)); break; @@ -951,7 +1092,7 @@ PRIVATE inline int FFPU fpp_cond(int condition) if (NaN) N = Z = 0; - switch (condition) { + switch (condition & 0x1f) { case 0x00: CONDRET("False",0); case 0x01: CONDRET("Equal",Z); case 0x02: CONDRET("Ordered Greater Than",!(NaN || Z || N)); @@ -1021,7 +1162,7 @@ void FFPU fpuop_scc(uae_u32 opcode, uae_u32 extra) { fpu_debug(("fscc_opp %X, %X at %08lx\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); - uae_u32 ad; + uae_u32 ad = 0; int cc = fpp_cond(extra & 0x3f); if (cc == -1) { m68k_setpc (m68k_getpc () - 4); @@ -1039,11 +1180,11 @@ void FFPU fpuop_scc(uae_u32 opcode, uae_u32 extra) put_byte(ad, cc ? 0xff : 0x00); } -void FFPU fpuop_trapcc(uae_u32 opcode, uaecptr oldpc) +void FFPU fpuop_trapcc(uae_u32 opcode, uaecptr oldpc, uae_u32 extra) { - fpu_debug(("ftrapcc_opp %X at %08lx\n", (uae_u32)opcode, m68k_getpc ())); + fpu_debug(("ftrapcc_opp %X, %X at %08lx\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); - int cc = fpp_cond(opcode & 0x3f); + int cc = fpp_cond(extra & 0x3f); if (cc == -1) { m68k_setpc (oldpc); op_illg (opcode); @@ -1075,7 +1216,7 @@ void FFPU fpuop_save(uae_u32 opcode) { fpu_debug(("fsave_opp at %08lx\n", m68k_getpc ())); - uae_u32 ad; + uae_u32 ad = 0; int incr = (opcode & 0x38) == 0x20 ? -1 : 1; int i; @@ -1136,7 +1277,7 @@ void FFPU fpuop_restore(uae_u32 opcode) { fpu_debug(("frestore_opp at %08lx\n", m68k_getpc ())); - uae_u32 ad; + uae_u32 ad = 0; uae_u32 d; int incr = (opcode & 0x38) == 0x20 ? -1 : 1; @@ -1256,8 +1397,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) if ((opcode & 0x38) == 0) { if (extra & 0x2000) { // dr bit if (extra & 0x1000) { - // according to the manual, the msb bits are always zero. - m68k_dreg (regs, opcode & 7) = get_fpcr() & 0xFFFF; + m68k_dreg (regs, opcode & 7) = get_fpcr(); fpu_debug(("FMOVEM FPU fpcr (%X) -> D%d\n", get_fpcr(), opcode & 7)); } if (extra & 0x0800) { @@ -1283,13 +1423,11 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) fpu_debug(("FMOVEM D%d (%X) -> FPU instruction_address\n", opcode & 7, FPU instruction_address)); } } -// } else if ((opcode & 0x38) == 1) { } else if ((opcode & 0x38) == 8) { if (extra & 0x2000) { // dr bit if (extra & 0x1000) { - // according to the manual, the msb bits are always zero. - m68k_areg (regs, opcode & 7) = get_fpcr() & 0xFFFF; + m68k_areg (regs, opcode & 7) = get_fpcr(); fpu_debug(("FMOVEM FPU fpcr (%X) -> A%d\n", get_fpcr(), opcode & 7)); } if (extra & 0x0800) { @@ -1333,7 +1471,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) } else if (extra & 0x2000) { /* FMOVEM FPP->memory */ - uae_u32 ad; + uae_u32 ad = 0; int incr = 0; if (get_fp_ad(opcode, &ad) == 0) { @@ -1352,8 +1490,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) } ad -= incr; if (extra & 0x1000) { - // according to the manual, the msb bits are always zero. - put_long (ad, get_fpcr() & 0xFFFF); + put_long (ad, get_fpcr()); fpu_debug(("FMOVEM FPU fpcr (%X) -> mem %X\n", get_fpcr(), ad )); ad += 4; } @@ -1375,7 +1512,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) } else { /* FMOVEM memory->FPP */ - uae_u32 ad; + uae_u32 ad = 0; if (get_fp_ad(opcode, &ad) == 0) { m68k_setpc (m68k_getpc () - 4); @@ -1421,7 +1558,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) return; case 6: case 7: { - uae_u32 ad, list = 0; + uae_u32 ad = 0, list = 0; int incr = 0; if (extra & 0x2000) { /* FMOVEM FPP->memory */ @@ -1568,27 +1705,27 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) switch (extra & 0x7f) { case 0x00: // FPU registers[reg] = 4.0 * atan(1.0); - FPU registers[reg] = 3.1415926535897932384626433832795; + FPU registers[reg] = LD(3.1415926535897932384626433832795029); fpu_debug(("FP const: Pi\n")); break; case 0x0b: // FPU registers[reg] = log10 (2.0); - FPU registers[reg] = 0.30102999566398119521373889472449; + FPU registers[reg] = LD(0.30102999566398119521); // 0.3010299956639811952137388947244930L fpu_debug(("FP const: Log 10 (2)\n")); break; case 0x0c: // FPU registers[reg] = exp (1.0); - FPU registers[reg] = 2.7182818284590452353602874713527; + FPU registers[reg] = LD(2.7182818284590452353); // 2.7182818284590452353602874713526625L fpu_debug(("FP const: e\n")); break; case 0x0d: // FPU registers[reg] = log (exp (1.0)) / log (2.0); - FPU registers[reg] = 1.4426950408889634073599246810019; + FPU registers[reg] = LD(1.4426950408889634073599246810019); fpu_debug(("FP const: Log 2 (e)\n")); break; case 0x0e: // FPU registers[reg] = log (exp (1.0)) / log (10.0); - FPU registers[reg] = 0.43429448190325182765112891891661; + FPU registers[reg] = LD(0.4342944819032518276511289189166051); fpu_debug(("FP const: Log 10 (e)\n")); break; case 0x0f: @@ -1597,73 +1734,79 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) break; case 0x30: // FPU registers[reg] = log (2.0); - FPU registers[reg] = 0.69314718055994530941723212145818; + FPU registers[reg] = LD(0.6931471805599453094172321214581766); fpu_debug(("FP const: ln(2)\n")); break; case 0x31: // FPU registers[reg] = log (10.0); - FPU registers[reg] = 2.3025850929940456840179914546844; + FPU registers[reg] = LD(2.3025850929940456840179914546843642); fpu_debug(("FP const: ln(10)\n")); break; case 0x32: - // ?? - FPU registers[reg] = 1.0e0; + FPU registers[reg] = LD(1.0e0); fpu_debug(("FP const: 1.0e0\n")); break; case 0x33: - FPU registers[reg] = 1.0e1; + FPU registers[reg] = LD(1.0e1); fpu_debug(("FP const: 1.0e1\n")); break; case 0x34: - FPU registers[reg] = 1.0e2; + FPU registers[reg] = LD(1.0e2); fpu_debug(("FP const: 1.0e2\n")); break; case 0x35: - FPU registers[reg] = 1.0e4; + FPU registers[reg] = LD(1.0e4); fpu_debug(("FP const: 1.0e4\n")); break; case 0x36: - FPU registers[reg] = 1.0e8; + FPU registers[reg] = LD(1.0e8); fpu_debug(("FP const: 1.0e8\n")); break; case 0x37: - FPU registers[reg] = 1.0e16; + FPU registers[reg] = LD(1.0e16); fpu_debug(("FP const: 1.0e16\n")); break; case 0x38: - FPU registers[reg] = 1.0e32; + FPU registers[reg] = LD(1.0e32); fpu_debug(("FP const: 1.0e32\n")); break; case 0x39: - FPU registers[reg] = 1.0e64; + FPU registers[reg] = LD(1.0e64); fpu_debug(("FP const: 1.0e64\n")); break; case 0x3a: - FPU registers[reg] = 1.0e128; + FPU registers[reg] = LD(1.0e128); fpu_debug(("FP const: 1.0e128\n")); break; case 0x3b: - FPU registers[reg] = 1.0e256; + FPU registers[reg] = LD(1.0e256); fpu_debug(("FP const: 1.0e256\n")); break; -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) case 0x3c: - FPU registers[reg] = 1.0e512L; + FPU registers[reg] = LD(1.0e512); fpu_debug(("FP const: 1.0e512\n")); break; case 0x3d: - FPU registers[reg] = 1.0e1024L; + FPU registers[reg] = LD(1.0e1024); fpu_debug(("FP const: 1.0e1024\n")); break; case 0x3e: - FPU registers[reg] = 1.0e2048L; + FPU registers[reg] = LD(1.0e2048); fpu_debug(("FP const: 1.0e2048\n")); break; case 0x3f: - FPU registers[reg] = 1.0e4096L; + FPU registers[reg] = LD(1.0e4096); fpu_debug(("FP const: 1.0e4096\n")); -#endif break; +#else + case 0x3c: + case 0x3d: + case 0x3e: + case 0x3f: + make_inf(FPU registers[reg], false); + break; +#endif default: m68k_setpc (m68k_getpc () - 4); op_illg (opcode); @@ -1761,34 +1904,22 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) fpu_debug(("FMUL %.04f\n",(double)src)); get_dest_flags(FPU registers[reg]); get_source_flags(src); - if(fl_dest.in_range && fl_source.in_range) { + if (fl_dest.in_range && fl_source.in_range) { if ((extra & 0x7f) == 0x63) FPU registers[reg] = (float)(FPU registers[reg] * src); else FPU registers[reg] = (double)(FPU registers[reg] * src); } else if (fl_dest.nan || fl_source.nan || - fl_dest.zero && fl_source.infinity || - fl_dest.infinity && fl_source.zero ) { - make_nan( FPU registers[reg] ); + (fl_dest.zero && fl_source.infinity) || + (fl_dest.infinity && fl_source.zero) ) { + make_nan( FPU registers[reg], fl_dest.negative ); } else if (fl_dest.zero || fl_source.zero ) { - if (fl_dest.negative && !fl_source.negative || - !fl_dest.negative && fl_source.negative) { - make_zero_negative(FPU registers[reg]); - } - else { - make_zero_positive(FPU registers[reg]); - } + make_zero(FPU registers[reg], fl_dest.negative != fl_source.negative); } else { - if( fl_dest.negative && !fl_source.negative || - !fl_dest.negative && fl_source.negative) { - make_inf_negative(FPU registers[reg]); - } - else { - make_inf_positive(FPU registers[reg]); - } + make_inf(FPU registers[reg], fl_dest.negative != fl_source.negative); } make_fpsr(FPU registers[reg]); break; @@ -1809,43 +1940,68 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) make_fpsr(FPU registers[reg]); break; case 0x01: /* FINT */ + /* + * FIXME: in round-to-nearest, x87 + * uses round-to-odd, but m68k round-to-even rule + */ fpu_debug(("FINT %.04f\n",(double)src)); - FPU registers[reg] = toint(src); + if (isinf(src)) + FPU registers[reg] = src; + else + FPU registers[reg] = fp_doround(src); make_fpsr(FPU registers[reg]); break; case 0x02: /* FSINH */ fpu_debug(("FSINH %.04f\n",(double)src)); - FPU registers[reg] = fp_sinh (src); + if (isinf(src)) + FPU registers[reg] = src; + else + FPU registers[reg] = fp_sinh (src); make_fpsr(FPU registers[reg]); break; case 0x03: /* FINTRZ */ fpu_debug(("FINTRZ %.04f\n",(double)src)); - FPU registers[reg] = fp_round_to_zero(src); + if (isinf(src)) + FPU registers[reg] = src; + else + FPU registers[reg] = fp_round_to_zero(src); make_fpsr(FPU registers[reg]); break; case 0x04: /* FSQRT */ fpu_debug(("FSQRT %.04f\n",(double)src)); - FPU registers[reg] = fp_sqrt (src); + if (isinf(src) && !isneg(src)) + FPU registers[reg] = src; + else + FPU registers[reg] = fp_sqrt (src); make_fpsr(FPU registers[reg]); break; case 0x06: /* FLOGNP1 */ fpu_debug(("FLOGNP1 %.04f\n",(double)src)); - FPU registers[reg] = fp_log (src + 1.0); + if (isinf(src) && !isneg(src)) + make_inf(FPU registers[reg], false); + else + FPU registers[reg] = fp_log1p (src); make_fpsr(FPU registers[reg]); break; case 0x08: /* FETOXM1 */ fpu_debug(("FETOXM1 %.04f\n",(double)src)); - FPU registers[reg] = fp_exp (src) - 1.0; + FPU registers[reg] = fp_expm1 (src); make_fpsr(FPU registers[reg]); break; case 0x09: /* FTANH */ fpu_debug(("FTANH %.04f\n",(double)src)); - FPU registers[reg] = fp_tanh (src); + if (isinf(src)) + FPU registers[reg] = isneg(src) ? LD(-1.0) : LD(1.0); + else + FPU registers[reg] = fp_tanh (src); make_fpsr(FPU registers[reg]); break; case 0x0a: /* FATAN */ fpu_debug(("FATAN %.04f\n",(double)src)); - FPU registers[reg] = fp_atan (src); + if (isinf(src)) + FPU registers[reg] = isneg (src) ? LD(-1.570796326794896619231321691639751442) : LD(1.570796326794896619231321691639751442); + else + FPU registers[reg] = fp_atan (src); make_fpsr(FPU registers[reg]); break; case 0x0c: /* FASIN */ @@ -1870,32 +2026,65 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) break; case 0x10: /* FETOX */ fpu_debug(("FETOX %.04f\n",(double)src)); - FPU registers[reg] = fp_exp (src); + if (isinf(src)) + { + make_zero(FPU registers[reg], isneg(src)); + } else + { + FPU registers[reg] = fp_exp (src); + } make_fpsr(FPU registers[reg]); break; case 0x11: /* FTWOTOX */ fpu_debug(("FTWOTOX %.04f\n",(double)src)); - FPU registers[reg] = fp_pow(2.0, src); + if (isinf(src)) + { + if (isneg(src)) + make_zero(FPU registers[reg], false); + else + make_inf(FPU registers[reg], true); + } else + { + FPU registers[reg] = fp_pow2(src); + } make_fpsr(FPU registers[reg]); break; case 0x12: /* FTENTOX */ fpu_debug(("FTENTOX %.04f\n",(double)src)); - FPU registers[reg] = fp_pow(10.0, src); + if (isinf(src)) + { + if (isneg(src)) + make_zero(FPU registers[reg], false); + else + make_inf(FPU registers[reg], true); + } else + { + FPU registers[reg] = fp_pow10(src); + } make_fpsr(FPU registers[reg]); break; case 0x14: /* FLOGN */ fpu_debug(("FLOGN %.04f\n",(double)src)); - FPU registers[reg] = fp_log (src); + if (isinf(src) && !isneg(src)) + make_inf(FPU registers[reg], false); + else + FPU registers[reg] = fp_log (src); make_fpsr(FPU registers[reg]); break; case 0x15: /* FLOG10 */ fpu_debug(("FLOG10 %.04f\n",(double)src)); - FPU registers[reg] = fp_log10 (src); + if (isinf(src) && !isneg(src)) + make_inf(FPU registers[reg], false); + else + FPU registers[reg] = fp_log10 (src); make_fpsr(FPU registers[reg]); break; case 0x16: /* FLOG2 */ fpu_debug(("FLOG2 %.04f\n",(double)src)); - FPU registers[reg] = fp_log (src) / fp_log (2.0); + if (isinf(src) && !isneg(src)) + make_inf(FPU registers[reg], false); + else + FPU registers[reg] = fp_log2 (src); make_fpsr(FPU registers[reg]); break; case 0x18: /* FABS */ @@ -1905,12 +2094,21 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) break; case 0x19: /* FCOSH */ fpu_debug(("FCOSH %.04f\n",(double)src)); - FPU registers[reg] = fp_cosh(src); + if (isinf(src)) + { + make_inf(FPU registers[reg], false); + } else + { + FPU registers[reg] = fp_cosh(src); + } make_fpsr(FPU registers[reg]); break; case 0x1a: /* FNEG */ fpu_debug(("FNEG %.04f\n",(double)src)); - FPU registers[reg] = -src; + if (iszero(src)) + make_zero(FPU registers[reg], !isneg(src)); + else + FPU registers[reg] = -src; make_fpsr(FPU registers[reg]); break; case 0x1c: /* FACOS */ @@ -1926,20 +2124,24 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) case 0x1e: /* FGETEXP */ fpu_debug(("FGETEXP %.04f\n",(double)src)); if( isinf(src) ) { - make_nan( FPU registers[reg] ); + make_nan( FPU registers[reg], isneg(src) ); + } + else if( iszero(src) ) { + make_zero(FPU registers[reg], isneg(src)); } else { + /* FIXME: subnormals not supported */ FPU registers[reg] = fast_fgetexp( src ); } make_fpsr(FPU registers[reg]); break; case 0x1f: /* FGETMAN */ fpu_debug(("FGETMAN %.04f\n",(double)src)); - if( src == 0 ) { - FPU registers[reg] = 0; + if( iszero(src)) { + make_zero(FPU registers[reg], isneg(src)); } - else if( isinf(src) ) { - make_nan( FPU registers[reg] ); + else if( isinf(src) || isnan(src) ) { + make_nan( FPU registers[reg], 0 ); } else { FPU registers[reg] = src; @@ -1949,7 +2151,28 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) break; case 0x20: /* FDIV */ fpu_debug(("FDIV %.04f\n",(double)src)); - FPU registers[reg] /= src; + if (isnan(src) || isnan(FPU registers[reg])) + { + make_nan(FPU registers[reg], false); + } else if (isinf(src)) + { + if (isinf(FPU registers[reg])) + make_nan(FPU registers[reg], false); + else + make_zero(FPU registers[reg], isneg(src) != isneg(FPU registers[reg])); + } else if (isinf(FPU registers[reg])) + { + if (isinf(src)) + make_nan(FPU registers[reg], false); + else + make_inf(FPU registers[reg], isneg(src) != isneg(FPU registers[reg])); + } else if (iszero(FPU registers[reg]) && !iszero(src)) + { + make_zero(FPU registers[reg], isneg(FPU registers[reg]) != isneg(src)); + } else + { + FPU registers[reg] /= src; + } make_fpsr(FPU registers[reg]); break; case 0x21: /* FMOD */ @@ -1967,31 +2190,23 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) fpu_debug(("FMUL %.04f\n",(double)src)); get_dest_flags(FPU registers[reg]); get_source_flags(src); - if(fl_dest.in_range && fl_source.in_range) { + if (fl_dest.in_range && fl_source.in_range) { FPU registers[reg] *= src; + if (unlikely(isinf(FPU registers[reg]))) + { + make_inf(FPU registers[reg], isneg(FPU registers[reg])); + } } else if (fl_dest.nan || fl_source.nan || - fl_dest.zero && fl_source.infinity || - fl_dest.infinity && fl_source.zero ) { - make_nan( FPU registers[reg] ); + (fl_dest.zero && fl_source.infinity) || + (fl_dest.infinity && fl_source.zero) ) { + make_nan( FPU registers[reg], fl_dest.negative ); } else if (fl_dest.zero || fl_source.zero ) { - if (fl_dest.negative && !fl_source.negative || - !fl_dest.negative && fl_source.negative) { - make_zero_negative(FPU registers[reg]); - } - else { - make_zero_positive(FPU registers[reg]); - } + make_zero(FPU registers[reg], fl_dest.negative != fl_source.negative); } else { - if( fl_dest.negative && !fl_source.negative || - !fl_dest.negative && fl_source.negative) { - make_inf_negative(FPU registers[reg]); - } - else { - make_inf_positive(FPU registers[reg]); - } + make_inf(FPU registers[reg], fl_dest.negative != fl_source.negative); } make_fpsr(FPU registers[reg]); break; @@ -2004,7 +2219,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) fpu_debug(("FREM %.04f\n",(double)src)); // FPU registers[reg] = FPU registers[reg] - (double) ((int) (FPU registers[reg] / src + 0.5)) * src; { - fpu_register quot = fp_round_to_nearest(FPU registers[reg] / src); + fpu_register quot = fp_round_to_even(FPU registers[reg] / src); uae_u32 sign = get_quotient_sign(FPU registers[reg],src); FPU registers[reg] = FPU registers[reg] - quot * src; make_fpsr(FPU registers[reg]); @@ -2022,24 +2237,49 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) // an overflow or underflow always results. // Here (int) cast is okay. int scale_factor = (int)fp_round_to_zero(src); -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, FPU registers[reg], extended); - sxp->ieee.exponent += scale_factor; +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = FPU registers[reg]; + int exp = sxp.ieee.exponent; + exp += scale_factor; + if (scale_factor >= FP_EXTENDED_EXP_MAX || exp >= FP_EXTENDED_EXP_MAX) /* overflow */ + { + make_inf(FPU registers[reg], isneg(FPU registers[reg])); + FPU fpsr.exception_status |= FPSR_EXCEPTION_OVFL; + } else if (scale_factor < -FP_EXTENDED_EXP_MAX || exp <= -64) /* underflow */ + { + make_zero(FPU registers[reg], isneg(FPU registers[reg])); + FPU fpsr.exception_status |= FPSR_EXCEPTION_UNFL; + } else if (exp >= 0) /* normal result */ + { + sxp.ieee.exponent = exp; + FPU registers[reg] = sxp.value; + } else /* subnormal result */ + { + exp += 64; + sxp.ieee.exponent = exp; + sxp.value = sxp.value * 5.421010862427522170037e-20L; /* 2^-64 */ + } #else - fp_declare_init_shape(sxp, FPU registers[reg], double); - uae_u32 exp = sxp->ieee.exponent + scale_factor; + fp_declare_init_shape(sxp, double); + sxp.value = FPU registers[reg]; + uae_u32 exp = sxp.ieee.exponent + scale_factor; if (exp < FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS) exp = 0; else if (exp > FP_EXTENDED_EXP_BIAS + FP_DOUBLE_EXP_BIAS) exp = FP_DOUBLE_EXP_MAX; else exp += FP_DOUBLE_EXP_BIAS - FP_EXTENDED_EXP_BIAS; - sxp->ieee.exponent = exp; + sxp.ieee.exponent = exp; + FPU registers[reg] = sxp.value; #endif } - else if (fl_source.infinity) { + else if (fl_source.infinity || fl_source.nan) { // Returns NaN for any Infinity source - make_nan( FPU registers[reg] ); + make_nan( FPU registers[reg], fl_source.negative ); + } else { + // source was zero, or dest was inf or nan + // in either case, dest is unchanged } make_fpsr(FPU registers[reg]); break; @@ -2050,12 +2290,52 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) break; case 0x28: /* FSUB */ fpu_debug(("FSUB %.04f\n",(double)src)); - FPU registers[reg] -= src; + if (isnan(src) || isnan(FPU registers[reg])) + { + make_nan(FPU registers[reg], false); + } else if (isinf(src)) + { + if (isinf(FPU registers[reg]) && isneg(src) == isneg(FPU registers[reg])) + make_nan(FPU registers[reg], false); + else + make_inf(FPU registers[reg], isneg(src)); + } else if (isinf(FPU registers[reg])) + { + if (isinf(src) && isneg(src) == isneg(FPU registers[reg])) + make_nan(FPU registers[reg], false); + else + make_inf(FPU registers[reg], isneg(FPU registers[reg])); + } else + { + FPU registers[reg] -= src; + } make_fpsr(FPU registers[reg]); break; case 0x22: /* FADD */ fpu_debug(("FADD %.04f\n",(double)src)); - FPU registers[reg] += src; + /* + * WTF. inf + some value generates NaN on x87, + * but we need inf in most cases + */ + if (isnan(src) || isnan(FPU registers[reg])) + { + make_nan(FPU registers[reg], false); + } else if (isinf(src)) + { + if (isinf(FPU registers[reg]) && isneg(src) != isneg(FPU registers[reg])) + make_nan(FPU registers[reg], false); + else + make_inf(FPU registers[reg], isneg(src)); + } else if (isinf(FPU registers[reg])) + { + if (isinf(src) && isneg(src) != isneg(FPU registers[reg])) + make_nan(FPU registers[reg], false); + else + make_inf(FPU registers[reg], isneg(FPU registers[reg])); + } else + { + FPU registers[reg] += src; + } make_fpsr(FPU registers[reg]); break; case 0x30: /* FSINCOS */ @@ -2068,6 +2348,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) case 0x37: fpu_debug(("FSINCOS %.04f\n",(double)src)); // Cosine must be calculated first if same register + // note: no need to use special sincos() function here; compiler will optimize that anyway FPU registers[extra & 7] = fp_cos(src); FPU registers[reg] = fp_sin (src); // Set FPU fpsr according to the sine result @@ -2076,7 +2357,26 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) case 0x38: /* FCMP */ fpu_debug(("FCMP %.04f\n",(double)src)); set_fpsr(0); - make_fpsr(FPU registers[reg] - src); + if (isnan(src) || isnan(FPU registers[reg])) + { + make_nan(src, false); + make_fpsr(src); + } else if (isinf(FPU registers[reg])) + { + if (isinf(src) && isneg(FPU registers[reg]) == isneg (src)) + { + make_fpsr(0); + } else + { + make_fpsr(FPU registers[reg]); + } + } else if (isinf(src)) + { + make_fpsr(-src); + } else + { + make_fpsr(FPU registers[reg] - src); + } break; case 0x3a: /* FTST */ fpu_debug(("FTST %.04f\n",(double)src)); @@ -2100,6 +2400,27 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) dump_registers( "END "); } + +void fpu_set_fpsr(uae_u32 new_fpsr) +{ + set_fpsr(new_fpsr); +} + +uae_u32 fpu_get_fpsr(void) +{ + return get_fpsr(); +} + +void fpu_set_fpcr(uae_u32 new_fpcr) +{ + set_fpcr(new_fpcr); +} + +uae_u32 fpu_get_fpcr(void) +{ + return get_fpcr(); +} + /* -------------------------- Initialization -------------------------- */ PRIVATE uae_u8 m_fpu_state_original[108]; // 90/94/108 @@ -2136,7 +2457,7 @@ PUBLIC void FFPU fpu_init (bool integral_68040) FPU result = 1; for (int i = 0; i < 8; i++) - make_nan(FPU registers[i]); + make_nan(FPU registers[i], false); } PUBLIC void FFPU fpu_exit (void) diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_ieee.h b/BasiliskII/src/uae_cpu/fpu/fpu_ieee.h index 89501956..5735874c 100644 --- a/BasiliskII/src/uae_cpu/fpu/fpu_ieee.h +++ b/BasiliskII/src/uae_cpu/fpu/fpu_ieee.h @@ -1,28 +1,33 @@ /* - * fpu/fpu_uae.h - Extra Definitions for the old UAE FPU core + * fpu/fpu_ieee.h - Extra Definitions for the IEEE FPU core * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_IEEE_H @@ -54,13 +59,11 @@ PRIVATE double_flags fl_dest; PRIVATE inline void FFPU get_dest_flags(fpu_register const & r); PRIVATE inline void FFPU get_source_flags(fpu_register const & r); -PRIVATE inline void FFPU make_nan(fpu_register & r); -PRIVATE inline void FFPU make_zero_positive(fpu_register & r); -PRIVATE inline void FFPU make_zero_negative(fpu_register & r); -PRIVATE inline void FFPU make_inf_positive(fpu_register & r); -PRIVATE inline void FFPU make_inf_negative(fpu_register & r); +PRIVATE inline void FFPU make_nan(fpu_register & r, bool negative); +PRIVATE inline void FFPU make_zero(fpu_register & r, bool negative); +PRIVATE inline void FFPU make_inf(fpu_register & r, bool negative); -PRIVATE inline void FFPU fast_scale(fpu_register & r, int add); +// MJ PRIVATE inline void FFPU fast_scale(fpu_register & r, int add); PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r); // May be optimized for particular processors diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_mpfr.cpp b/BasiliskII/src/uae_cpu/fpu/fpu_mpfr.cpp new file mode 100644 index 00000000..4eda14ca --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/fpu_mpfr.cpp @@ -0,0 +1,2110 @@ +/* + * fpu_mpfr.cpp - emulate 68881/68040 fpu with mpfr + * + * Copyright (c) 2012, 2013 Andreas Schwab + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "sysdeps.h" +#include +#include "memory.h" +#include "readcpu.h" +#include "newcpu.h" +#include "main.h" +#define FPU_IMPLEMENTATION +#include "fpu/fpu.h" + +#include "fpu/flags.h" +#include "fpu/exceptions.h" +#include "fpu/rounding.h" +#include "fpu/impl.h" + +#define SINGLE_PREC 24 +#define SINGLE_MIN_EXP -126 +#define SINGLE_MAX_EXP 127 +#define SINGLE_BIAS 127 +#define DOUBLE_PREC 53 +#define DOUBLE_MIN_EXP -1022 +#define DOUBLE_MAX_EXP 1023 +#define DOUBLE_BIAS 1023 +#define EXTENDED_PREC 64 +#define EXTENDED_MIN_EXP -16383 +#define EXTENDED_MAX_EXP 16383 +#define EXTENDED_BIAS 16383 + +fpu_t fpu; +// The constant ROM +// Constants 48 to 63 are mapped to index 16 to 31 +const int num_fpu_constants = 32; +static mpfr_t fpu_constant_rom[num_fpu_constants]; +#define FPU_CONSTANT_ONE fpu_constant_rom[18] +// Exceptions generated during execution in addition to the ones +// maintained by mpfr +static uae_u32 cur_exceptions; +static uaecptr cur_instruction_address; + +static void +set_format (int prec) +{ + // MPFR represents numbers as 0.m*2^e + switch (prec) + { + case SINGLE_PREC: + mpfr_set_emin (SINGLE_MIN_EXP + 1 - (SINGLE_PREC - 1)); + mpfr_set_emax (SINGLE_MAX_EXP + 1); + break; + case DOUBLE_PREC: + mpfr_set_emin (DOUBLE_MIN_EXP + 1 - (DOUBLE_PREC - 1)); + mpfr_set_emax (DOUBLE_MAX_EXP + 1); + break; + case EXTENDED_PREC: + mpfr_set_emin (EXTENDED_MIN_EXP + 1 - (EXTENDED_PREC - 1)); + mpfr_set_emax (EXTENDED_MAX_EXP + 1); + break; + } +} + +static mpfr_rnd_t +get_cur_rnd () +{ + switch (get_rounding_mode ()) + { + default: + case FPCR_ROUND_NEAR: + return MPFR_RNDN; + case FPCR_ROUND_ZERO: + return MPFR_RNDZ; + case FPCR_ROUND_MINF: + return MPFR_RNDD; + case FPCR_ROUND_PINF: + return MPFR_RNDU; + } +} + +static mpfr_prec_t +get_cur_prec () +{ + switch (get_rounding_precision ()) + { + default: + case FPCR_PRECISION_EXTENDED: + return EXTENDED_PREC; + case FPCR_PRECISION_SINGLE: + return SINGLE_PREC; + case FPCR_PRECISION_DOUBLE: + return DOUBLE_PREC; + } +} + +#define DEFAULT_NAN_BITS 0xffffffffffffffffULL + +static void +set_nan (fpu_register ®, uae_u64 nan_bits, int nan_sign) +{ + mpfr_set_nan (reg.f); + reg.nan_bits = nan_bits; + reg.nan_sign = nan_sign; +} + +static void +set_nan (fpu_register ®) +{ + set_nan (reg, DEFAULT_NAN_BITS, 0); +} + +static bool fpu_inited; + +void +fpu_init (bool integral_68040) +{ + fpu.is_integral = integral_68040; + + mpfr_set_default_prec (EXTENDED_PREC); + mpfr_set_default_rounding_mode (MPFR_RNDN); + set_format (EXTENDED_PREC); + + for (int i = 0; i < 8; i++) + mpfr_init (fpu.registers[i].f); + mpfr_init (fpu.result.f); + + // Initialize constant ROM + for (int i = 0; i < num_fpu_constants; i++) + mpfr_init (fpu_constant_rom[i]); + + // 0: pi + mpfr_const_pi (fpu_constant_rom[0], MPFR_RNDN); + // 11: log10 (2) + mpfr_set_ui (fpu_constant_rom[11], 2, MPFR_RNDN); + mpfr_log10 (fpu_constant_rom[11], fpu_constant_rom[11], MPFR_RNDZ); + // 12: e + mpfr_set_ui (fpu_constant_rom[12], 1, MPFR_RNDN); + mpfr_exp (fpu_constant_rom[12], fpu_constant_rom[12], MPFR_RNDZ); + // 13: log2 (e) + mpfr_log2 (fpu_constant_rom[13], fpu_constant_rom[12], MPFR_RNDU); + // 14: log10 (e) + mpfr_log10 (fpu_constant_rom[14], fpu_constant_rom[12], MPFR_RNDU); + // 15: 0 + mpfr_set_zero (fpu_constant_rom[15], 0); + // 48: ln (2) + mpfr_const_log2 (fpu_constant_rom[16], MPFR_RNDN); + // 49: ln (10) + mpfr_set_ui (fpu_constant_rom[17], 10, MPFR_RNDN); + mpfr_log (fpu_constant_rom[17], fpu_constant_rom[17], MPFR_RNDN); + // 50 to 63: powers of 10 + mpfr_set_ui (fpu_constant_rom[18], 1, MPFR_RNDN); + for (int i = 19; i < 32; i++) + { + mpfr_set_ui (fpu_constant_rom[i], 1L << (i - 19) , MPFR_RNDN); + mpfr_exp10 (fpu_constant_rom[i], fpu_constant_rom[i], MPFR_RNDN); + } + + fpu_inited = true; + + fpu_reset (); +} + +void +fpu_exit () +{ + if (!fpu_inited) return; + + for (int i = 0; i < 8; i++) + mpfr_clear (fpu.registers[i].f); + mpfr_clear (fpu.result.f); + for (int i = 0; i < num_fpu_constants; i++) + mpfr_clear (fpu_constant_rom[i]); +} + +void +fpu_reset () +{ + set_fpcr (0); + set_fpsr (0); + fpu.instruction_address = 0; + + for (int i = 0; i < 8; i++) + set_nan (fpu.registers[i]); +} + +fpu_register::operator long double () +{ + return mpfr_get_ld (f, MPFR_RNDN); +} + +fpu_register & +fpu_register::operator= (long double x) +{ + mpfr_set_ld (f, x, MPFR_RNDN); + nan_bits = DEFAULT_NAN_BITS; + nan_sign = 0; + return *this; +} + +static bool +get_fp_addr (uae_u32 opcode, uae_u32 *addr, bool write) +{ + uaecptr pc; + int mode; + int reg; + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + switch (mode) + { + case 0: + case 1: + return false; + case 2: + *addr = m68k_areg (regs, reg); + break; + case 3: + *addr = m68k_areg (regs, reg); + break; + case 4: + *addr = m68k_areg (regs, reg); + break; + case 5: + *addr = m68k_areg (regs, reg) + (uae_s16) next_iword(); + break; + case 6: + *addr = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch (reg) + { + case 0: + *addr = (uae_s16) next_iword(); + break; + case 1: + *addr = next_ilong(); + break; + case 2: + if (write) + return false; + pc = m68k_getpc (); + *addr = pc + (uae_s16) next_iword(); + break; + case 3: + if (write) + return false; + pc = m68k_getpc (); + *addr = get_disp_ea_020 (pc, next_iword()); + break; + default: + return false; + } + } + return true; +} + +static void +set_from_single (fpu_register &value, uae_u32 data) +{ + int s = data >> 31; + int e = (data >> 23) & 0xff; + uae_u32 m = data & 0x7fffff; + + if (e == 0xff) + { + if (m != 0) + { + if (!(m & 0x400000)) + cur_exceptions |= FPSR_EXCEPTION_SNAN; + set_nan (value, (uae_u64) (m | 0xc00000) << (32 + 8), s); + } + else + mpfr_set_inf (value.f, 0); + } + else + { + if (e != 0) + // Add integer bit + m |= 0x800000; + else + e++; + // Remove bias + e -= SINGLE_BIAS; + mpfr_set_ui_2exp (value.f, m, e - (SINGLE_PREC - 1), MPFR_RNDN); + } + mpfr_setsign (value.f, value.f, s, MPFR_RNDN); +} + +static void +set_from_double (fpu_register &value, uae_u32 words[2]) +{ + int s = words[0] >> 31; + int e = (words[0] >> 20) & 0x7ff; + uae_u32 m = words[0] & 0xfffff; + + if (e == 0x7ff) + { + if ((m | words[1]) != 0) + { + if (!(m & 0x80000)) + cur_exceptions |= FPSR_EXCEPTION_SNAN; + set_nan (value, (((uae_u64) (m | 0x180000) << (32 + 11)) + | ((uae_u64) words[1] << 11)), s); + } + else + mpfr_set_inf (value.f, 0); + } + else + { + if (e != 0) + // Add integer bit + m |= 0x100000; + else + e++; + // Remove bias + e -= DOUBLE_BIAS; + mpfr_set_uj_2exp (value.f, ((uintmax_t) m << 32) | words[1], + e - (DOUBLE_PREC - 1), MPFR_RNDN); + } + mpfr_setsign (value.f, value.f, s, MPFR_RNDN); +} + +static void +set_from_extended (fpu_register &value, uae_u32 words[3], bool check_snan) +{ + int s = words[0] >> 31; + int e = (words[0] >> 16) & 0x7fff; + + if (e == 0x7fff) + { + if (((words[1] & 0x7fffffff) | words[2]) != 0) + { + if (check_snan) + { + if ((words[1] & 0x40000000) == 0) + cur_exceptions |= FPSR_EXCEPTION_SNAN; + words[1] |= 0x40000000; + } + set_nan (value, ((uae_u64) words[1] << 32) | words[2], s); + } + else + mpfr_set_inf (value.f, 0); + } + else + { + // Remove bias + e -= EXTENDED_BIAS; + mpfr_set_uj_2exp (value.f, ((uintmax_t) words[1] << 32) | words[2], + e - (EXTENDED_PREC - 1), MPFR_RNDN); + } + mpfr_setsign (value.f, value.f, s, MPFR_RNDN); +} + +#define from_bcd(d) ((d) < 10 ? (d) : (d) - 10) + +static void +set_from_packed (fpu_register &value, uae_u32 words[3]) +{ + char str[32], *p = str; + int sm = words[0] >> 31; + int se = (words[0] >> 30) & 1; + int i; + + if (((words[0] >> 16) & 0x7fff) == 0x7fff) + { + if ((words[1] | words[2]) != 0) + { + if ((words[1] & 0x40000000) == 0) + cur_exceptions |= FPSR_EXCEPTION_SNAN; + set_nan (value, ((uae_u64) (words[1] | 0x40000000) << 32) | words[2], + sm); + } + else + mpfr_set_inf (value.f, 0); + } + else + { + if (sm) + *p++ = '-'; + *p++ = from_bcd (words[0] & 15) + '0'; + *p++ = '.'; + for (i = 0; i < 8; i++) + { + p[i] = from_bcd ((words[1] >> (28 - i * 4)) & 15) + '0'; + p[i + 8] = from_bcd ((words[2] >> (28 - i * 4)) & 15) + '0'; + } + p += 16; + *p++ = 'e'; + if (se) + *p++ = '-'; + *p++ = from_bcd ((words[0] >> 24) & 15) + '0'; + *p++ = from_bcd ((words[0] >> 20) & 15) + '0'; + *p++ = from_bcd ((words[0] >> 16) & 15) + '0'; + *p = 0; + mpfr_set_str (value.f, str, 10, MPFR_RNDN); + } + mpfr_setsign (value.f, value.f, sm, MPFR_RNDN); +} + +static bool +get_fp_value (uae_u32 opcode, uae_u32 extra, fpu_register &value) +{ + int mode, reg, size; + uaecptr pc; + uae_u32 addr; + uae_u32 words[3]; + static const int sz1[8] = {4, 4, 12, 12, 2, 8, 1, 0}; + static const int sz2[8] = {4, 4, 12, 12, 2, 8, 2, 0}; + + if ((extra & 0x4000) == 0) + { + mpfr_set (value.f, fpu.registers[(extra >> 10) & 7].f, MPFR_RNDN); + value.nan_bits = fpu.registers[(extra >> 10) & 7].nan_bits; + value.nan_sign = fpu.registers[(extra >> 10) & 7].nan_sign; + /* Check for SNaN. */ + if (mpfr_nan_p (value.f) && (value.nan_bits & (1ULL << 62)) == 0) + { + value.nan_bits |= 1ULL << 62; + cur_exceptions |= FPSR_EXCEPTION_SNAN; + } + return true; + } + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + switch (mode) + { + case 0: + switch (size) + { + case 6: + mpfr_set_si (value.f, (uae_s8) m68k_dreg (regs, reg), MPFR_RNDN); + break; + case 4: + mpfr_set_si (value.f, (uae_s16) m68k_dreg (regs, reg), MPFR_RNDN); + break; + case 0: + mpfr_set_si (value.f, (uae_s32) m68k_dreg (regs, reg), MPFR_RNDN); + break; + case 1: + set_from_single (value, m68k_dreg (regs, reg)); + break; + default: + return false; + } + return true; + case 1: + return false; + case 2: + case 3: + addr = m68k_areg (regs, reg); + break; + case 4: + addr = m68k_areg (regs, reg) - (reg == 7 ? sz2[size] : sz1[size]); + break; + case 5: + addr = m68k_areg (regs, reg) + (uae_s16) next_iword (); + break; + case 6: + addr = get_disp_ea_020 (m68k_areg (regs, reg), next_iword ()); + break; + case 7: + switch (reg) + { + case 0: + addr = (uae_s16) next_iword (); + break; + case 1: + addr = next_ilong (); + break; + case 2: + pc = m68k_getpc (); + addr = pc + (uae_s16) next_iword (); + break; + case 3: + pc = m68k_getpc (); + addr = get_disp_ea_020 (pc, next_iword ()); + break; + case 4: + addr = m68k_getpc (); + m68k_incpc (sz2[size]); + if (size == 6) // Immediate byte + addr++; + break; + default: + return false; + } + } + + switch (size) + { + case 0: + mpfr_set_si (value.f, (uae_s32) get_long (addr), MPFR_RNDN); + break; + case 1: + set_from_single (value, get_long (addr)); + break; + case 2: + words[0] = get_long (addr); + words[1] = get_long (addr + 4); + words[2] = get_long (addr + 8); + set_from_extended (value, words, true); + break; + case 3: + words[0] = get_long (addr); + words[1] = get_long (addr + 4); + words[2] = get_long (addr + 8); + set_from_packed (value, words); + break; + case 4: + mpfr_set_si (value.f, (uae_s16) get_word (addr), MPFR_RNDN); + break; + case 5: + words[0] = get_long (addr); + words[1] = get_long (addr + 4); + set_from_double (value, words); + break; + case 6: + mpfr_set_si (value.f, (uae_s8) get_byte (addr), MPFR_RNDN); + break; + default: + return false; + } + + switch (mode) + { + case 3: + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + break; + } + + return true; +} + +static void +update_exceptions () +{ + uae_u32 exc, aexc; + + exc = cur_exceptions; + // Add any mpfr detected exceptions + if (mpfr_underflow_p ()) + exc |= FPSR_EXCEPTION_UNFL; + if (mpfr_overflow_p ()) + exc |= FPSR_EXCEPTION_OVFL; + if (mpfr_inexflag_p ()) + exc |= FPSR_EXCEPTION_INEX2; + set_exception_status (exc); + + aexc = get_accrued_exception (); + if (exc & (FPSR_EXCEPTION_SNAN|FPSR_EXCEPTION_OPERR)) + aexc |= FPSR_ACCR_IOP; + if (exc & FPSR_EXCEPTION_OVFL) + aexc |= FPSR_ACCR_OVFL; + if ((exc & (FPSR_EXCEPTION_UNFL|FPSR_EXCEPTION_INEX2)) + == (FPSR_EXCEPTION_UNFL|FPSR_EXCEPTION_INEX2)) + aexc |= FPSR_ACCR_UNFL; + if (exc & FPSR_EXCEPTION_DZ) + aexc |= FPSR_ACCR_DZ; + if (exc & (FPSR_EXCEPTION_INEX1|FPSR_EXCEPTION_INEX2|FPSR_EXCEPTION_OVFL)) + aexc |= FPSR_ACCR_INEX; + set_accrued_exception (aexc); + + if ((fpu.fpcr & exc) != 0) + { + fpu.instruction_address = cur_instruction_address; + // TODO: raise exceptions + // Problem: FPSP040 depends on proper FPU stack frames, it would suffer + // undefined behaviour with our dummy FSAVE implementation + } +} + +static void +set_fp_register (int reg, mpfr_t value, uae_u64 nan_bits, int nan_sign, + int t, mpfr_rnd_t rnd, bool do_flags) +{ + mpfr_subnormalize (value, t, rnd); + mpfr_set (fpu.registers[reg].f, value, rnd); + fpu.registers[reg].nan_bits = nan_bits; + fpu.registers[reg].nan_sign = nan_sign; + if (do_flags) + { + uae_u32 flags = 0; + + if (mpfr_zero_p (fpu.registers[reg].f)) + flags |= FPSR_CCB_ZERO; + if (mpfr_signbit (fpu.registers[reg].f)) + flags |= FPSR_CCB_NEGATIVE; + if (mpfr_nan_p (fpu.registers[reg].f)) + flags |= FPSR_CCB_NAN; + if (mpfr_inf_p (fpu.registers[reg].f)) + flags |= FPSR_CCB_INFINITY; + set_fpccr (flags); + } +} + +static void +set_fp_register (int reg, mpfr_t value, int t, mpfr_rnd_t rnd, bool do_flags) +{ + set_fp_register (reg, value, DEFAULT_NAN_BITS, 0, t, rnd, do_flags); +} + +static void +set_fp_register (int reg, fpu_register &value, int t, mpfr_rnd_t rnd, + bool do_flags) +{ + set_fp_register (reg, value.f, value.nan_bits, value.nan_sign, t, rnd, + do_flags); +} + +static uae_u32 +extract_to_single (fpu_register &value) +{ + uae_u32 word; + int t; + mpfr_rnd_t rnd = get_cur_rnd (); + MPFR_DECL_INIT (single, SINGLE_PREC); + + set_format (SINGLE_PREC); + // Round to single + t = mpfr_set (single, value.f, rnd); + t = mpfr_check_range (single, t, rnd); + mpfr_subnormalize (single, t, rnd); + set_format (EXTENDED_PREC); + + if (mpfr_inf_p (single)) + word = 0x7f800000; + else if (mpfr_nan_p (single)) + { + if ((value.nan_bits & (1ULL << 62)) == 0) + { + value.nan_bits |= 1ULL << 62; + cur_exceptions |= FPSR_EXCEPTION_SNAN; + } + word = 0x7f800000 | ((value.nan_bits >> (32 + 8)) & 0x7fffff); + if (value.nan_sign) + word |= 0x80000000; + } + else if (mpfr_zero_p (single)) + word = 0; + else + { + int e; + mpz_t f; + mpz_init (f); + word = 0; + // Get exponent and mantissa + e = mpfr_get_z_2exp (f, single); + // Move binary point + e += SINGLE_PREC - 1; + // Add bias + e += SINGLE_BIAS; + if (e <= 0) + { + // Denormalized number + mpz_tdiv_q_2exp (f, f, -e + 1); + e = 0; + } + mpz_export (&word, 0, 1, 4, 0, 0, f); + // Remove integer bit + word &= 0x7fffff; + word |= e << 23; + mpz_clear (f); + } + if (mpfr_signbit (single)) + word |= 0x80000000; + return word; +} + +static void +extract_to_double (fpu_register &value, uint32_t *words) +{ + int t; + mpfr_rnd_t rnd = get_cur_rnd (); + MPFR_DECL_INIT (dbl, DOUBLE_PREC); + + set_format (DOUBLE_PREC); + // Round to double + t = mpfr_set (dbl, value.f, rnd); + t = mpfr_check_range (dbl, t, rnd); + mpfr_subnormalize (dbl, t, rnd); + set_format (EXTENDED_PREC); + + if (mpfr_inf_p (dbl)) + { + words[0] = 0x7ff00000; + words[1] = 0; + } + else if (mpfr_nan_p (dbl)) + { + if ((value.nan_bits & (1ULL << 62)) == 0) + { + value.nan_bits |= 1ULL << 62; + cur_exceptions |= FPSR_EXCEPTION_SNAN; + } + words[0] = 0x7ff00000 | ((value.nan_bits >> (32 + 11)) & 0xfffff); + words[1] = value.nan_bits >> 11; + if (value.nan_sign) + words[0] |= 0x80000000; + } + else if (mpfr_zero_p (dbl)) + { + words[0] = 0; + words[1] = 0; + } + else + { + int e, off = 0; + mpz_t f; + mpz_init (f); + words[0] = words[1] = 0; + // Get exponent and mantissa + e = mpfr_get_z_2exp (f, dbl); + // Move binary point + e += DOUBLE_PREC - 1; + // Add bias + e += DOUBLE_BIAS; + if (e <= 0) + { + // Denormalized number + mpz_tdiv_q_2exp (f, f, -e + 1); + if (e <= -20) + // No more than 32 bits left + off = 1; + e = 0; + } + mpz_export (&words[off], 0, 1, 4, 0, 0, f); + // Remove integer bit + words[0] &= 0xfffff; + words[0] |= e << 20; + mpz_clear (f); + } + if (mpfr_signbit (dbl)) + words[0] |= 0x80000000; +} + +static void +extract_to_extended (fpu_register &value, uint32_t *words) +{ + if (mpfr_inf_p (value.f)) + { + words[0] = 0x7fff0000; + words[1] = 0; + words[2] = 0; + } + else if (mpfr_nan_p (value.f)) + { + words[0] = 0x7fff0000; + words[1] = value.nan_bits >> 32; + words[2] = value.nan_bits; + if (value.nan_sign) + words[0] |= 0x80000000; + } + else if (mpfr_zero_p (value.f)) + { + words[0] = 0; + words[1] = 0; + words[2] = 0; + } + else + { + int e, off = 0; + mpz_t f; + + mpz_init (f); + words[0] = words[1] = words[2] = 0; + // Get exponent and mantissa + e = mpfr_get_z_2exp (f, value.f); + // Move binary point + e += EXTENDED_PREC - 1; + // Add bias + e += EXTENDED_BIAS; + if (e < 0) + { + // Denormalized number + mpz_tdiv_q_2exp (f, f, -e); + if (e <= -32) + // No more than 32 bits left + off = 1; + e = 0; + } + mpz_export (&words[1 + off], 0, 1, 4, 0, 0, f); + words[0] = e << 16; + mpz_clear (f); + } + if (mpfr_signbit (value.f)) + words[0] |= 0x80000000; +} + +static void +extract_to_packed (fpu_register &value, int k, uae_u32 *words) +{ + if (mpfr_inf_p (value.f)) + { + words[0] = 0x7fff0000; + words[1] = 0; + words[2] = 0; + } + else if (mpfr_nan_p (value.f)) + { + words[0] = 0x7fff0000; + words[1] = value.nan_bits >> 32; + words[2] = value.nan_bits; + if (value.nan_sign) + words[0] |= 0x80000000; + } + else if (mpfr_zero_p (value.f)) + { + words[0] = 0; + words[1] = 0; + words[2] = 0; + } + else + { + char str[100], *p = str; + mpfr_exp_t e; + mpfr_rnd_t rnd = get_cur_rnd (); + + words[0] = words[1] = words[2] = 0; + if (k >= 64) + k -= 128; + else if (k >= 18) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + if (k <= 0) + { + MPFR_DECL_INIT (temp, 16); + + mpfr_log10 (temp, value.f, rnd); + k = mpfr_get_si (temp, MPFR_RNDZ) - k + 1; + } + if (k <= 0) + k = 1; + else if (k >= 18) + k = 17; + mpfr_get_str (str, &e, 10, k, value.f, rnd); + e--; + if (*p == '-') + p++; + // Pad to 17 digits + while (k < 17) + p[k++] = '0'; + if (e < 0) + { + words[0] |= 0x40000000; + e = -e; + } + words[0] |= (e % 10) << 16; + e /= 10; + words[0] |= (e % 10) << 20; + e /= 10; + words[0] |= (e % 10) << 24; + e /= 10; + if (e) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + words[0] |= e << 12; + words[0] |= *p++ & 15; + for (k = 0; k < 8; k++) + words[1] = (words[1] << 4) | (*p++ & 15); + for (k = 0; k < 8; k++) + words[2] = (words[2] << 4) | (*p++ & 15); + + } + if (mpfr_signbit (value.f)) + words[0] |= 0x80000000; +} + +static long +extract_to_integer (mpfr_t value, long min, long max) +{ + long result; + mpfr_rnd_t rnd = get_cur_rnd (); + + if (mpfr_fits_slong_p (value, rnd)) + { + result = mpfr_get_si (value, rnd); + if (result > max) + { + result = max; + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (result < min) + { + result = min; + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + } + else + { + if (!mpfr_signbit (value)) + result = max; + else + result = min; + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + return result; +} + +static bool +fpuop_fmove_memory (uae_u32 opcode, uae_u32 extra) +{ + int mode, reg, size; + uaecptr pc; + uae_u32 addr; + uae_u32 words[3]; + static const int sz1[8] = {4, 4, 12, 12, 2, 8, 1, 0}; + static const int sz2[8] = {4, 4, 12, 12, 2, 8, 2, 0}; + + mpfr_clear_flags (); + cur_exceptions = 0; + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + fpu_register &value = fpu.registers[(extra >> 7) & 7]; + + switch (mode) + { + case 0: + switch (size) + { + case 0: + m68k_dreg (regs, reg) = extract_to_integer (value.f, -0x7fffffff-1, 0x7fffffff); + break; + case 1: + m68k_dreg (regs, reg) = extract_to_single (value); + break; + case 4: + m68k_dreg (regs, reg) &= ~0xffff; + m68k_dreg (regs, reg) |= extract_to_integer (value.f, -32768, 32767) & 0xffff; + break; + case 6: + m68k_dreg (regs, reg) &= ~0xff; + m68k_dreg (regs, reg) |= extract_to_integer (value.f, -128, 127) & 0xff; + break; + default: + return false; + } + update_exceptions (); + return true; + case 1: + return false; + case 2: + addr = m68k_areg (regs, reg); + break; + case 3: + addr = m68k_areg (regs, reg); + break; + case 4: + addr = m68k_areg (regs, reg) - (reg == 7 ? sz2[size] : sz1[size]); + break; + case 5: + addr = m68k_areg (regs, reg) + (uae_s16) next_iword(); + break; + case 6: + addr = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch (reg) + { + case 0: + addr = (uae_s16) next_iword(); + break; + case 1: + addr = next_ilong(); + break; + case 2: + pc = m68k_getpc (); + addr = pc + (uae_s16) next_iword(); + break; + case 3: + pc = m68k_getpc (); + addr = get_disp_ea_020 (pc, next_iword ()); + break; + case 4: + addr = m68k_getpc (); + m68k_incpc (sz2[size]); + break; + default: + return false; + } + } + + switch (size) + { + case 0: + put_long (addr, extract_to_integer (value.f, -0x7fffffff-1, 0x7fffffff)); + break; + case 1: + put_long (addr, extract_to_single (value)); + break; + case 2: + extract_to_extended (value, words); + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + put_long (addr + 8, words[2]); + break; + case 3: + extract_to_packed (value, extra & 0x7f, words); + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + put_long (addr + 8, words[2]); + break; + case 4: + put_word (addr, extract_to_integer (value.f, -32768, 32767)); + break; + case 5: + extract_to_double (value, words); + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + break; + case 6: + put_byte (addr, extract_to_integer (value.f, -128, 127)); + break; + case 7: + extract_to_packed (value, m68k_dreg (regs, (extra >> 4) & 7) & 0x7f, words); + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + put_long (addr + 8, words[2]); + break; + } + + switch (mode) + { + case 3: + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + break; + } + + update_exceptions (); + return true; +} + +static bool +fpuop_fmovem_control (uae_u32 opcode, uae_u32 extra) +{ + int list, mode, reg; + uae_u32 addr; + + list = (extra >> 10) & 7; + mode = (opcode >> 3) & 7; + reg = opcode & 7; + + if (list == 0) + return false; + + if (extra & 0x2000) + { + // FMOVEM to + if (mode == 0) + { + switch (list) + { + case 1: + m68k_dreg (regs, reg) = fpu.instruction_address; + break; + case 2: + m68k_dreg (regs, reg) = get_fpsr (); + break; + case 4: + m68k_dreg (regs, reg) = get_fpcr (); + break; + default: + return false; + } + } + else if (mode == 1) + { + if (list != 1) + return false; + m68k_areg (regs, reg) = fpu.instruction_address; + } + else + { + int nwords; + + if (!get_fp_addr (opcode, &addr, true)) + return false; + nwords = (list & 1) + ((list >> 1) & 1) + ((list >> 2) & 1); + if (mode == 4) + addr -= nwords * 4; + if (list & 4) + { + put_long (addr, get_fpcr ()); + addr += 4; + } + if (list & 2) + { + put_long (addr, get_fpsr ()); + addr += 4; + } + if (list & 1) + { + put_long (addr, fpu.instruction_address); + addr += 4; + } + if (mode == 4) + m68k_areg (regs, reg) = addr - nwords * 4; + else if (mode == 3) + m68k_areg (regs, reg) = addr; + } + } + else + { + // FMOVEM from + + if (mode == 0) + { + switch (list) + { + case 1: + fpu.instruction_address = m68k_dreg (regs, reg); + break; + case 2: + set_fpsr (m68k_dreg (regs, reg)); + break; + case 4: + set_fpcr (m68k_dreg (regs, reg)); + break; + default: + return false; + } + } + else if (mode == 1) + { + if (list != 1) + return false; + fpu.instruction_address = m68k_areg (regs, reg); + } + else if ((opcode & 077) == 074) + { + switch (list) + { + case 1: + fpu.instruction_address = next_ilong (); + break; + case 2: + set_fpsr (next_ilong ()); + break; + case 4: + set_fpcr (next_ilong ()); + break; + default: + return false; + } + } + else + { + int nwords; + + if (!get_fp_addr (opcode, &addr, false)) + return false; + nwords = (list & 1) + ((list >> 1) & 1) + ((list >> 2) & 1); + if (mode == 4) + addr -= nwords * 4; + if (list & 4) + { + set_fpcr (get_long (addr)); + addr += 4; + } + if (list & 2) + { + set_fpsr (get_long (addr)); + addr += 4; + } + if (list & 1) + { + fpu.instruction_address = get_long (addr); + addr += 4; + } + if (mode == 4) + m68k_areg (regs, reg) = addr - nwords * 4; + else if (mode == 3) + m68k_areg (regs, reg) = addr; + } + } + + return true; +} + +static bool +fpuop_fmovem_register (uae_u32 opcode, uae_u32 extra) +{ + uae_u32 addr; + uae_u32 words[3]; + int list; + int i; + + set_format (EXTENDED_PREC); + if (!get_fp_addr (opcode, &addr, extra & 0x2000)) + return false; + if (extra & 0x800) + list = m68k_dreg (regs, (extra >> 4) & 7) & 0xff; + else + list = extra & 0xff; + + if (extra & 0x2000) + { + // FMOVEM to memory + + switch (opcode & 070) + { + case 030: + return false; + case 040: + if (extra & 0x1000) + return false; + for (i = 7; i >= 0; i--) + if (list & (1 << i)) + { + extract_to_extended (fpu.registers[i], words); + addr -= 12; + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + put_long (addr + 8, words[2]); + } + m68k_areg (regs, opcode & 7) = addr; + break; + default: + if ((extra & 0x1000) == 0) + return false; + for (i = 0; i < 8; i++) + if (list & (0x80 >> i)) + { + extract_to_extended (fpu.registers[i], words); + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + put_long (addr + 8, words[2]); + addr += 12; + } + if ((opcode & 070) == 030) + m68k_areg (regs, opcode & 7) = addr; + break; + } + } + else + { + // FMOVEM from memory + + if ((opcode & 070) == 040) + return false; + + if ((extra & 0x1000) == 0) + return false; + for (i = 0; i < 8; i++) + if (list & (0x80 >> i)) + { + words[0] = get_long (addr); + words[1] = get_long (addr + 4); + words[2] = get_long (addr + 8); + addr += 12; + set_from_extended (fpu.registers[i], words, false); + } + if ((opcode & 070) == 030) + m68k_areg (regs, opcode & 7) = addr; + } + return true; +} + +static int +do_getexp (mpfr_t value, mpfr_rnd_t rnd) +{ + int t = 0; + + if (mpfr_inf_p (value)) + { + mpfr_set_nan (value); + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (!mpfr_nan_p (value) && !mpfr_zero_p (value)) + t = mpfr_set_si (value, mpfr_get_exp (value) - 1, rnd); + return t; +} + +static int +do_getman (mpfr_t value) +{ + if (mpfr_inf_p (value)) + { + mpfr_set_nan (value); + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (!mpfr_nan_p (value) && !mpfr_zero_p (value)) + mpfr_set_exp (value, 1); + return 0; +} + +static int +do_scale (mpfr_t value, mpfr_t reg, mpfr_rnd_t rnd) +{ + long scale; + int t = 0; + + if (mpfr_nan_p (value)) + ; + else if (mpfr_inf_p (value)) + { + mpfr_set_nan (value); + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (mpfr_fits_slong_p (value, rnd)) + { + scale = mpfr_get_si (value, MPFR_RNDZ); + mpfr_clear_inexflag (); + t = mpfr_mul_2si (value, reg, scale, rnd); + } + else + mpfr_set_inf (value, -mpfr_signbit (value)); + return t; +} + +static int +do_remainder (mpfr_t value, mpfr_t reg, mpfr_rnd_t rnd) +{ + long quo; + int t = 0; + + if (mpfr_nan_p (value) || mpfr_nan_p (reg)) + ; + else if (mpfr_zero_p (value) || mpfr_inf_p (reg)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_remquo (value, &quo, reg, value, rnd); + if (quo < 0) + quo = (-quo & 0x7f) | 0x80; + else + quo &= 0x7f; + fpu.fpsr.quotient = quo << 16; + return t; +} + +// Unfortunately, mpfr_fmod does not return the quotient bits, so we +// have to reimplement it here +static int +mpfr_rem1 (mpfr_t rem, int *quo, mpfr_t x, mpfr_t y, mpfr_rnd_t rnd) +{ + mpfr_exp_t ex, ey; + int inex, sign, signx = mpfr_signbit (x); + mpz_t mx, my, r; + + mpz_init (mx); + mpz_init (my); + mpz_init (r); + + ex = mpfr_get_z_2exp (mx, x); /* x = mx*2^ex */ + ey = mpfr_get_z_2exp (my, y); /* y = my*2^ey */ + + /* to get rid of sign problems, we compute it separately: + quo(-x,-y) = quo(x,y), rem(-x,-y) = -rem(x,y) + quo(-x,y) = -quo(x,y), rem(-x,y) = -rem(x,y) + thus quo = sign(x/y)*quo(|x|,|y|), rem = sign(x)*rem(|x|,|y|) */ + sign = (signx != mpfr_signbit (y)); + mpz_abs (mx, mx); + mpz_abs (my, my); + + /* divide my by 2^k if possible to make operations mod my easier */ + { + unsigned long k = mpz_scan1 (my, 0); + ey += k; + mpz_fdiv_q_2exp (my, my, k); + } + + if (ex <= ey) + { + /* q = x/y = mx/(my*2^(ey-ex)) */ + mpz_mul_2exp (my, my, ey - ex); /* divide mx by my*2^(ey-ex) */ + /* 0 <= |r| <= |my|, r has the same sign as mx */ + mpz_tdiv_qr (mx, r, mx, my); + /* mx is the quotient */ + mpz_tdiv_r_2exp (mx, mx, 7); + *quo = mpz_get_si (mx); + } + else /* ex > ey */ + { + /* to get the low 7 more bits of the quotient, we first compute + R = X mod Y*2^7, where X and Y are defined below. Then the + low 7 of the quotient are floor(R/Y). */ + mpz_mul_2exp (my, my, 7); /* 2^7*Y */ + + mpz_set_ui (r, 2); + mpz_powm_ui (r, r, ex - ey, my); /* 2^(ex-ey) mod my */ + mpz_mul (r, r, mx); + mpz_mod (r, r, my); + + /* now 0 <= r < 2^7*Y */ + mpz_fdiv_q_2exp (my, my, 7); /* back to Y */ + mpz_tdiv_qr (mx, r, r, my); + /* oldr = mx*my + newr */ + *quo = mpz_get_si (mx); + + /* now 0 <= |r| < |my| */ + } + + if (mpz_cmp_ui (r, 0) == 0) + { + inex = mpfr_set_ui (rem, 0, MPFR_RNDN); + /* take into account sign of x */ + if (signx) + mpfr_neg (rem, rem, MPFR_RNDN); + } + else + { + /* take into account sign of x */ + if (signx) + mpz_neg (r, r); + inex = mpfr_set_z_2exp (rem, r, ex > ey ? ey : ex, rnd); + } + + if (sign) + *quo |= 0x80; + + mpz_clear (mx); + mpz_clear (my); + mpz_clear (r); + + return inex; +} + +static int +do_fmod (mpfr_t value, mpfr_t reg, mpfr_rnd_t rnd) +{ + int t = 0; + + if (mpfr_nan_p (value) || mpfr_nan_p (reg)) + mpfr_set_nan (value); + else if (mpfr_zero_p (value) || mpfr_inf_p (reg)) + { + mpfr_set_nan (value); + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (mpfr_zero_p (reg) || mpfr_inf_p (value)) + { + fpu.fpsr.quotient = 0; + t = mpfr_set (value, reg, rnd); + } + else + { + int quo; + + t = mpfr_rem1 (value, &quo, reg, value, rnd); + fpu.fpsr.quotient = quo << 16; + } + return t; +} + +static void +do_fcmp (mpfr_t source, mpfr_t dest) +{ + uae_u32 flags = 0; + + if (mpfr_nan_p (source) || mpfr_nan_p (dest)) + flags |= FPSR_CCB_NAN; + else + { + int cmp = mpfr_cmp (dest, source); + if (cmp < 0) + flags |= FPSR_CCB_NEGATIVE; + else if (cmp == 0) + { + flags |= FPSR_CCB_ZERO; + if ((mpfr_zero_p (dest) || mpfr_inf_p (dest)) && mpfr_signbit (dest)) + flags |= FPSR_CCB_NEGATIVE; + } + } + set_fpccr (flags); +} + +static void +do_ftst (mpfr_t value) +{ + uae_u32 flags = 0; + + if (mpfr_signbit (value)) + flags |= FPSR_CCB_NEGATIVE; + if (mpfr_nan_p (value)) + flags |= FPSR_CCB_NAN; + else if (mpfr_zero_p (value)) + flags |= FPSR_CCB_ZERO; + else if (mpfr_inf_p (value)) + flags |= FPSR_CCB_INFINITY; + set_fpccr (flags); +} + +static bool +fpuop_general (uae_u32 opcode, uae_u32 extra) +{ + mpfr_prec_t prec = get_cur_prec (); + mpfr_rnd_t rnd = get_cur_rnd (); + int reg = (extra >> 7) & 7; + int t = 0; + fpu_register value; + bool ret; + + mpfr_init2 (value.f, prec); + value.nan_bits = DEFAULT_NAN_BITS; + value.nan_sign = 0; + + mpfr_clear_flags (); + set_format (prec); + cur_exceptions = 0; + cur_instruction_address = m68k_getpc () - 4; + if ((extra & 0xfc00) == 0x5c00) + { + // FMOVECR + int rom_index = extra & 0x7f; + if (rom_index == 0 || (rom_index >= 11 && rom_index <= 15)) + t = mpfr_set (value.f, fpu_constant_rom[rom_index], rnd); + else if (rom_index >= 48 && rom_index <= 63) + t = mpfr_set (value.f, fpu_constant_rom[rom_index - 32], rnd); + else + mpfr_set_zero (value.f, 0); + set_fp_register (reg, value, t, rnd, true); + } + else if (extra & 0x40) + { + static const char valid[64] = + { + 1, 1, 0, 0, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 1, 1, 0, 1, 1, + 1, 0, 0, 0, 1, 0, 0, 0 + }; + + if (extra & 4) + // FD... + prec = DOUBLE_PREC; + else + // FS... + prec = SINGLE_PREC; + set_format (prec); + MPFR_DECL_INIT (value2, prec); + + if (!fpu.is_integral) + { + ret = false; + goto out; + } + if (!valid[extra & 0x3b]) + { + ret = false; + goto out; + } + if (!get_fp_value (opcode, extra, value)) + { + ret = false; + goto out; + } + + switch (extra & 0x3f) + { + case 0: // FSMOVE + case 4: // FDMOVE + mpfr_set (value2, value.f, rnd); + break; + case 1: // FSSQRT + case 5: // FDSQRT + if (mpfr_sgn (value.f) < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sqrt (value2, value.f, rnd); + break; + case 24: // FSABS + case 28: // FDABS + t = mpfr_abs (value2, value.f, rnd); + break; + case 26: // FSNEG + case 30: // FDNEG + t = mpfr_neg (value2, value.f, rnd); + break; + case 32: // FSDIV + case 36: // FDDIV + if (mpfr_zero_p (value.f)) + { + if (mpfr_regular_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_zero_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (mpfr_inf_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_div (value2, fpu.registers[reg].f, value.f, rnd); + break; + case 34: // FSADD + case 38: // FDADD + if (mpfr_inf_p (fpu.registers[reg].f) && mpfr_inf_p (value.f) + && mpfr_signbit (fpu.registers[reg].f) != mpfr_signbit (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_add (value2, fpu.registers[reg].f, value.f, rnd); + break; + case 35: // FSMUL + case 39: // FDMUL + if ((mpfr_zero_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + || (mpfr_inf_p (value.f) && mpfr_zero_p (fpu.registers[reg].f))) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_mul (value2, fpu.registers[reg].f, value.f, rnd); + break; + case 40: // FSSUB + case 44: // FDSUB + if (mpfr_inf_p (fpu.registers[reg].f) && mpfr_inf_p (value.f) + && mpfr_signbit (fpu.registers[reg].f) == mpfr_signbit (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sub (value2, fpu.registers[reg].f, value.f, rnd); + break; + } + set_fp_register (reg, value2, t, rnd, true); + } + else if ((extra & 0x30) == 0x30) + { + if ((extra & 15) > 10 || (extra & 15) == 9) + { + ret = false; + goto out; + } + if (!get_fp_value (opcode, extra, value)) + { + ret = false; + goto out; + } + + if ((extra & 15) < 8) + { + // FSINCOS + int reg2 = extra & 7; + MPFR_DECL_INIT (value2, prec); + + if (mpfr_inf_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sin_cos (value.f, value2, value.f, rnd); + if (reg2 != reg) + set_fp_register (reg2, value2, t >> 2, rnd, false); + set_fp_register (reg, value, t & 3, rnd, true); + } + else if ((extra & 15) == 8) + // FCMP + do_fcmp (value.f, fpu.registers[reg].f); + else + // FTST + do_ftst (value.f); + } + else + { + static const char valid[64] = + { + 1, 1, 1, 1, 1, 0, 1, 0, + 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 0, 1, 1, 1, 0, + 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1 + }; + if (!valid[extra & 0x3f]) + { + ret = false; + goto out; + } + if (!get_fp_value (opcode, extra, value)) + { + ret = false; + goto out; + } + + switch (extra & 0x3f) + { + case 0: // FMOVE + break; + case 1: // FINT + t = mpfr_rint (value.f, value.f, rnd); + break; + case 2: // FSINH + t = mpfr_sinh (value.f, value.f, rnd); + break; + case 3: // FINTRZ + t = mpfr_rint (value.f, value.f, MPFR_RNDZ); + break; + case 4: // FSQRT + if (mpfr_sgn (value.f) < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sqrt (value.f, value.f, rnd); + break; + case 6: // FLOGNP1 + if (!mpfr_nan_p (value.f)) + { + int cmp = mpfr_cmp_si (value.f, -1); + if (cmp == 0) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (cmp < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + t = mpfr_log1p (value.f, value.f, rnd); + break; + case 8: // FETOXM1 + t = mpfr_expm1 (value.f, value.f, rnd); + break; + case 9: // FTANH + t = mpfr_tanh (value.f, value.f, rnd); + break; + case 10: // FATAN + t = mpfr_atan (value.f, value.f, rnd); + break; + case 12: // FASIN + if (mpfr_cmpabs (value.f, FPU_CONSTANT_ONE) > 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_asin (value.f, value.f, rnd); + break; + case 13: // FATANH + if (mpfr_cmpabs (value.f, FPU_CONSTANT_ONE) > 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_atanh (value.f, value.f, rnd); + break; + case 14: // FSIN + if (mpfr_inf_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sin (value.f, value.f, rnd); + break; + case 15: // FTAN + if (mpfr_inf_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_tan (value.f, value.f, rnd); + break; + case 16: // FETOX + t = mpfr_exp (value.f, value.f, rnd); + break; + case 17: // FTWOTOX + t = mpfr_ui_pow (value.f, 2, value.f, rnd); + break; + case 18: // FTENTOX + t = mpfr_ui_pow (value.f, 10, value.f, rnd); + break; + case 20: // FLOGN + if (mpfr_zero_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_sgn (value.f) < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_log (value.f, value.f, rnd); + break; + case 21: // FLOG10 + if (mpfr_zero_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_sgn (value.f) < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_log10 (value.f, value.f, rnd); + break; + case 22: // FLOG2 + if (mpfr_zero_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_sgn (value.f) < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_log2 (value.f, value.f, rnd); + break; + case 24: // FABS + t = mpfr_abs (value.f, value.f, rnd); + value.nan_sign = 0; + break; + case 25: // FCOSH + t = mpfr_cosh (value.f, value.f, rnd); + break; + case 26: // FNEG + t = mpfr_neg (value.f, value.f, rnd); + value.nan_sign = !value.nan_sign; + break; + case 28: // FACOS + if (mpfr_cmpabs (value.f, FPU_CONSTANT_ONE) > 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_acos (value.f, value.f, rnd); + break; + case 29: // FCOS + if (mpfr_inf_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_cos (value.f, value.f, rnd); + break; + case 30: // FGETEXP + t = do_getexp (value.f, rnd); + break; + case 31: // FGETMAN + t = do_getman (value.f); + break; + case 32: // FDIV + if (mpfr_zero_p (value.f)) + { + if (mpfr_regular_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_zero_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (mpfr_inf_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_div (value.f, fpu.registers[reg].f, value.f, rnd); + break; + case 33: // FMOD + t = do_fmod (value.f, fpu.registers[reg].f, rnd); + break; + case 34: // FADD + if (mpfr_inf_p (fpu.registers[reg].f) && mpfr_inf_p (value.f) + && mpfr_signbit (fpu.registers[reg].f) != mpfr_signbit (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_add (value.f, fpu.registers[reg].f, value.f, rnd); + break; + case 35: // FMUL + if ((mpfr_zero_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + || (mpfr_inf_p (value.f) && mpfr_zero_p (fpu.registers[reg].f))) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_mul (value.f, fpu.registers[reg].f, value.f, rnd); + break; + case 36: // FSGLDIV + { + MPFR_DECL_INIT (value2, SINGLE_PREC); + + set_format (SINGLE_PREC); + if (mpfr_zero_p (value.f)) + { + if (mpfr_regular_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_zero_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (mpfr_inf_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_div (value2, fpu.registers[reg].f, value.f, rnd); + mpfr_set (value.f, value2, rnd); + } + break; + case 37: // FREM + t = do_remainder (value.f, fpu.registers[reg].f, rnd); + break; + case 38: // FSCALE + t = do_scale (value.f, fpu.registers[reg].f, rnd); + break; + case 39: // FSGLMUL + { + MPFR_DECL_INIT (value2, SINGLE_PREC); + + set_format (SINGLE_PREC); + if ((mpfr_zero_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + || (mpfr_inf_p (value.f) && mpfr_zero_p (fpu.registers[reg].f))) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_mul (value2, fpu.registers[reg].f, value.f, rnd); + mpfr_set (value.f, value2, rnd); + } + break; + case 40: // FSUB + if (mpfr_inf_p (fpu.registers[reg].f) && mpfr_inf_p (value.f) + && mpfr_signbit (fpu.registers[reg].f) == mpfr_signbit (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sub (value.f, fpu.registers[reg].f, value.f, rnd); + break; + } + set_fp_register (reg, value, t, rnd, true); + } + update_exceptions (); + ret = true; + out: + mpfr_clear (value.f); + return ret; +} + +void +fpuop_arithmetic (uae_u32 opcode, uae_u32 extra) +{ + bool valid; + + switch ((extra >> 13) & 7) + { + case 3: + valid = fpuop_fmove_memory (opcode, extra); + break; + case 4: + case 5: + valid = fpuop_fmovem_control (opcode, extra); + break; + case 6: + case 7: + valid = fpuop_fmovem_register (opcode, extra); + break; + case 0: + case 2: + valid = fpuop_general (opcode, extra); + break; + default: + valid = false; + break; + } + + if (!valid) + { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } +} + +static bool +check_fp_cond (uae_u32 pred) +{ + uae_u32 fpcc = get_fpccr (); + + if ((pred & 16) != 0 && (fpcc & FPSR_CCB_NAN) != 0) + { + // IEEE non-aware test + set_exception_status (get_exception_status () | FPSR_EXCEPTION_BSUN); + set_accrued_exception (get_accrued_exception () | FPSR_ACCR_IOP); + } + + switch (pred & 15) + { + case 0: // F / SF + return false; + case 1: // EQ /SEQ + return (fpcc & FPSR_CCB_ZERO) != 0; + case 2: // OGT / GT + return (fpcc & (FPSR_CCB_NAN | FPSR_CCB_ZERO | FPSR_CCB_NEGATIVE)) == 0; + case 3: // OGE / GE + return (fpcc & FPSR_CCB_ZERO) != 0 || (fpcc & (FPSR_CCB_NAN | FPSR_CCB_NEGATIVE)) == 0; + case 4: // OLT / LT + return (fpcc & (FPSR_CCB_NEGATIVE | FPSR_CCB_NAN | FPSR_CCB_ZERO)) == FPSR_CCB_NEGATIVE; + case 5: // OLE / LE + return (fpcc & FPSR_CCB_ZERO) != 0 || (fpcc & (FPSR_CCB_NEGATIVE | FPSR_CCB_NAN)) == FPSR_CCB_NEGATIVE; + case 6: // OGL / GL + return (fpcc & (FPSR_CCB_NAN | FPSR_CCB_ZERO)) == 0; + case 7: // OR / GLE + return (fpcc & FPSR_CCB_NAN) == 0; + case 8: // UN / NGLE + return (fpcc & FPSR_CCB_NAN) != 0; + case 9: // UEQ / NGL + return (fpcc & (FPSR_CCB_NAN | FPSR_CCB_ZERO)) != 0; + case 10: // UGT / NLE + return (fpcc & FPSR_CCB_NAN) != 0 || (fpcc & (FPSR_CCB_NEGATIVE | FPSR_CCB_ZERO)) == 0; + case 11: // UGE / NLT + return (fpcc & (FPSR_CCB_NEGATIVE | FPSR_CCB_NAN | FPSR_CCB_ZERO)) != FPSR_CCB_NEGATIVE; + case 12: // ULT / NGE + return (fpcc & FPSR_CCB_NAN) != 0 || (fpcc & (FPSR_CCB_NEGATIVE | FPSR_CCB_ZERO)) == FPSR_CCB_NEGATIVE; + case 13: // ULE / NGT + return (fpcc & (FPSR_CCB_NAN | FPSR_CCB_ZERO | FPSR_CCB_NEGATIVE)) != 0; + case 14: // NE / SNE + return (fpcc & FPSR_CCB_ZERO) == 0; + case 15: // T / ST + return true; + default: + return false; + } +} + +void +fpuop_bcc (uae_u32 opcode, uaecptr pc, uae_u32 disp) +{ + if (check_fp_cond (opcode)) + { + if (!(opcode & (1 << 6))) + disp = (uae_s16) disp; + m68k_setpc (pc + disp); + } +} + +void +fpuop_scc (uae_u32 opcode, uae_u32 extra) +{ + uae_u32 addr; + int value = check_fp_cond (extra) ? 0xff : 0; + if ((opcode & 070) == 0) + { + int reg = opcode & 7; + m68k_dreg (regs, reg) = (m68k_dreg (regs, reg) & ~0xff) | value; + } + else if (!get_fp_addr (opcode, &addr, true)) + { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } + else + { + switch (opcode & 070) + { + case 030: + m68k_areg (regs, opcode & 7) += (opcode & 7) == 7 ? 2 : 1; + break; + case 040: + addr -= (opcode & 7) == 7 ? 2 : 1; + m68k_areg (regs, opcode & 7) = addr; + } + put_byte (addr, value); + } +} + +void +fpuop_dbcc (uae_u32 opcode, uae_u32 extra) +{ + uaecptr pc = m68k_getpc (); + uae_s16 disp = next_iword (); + + if (!check_fp_cond (extra)) + { + int reg = opcode & 7; + uae_u16 cnt = (m68k_dreg (regs, reg) & 0xffff) - 1; + m68k_dreg (regs, reg) = (m68k_dreg (regs, reg) & ~0xffff) | cnt; + if (cnt != 0xffff) + m68k_setpc (pc + disp); + } +} + +void +fpuop_trapcc (uae_u32, uaecptr oldpc, uae_u32 extra) +{ + if (check_fp_cond (extra)) + Exception (7, oldpc - 2); +} + +void +fpuop_save (uae_u32 opcode) +{ + uae_u32 addr; + + if ((opcode & 070) == 030 + || !get_fp_addr (opcode, &addr, true)) + { + m68k_setpc (m68k_getpc () - 2); + op_illg (opcode); + return; + } + + if (fpu.is_integral) + { + // 4 byte 68040 IDLE frame + // FIXME: generate proper FPU stack frames that does not result + // in undefined behaviour from FPSP040 + if ((opcode & 070) == 040) + { + addr -= 4; + m68k_areg (regs, opcode & 7) = addr; + } + put_long (addr, 0x41000000); + } + else + { + // 28 byte 68881 IDLE frame + if ((opcode & 070) == 040) + { + addr -= 28; + m68k_areg (regs, opcode & 7) = addr; + } + put_long (addr, 0x1f180000); + for (int i = 0; i < 6; i++) + { + addr += 4; + put_long (addr, 0); + } + } +} + +void +fpuop_restore (uae_u32 opcode) +{ + uae_u32 addr; + uae_u32 format; + + if ((opcode & 070) == 040 + || !get_fp_addr (opcode, &addr, false)) + { + m68k_setpc (m68k_getpc () - 2); + op_illg (opcode); + return; + } + + format = get_long (addr); + addr += 4; + if ((format & 0xff000000) == 0) + // NULL frame + fpu_reset (); + else + addr += (format & 0xff0000) >> 16; + if ((opcode & 070) == 030) + m68k_areg (regs, opcode & 7) = addr; +} + +void fpu_set_fpsr(uae_u32 new_fpsr) +{ + set_fpsr(new_fpsr); +} + +uae_u32 fpu_get_fpsr(void) +{ + return get_fpsr(); +} + +void fpu_set_fpcr(uae_u32 new_fpcr) +{ + set_fpcr(new_fpcr); +} + +uae_u32 fpu_get_fpcr(void) +{ + return get_fpcr(); +} diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_uae.cpp b/BasiliskII/src/uae_cpu/fpu/fpu_uae.cpp index ffc784a5..ca4b841d 100644 --- a/BasiliskII/src/uae_cpu/fpu/fpu_uae.cpp +++ b/BasiliskII/src/uae_cpu/fpu/fpu_uae.cpp @@ -1,31 +1,42 @@ /* - * fpu/fpu_uae.cpp + * fpu/fpu_uae.cpp - the old UAE FPU * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * MC68881/68040 fpu emulation * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - /* + * UAE - The Un*x Amiga Emulator + * + * MC68881 emulation + * + * Copyright 1996 Herman ten Brugge + * + * * Following fixes by Lauri Pesonen, July 1999: * * FMOVEM list handling: @@ -86,9 +97,8 @@ * - Precision rounding single/double */ + #include "sysdeps.h" -#include -#include #include "memory.h" #include "readcpu.h" #include "newcpu.h" @@ -97,6 +107,17 @@ #include "fpu/fpu.h" #include "fpu/fpu_uae.h" +#ifdef HAVE_NEW_HEADERS +#define _GLIBCPP_USE_C99 1 +# include +# include +using namespace __gnu_cxx; +#undef _GLIBCPP_USE_C99 +#else +# include +# include +#endif + /* Global FPU context */ fpu_t fpu; @@ -166,8 +187,8 @@ PUBLIC void FFPU dump_registers(const char * str) sprintf(temp_str, "%s: %.04f, %.04f, %.04f, %.04f, %.04f, %.04f, %.04f, %.04f\n", str, - get_register(0), get_register(1), get_register(2), get_register(3), - get_register(4), get_register(5), get_register(6), get_register(7) ); + fpu_get_register(0), fpu_get_register(1), fpu_get_register(2), fpu_get_register(3), + fpu_get_register(4), fpu_get_register(5), fpu_get_register(6), fpu_get_register(7) ); fpu_debug((temp_str)); } @@ -195,9 +216,7 @@ PUBLIC void FFPU dump_registers(const char *) { } -PUBLIC void FFPU dump_first_bytes(uae_u8 *, uae_s32) -{ -} +#define dump_first_bytes(a,b) #endif @@ -219,10 +238,10 @@ PRIVATE inline fpu_register FFPU round_to_nearest(fpu_register const & x) PRIVATE inline bool FFPU do_isnan(fpu_register const & r) { - uae_u32 * p = (uae_u32 *)&r; - if ((p[FHI] & 0x7FF00000) == 0x7FF00000) { + fpu_register_parts const p = { r }; + if ((p.parts[FHI] & 0x7FF00000) == 0x7FF00000) { // logical or is faster here. - if ((p[FHI] & 0x000FFFFF) || p[FLO]) { + if ((p.parts[FHI] & 0x000FFFFF) || p.parts[FLO]) { return true; } } @@ -235,8 +254,8 @@ PRIVATE inline bool FFPU do_isnan(fpu_register const & r) PRIVATE inline bool FFPU do_isinf(fpu_register const & r) { - uae_u32 * p = (uae_u32 *)&r; - if (((p[FHI] & 0x7FF00000) == 0x7FF00000) && p[FLO] == 0) { + fpu_register_parts const p = { r }; + if ((p.parts[FHI] & 0x7FF00000) == 0x7FF00000 && p.parts[FLO] == 0) { return true; } return false; @@ -248,8 +267,8 @@ PRIVATE inline bool FFPU do_isinf(fpu_register const & r) PRIVATE inline bool FFPU do_isneg(fpu_register const & r) { - uae_u32 * p = (uae_u32 *)&r; - return ((p[FHI] & 0x80000000) != 0); + fpu_register_parts const p = { r }; + return ((p.parts[FHI] & 0x80000000) != 0); } #ifndef HAVE_ISZERO @@ -258,8 +277,8 @@ PRIVATE inline bool FFPU do_isneg(fpu_register const & r) PRIVATE inline bool FFPU do_iszero(fpu_register const & r) { - uae_u32 * p = (uae_u32 *)&r; - return (((p[FHI] & 0x7FF00000) == 0) && p[FLO] == 0); + fpu_register_parts const p = { r }; + return (((p.parts[FHI] & 0x7FF00000) == 0) && p.parts[FLO] == 0); } // May be optimized for particular processors @@ -293,77 +312,70 @@ PRIVATE inline void FFPU get_source_flags(fpu_register const & r) fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan; } -PRIVATE inline void FFPU make_nan(fpu_register & r) +PRIVATE inline void FFPU make_nan(fpu_register & r, bool negative) { - uae_u32 * const p = (uae_u32 *)&r; - p[FLO] = 0xffffffff; - p[FHI] = 0x7fffffff; + fpu_register_parts p; + p.parts[FLO] = 0xffffffff; + p.parts[FHI] = negative ? 0xffffffff : 0x7fffffff; + r = p.val; } -PRIVATE inline void FFPU make_zero_positive(fpu_register & r) +PRIVATE inline void FFPU make_zero(fpu_register & r, bool negative) { - uae_u32 * const p = (uae_u32 *)&r; - p[FLO] = p[FHI] = 0; + fpu_register_parts p; + p.parts[FLO] = 0; + p.parts[FHI] = negative ? 0x80000000 : 0; + r = p.val; } -PRIVATE inline void FFPU make_zero_negative(fpu_register & r) +PRIVATE inline void FFPU make_inf(fpu_register & r, bool negative) { - uae_u32 * const p = (uae_u32 *)&r; - p[FLO] = 0; - p[FHI] = 0x80000000; -} - -PRIVATE inline void FFPU make_inf_positive(fpu_register & r) -{ - uae_u32 * const p = (uae_u32 *)&r; - p[FLO] = 0; - p[FHI] = 0x7FF00000; -} - -PRIVATE inline void FFPU make_inf_negative(fpu_register & r) -{ - uae_u32 * const p = (uae_u32 *)&r; - p[FLO] = 0; - p[FHI] = 0xFFF00000; + fpu_register_parts p; + p.parts[FLO] = 0; + p.parts[FHI] = negative ? 0xFFF00000 : 0x7FF00000; + r = p.val; } PRIVATE inline void FFPU fast_scale(fpu_register & r, int add) { - uae_u32 * const p = (uae_u32 *)&r; - int exp = (p[FHI] & 0x7FF00000) >> 20; + fpu_register_parts p = { r }; + int exp = (p.parts[FHI] & 0x7FF00000) >> 20; // TODO: overflow flags exp += add; if(exp >= 2047) { - make_inf_positive(r); + make_inf(r, false); + return; } else if(exp < 0) { // keep sign (+/- 0) - p[FHI] &= 0x80000000; + p.parts[FHI] &= 0x80000000; } else { - p[FHI] = (p[FHI] & 0x800FFFFF) | ((uae_u32)exp << 20); + p.parts[FHI] = (p.parts[FHI] & 0x800FFFFF) | ((uae_u32)exp << 20); } + r = p.val; } PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r) { - uae_u32 * const p = (uae_u32 *)&r; - int exp = (p[FHI] & 0x7FF00000) >> 20; + fpu_register_parts const p = { r }; + int exp = (p.parts[FHI] & 0x7FF00000) >> 20; return( exp - 1023 ); } // Normalize to range 1..2 PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r) { - uae_u32 * const p = (uae_u32 *)&r; - p[FHI] = (p[FHI] & 0x800FFFFF) | 0x3FF00000; + fpu_register_parts p = { r }; + p.parts[FHI] = (p.parts[FHI] & 0x800FFFFF) | 0x3FF00000; + r = p.val; } // The sign of the quotient is the exclusive-OR of the sign bits // of the source and destination operands. PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb) { - uae_u32 * const a = (uae_u32 *)&ra; - uae_u32 * const b = (uae_u32 *)&rb; - return (((a[FHI] ^ b[FHI]) & 0x80000000) ? FPSR_QUOTIENT_SIGN : 0); + fpu_register_parts const a = { ra }; + fpu_register_parts const b = { rb }; + return (((a.parts[FHI] ^ b.parts[FHI]) & 0x80000000) ? FPSR_QUOTIENT_SIGN : 0); } // Quotient Byte is loaded with the sign and least significant @@ -381,13 +393,15 @@ PRIVATE inline fpu_register FFPU make_single(uae_u32 value) return (0.0); fpu_register result; - uae_u32 * p = (uae_u32 *)&result; + fpu_register_parts p; uae_u32 sign = (value & 0x80000000); uae_u32 exp = ((value & 0x7F800000) >> 23) + 1023 - 127; - p[FLO] = value << 29; - p[FHI] = sign | (exp << 20) | ((value & 0x007FFFFF) >> 3); + p.parts[FLO] = value << 29; + p.parts[FHI] = sign | (exp << 20) | ((value & 0x007FFFFF) >> 3); + + result = p.val; fpu_debug(("make_single (%X) = %.04f\n",value,(double)result)); @@ -401,10 +415,10 @@ PRIVATE inline uae_u32 FFPU extract_single(fpu_register const & src) return 0; uae_u32 result; - uae_u32 *p = (uae_u32 *)&src; + fpu_register_parts const p = { src }; - uae_u32 sign = (p[FHI] & 0x80000000); - uae_u32 exp = (p[FHI] & 0x7FF00000) >> 20; + uae_u32 sign = (p.parts[FHI] & 0x80000000); + uae_u32 exp = (p.parts[FHI] & 0x7FF00000) >> 20; if(exp + 127 < 1023) { exp = 0; @@ -414,7 +428,7 @@ PRIVATE inline uae_u32 FFPU extract_single(fpu_register const & src) exp = exp + 127 - 1023; } - result = sign | (exp << 23) | ((p[FHI] & 0x000FFFFF) << 3) | (p[FLO] >> 29); + result = sign | (exp << 23) | ((p.parts[FHI] & 0x000FFFFF) << 3) | (p.parts[FLO] >> 29); fpu_debug(("extract_single (%.04f) = %X\n",(double)src,result)); @@ -428,8 +442,8 @@ PRIVATE inline fpu_register FFPU make_extended(uae_u32 wrd1, uae_u32 wrd2, uae_u return 0.0; fpu_register result; - uae_u32 *p = (uae_u32 *)&result; - + fpu_register_parts p; + uae_u32 sign = wrd1 & 0x80000000; uae_u32 exp = (wrd1 >> 16) & 0x7fff; @@ -466,8 +480,10 @@ PRIVATE inline fpu_register FFPU make_extended(uae_u32 wrd1, uae_u32 wrd2, uae_u } // drop the explicit integer bit. - p[FLO] = (wrd2 << 21) | (wrd3 >> 11); - p[FHI] = sign | (exp << 20) | ((wrd2 & 0x7FFFFFFF) >> 11); + p.parts[FLO] = (wrd2 << 21) | (wrd3 >> 11); + p.parts[FHI] = sign | (exp << 20) | ((wrd2 & 0x7FFFFFFF) >> 11); + + result = p.val; fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result)); @@ -485,14 +501,14 @@ PRIVATE inline void FFPU make_extended_no_normalize( { // Is it zero? if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) { - make_zero_positive(result); + make_zero(result, false); return; } // Is it NaN? if( (wrd1 & 0x7FFF0000) == 0x7FFF0000 ) { if( (wrd1 & 0x0000FFFF) || wrd2 || wrd3 ) { - make_nan(result); + make_nan(result, (wrd1 & 0x80000000) != 0); return; } } @@ -511,11 +527,13 @@ PRIVATE inline void FFPU make_extended_no_normalize( } // drop the explicit integer bit. - uae_u32 *p = (uae_u32 *)&result; - p[FLO] = (wrd2 << 21) | (wrd3 >> 11); - p[FHI] = sign | (exp << 20) | ((wrd2 & 0x7FFFFFFF) >> 11); + fpu_register_parts p; + p.parts[FLO] = (wrd2 << 21) | (wrd3 >> 11); + p.parts[FHI] = sign | (exp << 20) | ((wrd2 & 0x7FFFFFFF) >> 11); - fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(float)(*(double *)p))); + result = p.val; + + fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result)); } // from_exten @@ -527,14 +545,14 @@ PRIVATE inline void FFPU extract_extended(fpu_register const & src, *wrd1 = *wrd2 = *wrd3 = 0; return; } - - uae_u32 *p = (uae_u32 *)&src; - - fpu_debug(("extract_extended (%X,%X)\n",p[FLO],p[FHI])); - uae_u32 sign = p[FHI] & 0x80000000; + fpu_register_parts const p = { src }; + + fpu_debug(("extract_extended (%X,%X)\n",p.parts[FLO],p.parts[FHI])); - uae_u32 exp = ((p[FHI] >> 20) & 0x7ff); + uae_u32 sign = p.parts[FHI] & 0x80000000; + + uae_u32 exp = ((p.parts[FHI] >> 20) & 0x7ff); // Check for maximum if(exp == 0x7FF) { exp = 0x7FFF; @@ -544,8 +562,8 @@ PRIVATE inline void FFPU extract_extended(fpu_register const & src, *wrd1 = sign | (exp << 16); // always set the explicit integer bit. - *wrd2 = 0x80000000 | ((p[FHI] & 0x000FFFFF) << 11) | ((p[FLO] & 0xFFE00000) >> 21); - *wrd3 = p[FLO] << 11; + *wrd2 = 0x80000000 | ((p.parts[FHI] & 0x000FFFFF) << 11) | ((p.parts[FLO] & 0xFFE00000) >> 21); + *wrd3 = p.parts[FLO] << 11; fpu_debug(("extract_extended (%.04f) = %X,%X,%X\n",(double)src,*wrd1,*wrd2,*wrd3)); } @@ -557,9 +575,11 @@ PRIVATE inline fpu_register FFPU make_double(uae_u32 wrd1, uae_u32 wrd2) return 0.0; fpu_register result; - uae_u32 *p = (uae_u32 *)&result; - p[FLO] = wrd2; - p[FHI] = wrd1; + fpu_register_parts p; + p.parts[FLO] = wrd2; + p.parts[FHI] = wrd1; + + result = p.val; fpu_debug(("make_double (%X,%X) = %.04f\n",wrd1,wrd2,(double)result)); @@ -577,9 +597,9 @@ PRIVATE inline void FFPU extract_double(fpu_register const & src, return; } */ - uae_u32 *p = (uae_u32 *)&src; - *wrd2 = p[FLO]; - *wrd1 = p[FHI]; + fpu_register_parts const p = { src }; + *wrd2 = p.parts[FLO]; + *wrd1 = p.parts[FHI]; fpu_debug(("extract_double (%.04f) = %X,%X\n",(double)src,*wrd1,*wrd2)); } @@ -590,8 +610,8 @@ PRIVATE inline void FFPU extract_double(fpu_register const & src, PRIVATE inline void FFPU make_fpsr(fpu_register const & r) { FPU fpsr.condition_codes - = ((r == 0.0) ? NATIVE_FFLAG_ZERO : 0) - | ((r < 0.0) ? NATIVE_FFLAG_NEGATIVE : 0) + = (iszero(r) ? NATIVE_FFLAG_ZERO : 0) + | (isneg(r) ? NATIVE_FFLAG_NEGATIVE : 0) ; } #endif @@ -622,25 +642,25 @@ PRIVATE inline uae_u32 FFPU extract_single(fpu_register const & src) fpu_register src0 = src; #endif - if (src == 0.0) + if (src == 0.0) return 0; - if (src < 0) { + if (src < 0) { tmp = 0x80000000; src = -src; - } else { + } else { tmp = 0; - } - frac = frexp (src, &expon); - frac += 0.5 / 16777216.0; - if (frac >= 1.0) { + } + frac = frexp (src, &expon); + frac += 0.5 / 16777216.0; + if (frac >= 1.0) { frac /= 2.0; expon++; - } + } result = tmp | (((expon + 127 - 1) & 0xff) << 23) | (((int) (frac * 16777216.0)) & 0x7fffff); // fpu_debug(("extract_single (%.04f) = %X\n",(float)src0,result)); - return (result); + return (result); } // to exten @@ -895,11 +915,9 @@ PRIVATE inline int FFPU get_fp_value (uae_u32 opcode, uae_u16 extra, fpu_registe break; case 3: ad = m68k_areg (regs, reg); - m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; break; case 4: - m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; - ad = m68k_areg (regs, reg); + ad = m68k_areg (regs, reg) - (reg == 7 ? sz2[size] : sz1[size]); break; case 5: ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); @@ -940,8 +958,8 @@ PRIVATE inline int FFPU get_fp_value (uae_u32 opcode, uae_u16 extra, fpu_registe fpu_debug(("get_fp_value m68k_getpc()=%X\n",m68k_getpc())); fpu_debug(("get_fp_value ad=%X\n",ad)); fpu_debug(("get_fp_value get_long (ad)=%X\n",get_long (ad))); - dump_first_bytes( get_real_address(ad)-64, 64 ); - dump_first_bytes( get_real_address(ad), 64 ); + dump_first_bytes( get_real_address(ad, 0, 0)-64, 64 ); + dump_first_bytes( get_real_address(ad, 0, 0), 64 ); switch (size) { case 0: @@ -988,6 +1006,15 @@ PRIVATE inline int FFPU get_fp_value (uae_u32 opcode, uae_u16 extra, fpu_registe return 0; } + switch (mode) { + case 3: + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + break; + } + // fpu_debug(("get_fp_value result = %.04f\n",(float)src)); return 1; } @@ -1204,7 +1231,7 @@ PRIVATE inline int FFPU fpp_cond(int condition) #if 0 return fpcctrue(condition); #else - switch (condition) { + switch (condition & 0x1f) { case 0x00: CONDRET("False",0); case 0x01: CONDRET("Equal",Z); case 0x02: CONDRET("Ordered Greater Than",!(NaN || Z || N)); @@ -1299,11 +1326,11 @@ void FFPU fpuop_scc(uae_u32 opcode, uae_u32 extra) put_byte(ad, cc ? 0xff : 0x00); } -void FFPU fpuop_trapcc(uae_u32 opcode, uaecptr oldpc) +void FFPU fpuop_trapcc(uae_u32 opcode, uaecptr oldpc, uae_u32 extra) { - fpu_debug(("ftrapcc_opp %X at %08lx\n", (uae_u32)opcode, m68k_getpc ())); + fpu_debug(("ftrapcc_opp %X, %X at %08lx\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); - int cc = fpp_cond(opcode & 0x3f); + int cc = fpp_cond(extra & 0x3f); if (cc == -1) { m68k_setpc (oldpc); op_illg (opcode); @@ -1516,8 +1543,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) if ((opcode & 0x38) == 0) { if (extra & 0x2000) { // dr bit if (extra & 0x1000) { - // according to the manual, the msb bits are always zero. - m68k_dreg (regs, opcode & 7) = get_fpcr() & 0xFFFF; + m68k_dreg (regs, opcode & 7) = get_fpcr(); fpu_debug(("FMOVEM FPU fpcr (%X) -> D%d\n", get_fpcr(), opcode & 7)); } if (extra & 0x0800) { @@ -1548,8 +1574,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) else if ((opcode & 0x38) == 8) { if (extra & 0x2000) { // dr bit if (extra & 0x1000) { - // according to the manual, the msb bits are always zero. - m68k_areg (regs, opcode & 7) = get_fpcr() & 0xFFFF; + m68k_areg (regs, opcode & 7) = get_fpcr(); fpu_debug(("FMOVEM FPU fpcr (%X) -> A%d\n", get_fpcr(), opcode & 7)); } if (extra & 0x0800) { @@ -1612,8 +1637,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) } ad -= incr; if (extra & 0x1000) { - // according to the manual, the msb bits are always zero. - put_long (ad, get_fpcr() & 0xFFFF); + put_long (ad, get_fpcr()); fpu_debug(("FMOVEM FPU fpcr (%X) -> mem %X\n", get_fpcr(), ad )); ad += 4; } @@ -1906,6 +1930,8 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) FPU registers[reg] = 1.0e256; fpu_debug(("FP const: 1.0e256\n")); break; + + // Valid for 64 bits only (see fpu.cpp) #if 0 case 0x3c: FPU registers[reg] = 1.0e512; @@ -1942,7 +1968,114 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) return; } fpu_debug(("returned from get_fp_value m68k_getpc()=%X\n",m68k_getpc())); +#if 0 // MJ added, not tested now + if (FPU is_integral) { + // 68040-specific operations + switch (extra & 0x7f) { + case 0x40: /* FSMOVE */ + fpu_debug(("FSMOVE %.04f\n",(double)src)); + FPU registers[reg] = (float)src; + make_fpsr(FPU registers[reg]); + break; + case 0x44: /* FDMOVE */ + fpu_debug(("FDMOVE %.04f\n",(double)src)); + FPU registers[reg] = (double)src; + make_fpsr(FPU registers[reg]); + break; + case 0x41: /* FSSQRT */ + fpu_debug(("FSQRT %.04f\n",(double)src)); + FPU registers[reg] = (float)sqrt (src); + make_fpsr(FPU registers[reg]); + break; + case 0x45: /* FDSQRT */ + fpu_debug(("FSQRT %.04f\n",(double)src)); + FPU registers[reg] = (double)sqrt (src); + make_fpsr(FPU registers[reg]); + break; + case 0x58: /* FSABS */ + fpu_debug(("FSABS %.04f\n",(double)src)); + FPU registers[reg] = (float)fabs(src); + make_fpsr(FPU registers[reg]); + break; + case 0x5c: /* FDABS */ + fpu_debug(("FDABS %.04f\n",(double)src)); + FPU registers[reg] = (double)fabs(src); + make_fpsr(FPU registers[reg]); + break; + case 0x5a: /* FSNEG */ + fpu_debug(("FSNEG %.04f\n",(double)src)); + FPU registers[reg] = (float)-src; + make_fpsr(FPU registers[reg]); + break; + case 0x5e: /* FDNEG */ + fpu_debug(("FDNEG %.04f\n",(double)src)); + FPU registers[reg] = (double)-src; + make_fpsr(FPU registers[reg]); + break; + case 0x60: /* FSDIV */ + fpu_debug(("FSDIV %.04f\n",(double)src)); + FPU registers[reg] = (float)(FPU registers[reg] / src); + make_fpsr(FPU registers[reg]); + break; + case 0x64: /* FDDIV */ + fpu_debug(("FDDIV %.04f\n",(double)src)); + FPU registers[reg] = (double)(FPU registers[reg] / src); + make_fpsr(FPU registers[reg]); + break; + case 0x62: /* FSADD */ + fpu_debug(("FSADD %.04f\n",(double)src)); + FPU registers[reg] = (float)(FPU registers[reg] + src); + make_fpsr(FPU registers[reg]); + break; + case 0x66: /* FDADD */ + fpu_debug(("FDADD %.04f\n",(double)src)); + FPU registers[reg] = (double)(FPU registers[reg] + src); + make_fpsr(FPU registers[reg]); + break; + case 0x68: /* FSSUB */ + fpu_debug(("FSSUB %.04f\n",(double)src)); + FPU registers[reg] = (float)(FPU registers[reg] - src); + make_fpsr(FPU registers[reg]); + break; + case 0x6c: /* FDSUB */ + fpu_debug(("FDSUB %.04f\n",(double)src)); + FPU registers[reg] = (double)(FPU registers[reg] - src); + make_fpsr(FPU registers[reg]); + break; + case 0x63: /* FSMUL */ + case 0x67: /* FDMUL */ + get_dest_flags(FPU registers[reg]); + get_source_flags(src); + if(fl_dest.in_range && fl_source.in_range) { + if ((extra & 0x7f) == 0x63) + FPU registers[reg] = (float)(FPU registers[reg] * src); + else + FPU registers[reg] = (double)(FPU registers[reg] * src); + } + else if (fl_dest.nan || fl_source.nan || + fl_dest.zero && fl_source.infinity || + fl_dest.infinity && fl_source.zero ) { + make_nan( FPU registers[reg], fl_dest.negative ); + } + else if (fl_dest.zero || fl_source.zero ) { + make_zero(FPU registers[reg], fl_dest.negative != fl_source.negative); + } + else { + make_inf(FPU registers[reg], fl_dest.negative != fl_source.negative); + } + make_fpsr(FPU registers[reg]); + break; + default: + // Continue decode-execute 6888x instructions below + goto process_6888x_instructions; + } + fpu_debug(("END m68k_getpc()=%X\n",m68k_getpc())); + dump_registers( "END "); + return; + } + process_6888x_instructions: +#endif switch (extra & 0x7f) { case 0x00: /* FMOVE */ fpu_debug(("FMOVE %.04f\n",(double)src)); @@ -1954,7 +2087,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) fpu_debug(("FINT %.04f\n",(double)src)); // FPU registers[reg] = (int) (src + 0.5); // FIXME: use native rounding mode flags - switch (get_fpcr() & 0x30) { + switch (get_fpcr() & FPCR_ROUNDING_MODE) { case FPCR_ROUND_ZERO: FPU registers[reg] = round_to_zero(src); break; @@ -2075,7 +2208,10 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) break; case 0x1a: /* FNEG */ fpu_debug(("FNEG %.04f\n",(double)src)); - FPU registers[reg] = -src; + if (iszero(src)) + make_zero(FPU registers[reg], !isneg(src)); + else + FPU registers[reg] = -src; make_fpsr(FPU registers[reg]); break; case 0x1c: /* FACOS */ @@ -2092,7 +2228,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) fpu_debug(("FGETEXP %.04f\n",(double)src)); #if FPU_HAVE_IEEE_DOUBLE if( isinf(src) ) { - make_nan( FPU registers[reg] ); + make_nan( FPU registers[reg], isneg(src) ); } else { FPU registers[reg] = fast_fgetexp( src ); @@ -2116,7 +2252,7 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) FPU registers[reg] = 0; } else if( isinf(src) ) { - make_nan( FPU registers[reg] ); + make_nan( FPU registers[reg], isneg(src) ); } else { FPU registers[reg] = src; @@ -2166,27 +2302,15 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) FPU registers[reg] *= src; } else if (fl_dest.nan || fl_source.nan || - fl_dest.zero && fl_source.infinity || - fl_dest.infinity && fl_source.zero ) { - make_nan( FPU registers[reg] ); + (fl_dest.zero && fl_source.infinity) || + (fl_dest.infinity && fl_source.zero) ) { + make_nan( FPU registers[reg], fl_dest.negative ); } else if (fl_dest.zero || fl_source.zero ) { - if (fl_dest.negative && !fl_source.negative || - !fl_dest.negative && fl_source.negative) { - make_zero_negative(FPU registers[reg]); - } - else { - make_zero_positive(FPU registers[reg]); - } + make_zero(FPU registers[reg], fl_dest.negative != fl_source.negative); } else { - if( fl_dest.negative && !fl_source.negative || - !fl_dest.negative && fl_source.negative) { - make_inf_negative(FPU registers[reg]); - } - else { - make_inf_positive(FPU registers[reg]); - } + make_inf(FPU registers[reg], fl_dest.negative != fl_source.negative); } #else fpu_debug(("FMUL %.04f\n",(double)src)); @@ -2223,8 +2347,8 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) // Overflow, underflow #if FPU_HAVE_IEEE_DOUBLE - if( isinf(FPU registers[reg]) ) { - make_nan( FPU registers[reg] ); + if( isinf(src) ) { + make_nan( FPU registers[reg], isneg(src) ); } else { // When the absolute value of the source operand is >= 2^14, @@ -2336,6 +2460,27 @@ void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) dump_registers( "END "); } + +void fpu_set_fpsr(uae_u32 new_fpsr) +{ + set_fpsr(new_fpsr); +} + +uae_u32 fpu_get_fpsr(void) +{ + return get_fpsr(); +} + +void fpu_set_fpcr(uae_u32 new_fpcr) +{ + set_fpcr(new_fpcr); +} + +uae_u32 fpu_get_fpcr(void) +{ + return get_fpcr(); +} + /* -------------------------- Initialization -------------------------- */ void FFPU fpu_init (bool integral_68040) diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_uae.h b/BasiliskII/src/uae_cpu/fpu/fpu_uae.h index 7fc4ebbd..822fc220 100644 --- a/BasiliskII/src/uae_cpu/fpu/fpu_uae.h +++ b/BasiliskII/src/uae_cpu/fpu/fpu_uae.h @@ -1,28 +1,33 @@ /* - * fpu/fpu_uae.h - Extra Definitions for the old UAE FPU core + * fpu/fpu_uae.h - Extra Definitions for the old UAE FPU core * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_UAE_H @@ -78,11 +83,9 @@ PRIVATE inline bool FFPU do_isinf(fpu_register const & r); PRIVATE inline bool FFPU do_isneg(fpu_register const & r); PRIVATE inline bool FFPU do_iszero(fpu_register const & r); -PRIVATE inline void FFPU make_nan(fpu_register & r); -PRIVATE inline void FFPU make_zero_positive(fpu_register & r); -PRIVATE inline void FFPU make_zero_negative(fpu_register & r); -PRIVATE inline void FFPU make_inf_positive(fpu_register & r); -PRIVATE inline void FFPU make_inf_negative(fpu_register & r); +PRIVATE inline void FFPU make_nan(fpu_register & r, bool negative); +PRIVATE inline void FFPU make_zero(fpu_register & r, bool negative); +PRIVATE inline void FFPU make_inf(fpu_register & r, bool negative); PRIVATE inline void FFPU fast_scale(fpu_register & r, int add); PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r); diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_x86.cpp b/BasiliskII/src/uae_cpu/fpu/fpu_x86.cpp index 70e59086..29af7ddb 100644 --- a/BasiliskII/src/uae_cpu/fpu/fpu_x86.cpp +++ b/BasiliskII/src/uae_cpu/fpu/fpu_x86.cpp @@ -1,27 +1,33 @@ /* - * fpu_x86.cpp - 68881/68040 fpu code for x86/Windows an Linux/x86. + * fpu/fpu_x86.cpp - 68881/68040 fpu code for x86/Windows an Linux/x86. * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * Based on UAE FPU, original copyright 1996 Herman ten Brugge, - * rewritten for x86 by Lauri Pesonen 1999-2000, - * accomodated to GCC's Extended Asm syntax by Gwenole Beauchesne 2000. + * MC68881/68040 fpu emulation * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * * Interface @@ -134,10 +140,8 @@ * */ -#include -#include -#include -#include +# include +# include #include "sysdeps.h" #include "memory.h" @@ -238,8 +242,6 @@ PUBLIC void FFPU fpu_dump_flags(void) #include "debug.h" #if FPU_DEBUG -#undef __inline__ -#define __inline__ PRIVATE void FFPU dump_first_bytes_buf(char *b, uae_u8* buf, uae_s32 actual) { @@ -390,7 +392,7 @@ PRIVATE void FFPU FPU_CONSISTENCY_CHECK_STOP(const char *) /* ---------------------------- Status functions ---------------------------- */ -PRIVATE void __inline__ FFPU SET_BSUN_ON_NAN () +PRIVATE void inline FFPU SET_BSUN_ON_NAN () { if( (x86_status_word & (SW_Z_I_NAN_MASK)) == SW_NAN ) { x86_status_word |= SW_FAKE_BSUN; @@ -398,7 +400,7 @@ PRIVATE void __inline__ FFPU SET_BSUN_ON_NAN () } } -PRIVATE void __inline__ FFPU build_ex_status () +PRIVATE void inline FFPU build_ex_status () { if(x86_status_word & SW_EXCEPTION_MASK) { // _asm FNCLEX @@ -415,20 +417,20 @@ When the FPU creates a NAN, the NAN always contains the same bit pattern in the mantissa. All bits of the mantissa are ones for any precision. When the user creates a NAN, any nonzero bit pattern can be stored in the mantissa. */ -PRIVATE __inline__ void FFPU MAKE_NAN (fpu_register & f) +PRIVATE inline void FFPU MAKE_NAN (fpu_register & f, bool negative) { // Make it non-signaling. uae_u8 * p = (uae_u8 *) &f; memset( p, 0xFF, sizeof(fpu_register) - 1 ); - p[9] = 0x7F; + p[9] = negative ? 0xff : 0x7F; } /* For single- and double-precision infinities the fraction is a zero. -For extended-precision infinities, the mantissa’s MSB, the explicit +For extended-precision infinities, the mantissa�s MSB, the explicit integer bit, can be either one or zero. */ -PRIVATE __inline__ uae_u32 FFPU IS_INFINITY (fpu_register const & f) +PRIVATE inline uae_u32 FFPU IS_INFINITY (fpu_register const & f) { uae_u8 * p = (uae_u8 *) &f; if( ((p[9] & 0x7F) == 0x7F) && p[8] == 0xFF ) { @@ -439,7 +441,7 @@ PRIVATE __inline__ uae_u32 FFPU IS_INFINITY (fpu_register const & f) return(0); } -PRIVATE __inline__ uae_u32 FFPU IS_NAN (fpu_register const & f) +PRIVATE inline uae_u32 FFPU IS_NAN (fpu_register const & f) { uae_u8 * p = (uae_u8 *) &f; if( ((p[9] & 0x7F) == 0x7F) && p[8] == 0xFF ) { @@ -450,7 +452,7 @@ PRIVATE __inline__ uae_u32 FFPU IS_NAN (fpu_register const & f) return(0); } -PRIVATE __inline__ uae_u32 FFPU IS_ZERO (fpu_register const & f) +PRIVATE inline uae_u32 FFPU IS_ZERO (fpu_register const & f) { uae_u8 * p = (uae_u8 *) &f; return *((uae_u32 *)p) == 0 && @@ -458,34 +460,34 @@ PRIVATE __inline__ uae_u32 FFPU IS_ZERO (fpu_register const & f) ( *((uae_u16 *)&p[8]) & 0x7FFF ) == 0; } -PRIVATE __inline__ void FFPU MAKE_INF_POSITIVE (fpu_register & f) +PRIVATE inline void FFPU MAKE_INF_POSITIVE (fpu_register & f) { uae_u8 * p = (uae_u8 *) &f; memset( p, 0, sizeof(fpu_register)-2 ); *((uae_u16 *)&p[8]) = 0x7FFF; } -PRIVATE __inline__ void FFPU MAKE_INF_NEGATIVE (fpu_register & f) +PRIVATE inline void FFPU MAKE_INF_NEGATIVE (fpu_register & f) { uae_u8 * p = (uae_u8 *) &f; memset( p, 0, sizeof(fpu_register)-2 ); *((uae_u16 *)&p[8]) = 0xFFFF; } -PRIVATE __inline__ void FFPU MAKE_ZERO_POSITIVE (fpu_register & f) +PRIVATE inline void FFPU MAKE_ZERO_POSITIVE (fpu_register & f) { uae_u32 * const p = (uae_u32 *) &f; memset( p, 0, sizeof(fpu_register) ); } -PRIVATE __inline__ void FFPU MAKE_ZERO_NEGATIVE (fpu_register & f) +PRIVATE inline void FFPU MAKE_ZERO_NEGATIVE (fpu_register & f) { uae_u32 * const p = (uae_u32 *) &f; memset( p, 0, sizeof(fpu_register) ); *((uae_u32 *)&p[4]) = 0x80000000; } -PRIVATE __inline__ uae_u32 FFPU IS_NEGATIVE (fpu_register const & f) +PRIVATE inline uae_u32 FFPU IS_NEGATIVE (fpu_register const & f) { uae_u8 * p = (uae_u8 *) &f; return( (p[9] & 0x80) != 0 ); @@ -900,6 +902,34 @@ PRIVATE void FFPU do_fmove ( fpu_register & dest, fpu_register const & src ) FPU_CONSISTENCY_CHECK_STOP("do_fmove"); } +PRIVATE void FFPU do_fsmove ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + FPU_CONSISTENCY_CHECK_STOP("do_fsmove"); +} + +PRIVATE void FFPU do_fdmove ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + FPU_CONSISTENCY_CHECK_STOP("do_fdmove"); +} + /* PRIVATE void FFPU do_fmove_no_status ( fpu_register & dest, fpu_register const & src ) { @@ -1023,6 +1053,50 @@ PRIVATE void FFPU do_fsqrt ( fpu_register & dest, fpu_register const & src ) FPU_CONSISTENCY_CHECK_STOP("do_fsqrt"); } +PRIVATE void FFPU do_fssqrt ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fsqrt \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fssqrt"); +} + +PRIVATE void FFPU do_fdsqrt ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fsqrt \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdsqrt"); +} + PRIVATE void FFPU do_ftst ( fpu_register const & src ) { FPU_CONSISTENCY_CHECK_START(); @@ -1311,6 +1385,48 @@ PRIVATE void FFPU do_fabs ( fpu_register & dest, fpu_register const & src ) FPU_CONSISTENCY_CHECK_STOP("do_fabs"); } +PRIVATE void FFPU do_fsabs ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fabs \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + // x86 fabs should not rise any exceptions (except stack underflow) + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsabs"); +} + +PRIVATE void FFPU do_fdabs ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fabs \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + // x86 fabs should not rise any exceptions (except stack underflow) + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdabs"); +} + PRIVATE void FFPU do_fneg ( fpu_register & dest, fpu_register const & src ) { FPU_CONSISTENCY_CHECK_START(); @@ -1341,6 +1457,48 @@ PRIVATE void FFPU do_fneg ( fpu_register & dest, fpu_register const & src ) FPU_CONSISTENCY_CHECK_STOP("do_fneg"); } +PRIVATE void FFPU do_fsneg ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fchs \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + // x86 fchs should not rise any exceptions (except stack underflow) + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsneg"); +} + +PRIVATE void FFPU do_fdneg ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fchs \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + // x86 fchs should not rise any exceptions (except stack underflow) + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdneg"); +} + PRIVATE void FFPU do_fcos ( fpu_register & dest, fpu_register const & src ) { FPU_CONSISTENCY_CHECK_START(); @@ -1466,6 +1624,50 @@ PRIVATE void FFPU do_fdiv ( fpu_register & dest, fpu_register const & src ) FPU_CONSISTENCY_CHECK_STOP("do_fdiv"); } +PRIVATE void FFPU do_fsdiv ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fdiv %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsdiv"); +} + +PRIVATE void FFPU do_fddiv ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fdiv %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fddiv"); +} + // The sign of the quotient is the exclusive-OR of the sign bits // of the source and destination operands. // Quotient Byte is loaded with the sign and least significant @@ -1851,6 +2053,48 @@ PRIVATE void FFPU do_fadd ( fpu_register & dest, fpu_register const & src ) FPU_CONSISTENCY_CHECK_STOP("do_fadd"); } +PRIVATE void FFPU do_fsadd ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fadd \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE - SW_OE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsadd"); +} + +PRIVATE void FFPU do_fdadd ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fadd \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE - SW_OE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdadd"); +} + PRIVATE void FFPU do_fmul ( fpu_register & dest, fpu_register const & src ) { FPU_CONSISTENCY_CHECK_START(); @@ -1882,6 +2126,48 @@ PRIVATE void FFPU do_fmul ( fpu_register & dest, fpu_register const & src ) FPU_CONSISTENCY_CHECK_STOP("do_fmul"); } +PRIVATE void FFPU do_fsmul ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fmul \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsmul"); +} + +PRIVATE void FFPU do_fdmul ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fmul \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdmul"); +} + PRIVATE void FFPU do_fsgldiv ( fpu_register & dest, fpu_register const & src ) { FPU_CONSISTENCY_CHECK_START(); @@ -2040,6 +2326,52 @@ PRIVATE void FFPU do_fsub ( fpu_register & dest, fpu_register const & src ) FPU_CONSISTENCY_CHECK_STOP("do_fsub"); } +PRIVATE void FFPU do_fssub ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fsub %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE - SW_OE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fssub"); +} + +PRIVATE void FFPU do_fdsub ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fsub %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE - SW_OE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdsub"); +} + PRIVATE void FFPU do_fsincos ( fpu_register & dest_sin, fpu_register & dest_cos, fpu_register const & src ) { FPU_CONSISTENCY_CHECK_START(); @@ -2284,11 +2616,9 @@ PRIVATE int FFPU get_fp_value (uae_u32 opcode, uae_u32 extra, fpu_register & src break; case 3: ad = m68k_areg (regs, reg); - m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; break; case 4: - m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; - ad = m68k_areg (regs, reg); + ad = m68k_areg (regs, reg) - (reg == 7 ? sz2[size] : sz1[size]); break; case 5: ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); @@ -2387,6 +2717,15 @@ PRIVATE int FFPU get_fp_value (uae_u32 opcode, uae_u32 extra, fpu_register & src return 0; } + switch (mode) { + case 3: + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + break; + } + // D(bug("get_fp_value result = %.04f\r\n",(float)src)); return 1; @@ -2612,7 +2951,7 @@ PRIVATE int FFPU fpp_cond(uae_u32 opcode, int condition) #define I ((x86_status_word & (SW_Z_I_NAN_MASK)) == (SW_I)) #define NotANumber ((x86_status_word & (SW_Z_I_NAN_MASK)) == SW_NAN) - switch (condition) { + switch (condition & 0x1f) { // Common Tests, no BSUN case 0x01: CONDRET("Equal",Z); @@ -2757,11 +3096,11 @@ PUBLIC void REGPARAM2 FFPU fpuop_scc(uae_u32 opcode, uae_u32 extra) } } -PUBLIC void REGPARAM2 FFPU fpuop_trapcc(uae_u32 opcode, uaecptr oldpc) +PUBLIC void REGPARAM2 FFPU fpuop_trapcc(uae_u32 opcode, uaecptr oldpc, uae_u32 extra) { int cc; - D(bug("ftrapcc_opp %X at %08lx\r\n", (uae_u32)opcode, m68k_getpc ())); + D(bug("ftrapcc_opp %X, %X at %08lx\r\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); #if I3_ON_FTRAPCC #error "FIXME: _asm int 3" @@ -2769,7 +3108,7 @@ PUBLIC void REGPARAM2 FFPU fpuop_trapcc(uae_u32 opcode, uaecptr oldpc) #endif // This must be broken. - cc = fpp_cond(opcode, opcode & 0x3f); + cc = fpp_cond(opcode, extra & 0x3f); if (cc < 0) { m68k_setpc (oldpc); @@ -2856,7 +3195,7 @@ PRIVATE void FFPU do_null_frestore () { // A null-restore operation sets FP7-FP0 positive, nonsignaling NANs. for( int i=0; i<8; i++ ) { - MAKE_NAN( FPU registers[i] ); + MAKE_NAN( FPU registers[i], false ); } FPU instruction_address = 0; @@ -4646,6 +4985,249 @@ PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e4096( uae_u32 opcode, uae_u32 } +/* -------------------------- 040 ALU -------------------------- */ +PRIVATE void REGPARAM2 FFPU fpuop_do_fsmove( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSMOVE %s\r\n",etos(src))); + do_fsmove( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdmove( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDMOVE %s\r\n",etos(src))); + do_fdmove( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fssqrt( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSSQRT %s\r\n",etos(src))); + do_fssqrt( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdsqrt( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDSQRT %s\r\n",etos(src))); + do_fdsqrt( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsabs( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSABS %s\r\n",etos(src))); + do_fsabs( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdabs( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDABS %s\r\n",etos(src))); + do_fdabs( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsneg( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSNEG %s\r\n",etos(src))); + do_fsneg( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdneg( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDNEG %s\r\n",etos(src))); + do_fdneg( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsdiv( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSDIV %s\r\n",etos(src))); + do_fsdiv( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fddiv( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDDIV %s\r\n",etos(src))); + do_fddiv( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsadd( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSADD %s\r\n",etos(src))); + do_fsadd( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdadd( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDADD %s\r\n",etos(src))); + do_fdadd( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fssub( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSSUB %s\r\n",etos(src))); + do_fssub( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdsub( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDSUB %s\r\n",etos(src))); + do_fdsub( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsmul( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSMUL %s\r\n",etos(src))); + do_fsmul( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdmul( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSMUL %s\r\n",etos(src))); + do_fsmul( FPU registers[reg], src ); + dump_registers( "END "); +} + /* ---------------------------- ALU ---------------------------- */ PRIVATE void REGPARAM2 FFPU fpuop_do_fmove( uae_u32 opcode, uae_u32 extra ) @@ -5037,7 +5619,7 @@ PRIVATE void REGPARAM2 FFPU fpuop_do_fgetexp( uae_u32 opcode, uae_u32 extra ) D(bug("FGETEXP %s\r\n",etos(src))); if( IS_INFINITY(src) ) { - MAKE_NAN( FPU registers[reg] ); + MAKE_NAN( FPU registers[reg], IS_NEGATIVE(src) ); do_ftst( FPU registers[reg] ); x86_status_word |= SW_IE; } else { @@ -5058,7 +5640,7 @@ PRIVATE void REGPARAM2 FFPU fpuop_do_fgetman( uae_u32 opcode, uae_u32 extra ) } D(bug("FGETMAN %s\r\n",etos(src))); if( IS_INFINITY(src) ) { - MAKE_NAN( FPU registers[reg] ); + MAKE_NAN( FPU registers[reg], IS_NEGATIVE(src) ); do_ftst( FPU registers[reg] ); x86_status_word |= SW_IE; } else { @@ -5186,7 +5768,7 @@ PRIVATE void REGPARAM2 FFPU fpuop_do_fscale( uae_u32 opcode, uae_u32 extra ) } D(bug("FSCALE %s, opcode=%X, extra=%X, ta %X\r\n",etos(src),opcode,extra,m68k_getpc())); if( IS_INFINITY(FPU registers[reg]) ) { - MAKE_NAN( FPU registers[reg] ); + MAKE_NAN( FPU registers[reg], IS_NEGATIVE(FPU registers[reg]) ); do_ftst( FPU registers[reg] ); x86_status_word |= SW_IE; } else { @@ -5744,6 +6326,61 @@ PRIVATE void FFPU build_fpp_opp_lookup_table () } break; } + + if (FPU is_integral) { + switch (extra & 0x7f) { + case 0x40: + fpufunctbl[mask] = & FFPU fpuop_do_fsmove; + break; + case 0x44: + fpufunctbl[mask] = & FFPU fpuop_do_fdmove; + break; + case 0x41: + fpufunctbl[mask] = & FFPU fpuop_do_fssqrt; + break; + case 0x45: + fpufunctbl[mask] = & FFPU fpuop_do_fdsqrt; + break; + case 0x58: + fpufunctbl[mask] = & FFPU fpuop_do_fsabs; + break; + case 0x5c: + fpufunctbl[mask] = & FFPU fpuop_do_fdabs; + break; + case 0x5a: + fpufunctbl[mask] = & FFPU fpuop_do_fsneg; + break; + case 0x5e: + fpufunctbl[mask] = & FFPU fpuop_do_fdneg; + break; + case 0x60: + fpufunctbl[mask] = & FFPU fpuop_do_fsdiv; + break; + case 0x64: + fpufunctbl[mask] = & FFPU fpuop_do_fddiv; + break; + case 0x62: + fpufunctbl[mask] = & FFPU fpuop_do_fsadd; + break; + case 0x66: + fpufunctbl[mask] = & FFPU fpuop_do_fdadd; + break; + case 0x68: + fpufunctbl[mask] = & FFPU fpuop_do_fssub; + break; + case 0x6c: + fpufunctbl[mask] = & FFPU fpuop_do_fdsub; + break; + case 0x63: + fpufunctbl[mask] = & FFPU fpuop_do_fsmul; + break; + case 0x67: + fpufunctbl[mask] = & FFPU fpuop_do_fdmul; + break; + default: + break; + } + } switch (extra & 0x7f) { case 0x00: @@ -6033,6 +6670,26 @@ PRIVATE void FFPU do_fld1 ( fpu_register & dest ) } +void fpu_set_fpsr(uae_u32 new_fpsr) +{ + set_fpsr(new_fpsr); +} + +uae_u32 fpu_get_fpsr(void) +{ + return get_fpsr(); +} + +void fpu_set_fpcr(uae_u32 new_fpcr) +{ + set_fpcr(new_fpcr); +} + +uae_u32 fpu_get_fpcr(void) +{ + return get_fpcr(); +} + /* ---------------------------- MAIN INIT ---------------------------- */ #ifdef HAVE_SIGACTION @@ -6079,11 +6736,15 @@ PUBLIC void FFPU fpu_init( bool integral_68040 ) FPU fpsr.quotient = 0; for( int i=0; i<8; i++ ) { - MAKE_NAN( FPU registers[i] ); + MAKE_NAN( FPU registers[i], false ); } build_fpp_opp_lookup_table(); +/* _asm { + FNINIT + FLDCW x86_control_word + } */ __asm__ __volatile__("fninit\nfldcw %0" : : "m" (x86_control_word)); do_fldpi( const_pi ); @@ -6111,6 +6772,10 @@ PUBLIC void FFPU fpu_init( bool integral_68040 ) set_constant( const_1e4096, "1.0e4096", 1.0e256, 10000 ); // Just in case. +/* _asm { + FNINIT + FLDCW x86_control_word + } */ __asm__ __volatile__("fninit\nfldcw %0" : : "m" (x86_control_word)); } diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_x86.h b/BasiliskII/src/uae_cpu/fpu/fpu_x86.h index 96f1d959..c42bfa91 100644 --- a/BasiliskII/src/uae_cpu/fpu/fpu_x86.h +++ b/BasiliskII/src/uae_cpu/fpu/fpu_x86.h @@ -1,28 +1,33 @@ /* - * fpu/fpu_x86.h - Extra Definitions for the X86 assembly FPU core + * fpu/fpu_x86.h - Extra Definitions for the X86 assembly FPU core * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_X86_H @@ -94,17 +99,17 @@ PRIVATE void FFPU FPU_CONSISTENCY_CHECK_START(void); PRIVATE void FFPU FPU_CONSISTENCY_CHECK_STOP(const char *name); // Get special floating-point value class -PRIVATE __inline__ uae_u32 FFPU IS_INFINITY (fpu_register const & f); -PRIVATE __inline__ uae_u32 FFPU IS_NAN (fpu_register const & f); -PRIVATE __inline__ uae_u32 FFPU IS_ZERO (fpu_register const & f); -PRIVATE __inline__ uae_u32 FFPU IS_NEGATIVE (fpu_register const & f); +PRIVATE inline uae_u32 FFPU IS_INFINITY (fpu_register const & f); +PRIVATE inline uae_u32 FFPU IS_NAN (fpu_register const & f); +PRIVATE inline uae_u32 FFPU IS_ZERO (fpu_register const & f); +PRIVATE inline uae_u32 FFPU IS_NEGATIVE (fpu_register const & f); // Make a special floating-point value -PRIVATE __inline__ void FFPU MAKE_NAN (fpu_register & f); -PRIVATE __inline__ void FFPU MAKE_INF_POSITIVE (fpu_register & f); -PRIVATE __inline__ void FFPU MAKE_INF_NEGATIVE (fpu_register & f); -PRIVATE __inline__ void FFPU MAKE_ZERO_POSITIVE (fpu_register & f); -PRIVATE __inline__ void FFPU MAKE_ZERO_NEGATIVE (fpu_register & f); +PRIVATE inline void FFPU MAKE_NAN (fpu_register & f, bool negative); +PRIVATE inline void FFPU MAKE_INF_POSITIVE (fpu_register & f); +PRIVATE inline void FFPU MAKE_INF_NEGATIVE (fpu_register & f); +PRIVATE inline void FFPU MAKE_ZERO_POSITIVE (fpu_register & f); +PRIVATE inline void FFPU MAKE_ZERO_NEGATIVE (fpu_register & f); // Conversion from extended floating-point values PRIVATE uae_s32 FFPU extended_to_signed_32 ( fpu_register const & f ) REGPARAM; @@ -342,6 +347,24 @@ PRIVATE void REGPARAM2 FFPU fpuop_do_fsincos( uae_u32 opcode, uae_u32 extra ); PRIVATE void REGPARAM2 FFPU fpuop_do_fcmp( uae_u32 opcode, uae_u32 extra ); PRIVATE void REGPARAM2 FFPU fpuop_do_ftst( uae_u32 opcode, uae_u32 extra ); +// 040 +PRIVATE void REGPARAM2 FFPU fpuop_do_fsmove( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdmove( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fssqrt( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdsqrt( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsabs( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdabs( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsneg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdneg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsdiv( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fddiv( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsadd( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdadd( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fssub( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdsub( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsmul( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdmul( uae_u32 opcode, uae_u32 extra ); + // Get & Put floating-point values PRIVATE int FFPU get_fp_value (uae_u32 opcode, uae_u32 extra, fpu_register & src) REGPARAM; PRIVATE int FFPU put_fp_value (fpu_register const & value, uae_u32 opcode, uae_u32 extra) REGPARAM; @@ -351,9 +374,9 @@ PRIVATE int FFPU get_fp_ad(uae_u32 opcode, uae_u32 * ad) REGPARAM; PRIVATE int FFPU fpp_cond(uae_u32 opcode, int condition) REGPARAM; // Misc functions -PRIVATE void __inline__ FFPU set_host_fpu_control_word (); -PRIVATE void __inline__ FFPU SET_BSUN_ON_NAN (); -PRIVATE void __inline__ FFPU build_ex_status (); +PRIVATE void inline FFPU set_host_fpu_control_word (); +PRIVATE void inline FFPU SET_BSUN_ON_NAN (); +PRIVATE void inline FFPU build_ex_status (); PRIVATE void FFPU do_null_frestore (); PRIVATE void FFPU build_fpp_opp_lookup_table (); PRIVATE void FFPU set_constant ( fpu_register & f, char *name, double value, uae_s32 mult ); diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_x86_asm.h b/BasiliskII/src/uae_cpu/fpu/fpu_x86_asm.h index ecdecfbc..6e5a3766 100644 --- a/BasiliskII/src/uae_cpu/fpu/fpu_x86_asm.h +++ b/BasiliskII/src/uae_cpu/fpu/fpu_x86_asm.h @@ -1,3 +1,35 @@ +/* + * fpu/fpu_x86_asm.h - Extra Definitions for the X86 assembly FPU core + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + #define DEFINE_X86_MACRO(name, value) \ asm(".local " #name "\n\t" #name " = " #value) diff --git a/BasiliskII/src/uae_cpu/fpu/impl.h b/BasiliskII/src/uae_cpu/fpu/impl.h index c79d1f3f..ec5648a9 100644 --- a/BasiliskII/src/uae_cpu/fpu/impl.h +++ b/BasiliskII/src/uae_cpu/fpu/impl.h @@ -1,28 +1,38 @@ /* * fpu/impl.h - extra functions and inline implementations * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_IMPL_H @@ -101,14 +111,16 @@ static inline void FFPU set_fpsr(uae_u32 new_fpsr) /* Return the floating-point control register in m68k format */ static inline uae_u32 FFPU get_fpcr(void) { - uae_u32 rounding_precision = get_rounding_precision(); - uae_u32 rounding_mode = get_rounding_mode(); - return (rounding_precision | rounding_mode); + // according to the manual, the msb bits are always zero. + // According to Toni Wilen, on '040 the least + // significant 4 bits are not masked out + return FPU fpcr & (CPUType == 4 ? 0xffff : 0xfff0); } /* Set the floating-point control register from an m68k format */ static inline void FFPU set_fpcr(uae_u32 new_fpcr) { + FPU fpcr = new_fpcr; set_rounding_precision ( new_fpcr & FPCR_ROUNDING_PRECISION); set_rounding_mode ( new_fpcr & FPCR_ROUNDING_MODE ); set_host_control_word(); @@ -123,9 +135,8 @@ static inline void FFPU set_fpcr(uae_u32 new_fpcr) /* Retrieve a floating-point register value and convert it to double precision */ static inline double FFPU fpu_get_register(int r) { - double f; - __asm__ __volatile__("fldt %1\n\tfstpl %0" : "=m" (f) : "m" (FPU registers[r])); - return f; + /* only used for debug output; no need for any fancy asm here */ + return FPU registers[r]; } #endif diff --git a/BasiliskII/src/uae_cpu/fpu/mathlib.cpp b/BasiliskII/src/uae_cpu/fpu/mathlib.cpp index eabb376e..c9616927 100644 --- a/BasiliskII/src/uae_cpu/fpu/mathlib.cpp +++ b/BasiliskII/src/uae_cpu/fpu/mathlib.cpp @@ -1,28 +1,33 @@ /* * fpu/mathlib.cpp - Floating-point math support library * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2001 Lauri Pesonen - * New framework, copyright 2000-2001 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* NOTE: this file shall be included only from fpu/fpu_*.cpp */ @@ -40,6 +45,7 @@ #if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY) +#if !defined(HAVE_EXP10L) && !defined(HAVE_POW10L) PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y) { fpu_extended value, exponent; @@ -82,7 +88,9 @@ PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y) __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent)); return value; } +#endif +#ifndef HAVE_LOG1PL PRIVATE fpu_extended fp_do_log1p(fpu_extended x) { // TODO: handle NaN and +inf/-inf @@ -96,5 +104,6 @@ PRIVATE fpu_extended fp_do_log1p(fpu_extended x) __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x + 1.0)); return value; } +#endif #endif diff --git a/BasiliskII/src/uae_cpu/fpu/mathlib.h b/BasiliskII/src/uae_cpu/fpu/mathlib.h index 2363af56..8b03dd2a 100644 --- a/BasiliskII/src/uae_cpu/fpu/mathlib.h +++ b/BasiliskII/src/uae_cpu/fpu/mathlib.h @@ -1,28 +1,33 @@ /* - * fpu/mathlib.h - Floating-point math support library + * fpu/mathlib.h - Floating-point math support library * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2001 Lauri Pesonen - * New framework, copyright 2000-2001 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_MATHLIB_H @@ -49,22 +54,7 @@ // Use ISO C99 extended-precision math functions (glibc 2.1+) #define FPU_USE_ISO_C99 1 -// NOTE: this is irrelevant on Win32 platforms since the MS libraries -// don't support extended-precision floating-point computations -#if defined(WIN32) && USE_LONG_DOUBLE -#undef FPU_USE_ISO_C99 -#endif - -// Use faster implementation of math functions, but this could cause -// some incorrect results (?) -#ifdef _MSC_VER -// MSVC uses intrinsics for all of the math functions, so it should still be fast -#define FPU_FAST_MATH 0 -#else -#define FPU_FAST_MATH 1 -#endif - -#if FPU_USE_ISO_C99 +#if defined(FPU_USE_ISO_C99) // NOTE: no prior shall be included at this point #define __USE_ISOC99 1 // for glibc 2.2.X and newer #define __USE_ISOC9X 1 // for glibc 2.1.X @@ -147,7 +137,7 @@ union fpu_double_shape { unsigned int mantissa0:20; unsigned int mantissa1:32; #else -# if HOST_FLOAT_WORDS_BIG_ENDIAN +# if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int mantissa0:20; unsigned int exponent:11; unsigned int negative:1; @@ -172,7 +162,7 @@ union fpu_double_shape { unsigned int mantissa0:19; unsigned int mantissa1:32; #else -# if HOST_FLOAT_WORDS_BIG_ENDIAN +# if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int mantissa0:19; unsigned int quiet_nan:1; unsigned int exponent:11; @@ -191,7 +181,7 @@ union fpu_double_shape { /* This format is used to extract the sign_exponent and mantissa parts only */ struct { -#if HOST_FLOAT_WORDS_BIG_ENDIAN +#if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int msw:32; unsigned int lsw:32; #else @@ -215,7 +205,7 @@ union fpu_extended_shape { unsigned int mantissa0:32; unsigned int mantissa1:32; #else -# if HOST_FLOAT_WORDS_BIG_ENDIAN +# if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int exponent:15; unsigned int negative:1; unsigned int empty:16; @@ -242,7 +232,7 @@ union fpu_extended_shape { unsigned int mantissa0:30; unsigned int mantissa1:32; #else -# if HOST_FLOAT_WORDS_BIG_ENDIAN +# if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int exponent:15; unsigned int negative:1; unsigned int empty:16; @@ -264,7 +254,7 @@ union fpu_extended_shape { /* This format is used to extract the sign_exponent and mantissa parts only */ struct { -#if HOST_FLOAT_WORDS_BIG_ENDIAN +#if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN unsigned int sign_exponent:16; unsigned int empty:16; unsigned int msw:32; @@ -310,7 +300,7 @@ union fpu_extended_shape { unsigned int exponent:15; unsigned int quiet_nan:1; unsigned int mantissa0:15; - unsigned int mantissa1:30; + unsigned int mantissa1:32; unsigned int mantissa2:32; unsigned int mantissa3:32; #else @@ -325,7 +315,7 @@ union fpu_extended_shape { } ieee_nan; /* This format is used to extract the sign_exponent and mantissa parts only */ -#if HOST_FLOAT_WORDS_BIG_ENDIAN +#if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN struct { uae_u64 msw; uae_u64 lsw; @@ -351,9 +341,9 @@ union fpu_extended_shape { }; #endif -// Declare and initialize a pointer to a shape of the requested FP type -#define fp_declare_init_shape(psvar, rfvar, ftype) \ - fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar ) +// Declare a shape of the requested FP type +#define fp_declare_init_shape(psvar, ftype) \ + fpu_ ## ftype ## _shape psvar /* -------------------------------------------------------------------------- */ /* --- Extra Math Functions --- */ @@ -370,47 +360,51 @@ union fpu_extended_shape { PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r) { #ifdef BRANCHES_ARE_EXPENSIVE -#ifndef USE_LONG_DOUBLE - fp_declare_init_shape(sxp, r, double); - uae_s32 hx = sxp->parts.msw; - uae_s32 lx = sxp->parts.lsw; +#if !defined(USE_LONG_DOUBLE) + fp_declare_init_shape(sxp, double); + sxp.value = r; + uae_s32 hx = sxp.parts.msw; + uae_s32 lx = sxp.parts.lsw; hx &= 0x7fffffff; hx |= (uae_u32)(lx | (-lx)) >> 31; hx = 0x7ff00000 - hx; - return (((uae_u32)hx) >> 31) != 0; -#elif USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); - uae_s64 hx = sxp->parts64.msw; - uae_s64 lx = sxp->parts64.lsw; + return (int)(((uae_u32)hx) >> 31); +#elif defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + uae_s64 hx = sxp.parts64.msw; + uae_s64 lx = sxp.parts64.lsw; hx &= 0x7fffffffffffffffLL; hx |= (uae_u64)(lx | (-lx)) >> 63; hx = 0x7fff000000000000LL - hx; - return ((uae_u64)hx >> 63) != 0; + return (int)((uae_u64)hx >> 63); #else - fp_declare_init_shape(sxp, r, extended); - uae_s32 se = sxp->parts.sign_exponent; - uae_s32 hx = sxp->parts.msw; - uae_s32 lx = sxp->parts.lsw; + fp_declare_init_shape(sxp, extended); + sxp.value = r; + uae_s32 se = sxp.parts.sign_exponent; + uae_s32 hx = sxp.parts.msw; + uae_s32 lx = sxp.parts.lsw; se = (se & 0x7fff) << 1; lx |= hx & 0x7fffffff; se |= (uae_u32)(lx | (-lx)) >> 31; se = 0xfffe - se; - // TODO: check whether rshift count is 16 or 31 - return (((uae_u32)(se)) >> 16) != 0; + return (int)(((uae_u32)(se)) >> 31); #endif #else -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); - return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + return (sxp.ieee.exponent == FP_EXTENDED_EXP_MAX) #else - fp_declare_init_shape(sxp, r, double); - return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX) + fp_declare_init_shape(sxp, double); + sxp.value = r; + return (sxp.ieee.exponent == FP_DOUBLE_EXP_MAX) #endif - && (sxp->ieee_nan.mantissa0 != 0) - && (sxp->ieee_nan.mantissa1 != 0) + && (sxp.ieee.mantissa0 & 0x7fffffff) != 0 + && sxp.ieee.mantissa1 != 0 #ifdef USE_QUAD_DOUBLE - && (sxp->ieee_nan.mantissa2 != 0) - && (sxp->ieee_nan.mantissa3 != 0) + && sxp.ieee.mantissa2 != 0 + && sxp.ieee.mantissa3 != 0 #endif ; #endif @@ -426,50 +420,62 @@ PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r) PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r) { #ifdef BRANCHES_ARE_EXPENSIVE -#ifndef USE_LONG_DOUBLE - fp_declare_init_shape(sxp, r, double); - uae_s32 hx = sxp->parts.msw; - uae_s32 lx = sxp->parts.lsw; +#if !defined(USE_LONG_DOUBLE) + fp_declare_init_shape(sxp, double); + sxp.value = r; + uae_s32 hx = sxp.parts.msw; + uae_s32 lx = sxp.parts.lsw; lx |= (hx & 0x7fffffff) ^ 0x7ff00000; lx |= -lx; - return (~(lx >> 31) & (hx >> 30)) != 0; -#elif USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); - uae_s64 hx = sxp->parts64.msw; - uae_s64 lx = sxp->parts64.lsw; + return ~(lx >> 31) & (hx >> 30); +#elif defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + uae_s64 hx = sxp.parts64.msw; + uae_s64 lx = sxp.parts64.lsw; lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL; lx |= -lx; - return (~(lx >> 63) & (hx >> 62)) != 0; + return ~(lx >> 63) & (hx >> 62); #else - fp_declare_init_shape(sxp, r, extended); - uae_s32 se = sxp->parts.sign_exponent; - uae_s32 hx = sxp->parts.msw; - uae_s32 lx = sxp->parts.lsw; + fp_declare_init_shape(sxp, extended); + sxp.value = r; + /* NOTE: This function should work for both m68k and native INFs. */ +#if 0 + uae_s32 se = sxp.parts.sign_exponent; + uae_s32 hx = sxp.parts.msw; + uae_s32 lx = sxp.parts.lsw; /* This additional ^ 0x80000000 is necessary because in Intel's internal representation of the implicit one is explicit. NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */ -#ifdef __i386__ +#ifdef CPU_i386 lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff); #else lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff); #endif lx |= -lx; se &= 0x8000; - return (~(lx >> 31) & (1 - (se >> 14))) != 0; + return ~(lx >> 31) & (1 - (se >> 14)); +#else + return sxp.ieee.exponent == FP_EXTENDED_EXP_MAX + && (sxp.ieee.mantissa0 & 0x7fffffff) == 0 + && sxp.ieee.mantissa1 == 0; +#endif #endif #else -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); - return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + return (sxp.ieee_nan.exponent == FP_EXTENDED_EXP_MAX) #else - fp_declare_init_shape(sxp, r, double); - return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX) + fp_declare_init_shape(sxp, double); + sxp.value = r; + return (sxp.ieee_nan.exponent == FP_DOUBLE_EXP_MAX) #endif - && (sxp->ieee_nan.mantissa0 == 0) - && (sxp->ieee_nan.mantissa1 == 0) + && (sxp.ieee.mantissa0 & 0x7fffffff) == 0 + && sxp.ieee.mantissa1 == 0 #ifdef USE_QUAD_DOUBLE - && (sxp->ieee_nan.mantissa2 == 0) - && (sxp->ieee_nan.mantissa3 == 0) + && sxp.ieee.mantissa2 == 0 + && sxp.ieee.mantissa3 == 0 #endif ; #endif @@ -480,12 +486,13 @@ PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r) PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r) { -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); #else - fp_declare_init_shape(sxp, r, double); + fp_declare_init_shape(sxp, double); #endif - return sxp->ieee.negative; + sxp.value = r; + return sxp.ieee.negative; } #undef iszero @@ -494,17 +501,18 @@ PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r) PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r) { // TODO: BRANCHES_ARE_EXPENSIVE -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); #else - fp_declare_init_shape(sxp, r, double); + fp_declare_init_shape(sxp, double); #endif - return (sxp->ieee.exponent == 0) - && (sxp->ieee.mantissa0 == 0) - && (sxp->ieee.mantissa1 == 0) + sxp.value = r; + return (sxp.ieee.exponent == 0) + && (sxp.ieee.mantissa0 == 0) + && (sxp.ieee.mantissa1 == 0) #ifdef USE_QUAD_DOUBLE - && (sxp->ieee.mantissa2 == 0) - && (sxp->ieee.mantissa3 == 0) + && (sxp.ieee.mantissa2 == 0) + && (sxp.ieee.mantissa3 == 0) #endif ; } @@ -527,158 +535,166 @@ PRIVATE inline void FFPU get_source_flags(fpu_register const & r) fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan; } -PRIVATE inline void FFPU make_nan(fpu_register & r) +PRIVATE inline void FFPU make_nan(fpu_register & r, bool negative) { - // FIXME: is that correct ? -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); - sxp->ieee.exponent = FP_EXTENDED_EXP_MAX; - sxp->ieee.mantissa0 = 0xffffffff; +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.ieee.exponent = FP_EXTENDED_EXP_MAX; + sxp.ieee.empty = 0; + sxp.ieee.mantissa0 = 0xffffffff; #else - fp_declare_init_shape(sxp, r, double); - sxp->ieee.exponent = FP_DOUBLE_EXP_MAX; - sxp->ieee.mantissa0 = 0xfffff; + fp_declare_init_shape(sxp, double); + sxp.ieee.exponent = FP_DOUBLE_EXP_MAX; + sxp.ieee.mantissa0 = 0xfffff; #endif - sxp->ieee.mantissa1 = 0xffffffff; + sxp.ieee.mantissa1 = 0xffffffff; #ifdef USE_QUAD_DOUBLE - sxp->ieee.mantissa2 = 0xffffffff; - sxp->ieee.mantissa3 = 0xffffffff; + sxp.ieee.mantissa2 = 0xffffffff; + sxp.ieee.mantissa3 = 0xffffffff; #endif + sxp.ieee.negative = negative; + r = sxp.value; } -PRIVATE inline void FFPU make_zero_positive(fpu_register & r) +PRIVATE inline void FFPU make_zero(fpu_register & r, bool negative) { #if 1 - r = +0.0; + r = negative ? -0.0 : +0.0; #else -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.ieee.empty = 0; #else - fp_declare_init_shape(sxp, r, double); + fp_declare_init_shape(sxp, double); #endif - sxp->ieee.negative = 0; - sxp->ieee.exponent = 0; - sxp->ieee.mantissa0 = 0; - sxp->ieee.mantissa1 = 0; + sxp.ieee.negative = negative; + sxp.ieee.exponent = 0; + sxp.ieee.mantissa0 = 0; + sxp.ieee.mantissa1 = 0; #ifdef USE_QUAD_DOUBLE - sxp->ieee.mantissa2 = 0; - sxp->ieee.mantissa3 = 0; + sxp.ieee.mantissa2 = 0; + sxp.ieee.mantissa3 = 0; #endif + r = sxp.value; #endif } -PRIVATE inline void FFPU make_zero_negative(fpu_register & r) +PRIVATE inline void FFPU make_inf(fpu_register & r, bool negative) { -#if 1 - r = -0.0; +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.ieee.exponent = FP_EXTENDED_EXP_MAX; + sxp.ieee.mantissa0 = 0x80000000; + sxp.ieee.empty = 0; #else -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); -#else - fp_declare_init_shape(sxp, r, double); + fp_declare_init_shape(sxp, double); + sxp.ieee.exponent = FP_DOUBLE_EXP_MAX; + sxp.ieee.mantissa0 = 0; #endif - sxp->ieee.negative = 1; - sxp->ieee.exponent = 0; - sxp->ieee.mantissa0 = 0; - sxp->ieee.mantissa1 = 0; + sxp.ieee.negative = negative; + sxp.ieee.mantissa1 = 0; #ifdef USE_QUAD_DOUBLE - sxp->ieee.mantissa2 = 0; - sxp->ieee.mantissa3 = 0; -#endif -#endif -} - -PRIVATE inline void FFPU make_inf_positive(fpu_register & r) -{ -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); - sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; -#else - fp_declare_init_shape(sxp, r, double); - sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; -#endif - sxp->ieee_nan.negative = 0; - sxp->ieee_nan.mantissa0 = 0; - sxp->ieee_nan.mantissa1 = 0; -#ifdef USE_QUAD_DOUBLE - sxp->ieee_nan.mantissa2 = 0; - sxp->ieee_nan.mantissa3 = 0; -#endif -} - -PRIVATE inline void FFPU make_inf_negative(fpu_register & r) -{ -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); - sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; -#else - fp_declare_init_shape(sxp, r, double); - sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; -#endif - sxp->ieee_nan.negative = 1; - sxp->ieee_nan.mantissa0 = 0; - sxp->ieee_nan.mantissa1 = 0; -#ifdef USE_QUAD_DOUBLE - sxp->ieee_nan.mantissa2 = 0; - sxp->ieee_nan.mantissa3 = 0; + sxp.ieee.mantissa2 = 0; + sxp.ieee.mantissa3 = 0; #endif + r = sxp.value; } PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r) { -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); - return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS); +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + return ((int) sxp.ieee.exponent - FP_EXTENDED_EXP_BIAS); #else - fp_declare_init_shape(sxp, r, double); - return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS); + fp_declare_init_shape(sxp, double); + sxp.value = r; + return ((int) sxp.ieee.exponent - FP_DOUBLE_EXP_BIAS); #endif } // Normalize to range 1..2 PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r) { -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, r, extended); - sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS; +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + sxp.ieee.exponent = FP_EXTENDED_EXP_BIAS; #else - fp_declare_init_shape(sxp, r, double); - sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS; + fp_declare_init_shape(sxp, double); + sxp.value = r; + sxp.ieee.exponent = FP_DOUBLE_EXP_BIAS; #endif + r = sxp.value; } // The sign of the quotient is the exclusive-OR of the sign bits // of the source and destination operands. PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb) { -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sap, ra, extended); - fp_declare_init_shape(sbp, rb, extended); +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sap, extended); + fp_declare_init_shape(sbp, extended); #else - fp_declare_init_shape(sap, ra, double); - fp_declare_init_shape(sbp, rb, double); + fp_declare_init_shape(sap, double); + fp_declare_init_shape(sbp, double); #endif - return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0); + sap.value = ra; + sbp.value = rb; + return ((sap.ieee.negative ^ sbp.ieee.negative) ? FPSR_QUOTIENT_SIGN : 0); } /* -------------------------------------------------------------------------- */ /* --- Math functions --- */ /* -------------------------------------------------------------------------- */ -#if FPU_USE_ISO_C99 -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE +#ifdef __HAIKU__ +#ifdef __cplusplus +extern "C" { +#endif +/* Haiku seems to lack some declarations, even if the functions are there */ +extern long double exp10l(long double); +#ifdef __cplusplus +} +#endif +#endif + +#if defined(FPU_USE_ISO_C99) && (defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)) # ifdef HAVE_LOGL # define fp_log logl # endif +# ifdef HAVE_LOG1PL +# define fp_log1p log1pl +# endif +# ifdef HAVE_EXPM1L +# define fp_expm1 expm1l +# endif # ifdef HAVE_LOG10L # define fp_log10 log10l # endif +# ifdef HAVE_LOG2L +# define fp_log2 log2l +# endif # ifdef HAVE_EXPL # define fp_exp expl # endif # ifdef HAVE_POWL # define fp_pow powl # endif +# if defined(HAVE_EXP10L) +# define fp_pow10 exp10l +# elif defined(HAVE_POW10L) +# define fp_pow10 pow10l +# else +# define fp_pow10(x) fp_pow(LD(10.0), x) +# endif +# if defined(HAVE_EXP2L) +# define fp_pow2 exp2l +# elif defined(HAVE_POW2L) +# define fp_pow2 pow2l +# else +# define fp_pow2(x) fp_pow(LD(2.0), x) +# endif # ifdef HAVE_FABSL # define fp_fabs fabsl # endif @@ -732,15 +748,38 @@ PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_regis #ifndef fp_log # define fp_log log #endif +#ifndef fp_log1p +# define fp_log1p log1p +#endif +#ifndef fp_expm1 +# define fp_expm1 expm1 +#endif #ifndef fp_log10 # define fp_log10 log10 #endif +#ifndef fp_log2 +# define fp_log2 log2 +#endif #ifndef fp_exp # define fp_exp exp #endif #ifndef fp_pow # define fp_pow pow #endif +#ifndef fp_pow10 +# ifdef HAVE_POW10 +# define fp_pow10 pow10 +# else +# define fp_pow10 exp10 +# endif +#endif +#ifndef fp_pow2 +# ifdef HAVE_POW2 +# define fp_pow2 pow2 +# else +# define fp_pow2 exp2 +# endif +#endif #ifndef fp_fabs # define fp_fabs fabs #endif @@ -790,48 +829,43 @@ PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_regis # define fp_ceil ceil #endif -#elif defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY) +#if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY) // Assembly optimized support functions. Taken from glibc 2.2.2 #undef fp_log #define fp_log fp_do_log -#if !FPU_FAST_MATH -PRIVATE fpu_extended fp_do_log(fpu_extended x); -#else PRIVATE inline fpu_extended fp_do_log(fpu_extended x) { fpu_extended value; __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); return value; } -#endif #undef fp_log10 #define fp_log10 fp_do_log10 -#if !FPU_FAST_MATH -// FIXME: unimplemented -PRIVATE fpu_extended fp_do_log10(fpu_extended x); -#else PRIVATE inline fpu_extended fp_do_log10(fpu_extended x) { fpu_extended value; __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); return value; } -#endif +#if !defined(HAVE_EXPL) #undef fp_exp #define fp_exp fp_do_exp -#if !FPU_FAST_MATH -// FIXME: unimplemented -PRIVATE fpu_extended fp_do_exp(fpu_extended x); -#else PRIVATE inline fpu_extended fp_do_exp(fpu_extended x) { fpu_extended value, exponent; + if (isinf(x)) + { + if(isneg(x)) + return 0.; + else + return x; + } __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t" "fmul %%st(1) # x * log2(e)\n\t" "fst %%st(1)\n\t" @@ -846,10 +880,12 @@ PRIVATE inline fpu_extended fp_do_exp(fpu_extended x) } #endif +#if !defined(HAVE_EXP10L) && !defined(HAVE_POW10L) #undef fp_pow #define fp_pow fp_do_pow PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y); +#endif #undef fp_fabs #define fp_fabs fp_do_fabs @@ -871,6 +907,7 @@ PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x) return value; } +#ifndef ACCURATE_SIN_COS_TAN #undef fp_sin #define fp_sin fp_do_sin @@ -896,18 +933,27 @@ PRIVATE inline fpu_extended fp_do_cos(fpu_extended x) PRIVATE inline fpu_extended fp_do_tan(fpu_extended x) { - fpu_extended value; - __asm__ __volatile__("fptan" : "=t" (value) : "0" (x)); + fpu_extended value, value2; + __asm__ __volatile__("fptan" : "=t" (value2), "=u" (value) : "0" (x)); return value; } +#endif /* ACCURATE_SIN_COS_TAN */ +#ifndef HAVE_EXPM1L #undef fp_expm1 #define fp_expm1 fp_do_expm1 // Returns: exp(X) - 1.0 PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x) { - fpu_extended value, exponent, temp; + fpu_extended value, exponent, temp, temp2; + if (isinf(x)) + { + if(isneg(x)) + return -1.; + else + return x; + } __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" "fmul %%st(1) # x * log2(e)\n\t" "fst %%st(1)\n\t" @@ -917,51 +963,52 @@ PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x) "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" : "=t" (value), "=u" (exponent) : "0" (x)); - __asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent)); + __asm__ __volatile__("fld1 \n\t" + "fscale \n\t" + : "=t" (temp), "=u" (temp2) : "0" (exponent)); temp -= 1.0; return temp + value ? temp + value : x; } +#endif #undef fp_sgn1 #define fp_sgn1 fp_do_sgn1 PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x) { -#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE - fp_declare_init_shape(sxp, x, extended); - sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; - sxp->ieee_nan.one = 1; +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = x; + sxp.ieee_nan.exponent = FP_EXTENDED_EXP_MAX>>1; + sxp.ieee_nan.one = 1; #else - fp_declare_init_shape(sxp, x, double); - sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; + fp_declare_init_shape(sxp, double); + sxp.value = x; + sxp.ieee_nan.exponent = FP_DOUBLE_EXP_MAX>>1; #endif - sxp->ieee_nan.quiet_nan = 0; - sxp->ieee_nan.mantissa0 = 0; - sxp->ieee_nan.mantissa1 = 0; + sxp.ieee_nan.quiet_nan = 0; + sxp.ieee_nan.mantissa0 = 0; + sxp.ieee_nan.mantissa1 = 0; + x = sxp.value; return x; } +#ifndef HAVE_SINHL #undef fp_sinh #define fp_sinh fp_do_sinh -#if !FPU_FAST_MATH -// FIXME: unimplemented -PRIVATE fpu_extended fp_do_sinh(fpu_extended x); -#else PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x) { + if (isinf(x)) return x; fpu_extended exm1 = fp_expm1(fp_fabs(x)); return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x); } #endif +#ifndef HAVE_COSHL #undef fp_cosh #define fp_cosh fp_do_cosh -#if !FPU_FAST_MATH -// FIXME: unimplemented -PRIVATE fpu_extended fp_do_cosh(fpu_extended x); -#else PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x) { fpu_extended ex = fp_exp(x); @@ -969,13 +1016,10 @@ PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x) } #endif +#ifndef HAVE_TANHL #undef fp_tanh #define fp_tanh fp_do_tanh -#if !FPU_FAST_MATH -// FIXME: unimplemented -PRIVATE fpu_extended fp_do_tanh(fpu_extended x); -#else PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x) { fpu_extended exm1 = fp_expm1(-fp_fabs(x + x)); @@ -993,6 +1037,7 @@ PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x) return value; } +#ifndef HAVE_ASINL #undef fp_asin #define fp_asin fp_do_asin @@ -1000,7 +1045,9 @@ PRIVATE inline fpu_extended fp_do_asin(fpu_extended x) { return fp_atan2(x, fp_sqrt(1.0 - x * x)); } +#endif +#ifndef HAVE_ACOSL #undef fp_acos #define fp_acos fp_do_acos @@ -1008,6 +1055,7 @@ PRIVATE inline fpu_extended fp_do_acos(fpu_extended x) { return fp_atan2(fp_sqrt(1.0 - x * x), x); } +#endif #undef fp_atan #define fp_atan fp_do_atan @@ -1019,12 +1067,15 @@ PRIVATE inline fpu_extended fp_do_atan(fpu_extended x) return value; } +#ifndef HAVE_LOG1PL #undef fp_log1p #define fp_log1p fp_do_log1p // Returns: ln(1.0 + X) PRIVATE fpu_extended fp_do_log1p(fpu_extended x); +#endif +#ifndef HAVE_ASINHL #undef fp_asinh #define fp_asinh fp_do_asinh @@ -1033,7 +1084,9 @@ PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x) fpu_extended y = fp_fabs(x); return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x)); } +#endif +#ifndef HAVE_ACOSHL #undef fp_acosh #define fp_acosh fp_do_acosh @@ -1041,7 +1094,9 @@ PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x) { return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0)); } +#endif +#ifndef HAVE_ATANHL #undef fp_atanh #define fp_atanh fp_do_atanh @@ -1050,69 +1105,95 @@ PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x) fpu_extended y = fp_fabs(x); return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x); } +#endif -#undef fp_floor -#define fp_floor fp_do_floor -PRIVATE inline fpu_extended fp_do_floor(fpu_extended x) -{ - volatile unsigned int cw; - __asm__ __volatile__("fnstcw %0" : "=m" (cw)); - volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down - __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); - fpu_extended value; - __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); - __asm__ __volatile__("fldcw %0" : : "m" (cw)); - return value; +/* + * LLVM 2.9 crashes on first definition, + * clang with LLVM 3.x crashes on 2nd definition... sigh + */ +#if defined(__clang__) || !defined(__llvm__) +#define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \ +PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended __x) \ +{ \ + register long double __value; \ + register int __ignore; \ + volatile unsigned short __cw; \ + volatile unsigned short __cwtmp; \ + __asm __volatile ("fnstcw %3\n\t" \ + "movzwl %3, %1\n\t" \ + "andl $0xf3ff, %1\n\t" \ + "orl %5, %1\n\t" \ + "movw %w1, %2\n\t" \ + "fldcw %2\n\t" \ + "frndint\n\t" \ + "fldcw %3" \ + : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \ + "=m" (__cw) \ + : "0" (__x), "i"(rounding_mode)); \ + return __value; \ } - -#undef fp_ceil -#define fp_ceil fp_do_ceil - -PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x) -{ - volatile unsigned int cw; - __asm__ __volatile__("fnstcw %0" : "=m" (cw)); - volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up - __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); - fpu_extended value; - __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); - __asm__ __volatile__("fldcw %0" : : "m" (cw)); - return value; -} - +#else #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \ PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \ { \ - volatile unsigned int cw; \ + volatile unsigned short cw; \ __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \ - volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \ + volatile unsigned short cw_temp = (cw & 0xf3ff) | (rounding_mode); \ __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \ fpu_extended value; \ __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \ __asm__ __volatile__("fldcw %0" : : "m" (cw)); \ return value; \ } +#endif #undef fp_round_to_minus_infinity +#ifdef HAVE_FLOORL +#define fp_round_to_minus_infinity floorl +#else #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity - -DEFINE_ROUND_FUNC(minus_infinity, 0x400) +DEFINE_ROUND_FUNC(minus_infinity, CW_RC_DOWN) +#endif #undef fp_round_to_plus_infinity +#ifdef HAVE_CEILL +#define fp_round_to_plus_infinity ceill +#else #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity - -DEFINE_ROUND_FUNC(plus_infinity, 0x800) +DEFINE_ROUND_FUNC(plus_infinity, CW_RC_UP) +#endif #undef fp_round_to_zero +#ifdef HAVE_TRUNCL +#define fp_round_to_zero truncl +#else #define fp_round_to_zero fp_do_round_to_zero - -DEFINE_ROUND_FUNC(zero, 0xc00) +DEFINE_ROUND_FUNC(zero, CW_RC_ZERO) +#endif #undef fp_round_to_nearest +#ifdef HAVE_ROUNDL +#define fp_round_to_nearest roundl +#else #define fp_round_to_nearest fp_do_round_to_nearest +DEFINE_ROUND_FUNC(nearest, CW_RC_NEAR) +#endif + +#undef fp_round_to_even +#ifdef HAVE_RINTL +#define fp_round_to_even rintl +#else +#define fp_round_to_even fp_do_round_to_even +DEFINE_ROUND_FUNC(even, CW_RC_NEAR) +#endif + +#undef fp_ceil +#define fp_ceil fp_do_round_to_plus_infinity + +#undef fp_floor +#define fp_floor fp_do_round_to_minus_infinity -DEFINE_ROUND_FUNC(nearest, 0x000) #endif /* USE_X87_ASSEMBLY */ @@ -1132,4 +1213,8 @@ DEFINE_ROUND_FUNC(nearest, 0x000) #define fp_round_to_nearest(x) ((int)((x) + 0.5)) #endif +#ifndef fp_round_to_even +#define fp_round_to_even fp_round_to_nearest +#endif + #endif /* FPU_MATHLIB_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/rounding.cpp b/BasiliskII/src/uae_cpu/fpu/rounding.cpp index 1f8b3618..9942d4e8 100644 --- a/BasiliskII/src/uae_cpu/fpu/rounding.cpp +++ b/BasiliskII/src/uae_cpu/fpu/rounding.cpp @@ -1,28 +1,33 @@ /* - * fpu/rounding.cpp - system-dependant FPU rounding mode and precision + * fpu/rounding.cpp - system-dependant FPU rounding mode and precision * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #undef PRIVATE diff --git a/BasiliskII/src/uae_cpu/fpu/rounding.h b/BasiliskII/src/uae_cpu/fpu/rounding.h index 67db5519..aa2c9ced 100644 --- a/BasiliskII/src/uae_cpu/fpu/rounding.h +++ b/BasiliskII/src/uae_cpu/fpu/rounding.h @@ -1,28 +1,33 @@ /* - * fpu/rounding.h - system-dependant FPU rounding mode and precision + * fpu/rounding.h - system-dependant FPU rounding mode and precision * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_ROUNDING_H @@ -106,8 +111,8 @@ PRIVATE inline void set_host_control_word(void) */ x86_control_word = (x86_control_word & ~(X86_ROUNDING_MODE|X86_ROUNDING_PRECISION)) - | x86_control_word_rm_mac2host[(FPU fpcr.rounding_mode & FPCR_ROUNDING_MODE) >> 4] - | x86_control_word_rp_mac2host[(FPU fpcr.rounding_precision & FPCR_ROUNDING_PRECISION) >> 6] + | x86_control_word_rm_mac2host[(FPU fpcr & FPCR_ROUNDING_MODE) >> 4] + | x86_control_word_rp_mac2host[(FPU fpcr & FPCR_ROUNDING_PRECISION) >> 6] ; __asm__ __volatile__("fldcw %0" : : "m" (x86_control_word)); } @@ -131,11 +136,11 @@ PRIVATE inline void set_host_control_word(void) /* Return the current rounding mode in m68k format */ static inline uae_u32 FFPU get_rounding_mode(void) - { return FPU fpcr.rounding_mode; } + { return FPU fpcr & FPCR_ROUNDING_MODE; } /* Convert and set to native rounding mode */ -static inline void FFPU set_rounding_mode(uae_u32 new_rounding_mode) - { FPU fpcr.rounding_mode = new_rounding_mode; } +static inline void FFPU set_rounding_mode(uae_u32 /* new_rounding_mode */ ) + { } #endif @@ -143,11 +148,11 @@ static inline void FFPU set_rounding_mode(uae_u32 new_rounding_mode) /* Return the current rounding precision in m68k format */ static inline uae_u32 FFPU get_rounding_precision(void) - { return FPU fpcr.rounding_precision; } + { return FPU fpcr & FPCR_ROUNDING_PRECISION; } /* Convert and set to native rounding precision */ -static inline void FFPU set_rounding_precision(uae_u32 new_rounding_precision) - { FPU fpcr.rounding_precision = new_rounding_precision; } +static inline void FFPU set_rounding_precision(uae_u32 /* new_rounding_precision */) + { } #endif diff --git a/BasiliskII/src/uae_cpu/fpu/types.h b/BasiliskII/src/uae_cpu/fpu/types.h index c0a64192..50e07ec2 100644 --- a/BasiliskII/src/uae_cpu/fpu/types.h +++ b/BasiliskII/src/uae_cpu/fpu/types.h @@ -1,28 +1,33 @@ /* - * types.h - basic types for fpu registers + * fpu/types.h - basic types for fpu registers * - * Basilisk II (C) 1997-2008 Christian Bauer + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * MC68881/68040 fpu emulation - * - * Original UAE FPU, copyright 1996 Herman ten Brugge - * Rewrite for x86, copyright 1999-2000 Lauri Pesonen - * New framework, copyright 2000 Gwenole Beauchesne - * Adapted for JIT compilation (c) Bernd Meyer, 2000 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MC68881/68040 fpu emulation * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef FPU_TYPES_H @@ -106,9 +111,11 @@ typedef uae_f32 fpu_single; #elif defined(FPU_IEEE) +#if 0 #if HOST_FLOAT_FORMAT != IEEE_FLOAT_FORMAT #error "No IEEE float format, you lose." #endif +#endif /* 4-byte floats */ #if SIZEOF_FLOAT == 4 @@ -133,7 +140,7 @@ typedef long double uae_f64; typedef long double uae_f96; typedef uae_f96 fpu_register; #define USE_LONG_DOUBLE 1 -#elif SIZEOF_LONG_DOUBLE == 16 && (defined(__i386__) || defined(__x86_64__)) +#elif SIZEOF_LONG_DOUBLE == 16 && (defined(CPU_i386) || defined(CPU_x86_64) || defined(CPU_ia64)) /* Long doubles on x86-64 are really held in old x87 FPU stack. */ typedef long double uae_f128; typedef uae_f128 fpu_register; @@ -154,6 +161,23 @@ typedef fpu_register fpu_extended; typedef uae_f64 fpu_double; typedef uae_f32 fpu_single; +#elif defined(FPU_MPFR) + +#include + +struct fpu_register { + mpfr_t f; + uae_u64 nan_bits; + int nan_sign; + operator long double (); + fpu_register &operator=(long double); +}; + #endif +union fpu_register_parts { + fpu_register val; + uae_u32 parts[sizeof(fpu_register) / 4]; +}; + #endif /* FPU_TYPES_H */ diff --git a/BasiliskII/src/uae_cpu/gencpu.c b/BasiliskII/src/uae_cpu/gencpu.c index 5045dffd..295a49e5 100644 --- a/BasiliskII/src/uae_cpu/gencpu.c +++ b/BasiliskII/src/uae_cpu/gencpu.c @@ -1,3 +1,27 @@ +/* + * gencpu.c - m68k emulation generator + * + * Copyright (c) 2009 ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ /* * UAE - The Un*x Amiga Emulator * @@ -14,42 +38,26 @@ * take care of this. * * Copyright 1995, 1996 Bernd Schmidt - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * */ +#define CC_FOR_BUILD 1 + +#include "sysdeps.h" +#include "readcpu.h" + #include #include #include #include - -#include "sysdeps.h" -#include "readcpu.h" - -#if defined(SPARC_V8_ASSEMBLY) || defined(SPARC_V9_ASSEMBLY) -#define SPARC_ASSEMBLY 0 -#endif +#include +#undef abort #define BOOL_TYPE "int" - -/* Define the minimal 680x0 where NV flags are not affected by xBCD instructions. */ -#define xBCD_KEEPS_NV_FLAGS 4 +#define VERIFY_MMU_GENAMODE 0 static FILE *headerfile; static FILE *stblfile; +static FILE *functblfile; static int using_prefetch; static int using_exception_3; @@ -65,6 +73,23 @@ static int *opcode_next_clev; static int *opcode_last_postfix; static unsigned long *counts; +#define GENA_GETV_NO_FETCH 0 +#define GENA_GETV_FETCH 1 +#define GENA_GETV_FETCH_ALIGN 2 +#define GENA_MOVEM_DO_INC 0 +#define GENA_MOVEM_NO_INC 1 +#define GENA_MOVEM_MOVE16 2 + +#define XLATE_LOG 0 +#define XLATE_PHYS 1 +#define XLATE_SFC 2 +#define XLATE_DFC 3 +static char * mem_prefix[4] = { "", "phys_", "sfc_", "dfc_" }; + +/* Define the minimal 680x0 where NV flags are not affected by xBCD instructions. */ +#define xBCD_KEEPS_N_FLAG 4 +#define xBCD_KEEPS_V_FLAG 3 + static void read_counts (void) { FILE *file; @@ -75,7 +100,8 @@ static void read_counts (void) file = fopen ("frequent.68k", "r"); if (file) { - fscanf (file, "Total: %lu\n", &total); + int c = fscanf (file, "Total: %lu\n", &total); + assert(c == 1); while (fscanf (file, "%lx: %lu %s\n", &opcode, &count, name) == 3) { opcode_next_clev[nr] = 4; opcode_last_postfix[nr] = -1; @@ -106,7 +132,6 @@ static int need_endlabel; static int n_braces = 0; static int m68k_pc_offset = 0; -static int insn_n_cycles; static void start_brace (void) { @@ -159,9 +184,8 @@ static const char *gen_nextilong (void) { static char buffer[80]; int r = m68k_pc_offset; - m68k_pc_offset += 4; - insn_n_cycles += 4; + m68k_pc_offset += 4; if (using_prefetch) sprintf (buffer, "get_ilong_prefetch(%d)", r); @@ -174,9 +198,8 @@ static const char *gen_nextiword (void) { static char buffer[80]; int r = m68k_pc_offset; - m68k_pc_offset += 2; - insn_n_cycles += 2; + m68k_pc_offset += 2; if (using_prefetch) sprintf (buffer, "get_iword_prefetch(%d)", r); @@ -191,8 +214,6 @@ static const char *gen_nextibyte (void) int r = m68k_pc_offset; m68k_pc_offset += 2; - insn_n_cycles += 2; - if (using_prefetch) sprintf (buffer, "get_ibyte_prefetch(%d)", r); else @@ -214,9 +235,22 @@ static void fill_prefetch_2 (void) static void swap_opcode (void) { - printf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); - printf ("\topcode = ((opcode << 8) & 0xFF00) | ((opcode >> 8) & 0xFF);\n"); - printf ("#endif\n"); + printf("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + printf ("\topcode = do_byteswap_16(opcode);\n"); + printf("#endif\n"); +} + +static void real_opcode (int *have) +{ + if (!*have) + { + printf("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + printf ("\tuae_u32 real_opcode = do_byteswap_16(opcode);\n"); + printf("#else\n"); + printf ("\tuae_u32 real_opcode = opcode;\n"); + printf("#endif\n"); + *have = 1; + } } static void sync_m68k_pc (void) @@ -238,33 +272,49 @@ static void sync_m68k_pc (void) m68k_pc_offset = 0; } +static void gen_set_fault_pc (void) +{ + sync_m68k_pc(); + printf ("regs.fault_pc = m68k_getpc ();\n"); + m68k_pc_offset = 0; +} + /* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0, * the calling routine handles Apdi and Aipi modes. * gb-- movem == 2 means the same thing but for a MOVE16 instruction */ -static void genamode (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem) + +/* fixup indicates if we want to fix up adress registers in pre decrement + * or post increment mode now (0) or later (1). A value of 2 will then be + * used to do the actual fix up. This allows to do all memory readings + * before any register is modified, and so to rerun operation without + * side effect in case a bus fault is generated by any memory access. + * XJ - 2006/11/13 */ +static void genamode2 (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem, int xlateflag, int fixup) { + if (fixup != 2) + { start_brace (); switch (mode) { case Dreg: if (movem) abort (); - if (getv == 1) + if (getv == GENA_GETV_FETCH) switch (size) { case sz_byte: -#if defined(AMIGA) && !defined(WARPUP) + printf("\n#if defined(AMIGA) && !defined(WARPUP)\n"); /* sam: I don't know why gcc.2.7.2.1 produces a code worse */ /* if it is not done like that: */ printf ("\tuae_s8 %s = ((uae_u8*)&m68k_dreg(regs, %s))[3];\n", name, reg); -#else + printf("#else\n"); printf ("\tuae_s8 %s = m68k_dreg(regs, %s);\n", name, reg); -#endif + printf("#endif\n"); break; case sz_word: -#if defined(AMIGA) && !defined(WARPUP) + printf("\n#if defined(AMIGA) && !defined(WARPUP)\n"); printf ("\tuae_s16 %s = ((uae_s16*)&m68k_dreg(regs, %s))[1];\n", name, reg); -#else + printf("#else\n"); printf ("\tuae_s16 %s = m68k_dreg(regs, %s);\n", name, reg); -#endif + printf("#endif\n"); break; case sz_long: printf ("\tuae_s32 %s = m68k_dreg(regs, %s);\n", name, reg); @@ -276,7 +326,7 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge case Areg: if (movem) abort (); - if (getv == 1) + if (getv == GENA_GETV_FETCH) switch (size) { case sz_word: printf ("\tuae_s16 %s = m68k_areg(regs, %s);\n", name, reg); @@ -303,10 +353,16 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge printf ("\tuaecptr %sa = m68k_areg(regs, %s) - areg_byteinc[%s];\n", name, reg, reg); break; case sz_word: - printf ("\tuaecptr %sa = m68k_areg(regs, %s) - %d;\n", name, reg, movem ? 0 : 2); + if (movem) + printf ("\tuaecptr %sa = m68k_areg(regs, %s);\n", name, reg); + else + printf ("\tuaecptr %sa = m68k_areg(regs, %s) - 2;\n", name, reg); break; case sz_long: - printf ("\tuaecptr %sa = m68k_areg(regs, %s) - %d;\n", name, reg, movem ? 0 : 4); + if (movem) + printf ("\tuaecptr %sa = m68k_areg(regs, %s);\n", name, reg); + else + printf ("\tuaecptr %sa = m68k_areg(regs, %s) - 4;\n", name, reg); break; default: abort (); @@ -351,7 +407,7 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge printf ("\tuaecptr %sa = %s;\n", name, gen_nextilong ()); break; case imm: - if (getv != 1) + if (getv != GENA_GETV_FETCH) abort (); switch (size) { case sz_byte: @@ -368,22 +424,22 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge } return; case imm0: - if (getv != 1) + if (getv != GENA_GETV_FETCH) abort (); printf ("\tuae_s8 %s = %s;\n", name, gen_nextibyte ()); return; case imm1: - if (getv != 1) + if (getv != GENA_GETV_FETCH) abort (); printf ("\tuae_s16 %s = %s;\n", name, gen_nextiword ()); return; case imm2: - if (getv != 1) + if (getv != GENA_GETV_FETCH) abort (); printf ("\tuae_s32 %s = %s;\n", name, gen_nextilong ()); return; case immi: - if (getv != 1) + if (getv != GENA_GETV_FETCH) abort (); printf ("\tuae_u32 %s = %s;\n", name, reg); return; @@ -394,7 +450,7 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge /* We get here for all non-reg non-immediate addressing modes to * actually fetch the value. */ - if (using_exception_3 && getv != 0 && size != sz_byte) { + if (using_exception_3 && getv != GENA_GETV_NO_FETCH && size != sz_byte) { printf ("\tif ((%sa & 1) != 0) {\n", name); printf ("\t\tlast_fault_for_exception_3 = %sa;\n", name); printf ("\t\tlast_op_for_exception_3 = opcode;\n"); @@ -406,20 +462,29 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge start_brace (); } - if (getv == 1) { + if (getv == GENA_GETV_FETCH) { switch (size) { - case sz_byte: insn_n_cycles += 2; break; - case sz_word: insn_n_cycles += 2; break; - case sz_long: insn_n_cycles += 4; break; + case sz_byte: break; + case sz_word: break; + case sz_long: break; default: abort (); } start_brace (); + printf("\n#ifdef FULLMMU\n"); switch (size) { - case sz_byte: printf ("\tuae_s8 %s = get_byte(%sa);\n", name, name); break; - case sz_word: printf ("\tuae_s16 %s = get_word(%sa);\n", name, name); break; - case sz_long: printf ("\tuae_s32 %s = get_long(%sa);\n", name, name); break; + case sz_byte: printf ("\tuae_s8 %s = %sget_byte(%sa);\n", name, mem_prefix[xlateflag], name); break; + case sz_word: printf ("\tuae_s16 %s = %sget_word(%sa);\n", name, mem_prefix[xlateflag], name); break; + case sz_long: printf ("\tuae_s32 %s = %sget_long(%sa);\n", name, mem_prefix[xlateflag], name); break; default: abort (); } + printf("#else\n"); + switch (size) { + case sz_byte: printf ("\tuae_s8 %s = phys_get_byte(%sa);\n", name, name); break; + case sz_word: printf ("\tuae_s16 %s = phys_get_word(%sa);\n", name, name); break; + case sz_long: printf ("\tuae_s32 %s = phys_get_long(%sa);\n", name, name); break; + default: abort (); + } + printf("#endif\n"); } /* We now might have to fix up the register for pre-dec or post-inc @@ -427,6 +492,12 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge if (!movem) switch (mode) { case Aipi: + if (fixup == 1) + { + printf ("\tfixup.flag = 1;\n"); + printf ("\tfixup.reg = %s;\n", reg); + printf ("\tfixup.value = m68k_areg(regs, %s);\n", reg); + } switch (size) { case sz_byte: printf ("\tm68k_areg(regs, %s) += areg_byteinc[%s];\n", reg, reg); @@ -442,14 +513,39 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge } break; case Apdi: + if (fixup == 1) + { + printf ("\tfixup.flag = 1;\n"); + printf ("\tfixup.reg = %s;\n", reg); + printf ("\tfixup.value = m68k_areg(regs, %s);\n", reg); + } printf ("\tm68k_areg (regs, %s) = %sa;\n", reg, name); break; default: break; } + + } + else /* (fixup != 2) */ + { + if (!movem) + switch (mode) { + case Aipi: + case Apdi: + printf ("\tfixup.flag = 0;\n"); + break; + default: + break; + } + } } -static void genastore (char *from, amodes mode, char *reg, wordsizes size, char *to) +static void genamode (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem, int xlateflag) +{ + genamode2 (mode, reg, size, name, getv, movem, xlateflag, 0); +} + +static void genastore (char *from, amodes mode, char *reg, wordsizes size, char *to, int xlateflag) { switch (mode) { case Dreg: @@ -489,28 +585,32 @@ static void genastore (char *from, amodes mode, char *reg, wordsizes size, char case absl: case PC16: case PC8r: - if (using_prefetch) - sync_m68k_pc (); + gen_set_fault_pc (); + printf("#ifdef FULLMMU\n"); switch (size) { case sz_byte: - insn_n_cycles += 2; + printf ("\t%sput_byte(%sa,%s);\n", mem_prefix[xlateflag], to, from); + printf("#else\n"); printf ("\tput_byte(%sa,%s);\n", to, from); break; case sz_word: - insn_n_cycles += 2; if (cpu_level < 2 && (mode == PC16 || mode == PC8r)) abort (); + printf ("\t%sput_word(%sa,%s);\n", mem_prefix[xlateflag], to, from); + printf("#else\n"); printf ("\tput_word(%sa,%s);\n", to, from); break; case sz_long: - insn_n_cycles += 4; if (cpu_level < 2 && (mode == PC16 || mode == PC8r)) abort (); + printf ("\t%sput_long(%sa,%s);\n", mem_prefix[xlateflag], to, from); + printf("#else\n"); printf ("\tput_long(%sa,%s);\n", to, from); break; default: abort (); } + printf("#endif\n"); break; case imm: case imm0: @@ -526,23 +626,33 @@ static void genastore (char *from, amodes mode, char *reg, wordsizes size, char static void genmovemel (uae_u16 opcode) { - char getcode[100]; + char getcode1[100]; + char getcode2[100]; int size = table68k[opcode].size == sz_long ? 4 : 2; - + if (table68k[opcode].size == sz_long) { - strcpy (getcode, "get_long(srca)"); + strcpy (getcode1, ""); + strcpy (getcode2, "get_long(srca)"); } else { - strcpy (getcode, "(uae_s32)(uae_s16)get_word(srca)"); + strcpy (getcode1, "(uae_s32)(uae_s16)"); + strcpy (getcode2, "get_word(srca)"); } printf ("\tuae_u16 mask = %s;\n", gen_nextiword ()); printf ("\tunsigned int dmask = mask & 0xff, amask = (mask >> 8) & 0xff;\n"); - genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1); + genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_NO_INC, XLATE_LOG); start_brace (); - printf ("\twhile (dmask) { m68k_dreg(regs, movem_index1[dmask]) = %s; srca += %d; dmask = movem_next[dmask]; }\n", - getcode, size); - printf ("\twhile (amask) { m68k_areg(regs, movem_index1[amask]) = %s; srca += %d; amask = movem_next[amask]; }\n", - getcode, size); + printf("\n#ifdef FULLMMU\n"); + printf ("\twhile (dmask) { m68k_dreg(regs, movem_index1[dmask]) = %s%s; srca += %d; dmask = movem_next[dmask]; }\n", + getcode1, getcode2, size); + printf ("\twhile (amask) { m68k_areg(regs, movem_index1[amask]) = %s%s; srca += %d; amask = movem_next[amask]; }\n", + getcode1, getcode2, size); + printf("#else\n"); + printf ("\twhile (dmask) { m68k_dreg(regs, movem_index1[dmask]) = %sphys_%s; srca += %d; dmask = movem_next[dmask]; }\n", + getcode1, getcode2, size); + printf ("\twhile (amask) { m68k_areg(regs, movem_index1[amask]) = %sphys_%s; srca += %d; amask = movem_next[amask]; }\n", + getcode1, getcode2, size); + printf("#endif\n"); if (table68k[opcode].dmode == Aipi) printf ("\tm68k_areg(regs, dstreg) = srca;\n"); @@ -552,6 +662,7 @@ static void genmovemle (uae_u16 opcode) { char putcode[100]; int size = table68k[opcode].size == sz_long ? 4 : 2; + if (table68k[opcode].size == sz_long) { strcpy (putcode, "put_long(srca,"); } else { @@ -559,30 +670,44 @@ static void genmovemle (uae_u16 opcode) } printf ("\tuae_u16 mask = %s;\n", gen_nextiword ()); - genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1); - if (using_prefetch) - sync_m68k_pc (); + genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", + GENA_GETV_FETCH_ALIGN, GENA_MOVEM_NO_INC, XLATE_LOG); + sync_m68k_pc (); start_brace (); if (table68k[opcode].dmode == Apdi) { printf ("\tuae_u16 amask = mask & 0xff, dmask = (mask >> 8) & 0xff;\n"); + printf("#ifdef FULLMMU\n"); printf ("\twhile (amask) { srca -= %d; %s m68k_areg(regs, movem_index2[amask])); amask = movem_next[amask]; }\n", size, putcode); printf ("\twhile (dmask) { srca -= %d; %s m68k_dreg(regs, movem_index2[dmask])); dmask = movem_next[dmask]; }\n", size, putcode); + printf("#else\n"); + printf ("\twhile (amask) { srca -= %d; phys_%s m68k_areg(regs, movem_index2[amask])); amask = movem_next[amask]; }\n", + size, putcode); + printf ("\twhile (dmask) { srca -= %d; phys_%s m68k_dreg(regs, movem_index2[dmask])); dmask = movem_next[dmask]; }\n", + size, putcode); + printf("#endif\n"); printf ("\tm68k_areg(regs, dstreg) = srca;\n"); } else { printf ("\tuae_u16 dmask = mask & 0xff, amask = (mask >> 8) & 0xff;\n"); + printf("#ifdef FULLMMU\n"); printf ("\twhile (dmask) { %s m68k_dreg(regs, movem_index1[dmask])); srca += %d; dmask = movem_next[dmask]; }\n", putcode, size); printf ("\twhile (amask) { %s m68k_areg(regs, movem_index1[amask])); srca += %d; amask = movem_next[amask]; }\n", putcode, size); + printf("#else\n"); + printf ("\twhile (dmask) { phys_%s m68k_dreg(regs, movem_index1[dmask])); srca += %d; dmask = movem_next[dmask]; }\n", + putcode, size); + printf ("\twhile (amask) { phys_%s m68k_areg(regs, movem_index1[amask])); srca += %d; amask = movem_next[amask]; }\n", + putcode, size); + printf("#endif\n"); } } static void duplicate_carry (void) { - printf ("\tCOPY_CARRY;\n"); + printf ("\tCOPY_CARRY();\n"); } typedef enum { @@ -649,12 +774,10 @@ static void genflags_normal (flagtypes type, wordsizes size, char *value, char * break; case flag_add: - start_brace (); printf ("uae_u32 %s = %s + %s;\n", value, dstr, sstr); break; case flag_sub: case flag_cmp: - start_brace (); printf ("uae_u32 %s = %s - %s;\n", value, dstr, sstr); break; } @@ -673,7 +796,6 @@ static void genflags_normal (flagtypes type, wordsizes size, char *value, char * case flag_cmp: case flag_av: case flag_sv: - start_brace (); printf ("\t" BOOL_TYPE " flgs = %s < 0;\n", sstr); printf ("\t" BOOL_TYPE " flgo = %s < 0;\n", dstr); printf ("\t" BOOL_TYPE " flgn = %s < 0;\n", vstr); @@ -682,7 +804,7 @@ static void genflags_normal (flagtypes type, wordsizes size, char *value, char * switch (type) { case flag_logical: - printf ("\tCLEAR_CZNV;\n"); + printf ("\tCLEAR_CZNV();\n"); printf ("\tSET_ZFLG (%s == 0);\n", vstr); printf ("\tSET_NFLG (%s < 0);\n", vstr); break; @@ -697,10 +819,10 @@ static void genflags_normal (flagtypes type, wordsizes size, char *value, char * printf ("\tSET_VFLG ((flgs ^ flgo) & (flgn ^ flgo));\n"); break; case flag_z: - printf ("\tSET_ZFLG (GET_ZFLG & (%s == 0));\n", vstr); + printf ("\tSET_ZFLG (GET_ZFLG () & (%s == 0));\n", vstr); break; case flag_zn: - printf ("\tSET_ZFLG (GET_ZFLG & (%s == 0));\n", vstr); + printf ("\tSET_ZFLG (GET_ZFLG () & (%s == 0));\n", vstr); printf ("\tSET_NFLG (%s < 0);\n", vstr); break; case flag_add: @@ -741,11 +863,13 @@ static void genflags (flagtypes type, wordsizes size, char *value, char *src, ch /* Temporarily deleted 68k/ARM flag optimizations. I'd prefer to have them in the appropriate m68k.h files and use just one copy of this code here. The API can be changed if necessary. */ -#ifdef OPTIMIZED_FLAGS + int done = 0; + + start_brace (); + printf("\n#ifdef OPTIMIZED_FLAGS\n"); switch (type) { case flag_add: case flag_sub: - start_brace (); printf ("\tuae_u32 %s;\n", value); break; default: @@ -755,7 +879,7 @@ static void genflags (flagtypes type, wordsizes size, char *value, char *src, ch /* At least some of those casts are fairly important! */ switch (type) { case flag_logical_noclobber: - printf ("\t{uae_u32 oldcznv = GET_CZNV & ~(FLAGVAL_Z | FLAGVAL_N);\n"); + printf ("\t{uae_u32 oldcznv = GET_CZNV() & ~(FLAGVAL_Z | FLAGVAL_N);\n"); if (strcmp (value, "0") == 0) { printf ("\tSET_CZNV (olcznv | FLAGVAL_Z);\n"); } else { @@ -767,8 +891,9 @@ static void genflags (flagtypes type, wordsizes size, char *value, char *src, ch printf ("\tIOR_CZNV (oldcznv);\n"); } printf ("\t}\n"); - return; - + done = 1; + break; + case flag_logical: if (strcmp (value, "0") == 0) { printf ("\tSET_CZNV (FLAGVAL_Z);\n"); @@ -779,7 +904,8 @@ static void genflags (flagtypes type, wordsizes size, char *value, char *src, ch case sz_long: printf ("\toptflag_testl ((uae_s32)(%s));\n", value); break; } } - return; + done = 1; + break; case flag_add: switch (size) { @@ -787,7 +913,8 @@ static void genflags (flagtypes type, wordsizes size, char *value, char *src, ch case sz_word: printf ("\toptflag_addw (%s, (uae_s16)(%s), (uae_s16)(%s));\n", value, src, dst); break; case sz_long: printf ("\toptflag_addl (%s, (uae_s32)(%s), (uae_s32)(%s));\n", value, src, dst); break; } - return; + done = 1; + break; case flag_sub: switch (size) { @@ -795,7 +922,8 @@ static void genflags (flagtypes type, wordsizes size, char *value, char *src, ch case sz_word: printf ("\toptflag_subw (%s, (uae_s16)(%s), (uae_s16)(%s));\n", value, src, dst); break; case sz_long: printf ("\toptflag_subl (%s, (uae_s32)(%s), (uae_s32)(%s));\n", value, src, dst); break; } - return; + done = 1; + break; case flag_cmp: switch (size) { @@ -803,13 +931,19 @@ static void genflags (flagtypes type, wordsizes size, char *value, char *src, ch case sz_word: printf ("\toptflag_cmpw ((uae_s16)(%s), (uae_s16)(%s));\n", src, dst); break; case sz_long: printf ("\toptflag_cmpl ((uae_s32)(%s), (uae_s32)(%s));\n", src, dst); break; } - return; + done = 1; + break; default: break; } -#endif + if (done) + printf("#else\n"); + else + printf("#endif\n"); genflags_normal (type, size, value, src, dst); + if (done) + printf("#endif\n"); } static void force_range_for_rox (const char *var, wordsizes size) @@ -837,7 +971,7 @@ static const char *cmask (wordsizes size) case sz_byte: return "0x80"; case sz_word: return "0x8000"; case sz_long: return "0x80000000"; - default: abort (); + default: abort (); return NULL; } } @@ -849,11 +983,10 @@ static int source_is_imm1_8 (struct instr *i) static void gen_opcode (unsigned long int opcode) { struct instr *curi = table68k + opcode; - insn_n_cycles = 2; start_brace (); #if 0 - printf ("uae_u8 *m68k_pc = regs.pc_p;\n"); + printf ("uae_u8 *m68k_pc = m68k_getpc();\n"); #endif m68k_pc_offset = 2; switch (curi->plev) { @@ -883,16 +1016,16 @@ static void gen_opcode (unsigned long int opcode) case i_OR: case i_AND: case i_EOR: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tsrc %c= dst;\n", curi->mnemo == i_OR ? '|' : curi->mnemo == i_AND ? '&' : '^'); genflags (flag_logical, curi->size, "src", "", ""); - genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("src", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_ORSR: case i_EORSR: printf ("\tMakeSR();\n"); - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) { printf ("\tsrc &= 0xFF;\n"); } @@ -901,7 +1034,7 @@ static void gen_opcode (unsigned long int opcode) break; case i_ANDSR: printf ("\tMakeSR();\n"); - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) { printf ("\tsrc |= 0xFF00;\n"); } @@ -909,81 +1042,89 @@ static void gen_opcode (unsigned long int opcode) printf ("\tMakeFromSR();\n"); break; case i_SUB: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); genflags (flag_sub, curi->size, "newv", "src", "dst"); - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_SUBA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 newv = dst - src;\n"); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); break; case i_SUBX: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); start_brace (); - printf ("\tuae_u32 newv = dst - src - (GET_XFLG ? 1 : 0);\n"); + printf ("\tuae_u32 newv = dst - src - (GET_XFLG () ? 1 : 0);\n"); genflags (flag_subx, curi->size, "newv", "src", "dst"); genflags (flag_zn, curi->size, "newv", "", ""); - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_SBCD: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); start_brace (); - printf ("\tuae_u16 newv_lo = (dst & 0xF) - (src & 0xF) - (GET_XFLG ? 1 : 0);\n"); + printf ("\tuae_u16 newv_lo = (dst & 0xF) - (src & 0xF) - (GET_XFLG () ? 1 : 0);\n"); printf ("\tuae_u16 newv_hi = (dst & 0xF0) - (src & 0xF0);\n"); printf ("\tuae_u16 newv, tmp_newv;\n"); printf ("\tint bcd = 0;\n"); printf ("\tnewv = tmp_newv = newv_hi + newv_lo;\n"); printf ("\tif (newv_lo & 0xF0) { newv -= 6; bcd = 6; };\n"); - printf ("\tif ((((dst & 0xFF) - (src & 0xFF) - (GET_XFLG ? 1 : 0)) & 0x100) > 0xFF) { newv -= 0x60; }\n"); - printf ("\tSET_CFLG ((((dst & 0xFF) - (src & 0xFF) - bcd - (GET_XFLG ? 1 : 0)) & 0x300) > 0xFF);\n"); + printf ("\tif ((((dst & 0xFF) - (src & 0xFF) - (GET_XFLG () ? 1 : 0)) & 0x100) > 0xFF) { newv -= 0x60; }\n"); + printf ("\tSET_CFLG ((((dst & 0xFF) - (src & 0xFF) - bcd - (GET_XFLG () ? 1 : 0)) & 0x300) > 0xFF);\n"); duplicate_carry (); - /* Manual says bits NV are undefined though a real 68040 don't change them */ - if (cpu_level >= xBCD_KEEPS_NV_FLAGS) { - if (next_cpu_level < xBCD_KEEPS_NV_FLAGS) - next_cpu_level = xBCD_KEEPS_NV_FLAGS - 1; - genflags (flag_z, curi->size, "newv", "", ""); + /* Manual says bits NV are undefined though a real 68030 doesn't change V and 68040/060 don't change both */ + if (cpu_level >= xBCD_KEEPS_N_FLAG) { + if (next_cpu_level < xBCD_KEEPS_N_FLAG) + next_cpu_level = xBCD_KEEPS_N_FLAG - 1; + genflags (flag_z, curi->size, "newv", "", ""); + } else { + genflags (flag_zn, curi->size, "newv", "", ""); } - else { - genflags (flag_zn, curi->size, "newv", "", ""); - printf ("\tSET_VFLG ((tmp_newv & 0x80) != 0 && (newv & 0x80) == 0);\n"); + if (cpu_level >= xBCD_KEEPS_V_FLAG) { + if (next_cpu_level < xBCD_KEEPS_V_FLAG) + next_cpu_level = xBCD_KEEPS_V_FLAG - 1; + } else { + printf ("\tSET_VFLG ((tmp_newv & 0x80) != 0 && (newv & 0x80) == 0);\n"); } - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_ADD: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); genflags (flag_add, curi->size, "newv", "src", "dst"); - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_ADDA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 newv = dst + src;\n"); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); break; case i_ADDX: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); start_brace (); - printf ("\tuae_u32 newv = dst + src + (GET_XFLG ? 1 : 0);\n"); + printf ("\tuae_u32 newv = dst + src + (GET_XFLG () ? 1 : 0);\n"); genflags (flag_addx, curi->size, "newv", "src", "dst"); genflags (flag_zn, curi->size, "newv", "", ""); - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_ABCD: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); start_brace (); - printf ("\tuae_u16 newv_lo = (src & 0xF) + (dst & 0xF) + (GET_XFLG ? 1 : 0);\n"); + printf ("\tuae_u16 newv_lo = (src & 0xF) + (dst & 0xF) + (GET_XFLG () ? 1 : 0);\n"); printf ("\tuae_u16 newv_hi = (src & 0xF0) + (dst & 0xF0);\n"); printf ("\tuae_u16 newv, tmp_newv;\n"); printf ("\tint cflg;\n"); @@ -993,75 +1134,85 @@ static void gen_opcode (unsigned long int opcode) printf ("\tif (cflg) newv += 0x60;\n"); printf ("\tSET_CFLG (cflg);\n"); duplicate_carry (); - /* Manual says bits NV are undefined though a real 68040 don't change them */ - if (cpu_level >= xBCD_KEEPS_NV_FLAGS) { - if (next_cpu_level < xBCD_KEEPS_NV_FLAGS) - next_cpu_level = xBCD_KEEPS_NV_FLAGS - 1; - genflags (flag_z, curi->size, "newv", "", ""); + /* Manual says bits NV are undefined though a real 68030 doesn't change V and 68040/060 don't change both */ + if (cpu_level >= xBCD_KEEPS_N_FLAG) { + if (next_cpu_level < xBCD_KEEPS_N_FLAG) + next_cpu_level = xBCD_KEEPS_N_FLAG - 1; + genflags (flag_z, curi->size, "newv", "", ""); + } else { + genflags (flag_zn, curi->size, "newv", "", ""); } - else { - genflags (flag_zn, curi->size, "newv", "", ""); - printf ("\tSET_VFLG ((tmp_newv & 0x80) == 0 && (newv & 0x80) != 0);\n"); + if (cpu_level >= xBCD_KEEPS_V_FLAG) { + if (next_cpu_level < xBCD_KEEPS_V_FLAG) + next_cpu_level = xBCD_KEEPS_V_FLAG - 1; + } else { + printf ("\tSET_VFLG ((tmp_newv & 0x80) == 0 && (newv & 0x80) != 0);\n"); } - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_NEG: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); genflags (flag_sub, curi->size, "dst", "src", "0"); - genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_NEGX: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); - printf ("\tuae_u32 newv = 0 - src - (GET_XFLG ? 1 : 0);\n"); + printf ("\tuae_u32 newv = 0 - src - (GET_XFLG () ? 1 : 0);\n"); genflags (flag_subx, curi->size, "newv", "src", "0"); genflags (flag_zn, curi->size, "newv", "", ""); - genastore ("newv", curi->smode, "srcreg", curi->size, "src"); + genastore ("newv", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_NBCD: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); - printf ("\tuae_u16 newv_lo = - (src & 0xF) - (GET_XFLG ? 1 : 0);\n"); + printf ("\tuae_u16 newv_lo = - (src & 0xF) - (GET_XFLG () ? 1 : 0);\n"); printf ("\tuae_u16 newv_hi = - (src & 0xF0);\n"); printf ("\tuae_u16 newv;\n"); - printf ("\tint cflg;\n"); + printf ("\tint cflg, tmp_newv;\n"); + printf ("\ttmp_newv = newv_hi + newv_lo;\n"); printf ("\tif (newv_lo > 9) { newv_lo -= 6; }\n"); printf ("\tnewv = newv_hi + newv_lo;\n"); printf ("\tcflg = (newv & 0x1F0) > 0x90;\n"); printf ("\tif (cflg) newv -= 0x60;\n"); printf ("\tSET_CFLG (cflg);\n"); duplicate_carry(); - /* Manual says bits NV are undefined though a real 68040 don't change them */ - if (cpu_level >= xBCD_KEEPS_NV_FLAGS) { - if (next_cpu_level < xBCD_KEEPS_NV_FLAGS) - next_cpu_level = xBCD_KEEPS_NV_FLAGS - 1; - genflags (flag_z, curi->size, "newv", "", ""); + /* Manual says bits NV are undefined though a real 68030 doesn't change V and 68040/060 don't change both */ + if (cpu_level >= xBCD_KEEPS_N_FLAG) { + if (next_cpu_level < xBCD_KEEPS_N_FLAG) + next_cpu_level = xBCD_KEEPS_N_FLAG - 1; + genflags (flag_z, curi->size, "newv", "", ""); + } else { + genflags (flag_zn, curi->size, "newv", "", ""); } - else { - genflags (flag_zn, curi->size, "newv", "", ""); + if (cpu_level >= xBCD_KEEPS_V_FLAG) { + if (next_cpu_level < xBCD_KEEPS_V_FLAG) + next_cpu_level = xBCD_KEEPS_V_FLAG - 1; + } else { + printf ("\tSET_VFLG ((tmp_newv & 0x80) != 0 && (newv & 0x80) == 0);\n"); } - genastore ("newv", curi->smode, "srcreg", curi->size, "src"); + genastore ("newv", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_CLR: - genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); genflags (flag_logical, curi->size, "0", "", ""); - genastore ("0", curi->smode, "srcreg", curi->size, "src"); + genastore ("0", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_NOT: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 dst = ~src;\n"); genflags (flag_logical, curi->size, "dst", "", ""); - genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_TST: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); genflags (flag_logical, curi->size, "src", "", ""); break; case i_BTST: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) printf ("\tsrc &= 7;\n"); else @@ -1069,55 +1220,55 @@ static void gen_opcode (unsigned long int opcode) printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n"); break; case i_BCHG: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) printf ("\tsrc &= 7;\n"); else printf ("\tsrc &= 31;\n"); printf ("\tdst ^= (1 << src);\n"); printf ("\tSET_ZFLG (((uae_u32)dst & (1 << src)) >> src);\n"); - genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_BCLR: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) printf ("\tsrc &= 7;\n"); else printf ("\tsrc &= 31;\n"); printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n"); printf ("\tdst &= ~(1 << src);\n"); - genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_BSET: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) printf ("\tsrc &= 7;\n"); else printf ("\tsrc &= 31;\n"); printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n"); printf ("\tdst |= (1 << src);\n"); - genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_CMPM: case i_CMP: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); genflags (flag_cmp, curi->size, "newv", "src", "dst"); break; case i_CMPA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); genflags (flag_cmp, sz_long, "newv", "src", "dst"); break; /* The next two are coded a little unconventional, but they are doing * weird things... */ case i_MVPRM: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tuaecptr memp = m68k_areg(regs, dstreg) + (uae_s32)(uae_s16)%s;\n", gen_nextiword ()); if (curi->size == sz_word) { @@ -1129,41 +1280,45 @@ static void gen_opcode (unsigned long int opcode) break; case i_MVPMR: printf ("\tuaecptr memp = m68k_areg(regs, srcreg) + (uae_s32)(uae_s16)%s;\n", gen_nextiword ()); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_word) { - printf ("\tuae_u16 val = (get_byte(memp) << 8) + get_byte(memp + 2);\n"); + printf ("\tuae_u16 val = get_byte(memp) << 8;\n"); + printf ("\t val |= get_byte(memp + 2);\n"); } else { - printf ("\tuae_u32 val = (get_byte(memp) << 24) + (get_byte(memp + 2) << 16)\n"); - printf (" + (get_byte(memp + 4) << 8) + get_byte(memp + 6);\n"); + printf ("\tuae_u32 val = get_byte(memp) << 24;\n"); + printf ("\t val |= get_byte(memp + 2) << 16;\n"); + printf ("\t val |= get_byte(memp + 4) << 8;\n"); + printf ("\t val |= get_byte(memp + 6);\n"); } - genastore ("val", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("val", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_MOVE: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); genflags (flag_logical, curi->size, "src", "", ""); - genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("src", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_MOVEA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_word) { printf ("\tuae_u32 val = (uae_s32)(uae_s16)src;\n"); } else { printf ("\tuae_u32 val = src;\n"); } - genastore ("val", curi->dmode, "dstreg", sz_long, "dst"); + genastore ("val", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); break; case i_MVSR2: - genamode (curi->smode, "srcreg", sz_word, "src", 2, 0); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tMakeSR();\n"); if (curi->size == sz_byte) - genastore ("regs.sr & 0xff", curi->smode, "srcreg", sz_word, "src"); + genastore ("regs.sr & 0xff", curi->smode, "srcreg", sz_word, "src", XLATE_LOG); else - genastore ("regs.sr", curi->smode, "srcreg", sz_word, "src"); + genastore ("regs.sr", curi->smode, "srcreg", sz_word, "src", XLATE_LOG); break; case i_MV2SR: - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) printf ("\tMakeSR();\n\tregs.sr &= 0xFF00;\n\tregs.sr |= src & 0xFF;\n"); else { @@ -1172,102 +1327,98 @@ static void gen_opcode (unsigned long int opcode) printf ("\tMakeFromSR();\n"); break; case i_SWAP: - genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); + genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 dst = ((src >> 16)&0xFFFF) | ((src&0xFFFF)<<16);\n"); genflags (flag_logical, sz_long, "dst", "", ""); - genastore ("dst", curi->smode, "srcreg", sz_long, "src"); + genastore ("dst", curi->smode, "srcreg", sz_long, "src", XLATE_LOG); break; case i_EXG: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); - genastore ("dst", curi->smode, "srcreg", curi->size, "src"); - genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("dst", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); + genastore ("src", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_EXT: - genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); + genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { - case sz_byte: printf ("\tuae_u32 dst = (uae_s32)(uae_s8)src;\n"); break; - case sz_word: printf ("\tuae_u16 dst = (uae_s16)(uae_s8)src;\n"); break; - case sz_long: printf ("\tuae_u32 dst = (uae_s32)(uae_s16)src;\n"); break; - default: abort (); + case sz_byte: printf ("\tuae_u32 dst = (uae_s32)(uae_s8)src;\n"); break; + case sz_word: printf ("\tuae_u16 dst = (uae_s16)(uae_s8)src;\n"); break; + case sz_long: printf ("\tuae_u32 dst = (uae_s32)(uae_s16)src;\n"); break; + default: abort (); } genflags (flag_logical, curi->size == sz_word ? sz_word : sz_long, "dst", "", ""); genastore ("dst", curi->smode, "srcreg", - curi->size == sz_word ? sz_word : sz_long, "src"); + curi->size == sz_word ? sz_word : sz_long, "src", XLATE_LOG); break; case i_MVMEL: - genmovemel ((uae_u16)opcode); + genmovemel (opcode); break; case i_MVMLE: - genmovemle ((uae_u16)opcode); + genmovemle (opcode); break; case i_TRAP: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - sync_m68k_pc (); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + gen_set_fault_pc (); printf ("\tException(src+32,0);\n"); - m68k_pc_offset = 0; break; case i_MVR2USP: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tregs.usp = src;\n"); break; case i_MVUSP2R: - genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); - genastore ("regs.usp", curi->smode, "srcreg", curi->size, "src"); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("regs.usp", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_RESET: + printf ("\tAtariReset();\n"); break; case i_NOP: break; case i_STOP: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - printf ("\tregs.sr = src;\n"); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + /* + * STOP undocumented features: + * if SR is not set: + * 68000 (68010?): Update SR, increase PC and then cause privilege violation exception (handled in newcpu) + * 68000 (68010?): Traced STOP also runs 4 cycles faster. + * 68020 68030: STOP works normally + * 68040 68060: Immediate privilege violation exception + */ + printf ("\tuae_u16 sr = src;\n"); + if (cpu_level >= 4) { + printf("\tif (!(sr & 0x2000)) {\n"); + printf ("m68k_incpc(%d);\n", m68k_pc_offset); + printf("\t\tException(8,0); goto %s;\n", endlabelstr); + printf("\t}\n"); + } + printf("\tregs.sr = sr;\n"); printf ("\tMakeFromSR();\n"); printf ("\tm68k_setstopped(1);\n"); + sync_m68k_pc (); + /* STOP does not prefetch anything */ + /* did_prefetch = -1; */ break; case i_RTE: if (cpu_level == 0) { - genamode (Aipi, "7", sz_word, "sr", 1, 0); - genamode (Aipi, "7", sz_long, "pc", 1, 0); + genamode (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tregs.sr = sr; m68k_setpc_rte(pc);\n"); fill_prefetch_0 (); printf ("\tMakeFromSR();\n"); } else { - int old_brace_level = n_braces; - if (next_cpu_level < 0) + if (next_cpu_level < 0) next_cpu_level = 0; - printf ("\tuae_u16 newsr; uae_u32 newpc; for (;;) {\n"); - genamode (Aipi, "7", sz_word, "sr", 1, 0); - genamode (Aipi, "7", sz_long, "pc", 1, 0); - genamode (Aipi, "7", sz_word, "format", 1, 0); - printf ("\tnewsr = sr; newpc = pc;\n"); - printf ("\tif ((format & 0xF000) == 0x0000) { break; }\n"); - printf ("\telse if ((format & 0xF000) == 0x1000) { ; }\n"); - printf ("\telse if ((format & 0xF000) == 0x2000) { m68k_areg(regs, 7) += 4; break; }\n"); - /* gb-- the next two lines are deleted in Bernie's gencpu.c */ - printf ("\telse if ((format & 0xF000) == 0x3000) { m68k_areg(regs, 7) += 4; break; }\n"); - printf ("\telse if ((format & 0xF000) == 0x7000) { m68k_areg(regs, 7) += 52; break; }\n"); - printf ("\telse if ((format & 0xF000) == 0x8000) { m68k_areg(regs, 7) += 50; break; }\n"); - printf ("\telse if ((format & 0xF000) == 0x9000) { m68k_areg(regs, 7) += 12; break; }\n"); - printf ("\telse if ((format & 0xF000) == 0xa000) { m68k_areg(regs, 7) += 24; break; }\n"); - printf ("\telse if ((format & 0xF000) == 0xb000) { m68k_areg(regs, 7) += 84; break; }\n"); - printf ("\telse { Exception(14,0); goto %s; }\n", endlabelstr); - printf ("\tregs.sr = newsr; MakeFromSR();\n}\n"); - pop_braces (old_brace_level); - printf ("\tregs.sr = newsr; MakeFromSR();\n"); - printf ("\tm68k_setpc_rte(newpc);\n"); - fill_prefetch_0 (); - need_endlabel = 1; + printf ("\tex_rte();\n"); } /* PC is set and prefetch filled. */ m68k_pc_offset = 0; break; case i_RTD: - genamode (Aipi, "7", sz_long, "pc", 1, 0); - genamode (curi->smode, "srcreg", curi->size, "offs", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tm68k_areg(regs, 7) += offs;\n"); printf ("\tm68k_setpc_rte(pc);\n"); fill_prefetch_0 (); @@ -1275,18 +1426,18 @@ static void gen_opcode (unsigned long int opcode) m68k_pc_offset = 0; break; case i_LINK: - genamode (Apdi, "7", sz_long, "old", 2, 0); - genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); - genastore ("src", Apdi, "7", sz_long, "old"); - genastore ("m68k_areg(regs, 7)", curi->smode, "srcreg", sz_long, "src"); - genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (Apdi, "7", sz_long, "old", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("m68k_areg(regs, 7)", curi->smode, "srcreg", sz_long, "src", XLATE_LOG); printf ("\tm68k_areg(regs, 7) += offs;\n"); + genastore ("src", Apdi, "7", sz_long, "old", XLATE_LOG); break; case i_UNLK: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tm68k_areg(regs, 7) = src;\n"); - genamode (Aipi, "7", sz_long, "old", 1, 0); - genastore ("old", curi->smode, "srcreg", curi->size, "src"); + genamode (Aipi, "7", sz_long, "old", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("old", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_RTS: printf ("\tm68k_do_rts();\n"); @@ -1294,14 +1445,16 @@ static void gen_opcode (unsigned long int opcode) m68k_pc_offset = 0; break; case i_TRAPV: + printf ("\tuaecptr oldpc = m68k_getpc();\n"); sync_m68k_pc (); - printf ("\tif (GET_VFLG) { Exception(7,m68k_getpc()); goto %s; }\n", endlabelstr); + printf ("\tif (GET_VFLG ()) { Exception(7,oldpc); goto %s; }\n", endlabelstr); need_endlabel = 1; break; case i_RTR: printf ("\tMakeSR();\n"); - genamode (Aipi, "7", sz_word, "sr", 1, 0); - genamode (Aipi, "7", sz_long, "pc", 1, 0); + genamode2 (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); printf ("\tregs.sr &= 0xFF00; sr &= 0xFF;\n"); printf ("\tregs.sr |= sr; m68k_setpc(pc);\n"); fill_prefetch_0 (); @@ -1309,19 +1462,19 @@ static void gen_opcode (unsigned long int opcode) m68k_pc_offset = 0; break; case i_JSR: - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS); printf ("\tm68k_do_jsr(m68k_getpc() + %d, srca);\n", m68k_pc_offset); fill_prefetch_0 (); m68k_pc_offset = 0; break; case i_JMP: - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS); printf ("\tm68k_setpc(srca);\n"); fill_prefetch_0 (); m68k_pc_offset = 0; break; case i_BSR: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS); printf ("\tuae_s32 s = (uae_s32)src + 2;\n"); if (using_exception_3) { printf ("\tif (src & 1) {\n"); @@ -1336,18 +1489,6 @@ static void gen_opcode (unsigned long int opcode) m68k_pc_offset = 0; break; case i_Bcc: - if (0 && !using_prefetch && !using_exception_3 && (cpu_level >= 2)) { - /* gb-- variant probably more favorable to compiler optimizations - also assumes no prefetch buffer is used - Hmm, that would make sense with processors capable of conditional moves */ - if (curi->size == sz_long && next_cpu_level < 1) - next_cpu_level = 1; - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - printf ("\tm68k_incpc (cctrue(%d) ? ((uae_s32)src + 2) : %d);\n", curi->cc, m68k_pc_offset); - m68k_pc_offset = 0; - } - else { - /* original code for branch instructions */ if (curi->size == sz_long) { if (cpu_level < 2) { printf ("\tm68k_incpc(2);\n"); @@ -1361,8 +1502,8 @@ static void gen_opcode (unsigned long int opcode) next_cpu_level = 1; } } - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - printf ("\tif (!cctrue(%d)) goto didnt_jump;\n", curi->cc); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS); + printf ("\tif (!cctrue(%d)) goto didnt_jump_%lx;\n", curi->cc, opcode); if (using_exception_3) { printf ("\tif (src & 1) {\n"); printf ("\t\tlast_addr_for_exception_3 = m68k_getpc() + 2;\n"); @@ -1374,26 +1515,25 @@ static void gen_opcode (unsigned long int opcode) printf ("\tm68k_incpc ((uae_s32)src + 2);\n"); fill_prefetch_0 (); printf ("return;\n"); - printf ("didnt_jump:;\n"); + printf ("didnt_jump_%lx:;\n", opcode); need_endlabel = 1; - } break; case i_LEA: - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); - genastore ("srca", curi->dmode, "dstreg", curi->size, "dst"); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("srca", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_PEA: - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); - genamode (Apdi, "7", sz_long, "dst", 2, 0); - genastore ("srca", Apdi, "7", sz_long, "dst"); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (Apdi, "7", sz_long, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("srca", Apdi, "7", sz_long, "dst", XLATE_LOG); break; case i_DBcc: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tif (!cctrue(%d)) {\n", curi->cc); - genastore ("(src-1)", curi->smode, "srcreg", curi->size, "src"); + genastore ("(src-1)", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); printf ("\t\tif (src) {\n"); if (using_exception_3) { @@ -1412,15 +1552,15 @@ static void gen_opcode (unsigned long int opcode) need_endlabel = 1; break; case i_Scc: - genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tint val = cctrue(%d) ? 0xff : 0;\n", curi->cc); - genastore ("val", curi->smode, "srcreg", curi->size, "src"); + genastore ("val", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_DIVU: printf ("\tuaecptr oldpc = m68k_getpc();\n"); - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); /* Clear V flag when dividing by zero - Alcatraz Odyssey demo depends * on this (actually, it's doing a DIVS). */ @@ -1432,16 +1572,15 @@ static void gen_opcode (unsigned long int opcode) printf ("\tif (newv > 0xffff) { SET_VFLG (1); SET_NFLG (1); SET_CFLG (0); } else\n\t{\n"); genflags (flag_logical, sz_word, "newv", "", ""); printf ("\tnewv = (newv & 0xffff) | ((uae_u32)rem << 16);\n"); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); printf ("\t}\n"); printf ("\t}\n"); - insn_n_cycles += 68; need_endlabel = 1; break; case i_DIVS: printf ("\tuaecptr oldpc = m68k_getpc();\n"); - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); printf ("\tif (src == 0) { SET_VFLG (0); Exception(5,oldpc); goto %s; } else {\n", endlabelstr); printf ("\tuae_s32 newv = (uae_s32)dst / (uae_s32)(uae_s16)src;\n"); @@ -1450,34 +1589,31 @@ static void gen_opcode (unsigned long int opcode) printf ("\tif (((uae_s16)rem < 0) != ((uae_s32)dst < 0)) rem = -rem;\n"); genflags (flag_logical, sz_word, "newv", "", ""); printf ("\tnewv = (newv & 0xffff) | ((uae_u32)rem << 16);\n"); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); printf ("\t}\n"); printf ("\t}\n"); - insn_n_cycles += 72; need_endlabel = 1; break; case i_MULU: - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_word, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 newv = (uae_u32)(uae_u16)dst * (uae_u32)(uae_u16)src;\n"); genflags (flag_logical, sz_long, "newv", "", ""); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); - insn_n_cycles += 32; + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); break; case i_MULS: - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_word, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 newv = (uae_s32)(uae_s16)dst * (uae_s32)(uae_s16)src;\n"); genflags (flag_logical, sz_long, "newv", "", ""); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); - insn_n_cycles += 32; + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); break; case i_CHK: printf ("\tuaecptr oldpc = m68k_getpc();\n"); - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tif ((uae_s32)dst < 0) { SET_NFLG (1); Exception(6,oldpc); goto %s; }\n", endlabelstr); printf ("\telse if (dst > src) { SET_NFLG (0); Exception(6,oldpc); goto %s; }\n", endlabelstr); need_endlabel = 1; @@ -1485,8 +1621,8 @@ static void gen_opcode (unsigned long int opcode) case i_CHK2: printf ("\tuaecptr oldpc = m68k_getpc();\n"); - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\t{uae_s32 upper,lower,reg = regs.regs[(extra >> 12) & 15];\n"); switch (curi->size) { case sz_byte: @@ -1505,13 +1641,13 @@ static void gen_opcode (unsigned long int opcode) } printf ("\tSET_ZFLG (upper == reg || lower == reg);\n"); printf ("\tSET_CFLG_ALWAYS (lower <= upper ? reg < lower || reg > upper : reg > upper || reg < lower);\n"); - printf ("\tif ((extra & 0x800) && GET_CFLG) { Exception(6,oldpc); goto %s; }\n}\n", endlabelstr); + printf ("\tif ((extra & 0x800) && GET_CFLG ()) { Exception(6,oldpc); goto %s; }\n}\n", endlabelstr); need_endlabel = 1; break; case i_ASR: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1521,9 +1657,9 @@ static void gen_opcode (unsigned long int opcode) } printf ("\tuae_u32 sign = (%s & val) >> %d;\n", cmask (curi->size), bit_size (curi->size) - 1); printf ("\tcnt &= 63;\n"); - printf ("\tCLEAR_CZNV;\n"); + printf ("\tCLEAR_CZNV();\n"); printf ("\tif (cnt >= %d) {\n", bit_size (curi->size)); - printf ("\t\tval = %s & (uae_u32)-(uae_s32)sign;\n", bit_mask (curi->size)); + printf ("\t\tval = %s & (uae_u32)-sign;\n", bit_mask (curi->size)); printf ("\t\tSET_CFLG (sign);\n"); duplicate_carry (); if (source_is_imm1_8 (curi)) @@ -1534,17 +1670,17 @@ static void gen_opcode (unsigned long int opcode) printf ("\t\tSET_CFLG (val & 1);\n"); duplicate_carry (); printf ("\t\tval >>= 1;\n"); - printf ("\t\tval |= (%s << (%d - cnt)) & (uae_u32)-(uae_s32)sign;\n", + printf ("\t\tval |= (%s << (%d - cnt)) & (uae_u32)-sign;\n", bit_mask (curi->size), bit_size (curi->size)); printf ("\t\tval &= %s;\n", bit_mask (curi->size)); printf ("\t}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ASL: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1553,7 +1689,7 @@ static void gen_opcode (unsigned long int opcode) default: abort (); } printf ("\tcnt &= 63;\n"); - printf ("\tCLEAR_CZNV;\n"); + printf ("\tCLEAR_CZNV();\n"); printf ("\tif (cnt >= %d) {\n", bit_size (curi->size)); printf ("\t\tSET_VFLG (val != 0);\n"); printf ("\t\tSET_CFLG (cnt == %d ? val & 1 : 0);\n", @@ -1576,11 +1712,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\t\tval &= %s;\n", bit_mask (curi->size)); printf ("\t}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_LSR: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1589,7 +1725,7 @@ static void gen_opcode (unsigned long int opcode) default: abort (); } printf ("\tcnt &= 63;\n"); - printf ("\tCLEAR_CZNV;\n"); + printf ("\tCLEAR_CZNV();\n"); printf ("\tif (cnt >= %d) {\n", bit_size (curi->size)); printf ("\t\tSET_CFLG ((cnt == %d) & (val >> %d));\n", bit_size (curi->size), bit_size (curi->size) - 1); @@ -1605,11 +1741,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\t\tval >>= 1;\n"); printf ("\t}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_LSL: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1618,7 +1754,7 @@ static void gen_opcode (unsigned long int opcode) default: abort (); } printf ("\tcnt &= 63;\n"); - printf ("\tCLEAR_CZNV;\n"); + printf ("\tCLEAR_CZNV();\n"); printf ("\tif (cnt >= %d) {\n", bit_size (curi->size)); printf ("\t\tSET_CFLG (cnt == %d ? val & 1 : 0);\n", bit_size (curi->size)); @@ -1635,11 +1771,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\tval &= %s;\n", bit_mask (curi->size)); printf ("\t}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ROL: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1648,7 +1784,7 @@ static void gen_opcode (unsigned long int opcode) default: abort (); } printf ("\tcnt &= 63;\n"); - printf ("\tCLEAR_CZNV;\n"); + printf ("\tCLEAR_CZNV();\n"); if (source_is_imm1_8 (curi)) printf ("{"); else @@ -1662,11 +1798,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\tSET_CFLG (val & 1);\n"); printf ("}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ROR: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1675,7 +1811,7 @@ static void gen_opcode (unsigned long int opcode) default: abort (); } printf ("\tcnt &= 63;\n"); - printf ("\tCLEAR_CZNV;\n"); + printf ("\tCLEAR_CZNV();\n"); if (source_is_imm1_8 (curi)) printf ("{"); else @@ -1689,11 +1825,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\tSET_CFLG ((val & %s) >> %d);\n", cmask (curi->size), bit_size (curi->size) - 1); printf ("\t}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ROXL: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1702,7 +1838,7 @@ static void gen_opcode (unsigned long int opcode) default: abort (); } printf ("\tcnt &= 63;\n"); - printf ("\tCLEAR_CZNV;\n"); + printf ("\tCLEAR_CZNV();\n"); if (source_is_imm1_8 (curi)) printf ("{"); else { @@ -1713,17 +1849,17 @@ static void gen_opcode (unsigned long int opcode) printf ("\t{\n\tuae_u32 carry;\n"); printf ("\tuae_u32 loval = val >> (%d - cnt);\n", bit_size (curi->size) - 1); printf ("\tcarry = loval & 1;\n"); - printf ("\tval = (((val << 1) | GET_XFLG) << cnt) | (loval >> 1);\n"); + printf ("\tval = (((val << 1) | GET_XFLG ()) << cnt) | (loval >> 1);\n"); printf ("\tSET_XFLG (carry);\n"); printf ("\tval &= %s;\n", bit_mask (curi->size)); printf ("\t} }\n"); - printf ("\tSET_CFLG (GET_XFLG);\n"); + printf ("\tSET_CFLG (GET_XFLG ());\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ROXR: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1732,7 +1868,7 @@ static void gen_opcode (unsigned long int opcode) default: abort (); } printf ("\tcnt &= 63;\n"); - printf ("\tCLEAR_CZNV;\n"); + printf ("\tCLEAR_CZNV();\n"); if (source_is_imm1_8 (curi)) printf ("{"); else { @@ -1741,7 +1877,7 @@ static void gen_opcode (unsigned long int opcode) } printf ("\tcnt--;\n"); printf ("\t{\n\tuae_u32 carry;\n"); - printf ("\tuae_u32 hival = (val << 1) | GET_XFLG;\n"); + printf ("\tuae_u32 hival = (val << 1) | GET_XFLG ();\n"); printf ("\thival <<= (%d - cnt);\n", bit_size (curi->size) - 1); printf ("\tval >>= cnt;\n"); printf ("\tcarry = val & 1;\n"); @@ -1750,12 +1886,12 @@ static void gen_opcode (unsigned long int opcode) printf ("\tSET_XFLG (carry);\n"); printf ("\tval &= %s;\n", bit_mask (curi->size)); printf ("\t} }\n"); - printf ("\tSET_CFLG (GET_XFLG);\n"); + printf ("\tSET_CFLG (GET_XFLG ());\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ASRW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1769,10 +1905,10 @@ static void gen_opcode (unsigned long int opcode) genflags (flag_logical, curi->size, "val", "", ""); printf ("\tSET_CFLG (cflg);\n"); duplicate_carry (); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_ASLW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1788,11 +1924,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\tSET_CFLG (sign != 0);\n"); duplicate_carry (); - printf ("\tSET_VFLG (GET_VFLG | (sign2 != sign));\n"); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + printf ("\tSET_VFLG (GET_VFLG () | (sign2 != sign));\n"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_LSRW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1805,10 +1941,10 @@ static void gen_opcode (unsigned long int opcode) genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry);\n"); duplicate_carry (); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_LSLW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u8 val = data;\n"); break; @@ -1821,10 +1957,10 @@ static void gen_opcode (unsigned long int opcode) genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1); duplicate_carry (); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_ROLW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u8 val = data;\n"); break; @@ -1837,10 +1973,10 @@ static void gen_opcode (unsigned long int opcode) printf ("\tif (carry) val |= 1;\n"); genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_RORW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u8 val = data;\n"); break; @@ -1853,10 +1989,10 @@ static void gen_opcode (unsigned long int opcode) printf ("\tif (carry) val |= %s;\n", cmask (curi->size)); genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry);\n"); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_ROXLW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u8 val = data;\n"); break; @@ -1866,14 +2002,14 @@ static void gen_opcode (unsigned long int opcode) } printf ("\tuae_u32 carry = val & %s;\n", cmask (curi->size)); printf ("\tval <<= 1;\n"); - printf ("\tif (GET_XFLG) val |= 1;\n"); + printf ("\tif (GET_XFLG ()) val |= 1;\n"); genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1); duplicate_carry (); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_ROXRW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u8 val = data;\n"); break; @@ -1883,107 +2019,133 @@ static void gen_opcode (unsigned long int opcode) } printf ("\tuae_u32 carry = val & 1;\n"); printf ("\tval >>= 1;\n"); - printf ("\tif (GET_XFLG) val |= %s;\n", cmask (curi->size)); + printf ("\tif (GET_XFLG ()) val |= %s;\n", cmask (curi->size)); genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry);\n"); duplicate_carry (); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_MOVEC2: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tint regno = (src >> 12) & 15;\n"); printf ("\tuae_u32 *regp = regs.regs + regno;\n"); - printf ("\tif (! m68k_movec2(src & 0xFFF, regp)) goto %s;\n", endlabelstr); + printf ("\tif (!m68k_movec2(src & 0xFFF, regp)) goto %s;\n", endlabelstr); break; case i_MOVE2C: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tint regno = (src >> 12) & 15;\n"); printf ("\tuae_u32 *regp = regs.regs + regno;\n"); - printf ("\tif (! m68k_move2c(src & 0xFFF, regp)) goto %s;\n", endlabelstr); + printf ("\tif (!m68k_move2c(src & 0xFFF, regp)) goto %s;\n", endlabelstr); break; case i_CAS: { int old_brace_level; - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tint ru = (src >> 6) & 7;\n"); printf ("\tint rc = src & 7;\n"); genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, rc)", "dst"); - printf ("\tif (GET_ZFLG)"); + sync_m68k_pc (); + printf ("\tif (GET_ZFLG ())"); old_brace_level = n_braces; start_brace (); - genastore ("(m68k_dreg(regs, ru))", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("(m68k_dreg(regs, ru))", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); pop_braces (old_brace_level); printf ("else"); start_brace (); - printf ("m68k_dreg(regs, rc) = dst;\n"); + switch (curi->size) { + case sz_byte: + printf ("\tm68k_dreg(regs, rc) = (m68k_dreg(regs, rc) & ~0xff) | (dst & 0xff);\n"); + break; + case sz_word: + printf ("\tm68k_dreg(regs, rc) = (m68k_dreg(regs, rc) & ~0xffff) | (dst & 0xffff);\n"); + break; + default: + printf ("\tm68k_dreg(regs, rc) = dst;\n"); + break; + } pop_braces (old_brace_level); } break; case i_CAS2: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tuae_u32 rn1 = regs.regs[(extra >> 28) & 15];\n"); printf ("\tuae_u32 rn2 = regs.regs[(extra >> 12) & 15];\n"); if (curi->size == sz_word) { int old_brace_level = n_braces; + printf ("\tuae_u32 rc1 = (extra >> 16) & 7;\n"); + printf ("\tuae_u32 rc2 = extra & 7;\n"); printf ("\tuae_u16 dst1 = get_word(rn1), dst2 = get_word(rn2);\n"); - genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, (extra >> 16) & 7)", "dst1"); - printf ("\tif (GET_ZFLG) {\n"); - genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, extra & 7)", "dst2"); - printf ("\tif (GET_ZFLG) {\n"); + genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, rc1)", "dst1"); + printf ("\tif (GET_ZFLG ()) {\n"); + genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, rc2)", "dst2"); + printf ("\tif (GET_ZFLG ()) {\n"); printf ("\tput_word(rn1, m68k_dreg(regs, (extra >> 22) & 7));\n"); - printf ("\tput_word(rn1, m68k_dreg(regs, (extra >> 6) & 7));\n"); + printf ("\tput_word(rn2, m68k_dreg(regs, (extra >> 6) & 7));\n"); printf ("\t}}\n"); pop_braces (old_brace_level); - printf ("\tif (! GET_ZFLG) {\n"); - printf ("\tm68k_dreg(regs, (extra >> 22) & 7) = (m68k_dreg(regs, (extra >> 22) & 7) & ~0xffff) | (dst1 & 0xffff);\n"); - printf ("\tm68k_dreg(regs, (extra >> 6) & 7) = (m68k_dreg(regs, (extra >> 6) & 7) & ~0xffff) | (dst2 & 0xffff);\n"); + printf ("\tif (! GET_ZFLG ()) {\n"); + printf ("\tm68k_dreg(regs, rc2) = (m68k_dreg(regs, rc2) & ~0xffff) | (dst2 & 0xffff);\n"); + printf ("\tm68k_dreg(regs, rc1) = (m68k_dreg(regs, rc1) & ~0xffff) | (dst1 & 0xffff);\n"); printf ("\t}\n"); } else { int old_brace_level = n_braces; + printf ("\tuae_u32 rc1 = (extra >> 16) & 7;\n"); + printf ("\tuae_u32 rc2 = extra & 7;\n"); printf ("\tuae_u32 dst1 = get_long(rn1), dst2 = get_long(rn2);\n"); - genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, (extra >> 16) & 7)", "dst1"); - printf ("\tif (GET_ZFLG) {\n"); - genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, extra & 7)", "dst2"); - printf ("\tif (GET_ZFLG) {\n"); + genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, rc1)", "dst1"); + printf ("\tif (GET_ZFLG ()) {\n"); + genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, rc2)", "dst2"); + printf ("\tif (GET_ZFLG ()) {\n"); printf ("\tput_long(rn1, m68k_dreg(regs, (extra >> 22) & 7));\n"); - printf ("\tput_long(rn1, m68k_dreg(regs, (extra >> 6) & 7));\n"); + printf ("\tput_long(rn2, m68k_dreg(regs, (extra >> 6) & 7));\n"); printf ("\t}}\n"); pop_braces (old_brace_level); - printf ("\tif (! GET_ZFLG) {\n"); - printf ("\tm68k_dreg(regs, (extra >> 22) & 7) = dst1;\n"); - printf ("\tm68k_dreg(regs, (extra >> 6) & 7) = dst2;\n"); + printf ("\tif (! GET_ZFLG ()) {\n"); + printf ("\tm68k_dreg(regs, rc2) = dst2;\n"); + printf ("\tm68k_dreg(regs, rc1) = dst1;\n"); printf ("\t}\n"); } break; - case i_MOVES: /* ignore DFC and SFC because we have no MMU */ + case i_MOVES: { - int old_brace_level; - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); - printf ("\tif (extra & 0x800)\n"); - old_brace_level = n_braces; - start_brace (); - printf ("\tuae_u32 src = regs.regs[(extra >> 12) & 15];\n"); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); - genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); - pop_braces (old_brace_level); - printf ("else"); - start_brace (); - genamode (curi->dmode, "dstreg", curi->size, "src", 1, 0); - printf ("\tif (extra & 0x8000) {\n"); - switch (curi->size) { - case sz_byte: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s8)src;\n"); break; - case sz_word: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s16)src;\n"); break; - case sz_long: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = src;\n"); break; - default: abort (); - } - printf ("\t} else {\n"); - genastore ("src", Dreg, "(extra >> 12) & 7", curi->size, ""); - printf ("\t}\n"); - pop_braces (old_brace_level); + int old_brace_level; + + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + start_brace(); + printf ("\tif (extra & 0x0800)\n"); /* from reg to ea */ + { + int old_m68k_pc_offset = m68k_pc_offset; + /* use DFC */ + old_brace_level = n_braces; + start_brace (); + printf ("\tuae_u32 src = regs.regs[(extra >> 12) & 15];\n"); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_DFC); + genastore ("src", curi->dmode, "dstreg", curi->size, "dst", XLATE_DFC); + pop_braces (old_brace_level); + m68k_pc_offset = old_m68k_pc_offset; + } + printf ("else"); /* from ea to reg */ + { + /* use SFC */ + start_brace (); + genamode (curi->dmode, "dstreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_SFC); + printf ("\tif (extra & 0x8000) {\n"); /* address/data */ + switch (curi->size) { + case sz_byte: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s8)src;\n"); break; + case sz_word: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s16)src;\n"); break; + case sz_long: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = src;\n"); break; + default: abort (); + } + printf ("\t} else {\n"); + genastore ("src", Dreg, "(extra >> 12) & 7", curi->size, "", XLATE_LOG); + printf ("\t}\n"); + sync_m68k_pc(); + pop_braces (old_brace_level); + } } break; case i_BKPT: /* only needed for hardware emulators */ @@ -1999,23 +2161,23 @@ static void gen_opcode (unsigned long int opcode) printf ("\top_illg(opcode);\n"); break; case i_TRAPcc: + printf ("\tuaecptr oldpc = m68k_getpc();\n"); if (curi->smode != am_unknown && curi->smode != am_illg) - genamode (curi->smode, "srcreg", curi->size, "dummy", 1, 0); - printf ("\tif (cctrue(%d)) { Exception(7,m68k_getpc()); goto %s; }\n", curi->cc, endlabelstr); + genamode (curi->smode, "srcreg", curi->size, "dummy", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + sync_m68k_pc (); + printf ("\tif (cctrue(%d)) { Exception(7,oldpc); goto %s; }\n", curi->cc, endlabelstr); need_endlabel = 1; break; case i_DIVL: - sync_m68k_pc (); - start_brace (); printf ("\tuaecptr oldpc = m68k_getpc();\n"); - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); printf ("\tm68k_divl(opcode, dst, extra, oldpc);\n"); break; case i_MULL: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); printf ("\tm68k_mull(opcode, dst, extra);\n"); break; @@ -2027,34 +2189,37 @@ static void gen_opcode (unsigned long int opcode) case i_BFFFO: case i_BFSET: case i_BFINS: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); + printf ("\tuae_u32 bdata[2];"); printf ("\tuae_s32 offset = extra & 0x800 ? m68k_dreg(regs, (extra >> 6) & 7) : (extra >> 6) & 0x1f;\n"); printf ("\tint width = (((extra & 0x20 ? m68k_dreg(regs, extra & 7) : extra) -1) & 0x1f) +1;\n"); if (curi->dmode == Dreg) { - printf ("\tuae_u32 tmp = m68k_dreg(regs, dstreg) << (offset & 0x1f);\n"); + printf ("\tuae_u32 tmp = m68k_dreg(regs, dstreg);\n"); + printf ("\toffset &= 0x1f;\n"); + printf ("\ttmp = (tmp << offset) | (tmp >> (32 - offset));\n"); + printf ("\tbdata[0] = tmp & ((1 << (32 - width)) - 1);\n"); } else { - printf ("\tuae_u32 tmp,bf0,bf1;\n"); - printf ("\tdsta += (offset >> 3) | (offset & 0x80000000 ? ~0x1fffffff : 0);\n"); - printf ("\tbf0 = get_long(dsta);bf1 = get_byte(dsta+4) & 0xff;\n"); - printf ("\ttmp = (bf0 << (offset & 7)) | (bf1 >> (8 - (offset & 7)));\n"); + printf ("\tuae_u32 tmp;\n"); + printf ("\tdsta += offset >> 3;\n"); + printf ("\ttmp = get_bitfield(dsta, bdata, offset, width);\n"); } - printf ("\ttmp >>= (32 - width);\n"); - printf ("\tSET_NFLG_ALWAYS (tmp & (1 << (width-1)) ? 1 : 0);\n"); + printf ("\tSET_NFLG_ALWAYS (((uae_s32)tmp) < 0 ? 1 : 0);\n"); + if (curi->mnemo == i_BFEXTS) + printf ("\ttmp = (uae_s32)tmp >> (32 - width);\n"); + else + printf ("\ttmp >>= (32 - width);\n"); printf ("\tSET_ZFLG (tmp == 0); SET_VFLG (0); SET_CFLG (0);\n"); switch (curi->mnemo) { case i_BFTST: break; case i_BFEXTU: + case i_BFEXTS: printf ("\tm68k_dreg(regs, (extra >> 12) & 7) = tmp;\n"); break; case i_BFCHG: - printf ("\ttmp = ~tmp;\n"); - break; - case i_BFEXTS: - printf ("\tif (GET_NFLG) tmp |= width == 32 ? 0 : (-1 << width);\n"); - printf ("\tm68k_dreg(regs, (extra >> 12) & 7) = tmp;\n"); + printf ("\ttmp = tmp ^ (0xffffffffu >> (32 - width));\n"); break; case i_BFCLR: printf ("\ttmp = 0;\n"); @@ -2065,10 +2230,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\tm68k_dreg(regs, (extra >> 12) & 7) = offset;\n"); break; case i_BFSET: - printf ("\ttmp = 0xffffffff;\n"); + printf ("\ttmp = 0xffffffffu >> (32 - width);\n"); break; case i_BFINS: printf ("\ttmp = m68k_dreg(regs, (extra >> 12) & 7);\n"); + printf ("\ttmp = tmp & (0xffffffffu >> (32 - width));\n"); printf ("\tSET_NFLG_ALWAYS (tmp & (1 << (width - 1)) ? 1 : 0);\n"); printf ("\tSET_ZFLG (tmp == 0);\n"); break; @@ -2078,26 +2244,12 @@ static void gen_opcode (unsigned long int opcode) if (curi->mnemo == i_BFCHG || curi->mnemo == i_BFCLR || curi->mnemo == i_BFSET - || curi->mnemo == i_BFINS) - { - printf ("\ttmp <<= (32 - width);\n"); + || curi->mnemo == i_BFINS) { if (curi->dmode == Dreg) { - printf ("\tm68k_dreg(regs, dstreg) = (m68k_dreg(regs, dstreg) & ((offset & 0x1f) == 0 ? 0 :\n"); - printf ("\t\t(0xffffffff << (32 - (offset & 0x1f))))) |\n"); - printf ("\t\t(tmp >> (offset & 0x1f)) |\n"); - printf ("\t\t(((offset & 0x1f) + width) >= 32 ? 0 :\n"); - printf (" (m68k_dreg(regs, dstreg) & ((uae_u32)0xffffffff >> ((offset & 0x1f) + width))));\n"); + printf ("\ttmp = bdata[0] | (tmp << (32 - width));\n"); + printf ("\tm68k_dreg(regs, dstreg) = (tmp >> offset) | (tmp << (32 - offset));\n"); } else { - printf ("\tbf0 = (bf0 & (0xff000000 << (8 - (offset & 7)))) |\n"); - printf ("\t\t(tmp >> (offset & 7)) |\n"); - printf ("\t\t(((offset & 7) + width) >= 32 ? 0 :\n"); - printf ("\t\t (bf0 & ((uae_u32)0xffffffff >> ((offset & 7) + width))));\n"); - printf ("\tput_long(dsta,bf0 );\n"); - printf ("\tif (((offset & 7) + width) > 32) {\n"); - printf ("\t\tbf1 = (bf1 & (0xff >> (width - 32 + (offset & 7)))) |\n"); - printf ("\t\t\t(tmp << (8 - (offset & 7)));\n"); - printf ("\t\tput_byte(dsta+4,bf1);\n"); - printf ("\t}\n"); + printf ("\tput_bitfield(dsta, bdata, tmp, offset, width);\n"); } } break; @@ -2107,11 +2259,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\tm68k_dreg(regs, dstreg) = (m68k_dreg(regs, dstreg) & 0xffffff00) | ((val >> 4) & 0xf0) | (val & 0xf);\n"); } else { printf ("\tuae_u16 val;\n"); - printf ("\tm68k_areg(regs, srcreg) -= areg_byteinc[srcreg];\n"); - printf ("\tval = (uae_u16)get_byte(m68k_areg(regs, srcreg));\n"); - printf ("\tm68k_areg(regs, srcreg) -= areg_byteinc[srcreg];\n"); - printf ("\tval = (val | ((uae_u16)get_byte(m68k_areg(regs, srcreg)) << 8)) + %s;\n", gen_nextiword ()); + printf ("\tval = (uae_u16)get_byte(m68k_areg(regs, srcreg) - areg_byteinc[srcreg]);\n"); + printf ("\tval = (val | ((uae_u16)get_byte(m68k_areg(regs, srcreg) - 2 * areg_byteinc[srcreg]) << 8)) + %s;\n", gen_nextiword ()); + printf ("\tm68k_areg(regs, srcreg) -= 2;\n"); printf ("\tm68k_areg(regs, dstreg) -= areg_byteinc[dstreg];\n"); + gen_set_fault_pc (); printf ("\tput_byte(m68k_areg(regs, dstreg),((val >> 4) & 0xf0) | (val & 0xf));\n"); } break; @@ -2122,57 +2274,57 @@ static void gen_opcode (unsigned long int opcode) printf ("\tm68k_dreg(regs, dstreg) = (m68k_dreg(regs, dstreg) & 0xffff0000) | (val & 0xffff);\n"); } else { printf ("\tuae_u16 val;\n"); - printf ("\tm68k_areg(regs, srcreg) -= areg_byteinc[srcreg];\n"); - printf ("\tval = (uae_u16)get_byte(m68k_areg(regs, srcreg));\n"); + printf ("\tval = (uae_u16)get_byte(m68k_areg(regs, srcreg) - areg_byteinc[srcreg]);\n"); printf ("\tval = (((val << 4) & 0xf00) | (val & 0xf)) + %s;\n", gen_nextiword ()); - printf ("\tm68k_areg(regs, dstreg) -= areg_byteinc[dstreg];\n"); - printf ("\tput_byte(m68k_areg(regs, dstreg),val);\n"); - printf ("\tm68k_areg(regs, dstreg) -= areg_byteinc[dstreg];\n"); - printf ("\tput_byte(m68k_areg(regs, dstreg),val >> 8);\n"); + printf ("\tm68k_areg(regs, srcreg) -= areg_byteinc[srcreg];\n"); + printf ("\tm68k_areg(regs, dstreg) -= 2;\n"); + gen_set_fault_pc (); + printf ("\tput_word(m68k_areg(regs, dstreg), val);\n"); } break; case i_TAS: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); genflags (flag_logical, curi->size, "src", "", ""); printf ("\tsrc |= 0x80;\n"); - genastore ("src", curi->smode, "srcreg", curi->size, "src"); + genastore ("src", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_FPP: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); printf ("\tfpuop_arithmetic(opcode, extra);\n"); break; case i_FDBcc: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); printf ("\tfpuop_dbcc(opcode, extra);\n"); break; case i_FScc: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); - printf ("\tfpuop_scc(opcode,extra);\n"); + printf ("\tfpuop_scc(opcode, extra);\n"); break; case i_FTRAPcc: sync_m68k_pc (); start_brace (); printf ("\tuaecptr oldpc = m68k_getpc();\n"); + printf ("\tuae_u16 extra = %s;\n", gen_nextiword()); if (curi->smode != am_unknown && curi->smode != am_illg) - genamode (curi->smode, "srcreg", curi->size, "dummy", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "dummy", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); - printf ("\tfpuop_trapcc(opcode,oldpc);\n"); + printf ("\tfpuop_trapcc(opcode, oldpc, extra);\n"); break; case i_FBcc: sync_m68k_pc (); start_brace (); printf ("\tuaecptr pc = m68k_getpc();\n"); - genamode (curi->dmode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->dmode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); - printf ("\tfpuop_bcc(opcode,pc,extra);\n"); + printf ("\tfpuop_bcc(opcode, pc, extra);\n"); break; case i_FSAVE: sync_m68k_pc (); @@ -2185,67 +2337,104 @@ static void gen_opcode (unsigned long int opcode) printf ("\tfpuop_restore(opcode);\n"); break; case i_CINVL: + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache();\n"); + printf("#endif\n"); + break; case i_CINVP: + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache();\n"); + printf("#endif\n"); + break; case i_CINVA: - /* gb-- srcreg now contains the cache field */ - printf ("\tif (srcreg&0x2)\n"); - printf ("\t\tflush_icache(%d);\n", (int)(30 + ((opcode >> 3) & 3))); + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache();\n"); + printf("#endif\n"); break; case i_CPUSHL: + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache();\n"); + printf("#endif\n"); + break; case i_CPUSHP: + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache();\n"); + printf("#endif\n"); + break; case i_CPUSHA: - /* gb-- srcreg now contains the cache field */ - printf ("\tif (srcreg&0x2)\n"); - printf ("\t\tflush_icache(%d);\n", (int)(40 + ((opcode >> 3) & 3))); + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache();\n"); + printf("#endif\n"); break; case i_MOVE16: - if ((opcode & 0xfff8) == 0xf620) { - /* MOVE16 (Ax)+,(Ay)+ */ - printf ("\tuaecptr mems = m68k_areg(regs, srcreg) & ~15, memd;\n"); - printf ("\tdstreg = (%s >> 12) & 7;\n", gen_nextiword()); - printf ("\tmemd = m68k_areg(regs, dstreg) & ~15;\n"); - printf ("\tput_long(memd, get_long(mems));\n"); - printf ("\tput_long(memd+4, get_long(mems+4));\n"); - printf ("\tput_long(memd+8, get_long(mems+8));\n"); - printf ("\tput_long(memd+12, get_long(mems+12));\n"); - printf ("\tif (srcreg != dstreg)\n"); - printf ("\tm68k_areg(regs, srcreg) += 16;\n"); - printf ("\tm68k_areg(regs, dstreg) += 16;\n"); - } - else { - /* Other variants */ - genamode (curi->smode, "srcreg", curi->size, "mems", 0, 2); - genamode (curi->dmode, "dstreg", curi->size, "memd", 0, 2); - printf ("\tmemsa &= ~15;\n"); - printf ("\tmemda &= ~15;\n"); - printf ("\tput_long(memda, get_long(memsa));\n"); - printf ("\tput_long(memda+4, get_long(memsa+4));\n"); - printf ("\tput_long(memda+8, get_long(memsa+8));\n"); - printf ("\tput_long(memda+12, get_long(memsa+12));\n"); - if ((opcode & 0xfff8) == 0xf600) - printf ("\tm68k_areg(regs, srcreg) += 16;\n"); - else if ((opcode & 0xfff8) == 0xf608) - printf ("\tm68k_areg(regs, dstreg) += 16;\n"); - } - break; + if ((opcode & 0xfff8) == 0xf620) { + /* MOVE16 (Ax)+,(Ay)+ */ + printf ("\tuaecptr mems = m68k_areg(regs, srcreg) & ~15, memd;\n"); + printf ("\tdstreg = (%s >> 12) & 7;\n", gen_nextiword()); + printf ("\tmemd = m68k_areg(regs, dstreg) & ~15;\n"); + printf ("\tput_long(memd, get_long(mems));\n"); + printf ("\tput_long(memd+4, get_long(mems+4));\n"); + printf ("\tput_long(memd+8, get_long(mems+8));\n"); + printf ("\tput_long(memd+12, get_long(mems+12));\n"); + printf ("\tif (srcreg != dstreg)\n"); + printf ("\tm68k_areg(regs, srcreg) += 16;\n"); + printf ("\tm68k_areg(regs, dstreg) += 16;\n"); + } else { + /* Other variants */ + genamode (curi->smode, "srcreg", curi->size, "mems", GENA_GETV_NO_FETCH, GENA_MOVEM_MOVE16, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "memd", GENA_GETV_NO_FETCH, GENA_MOVEM_MOVE16, XLATE_LOG); + printf ("\tmemsa &= ~15;\n"); + printf ("\tmemda &= ~15;\n"); + printf ("\tput_long(memda, get_long(memsa));\n"); + printf ("\tput_long(memda+4, get_long(memsa+4));\n"); + printf ("\tput_long(memda+8, get_long(memsa+8));\n"); + printf ("\tput_long(memda+12, get_long(memsa+12));\n"); + if ((opcode & 0xfff8) == 0xf600) + printf ("\tm68k_areg(regs, srcreg) += 16;\n"); + else if ((opcode & 0xfff8) == 0xf608) + printf ("\tm68k_areg(regs, dstreg) += 16;\n"); + } + break; case i_MMUOP: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); printf ("\tmmu_op(opcode,extra);\n"); break; - - case i_EMULOP_RETURN: + + case i_EMULOP_RETURN: printf ("\tm68k_emulop_return();\n"); m68k_pc_offset = 0; break; - case i_EMULOP: + case i_EMULOP: printf ("\n"); swap_opcode (); printf ("\tm68k_emulop(opcode);\n"); break; - + + case i_NATFEAT_ID: + printf ("\n"); + printf ("\tm68k_natfeat_id();\n"); + break; + + case i_NATFEAT_CALL: + printf ("\n"); + printf ("\tm68k_natfeat_call();\n"); + break; + default: abort (); break; @@ -2257,73 +2446,194 @@ static void gen_opcode (unsigned long int opcode) static void generate_includes (FILE * f) { fprintf (f, "#include \"sysdeps.h\"\n"); - fprintf (f, "#include \"m68k.h\"\n"); fprintf (f, "#include \"memory.h\"\n"); fprintf (f, "#include \"readcpu.h\"\n"); fprintf (f, "#include \"newcpu.h\"\n"); + fprintf (f, "#ifdef USE_JIT\n"); fprintf (f, "#include \"compiler/compemu.h\"\n"); + fprintf (f, "#endif\n"); fprintf (f, "#include \"fpu/fpu.h\"\n"); fprintf (f, "#include \"cputbl.h\"\n"); + fprintf (f, "#include \"cpu_emulation.h\"\n"); + fprintf (f, "#include \"debug.h\"\n"); + + fprintf (f, "#define SET_CFLG_ALWAYS(x) SET_CFLG(x)\n"); + fprintf (f, "#define SET_NFLG_ALWAYS(x) SET_NFLG(x)\n"); + fprintf (f, "#define CPUFUNC_FF(x) x##_ff\n"); + fprintf (f, "#define CPUFUNC_NF(x) x##_nf\n"); + fprintf (f, "#define CPUFUNC(x) CPUFUNC_FF(x)\n"); - fprintf (f, "#define SET_CFLG_ALWAYS(x) SET_CFLG(x)\n"); - fprintf (f, "#define SET_NFLG_ALWAYS(x) SET_NFLG(x)\n"); - fprintf (f, "#define CPUFUNC_FF(x) x##_ff\n"); - fprintf (f, "#define CPUFUNC_NF(x) x##_nf\n"); - fprintf (f, "#define CPUFUNC(x) CPUFUNC_FF(x)\n"); - - fprintf (f, "#ifdef NOFLAGS\n"); - fprintf (f, "# include \"noflags.h\"\n"); - fprintf (f, "#endif\n"); + fprintf (f, "#ifdef NOFLAGS\n"); + fprintf (f, "# include \"noflags.h\"\n"); + fprintf (f, "#endif\n"); } static int postfix; +struct gencputbl { + char handler[80]; + uae_u16 specific; + uae_u16 opcode; + int namei; +}; +struct gencputbl cpustbl[65536]; +static int n_cpustbl; + +static char *decodeEA (amodes mode, wordsizes size) +{ + static char buffer[80]; + + buffer[0] = 0; + switch (mode){ + case Dreg: + strcpy (buffer,"Dn"); + break; + case Areg: + strcpy (buffer,"An"); + break; + case Aind: + strcpy (buffer,"(An)"); + break; + case Aipi: + strcpy (buffer,"(An)+"); + break; + case Apdi: + strcpy (buffer,"-(An)"); + break; + case Ad16: + strcpy (buffer,"(d16,An)"); + break; + case Ad8r: + strcpy (buffer,"(d8,An,Xn)"); + break; + case PC16: + strcpy (buffer,"(d16,PC)"); + break; + case PC8r: + strcpy (buffer,"(d8,PC,Xn)"); + break; + case absw: + strcpy (buffer,"(xxx).W"); + break; + case absl: + strcpy (buffer,"(xxx).L"); + break; + case imm: + switch (size){ + case sz_byte: + strcpy (buffer,"#.B"); + break; + case sz_word: + strcpy (buffer,"#.W"); + break; + case sz_long: + strcpy (buffer,"#.L"); + break; + default: + break; + } + break; + case imm0: + strcpy (buffer,"#.B"); + break; + case imm1: + strcpy (buffer,"#.W"); + break; + case imm2: + strcpy (buffer,"#.L"); + break; + case immi: + strcpy (buffer,"#"); + break; + + default: + break; + } + return buffer; +} + +static char *outopcode (const char *name, int opcode) +{ + static char out[100]; + struct instr *ins; + + ins = &table68k[opcode]; + strcpy (out, name); + if (ins->smode == immi) + strcat (out, "Q"); + if (ins->size == sz_byte) + strcat (out,".B"); + if (ins->size == sz_word) + strcat (out,".W"); + if (ins->size == sz_long) + strcat (out,".L"); + strcat (out," "); + if (ins->suse) + strcat (out, decodeEA (ins->smode, ins->size)); + if (ins->duse) { + if (ins->suse) strcat (out,","); + strcat (out, decodeEA (ins->dmode, ins->size)); + } + return out; +} + + static void generate_one_opcode (int rp) { + int i; uae_u16 smsk, dmsk; - long int opcode = opcode_map[rp]; - const char *opcode_str; + int opcode = opcode_map[rp]; + int have_realopcode = 0; + const char *name; if (table68k[opcode].mnemo == i_ILLG - || table68k[opcode].clev > (unsigned)cpu_level) + || table68k[opcode].clev > cpu_level) return; + for (i = 0; lookuptab[i].name[0]; i++) { + if (table68k[opcode].mnemo == lookuptab[i].mnemo) + break; + } + if (table68k[opcode].handler != -1) return; - opcode_str = get_instruction_string (opcode); - + name = lookuptab[i].name; if (opcode_next_clev[rp] != cpu_level) { - if (table68k[opcode].flagdead == 0) - /* force to the "ff" variant since the instruction doesn't set at all the condition codes */ - fprintf (stblfile, "{ CPUFUNC_FF(op_%lx_%d), 0, %ld }, /* %s */\n", opcode, opcode_last_postfix[rp], - opcode, opcode_str); - else - fprintf (stblfile, "{ CPUFUNC(op_%lx_%d), 0, %ld }, /* %s */\n", opcode, opcode_last_postfix[rp], - opcode, opcode_str); + sprintf(cpustbl[n_cpustbl].handler, "CPUFUNC(op_%x_%d)", opcode, opcode_last_postfix[rp]); + cpustbl[n_cpustbl].specific = 0; + cpustbl[n_cpustbl].opcode = opcode; + cpustbl[n_cpustbl].namei = i; + fprintf (stblfile, "{ %s, %d, %d }, /* %s */\n", cpustbl[n_cpustbl].handler, cpustbl[n_cpustbl].specific, opcode, name); + n_cpustbl++; return; } - + if (table68k[opcode].flagdead == 0) /* force to the "ff" variant since the instruction doesn't set at all the condition codes */ - fprintf (stblfile, "{ CPUFUNC_FF(op_%lx_%d), 0, %ld }, /* %s */\n", opcode, postfix, opcode, opcode_str); + sprintf (cpustbl[n_cpustbl].handler, "CPUFUNC_FF(op_%x_%d)", opcode, postfix); else - fprintf (stblfile, "{ CPUFUNC(op_%lx_%d), 0, %ld }, /* %s */\n", opcode, postfix, opcode, opcode_str); + sprintf (cpustbl[n_cpustbl].handler, "CPUFUNC(op_%x_%d)", opcode, postfix); + cpustbl[n_cpustbl].specific = 0; + cpustbl[n_cpustbl].opcode = opcode; + cpustbl[n_cpustbl].namei = i; + fprintf (stblfile, "{ %s, %d, %d }, /* %s */\n", cpustbl[n_cpustbl].handler, cpustbl[n_cpustbl].specific, opcode, name); + n_cpustbl++; - fprintf (headerfile, "extern cpuop_func op_%lx_%d_nf;\n", opcode, postfix); - fprintf (headerfile, "extern cpuop_func op_%lx_%d_ff;\n", opcode, postfix); - + fprintf (headerfile, "extern cpuop_func op_%x_%d_nf;\n", opcode, postfix); + fprintf (headerfile, "extern cpuop_func op_%x_%d_ff;\n", opcode, postfix); + + printf ("/* %s */\n", outopcode (name, opcode)); + printf ("void REGPARAM2 CPUFUNC(op_%x_%d)(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, name); + printf ("\tcpuop_begin();\n"); /* gb-- The "nf" variant for an instruction that doesn't set the condition codes at all is the same as the "ff" variant, so we don't need the "nf" variant to be compiled since it is mapped to the "ff" variant in the smalltbl. */ - if (table68k[opcode].flagdead == 0) + if (table68k[opcode].flagdead == 0) printf ("#ifndef NOFLAGS\n"); - printf ("void REGPARAM2 CPUFUNC(op_%lx_%d)(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, opcode_str); - printf ("\tcpuop_begin();\n"); - switch (table68k[opcode].stype) { case 0: smsk = 7; break; case 1: smsk = 255; break; @@ -2331,8 +2641,8 @@ static void generate_one_opcode (int rp) case 3: smsk = 7; break; case 4: smsk = 7; break; case 5: smsk = 63; break; - case 6: smsk = 255; break; - case 7: smsk = 3; break; + case 6: smsk = 255; break; + case 7: smsk = 3; break; default: abort (); } dmsk = 7; @@ -2363,38 +2673,17 @@ static void generate_one_opcode (int rp) if (pos < 8 && (smsk >> (8 - pos)) != 0) abort (); #endif - printf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); - - if (pos < 8 && (smsk >> (8 - pos)) != 0) - sprintf (source, "(((opcode >> %d) | (opcode << %d)) & %d)", - pos ^ 8, 8 - pos, dmsk); - else if (pos != 8) - sprintf (source, "((opcode >> %d) & %d)", pos ^ 8, smsk); - else - sprintf (source, "(opcode & %d)", smsk); - - if (table68k[opcode].stype == 3) - printf ("\tuae_u32 srcreg = imm8_table[%s];\n", source); - else if (table68k[opcode].stype == 1) - printf ("\tuae_u32 srcreg = (uae_s32)(uae_s8)%s;\n", source); - else - printf ("\tuae_u32 srcreg = %s;\n", source); - - printf ("#else\n"); - + real_opcode(&have_realopcode); if (pos) - sprintf (source, "((opcode >> %d) & %d)", pos, smsk); + sprintf (source, "((real_opcode >> %d) & %d)", pos, smsk); else - sprintf (source, "(opcode & %d)", smsk); - + sprintf (source, "(real_opcode & %d)", smsk); if (table68k[opcode].stype == 3) printf ("\tuae_u32 srcreg = imm8_table[%s];\n", source); else if (table68k[opcode].stype == 1) printf ("\tuae_u32 srcreg = (uae_s32)(uae_s8)%s;\n", source); else printf ("\tuae_u32 srcreg = %s;\n", source); - - printf ("#endif\n"); } } if (table68k[opcode].duse @@ -2414,27 +2703,13 @@ static void generate_one_opcode (int rp) /* Check that we can do the little endian optimization safely. */ if (pos < 8 && (dmsk >> (8 - pos)) != 0) abort (); -#endif - printf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); - - if (pos < 8 && (dmsk >> (8 - pos)) != 0) - printf ("\tuae_u32 dstreg = ((opcode >> %d) | (opcode << %d)) & %d;\n", - pos ^ 8, 8 - pos, dmsk); - else if (pos != 8) - printf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", - pos ^ 8, dmsk); - else - printf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); - - printf ("#else\n"); - +#endif + real_opcode(&have_realopcode); if (pos) - printf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", + printf ("\tuae_u32 dstreg = (real_opcode >> %d) & %d;\n", pos, dmsk); else - printf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); - - printf ("#endif\n"); + printf ("\tuae_u32 dstreg = real_opcode & %d;\n", dmsk); } } need_endlabel = 0; @@ -2443,10 +2718,10 @@ static void generate_one_opcode (int rp) gen_opcode (opcode); if (need_endlabel) printf ("%s: ;\n", endlabelstr); - printf ("\tcpuop_end();\n"); - printf ("}\n"); - if (table68k[opcode].flagdead == 0) + if (table68k[opcode].flagdead == 0) printf ("\n#endif\n"); + printf ("\tcpuop_end();\n"); + printf ("}\n"); opcode_next_clev[rp] = next_cpu_level; opcode_last_postfix[rp] = postfix; } @@ -2457,36 +2732,18 @@ static void generate_func (void) using_prefetch = 0; using_exception_3 = 0; -#if !USE_PREFETCH_BUFFER - /* gb-- No need for a prefetch buffer, nor exception 3 handling */ - /* Anyway, Basilisk2 does not use the op_smalltbl_5 table... */ - for (i = 0; i <= 4; i++) { -#else - for (i = 0; i < 6; i++) { -#endif - cpu_level = 4 - i; - if (i == 5) { - cpu_level = 0; - using_prefetch = 1; - using_exception_3 = 1; - for (rp = 0; rp < nr_cpuop_funcs; rp++) - opcode_next_clev[rp] = 0; - } - postfix = i; - fprintf (stblfile, "struct cputbl CPUFUNC(op_smalltbl_%d)[] = {\n", postfix); - /* Disable spurious warnings. */ - printf ("\n" - "#ifdef _MSC_VER\n" - "#pragma warning(disable:4102) /* unreferenced label */\n" - "#endif\n"); + for (i = 0; i < 1; i++) { + cpu_level = 4 - i; + postfix = i; + fprintf (stblfile, "const struct cputbl CPUFUNC(op_smalltbl_%d)[] = {\n", postfix); /* sam: this is for people with low memory (eg. me :)) */ printf ("\n" - "#if !defined(PART_1) && !defined(PART_2) && " - "!defined(PART_3) && !defined(PART_4) && " - "!defined(PART_5) && !defined(PART_6) && " - "!defined(PART_7) && !defined(PART_8)" + "#if !defined(PART_1) && !defined(PART_2) && " + "!defined(PART_3) && !defined(PART_4) && " + "!defined(PART_5) && !defined(PART_6) && " + "!defined(PART_7) && !defined(PART_8)" "\n" "#define PART_1 1\n" "#define PART_2 1\n" @@ -2497,8 +2754,8 @@ static void generate_func (void) "#define PART_7 1\n" "#define PART_8 1\n" "#endif\n\n"); - rp = 0; + n_cpustbl = 0; for(j=1;j<=8;++j) { int k = (j*nr_cpuop_funcs)/8; printf ("#ifdef PART_%d\n",j); @@ -2506,16 +2763,90 @@ static void generate_func (void) generate_one_opcode (rp); printf ("#endif\n\n"); } - fprintf (stblfile, "{ 0, 0, 0 }};\n"); } } -int main (int argc, char **argv) +static struct { + const char *handler; + const char *name; +} cpufunctbl[65536]; +static char const op_illg_1[] = "op_illg_1"; +static char const illegal[] = "ILLEGAL"; + +static void generate_functbl (void) { - FILE *out; - read_table68k (); - do_merges (); + int i; + unsigned int opcode; + int cpu_level = 4; + struct gencputbl *tbl = cpustbl; + + for (opcode = 0; opcode < 65536; opcode++) + { + cpufunctbl[opcode].handler = op_illg_1; + cpufunctbl[opcode].name = illegal; + } + for (i = 0; i < n_cpustbl; i++) + { + if (! tbl[i].specific) + { + cpufunctbl[tbl[i].opcode].handler = tbl[i].handler; + cpufunctbl[tbl[i].opcode].name = lookuptab[tbl[i].namei].name; + } + } + for (opcode = 0; opcode < 65536; opcode++) + { + const char *f; + + if (table68k[opcode].mnemo == i_ILLG || (unsigned)table68k[opcode].clev > (unsigned)cpu_level) + continue; + + if (table68k[opcode].handler != -1) + { + f = cpufunctbl[table68k[opcode].handler].handler; + if (f == op_illg_1) + abort(); + cpufunctbl[opcode].handler = f; + cpufunctbl[opcode].name = cpufunctbl[table68k[opcode].handler].name; + } + } + for (i = 0; i < n_cpustbl; i++) + { + if (tbl[i].specific) + { + cpufunctbl[tbl[i].opcode].handler = tbl[i].handler; + cpufunctbl[tbl[i].opcode].name = lookuptab[tbl[i].namei].name; + } + } + + fprintf(functblfile, "\n"); + fprintf(functblfile, "cpuop_func *cpufunctbl[65536] = {\n"); + fprintf(functblfile, "#if !defined(HAVE_GET_WORD_UNSWAPPED) || defined(FULLMMU)\n"); + for (opcode = 0; opcode < 65536; opcode++) + { + fprintf(functblfile, "\t%s%s /* %s */\n", cpufunctbl[opcode].handler, opcode < 65535 ? "," : "", cpufunctbl[opcode].name); + } + fprintf(functblfile, "#else\n"); + for (opcode = 0; opcode < 65536; opcode++) + { + unsigned int map = do_byteswap_16(opcode); + fprintf(functblfile, "\t%s%s /* %s */\n", cpufunctbl[map].handler, opcode < 65535 ? "," : "", cpufunctbl[map].name); + } + fprintf(functblfile, "#endif\n"); + fprintf(functblfile, "};\n"); +} + +#if (defined(OS_cygwin) || defined(OS_mingw)) && defined(EXTENDED_SIGSEGV) +void cygwin_mingw_abort() +{ +#undef abort + abort(); +} +#endif + +int main(void) +{ + init_table68k (); opcode_map = (int *) malloc (sizeof (int) * nr_cpuop_funcs); opcode_last_postfix = (int *) malloc (sizeof (int) * nr_cpuop_funcs); @@ -2527,32 +2858,28 @@ int main (int argc, char **argv) * cputbl.h that way), but cpuopti can't cope. That could be fixed, but * I don't dare to touch the 68k version. */ - headerfile = fopen ("cputbl.h", "w"); - stblfile = fopen ("cpustbl.cpp", "w"); - out = freopen ("cpuemu.cpp", "w", stdout); + if ((headerfile = fopen ("cputbl.h", "wb")) == NULL) + abort(); + if ((stblfile = fopen ("cpustbl.cpp", "wb")) == NULL) + abort(); + if ((functblfile = fopen ("cpufunctbl.cpp", "wb")) == NULL) + abort(); + if (freopen ("cpuemu.cpp", "wb", stdout) == NULL) + abort(); generate_includes (stdout); + fprintf(stdout, "#ifdef HAVE_CFLAG_NO_REDZONE\n"); + fprintf(stdout, "#ifndef NOFLAGS\n"); + fprintf(stdout, "#pragma GCC option \"-mno-red-zone\"\n"); + fprintf(stdout, "#endif\n"); + fprintf(stdout, "#endif\n"); generate_includes (stblfile); - + generate_includes (functblfile); generate_func (); - + generate_functbl (); free (table68k); - fclose (headerfile); - fclose (stblfile); - fflush (out); - - /* For build systems (IDEs mainly) that don't make it easy to compile the - * same file twice with different settings. */ - stblfile = fopen ("cpustbl_nf.cpp", "w"); - out = freopen ("cpuemu_nf.cpp", "w", stdout); - - fprintf (stblfile, "#define NOFLAGS\n"); - fprintf (stblfile, "#include \"cpustbl.cpp\"\n"); - fclose (stblfile); - - printf ("#define NOFLAGS\n"); - printf ("#include \"cpuemu.cpp\"\n"); - fflush (out); - + fclose(headerfile); + fclose(stblfile); + fclose(functblfile); return 0; } diff --git a/BasiliskII/src/uae_cpu/m68k.h b/BasiliskII/src/uae_cpu/m68k.h index f329cb3e..c307bdfd 100644 --- a/BasiliskII/src/uae_cpu/m68k.h +++ b/BasiliskII/src/uae_cpu/m68k.h @@ -1,91 +1,147 @@ -/* - * UAE - The Un*x Amiga Emulator +/* + * m68k.h - machine dependent bits + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) * - * MC68000 emulation - machine dependent bits + * Inspired by Christian Bauer's Basilisk II * - * Copyright 1996 Bernd Schmidt + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is free software; you can redistribute it and/or modify + * ARAnyM is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, + * ARAnyM is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with ARAnyM; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* + * UAE - The Un*x Amiga Emulator + * + * MC68000 emulation - machine dependent bits + * + * Copyright 1996 Bernd Schmidt + * + */ #ifndef M68K_FLAGS_H #define M68K_FLAGS_H #ifdef OPTIMIZED_FLAGS -#if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY) || defined(MSVC_INTRINSICS) +#if (defined(CPU_i386) && defined(X86_ASSEMBLY)) || (defined(CPU_x86_64) && defined(X86_64_ASSEMBLY)) + +#ifdef __cplusplus +# include +#else +# include +#endif #ifndef SAHF_SETO_PROFITABLE +/* + * Machine dependent structure for holding the 68k CCR flags + */ /* PUSH/POP instructions are naturally 64-bit sized on x86-64, thus unsigned long hereunder is either 64-bit or 32-bit wide depending on the target. */ struct flag_struct { - unsigned long cznv; - unsigned long x; +#if defined(CPU_x86_64) + uint64 cznv; + uint64 x; +#else + uint32 cznv; + uint32 x; +#endif }; -#define FLAGVAL_Z 0x40 -#define FLAGVAL_N 0x80 +/* + * The bits in the cznv field in the above structure are assigned to + * allow the easy mirroring of the x86 rFLAGS register. + * + * The 68k CZNV flags are thus assigned in cznv as: + * + * 76543210 FEDCBA98 --------- --------- + * SZxxxxxC xxxxVxxx xxxxxxxxx xxxxxxxxx + */ -#define SET_ZFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~0x40) | (((y) & 1) << 6)) -#define SET_CFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~1) | ((y) & 1)) -#define SET_VFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~0x800) | (((y) & 1) << 11)) -#define SET_NFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~0x80) | (((y) & 1) << 7)) -#define SET_XFLG(y) (regflags.x = (y)) +#define FLAGBIT_N 7 +#define FLAGBIT_Z 6 +#define FLAGBIT_C 0 +#define FLAGBIT_V 11 +#define FLAGBIT_X 0 /* must be in position 0 for duplicate_carry() to work */ -#define GET_ZFLG ((regflags.cznv >> 6) & 1) -#define GET_CFLG (regflags.cznv & 1) -#define GET_VFLG ((regflags.cznv >> 11) & 1) -#define GET_NFLG ((regflags.cznv >> 7) & 1) -#define GET_XFLG (regflags.x & 1) +#define FLAGVAL_N (1 << FLAGBIT_N) +#define FLAGVAL_Z (1 << FLAGBIT_Z) +#define FLAGVAL_C (1 << FLAGBIT_C) +#define FLAGVAL_V (1 << FLAGBIT_V) +#define FLAGVAL_X (1 << FLAGBIT_X) -#define CLEAR_CZNV (regflags.cznv = 0) -#define GET_CZNV (regflags.cznv) -#define IOR_CZNV(X) (regflags.cznv |= (X)) -#define SET_CZNV(X) (regflags.cznv = (X)) +#define SET_ZFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~FLAGVAL_Z) | (((y) & 1) << FLAGBIT_Z)) +#define SET_CFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~FLAGVAL_C) | (((y) & 1) << FLAGBIT_C)) +#define SET_VFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~FLAGVAL_V) | (((y) & 1) << FLAGBIT_V)) +#define SET_NFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~FLAGVAL_N) | (((y) & 1) << FLAGBIT_N)) +#define SET_XFLG(y) (regflags.x = ((y) & 1) << FLAGBIT_X) -#define COPY_CARRY (regflags.x = regflags.cznv) +#define GET_ZFLG() ((regflags.cznv >> FLAGBIT_Z) & 1) +#define GET_CFLG() ((regflags.cznv >> FLAGBIT_C) & 1) +#define GET_VFLG() ((regflags.cznv >> FLAGBIT_V) & 1) +#define GET_NFLG() ((regflags.cznv >> FLAGBIT_N) & 1) +#define GET_XFLG() ((regflags.x >> FLAGBIT_X) & 1) -extern struct flag_struct regflags ASM_SYM ("regflags"); +#define CLEAR_CZNV() (regflags.cznv = 0) +#define GET_CZNV() (regflags.cznv) +#define IOR_CZNV(X) (regflags.cznv |= (X)) +#define SET_CZNV(X) (regflags.cznv = (X)) -static __inline__ int cctrue(int cc) +#define COPY_CARRY() (regflags.x = regflags.cznv >> (FLAGBIT_C - FLAGBIT_X)) + +extern struct flag_struct regflags __asm__ ("regflags"); + +/* + * Test CCR condition + */ +static inline int cctrue(int cc) { uae_u32 cznv = regflags.cznv; - switch(cc){ - case 0: return 1; /* T */ - case 1: return 0; /* F */ - case 2: return (cznv & 0x41) == 0; /* !GET_CFLG && !GET_ZFLG; HI */ - case 3: return (cznv & 0x41) != 0; /* GET_CFLG || GET_ZFLG; LS */ - case 4: return (cznv & 1) == 0; /* !GET_CFLG; CC */ - case 5: return (cznv & 1) != 0; /* GET_CFLG; CS */ - case 6: return (cznv & 0x40) == 0; /* !GET_ZFLG; NE */ - case 7: return (cznv & 0x40) != 0; /* GET_ZFLG; EQ */ - case 8: return (cznv & 0x800) == 0;/* !GET_VFLG; VC */ - case 9: return (cznv & 0x800) != 0;/* GET_VFLG; VS */ - case 10:return (cznv & 0x80) == 0; /* !GET_NFLG; PL */ - case 11:return (cznv & 0x80) != 0; /* GET_NFLG; MI */ - case 12:return (((cznv << 4) ^ cznv) & 0x800) == 0; /* GET_NFLG == GET_VFLG; GE */ - case 13:return (((cznv << 4) ^ cznv) & 0x800) != 0;/* GET_NFLG != GET_VFLG; LT */ - case 14: - cznv &= 0x8c0; - return (((cznv << 4) ^ cznv) & 0x840) == 0; /* !GET_ZFLG && (GET_NFLG == GET_VFLG); GT */ - case 15: - cznv &= 0x8c0; - return (((cznv << 4) ^ cznv) & 0x840) != 0; /* GET_ZFLG || (GET_NFLG != GET_VFLG); LE */ + + switch (cc) { + case 0: return 1; /* T */ + case 1: return 0; /* F */ + case 2: return (cznv & (FLAGVAL_C | FLAGVAL_Z)) == 0; /* !CFLG && !ZFLG HI */ + case 3: return (cznv & (FLAGVAL_C | FLAGVAL_Z)) != 0; /* CFLG || ZFLG LS */ + case 4: return (cznv & FLAGVAL_C) == 0; /* !CFLG CC */ + case 5: return (cznv & FLAGVAL_C) != 0; /* CFLG CS */ + case 6: return (cznv & FLAGVAL_Z) == 0; /* !ZFLG NE */ + case 7: return (cznv & FLAGVAL_Z) != 0; /* ZFLG EQ */ + case 8: return (cznv & FLAGVAL_V) == 0; /* !VFLG VC */ + case 9: return (cznv & FLAGVAL_V) != 0; /* VFLG VS */ + case 10: return (cznv & FLAGVAL_N) == 0; /* !NFLG PL */ + case 11: return (cznv & FLAGVAL_N) != 0; /* NFLG MI */ +#if FLAGBIT_N > FLAGBIT_V + case 12: return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & FLAGVAL_N) == 0; /* NFLG == VFLG GE */ + case 13: return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & FLAGVAL_N) != 0; /* NFLG != VFLG LT */ + case 14: cznv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); /* !ZFLG && (NFLG == VFLG) GT */ + return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & (FLAGVAL_N | FLAGVAL_Z)) == 0; + case 15: cznv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); /* ZFLG || (NFLG != VFLG) LE */ + return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & (FLAGVAL_N | FLAGVAL_Z)) != 0; +#else + case 12: return (((cznv << (FLAGBIT_V - FLAGBIT_N)) ^ cznv) & FLAGVAL_V) == 0; /* NFLG == VFLG GE */ + case 13: return (((cznv << (FLAGBIT_V - FLAGBIT_N)) ^ cznv) & FLAGVAL_V) != 0; /* NFLG != VFLG LT */ + case 14: cznv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); /* !ZFLG && (NFLG == VFLG) GT */ + return (((cznv << (FLAGBIT_V - FLAGBIT_N)) ^ cznv) & (FLAGVAL_V | FLAGVAL_Z)) == 0; + case 15: cznv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); /* ZFLG || (NFLG != VFLG) LE */ + return (((cznv << (FLAGBIT_V - FLAGBIT_N)) ^ cznv) & (FLAGVAL_V | FLAGVAL_Z)) != 0; +#endif } + abort (); return 0; } @@ -93,34 +149,34 @@ static __inline__ int cctrue(int cc) __asm__ __volatile__ ("andl %1,%1\n\t" \ "pushf\n\t" \ "pop %0\n\t" \ - : "=r" (regflags.cznv) : "r" (v) : "cc") + : "=rm" (regflags.cznv) : "r" (v) : "memory", "cc") #define optflag_testw(v) \ __asm__ __volatile__ ("andw %w1,%w1\n\t" \ "pushf\n\t" \ "pop %0\n\t" \ - : "=r" (regflags.cznv) : "r" (v) : "cc") + : "=rm" (regflags.cznv) : "r" (v) : "memory", "cc") #define optflag_testb(v) \ __asm__ __volatile__ ("andb %b1,%b1\n\t" \ "pushf\n\t" \ "pop %0\n\t" \ - : "=r" (regflags.cznv) : "q" (v) : "cc") + : "=rm" (regflags.cznv) : "q" (v) : "memory", "cc") #define optflag_addl(v, s, d) do { \ __asm__ __volatile__ ("addl %k2,%k1\n\t" \ "pushf\n\t" \ "pop %0\n\t" \ - : "=r" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "cc"); \ - COPY_CARRY; \ + : "=rm" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "memory", "cc"); \ + COPY_CARRY(); \ } while (0) #define optflag_addw(v, s, d) do { \ __asm__ __volatile__ ("addw %w2,%w1\n\t" \ "pushf\n\t" \ "pop %0\n\t" \ - : "=r" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "cc"); \ - COPY_CARRY; \ + : "=rm" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "memory", "cc"); \ + COPY_CARRY(); \ } while (0) #define optflag_addb(v, s, d) do { \ @@ -128,113 +184,151 @@ static __inline__ int cctrue(int cc) "pushf\n\t" \ "pop %0\n\t" \ : "=r" (regflags.cznv), "=q" (v) : "qmi" (s), "1" (d) : "cc"); \ - COPY_CARRY; \ + COPY_CARRY(); \ } while (0) #define optflag_subl(v, s, d) do { \ __asm__ __volatile__ ("subl %k2,%k1\n\t" \ "pushf\n\t" \ "pop %0\n\t" \ - : "=r" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "cc"); \ - COPY_CARRY; \ + : "=rm" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "memory", "cc"); \ + COPY_CARRY(); \ } while (0) #define optflag_subw(v, s, d) do { \ __asm__ __volatile__ ("subw %w2,%w1\n\t" \ "pushf\n\t" \ "pop %0\n\t" \ - : "=r" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "cc"); \ - COPY_CARRY; \ + : "=rm" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "memory", "cc"); \ + COPY_CARRY(); \ } while (0) #define optflag_subb(v, s, d) do { \ __asm__ __volatile__ ("subb %b2,%b1\n\t" \ "pushf\n\t" \ "pop %0\n\t" \ - : "=r" (regflags.cznv), "=q" (v) : "qmi" (s), "1" (d) : "cc"); \ - COPY_CARRY; \ + : "=rm" (regflags.cznv), "=q" (v) : "qmi" (s), "1" (d) : "memory", "cc"); \ + COPY_CARRY(); \ } while (0) #define optflag_cmpl(s, d) \ __asm__ __volatile__ ("cmpl %k1,%k2\n\t" \ "pushf\n\t" \ "pop %0\n\t" \ - : "=r" (regflags.cznv) : "rmi" (s), "r" (d) : "cc") + : "=rm" (regflags.cznv) : "rmi" (s), "r" (d) : "memory", "cc") #define optflag_cmpw(s, d) \ __asm__ __volatile__ ("cmpw %w1,%w2\n\t" \ "pushf\n\t" \ "pop %0\n\t" \ - : "=r" (regflags.cznv) : "rmi" (s), "r" (d) : "cc") + : "=rm" (regflags.cznv) : "rmi" (s), "r" (d) : "memory", "cc") #define optflag_cmpb(s, d) \ __asm__ __volatile__ ("cmpb %b1,%b2\n\t" \ "pushf\n\t" \ "pop %0\n\t" \ - : "=r" (regflags.cznv) : "qmi" (s), "q" (d) : "cc") + : "=rm" (regflags.cznv) : "qmi" (s), "q" (d) : "memory", "cc") -#else +#else /* !SAHF_SETO_PROFITABLE */ +/* + * Machine dependent structure for holding the 68k CCR flags + */ struct flag_struct { - uae_u32 cznv; - uae_u32 x; + uae_u32 cznv; + uae_u32 x; }; -#define FLAGVAL_Z 0x4000 -#define FLAGVAL_N 0x8000 +extern struct flag_struct regflags __asm__ ("regflags"); -#define SET_ZFLG(y) (regflags.cznv = (regflags.cznv & ~0x4000) | (((y) & 1) << 14)) -#define SET_CFLG(y) (regflags.cznv = (regflags.cznv & ~0x100) | (((y) & 1) << 8)) -#define SET_VFLG(y) (regflags.cznv = (regflags.cznv & ~0x1) | (((y) & 1))) -#define SET_NFLG(y) (regflags.cznv = (regflags.cznv & ~0x8000) | (((y) & 1) << 15)) -#define SET_XFLG(y) (regflags.x = (y)) +/* + * The bits in the cznv field in the above structure are assigned to + * allow the easy mirroring of the x86 condition flags. (For example, + * from the AX register - the x86 overflow flag can be copied to AL + * with a setto %AL instr and the other flags copied to AH with an + * lahf instr). + * + * The 68k CZNV flags are thus assigned in cznv as: + * + * <--AL--> <--AH--> + * 76543210 FEDCBA98 --------- --------- + * xxxxxxxV NZxxxxxC xxxxxxxxx xxxxxxxxx + */ -#define GET_ZFLG ((regflags.cznv >> 14) & 1) -#define GET_CFLG ((regflags.cznv >> 8) & 1) -#define GET_VFLG ((regflags.cznv >> 0) & 1) -#define GET_NFLG ((regflags.cznv >> 15) & 1) -#define GET_XFLG (regflags.x & 1) +#define FLAGBIT_N 15 +#define FLAGBIT_Z 14 +#define FLAGBIT_C 8 +#define FLAGBIT_V 0 +#define FLAGBIT_X 0 /* must be in position 0 for duplicate_carry() to work */ -#define CLEAR_CZNV (regflags.cznv = 0) -#define GET_CZNV (regflags.cznv) -#define IOR_CZNV(X) (regflags.cznv |= (X)) -#define SET_CZNV(X) (regflags.cznv = (X)) +#define FLAGVAL_N (1 << FLAGBIT_N) +#define FLAGVAL_Z (1 << FLAGBIT_Z) +#define FLAGVAL_C (1 << FLAGBIT_C) +#define FLAGVAL_V (1 << FLAGBIT_V) +#define FLAGVAL_X (1 << FLAGBIT_X) -#define COPY_CARRY (regflags.x = (regflags.cznv)>>8) +#define SET_ZFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~FLAGVAL_Z) | (((y) & 1) << FLAGBIT_Z)) +#define SET_CFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~FLAGVAL_C) | (((y) & 1) << FLAGBIT_C)) +#define SET_VFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~FLAGVAL_V) | (((y) & 1) << FLAGBIT_V)) +#define SET_NFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~FLAGVAL_N) | (((y) & 1) << FLAGBIT_N)) +#define SET_XFLG(y) (regflags.x = ((y) & 1) << FLAGBIT_X) -extern struct flag_struct regflags ASM_SYM("regflags"); +#define GET_ZFLG() ((regflags.cznv >> FLAGBIT_Z) & 1) +#define GET_CFLG() ((regflags.cznv >> FLAGBIT_C) & 1) +#define GET_VFLG() ((regflags.cznv >> FLAGBIT_V) & 1) +#define GET_NFLG() ((regflags.cznv >> FLAGBIT_N) & 1) +#define GET_XFLG() ((regflags.x >> FLAGBIT_X) & 1) -static __inline__ int cctrue(int cc) +#define CLEAR_CZNV() (regflags.cznv = 0) +#define GET_CZNV() (regflags.cznv) +#define IOR_CZNV(X) (regflags.cznv |= (X)) +#define SET_CZNV(X) (regflags.cznv = (X)) + +#define COPY_CARRY() (regflags.x = regflags.cznv >> (FLAGBIT_C - FLAGBIT_X)) + + +/* + * Test CCR condition + */ +static inline int cctrue(int cc) { uae_u32 cznv = regflags.cznv; - switch(cc){ - case 0: return 1; /* T */ - case 1: return 0; /* F */ - case 2: return (cznv & 0x4100) == 0; /* !GET_CFLG && !GET_ZFLG; HI */ - case 3: return (cznv & 0x4100) != 0; /* GET_CFLG || GET_ZFLG; LS */ - case 4: return (cznv & 0x100) == 0; /* !GET_CFLG; CC */ - case 5: return (cznv & 0x100) != 0; /* GET_CFLG; CS */ - case 6: return (cznv & 0x4000) == 0; /* !GET_ZFLG; NE */ - case 7: return (cznv & 0x4000) != 0; /* GET_ZFLG; EQ */ - case 8: return (cznv & 0x01) == 0; /* !GET_VFLG; VC */ - case 9: return (cznv & 0x01) != 0; /* GET_VFLG; VS */ - case 10:return (cznv & 0x8000) == 0; /* !GET_NFLG; PL */ - case 11:return (cznv & 0x8000) != 0; /* GET_NFLG; MI */ - case 12:return (((cznv << 15) ^ cznv) & 0x8000) == 0; /* GET_NFLG == GET_VFLG; GE */ - case 13:return (((cznv << 15) ^ cznv) & 0x8000) != 0;/* GET_NFLG != GET_VFLG; LT */ - case 14: - cznv &= 0xc001; - return (((cznv << 15) ^ cznv) & 0xc000) == 0; /* !GET_ZFLG && (GET_NFLG == GET_VFLG); GT */ - case 15: - cznv &= 0xc001; - return (((cznv << 15) ^ cznv) & 0xc000) != 0; /* GET_ZFLG || (GET_NFLG != GET_VFLG); LE */ + + switch (cc) { + case 0: return 1; /* T */ + case 1: return 0; /* F */ + case 2: return (cznv & (FLAGVAL_C | FLAGVAL_Z)) == 0; /* !CFLG && !ZFLG HI */ + case 3: return (cznv & (FLAGVAL_C | FLAGVAL_Z)) != 0; /* CFLG || ZFLG LS */ + case 4: return (cznv & FLAGVAL_C) == 0; /* !CFLG CC */ + case 5: return (cznv & FLAGVAL_C) != 0; /* CFLG CS */ + case 6: return (cznv & FLAGVAL_Z) == 0; /* !ZFLG NE */ + case 7: return (cznv & FLAGVAL_Z) != 0; /* ZFLG EQ */ + case 8: return (cznv & FLAGVAL_V) == 0; /* !VFLG VC */ + case 9: return (cznv & FLAGVAL_V) != 0; /* VFLG VS */ + case 10: return (cznv & FLAGVAL_N) == 0; /* !NFLG PL */ + case 11: return (cznv & FLAGVAL_N) != 0; /* NFLG MI */ +#if FLAGBIT_N > FLAGBIT_V + case 12: return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & FLAGVAL_N) == 0; /* NFLG == VFLG GE */ + case 13: return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & FLAGVAL_N) != 0; /* NFLG != VFLG LT */ + case 14: cznv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); /* !ZFLG && (NFLG == VFLG) GT */ + return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & (FLAGVAL_N | FLAGVAL_Z)) == 0; + case 15: cznv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); /* ZFLG || (NFLG != VFLG) LE */ + return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & (FLAGVAL_N | FLAGVAL_Z)) != 0; +#else + case 12: return (((cznv << (FLAGBIT_V - FLAGBIT_N)) ^ cznv) & FLAGVAL_V) == 0; /* NFLG == VFLG GE */ + case 13: return (((cznv << (FLAGBIT_V - FLAGBIT_N)) ^ cznv) & FLAGVAL_V) != 0; /* NFLG != VFLG LT */ + case 14: cznv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); /* !ZFLG && (NFLG == VFLG) GT */ + return (((cznv << (FLAGBIT_V - FLAGBIT_N)) ^ cznv) & (FLAGVAL_V | FLAGVAL_Z)) == 0; + case 15: cznv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); /* ZFLG || (NFLG != VFLG) LE */ + return (((cznv << (FLAGBIT_V - FLAGBIT_N)) ^ cznv) & (FLAGVAL_V | FLAGVAL_Z)) != 0; +#endif } - abort(); + abort (); return 0; } /* Manually emit LAHF instruction so that 64-bit assemblers can grok it */ -#if defined __x86_64__ && defined __GNUC__ +#if defined CPU_x86_64 && defined __GNUC__ #define ASM_LAHF ".byte 0x9f" #else #define ASM_LAHF "lahf" @@ -273,7 +367,7 @@ static __inline__ int cctrue(int cc) "movb %%al,regflags\n\t" \ "movb %%ah,regflags+1\n\t" \ : "=r" (v) : "rmi" (s), "0" (d) : "%eax","cc","memory"); \ - COPY_CARRY; \ + COPY_CARRY(); \ } while (0) #define optflag_addw(v, s, d) do { \ @@ -283,7 +377,7 @@ static __inline__ int cctrue(int cc) "movb %%al,regflags\n\t" \ "movb %%ah,regflags+1\n\t" \ : "=r" (v) : "rmi" (s), "0" (d) : "%eax","cc","memory"); \ - COPY_CARRY; \ + COPY_CARRY(); \ } while (0) #define optflag_addb(v, s, d) do { \ @@ -293,7 +387,7 @@ static __inline__ int cctrue(int cc) "movb %%al,regflags\n\t" \ "movb %%ah,regflags+1\n\t" \ : "=q" (v) : "qmi" (s), "0" (d) : "%eax","cc","memory"); \ - COPY_CARRY; \ + COPY_CARRY(); \ } while (0) #define optflag_subl(v, s, d) do { \ @@ -303,7 +397,7 @@ static __inline__ int cctrue(int cc) "movb %%al,regflags\n\t" \ "movb %%ah,regflags+1\n\t" \ : "=r" (v) : "rmi" (s), "0" (d) : "%eax","cc","memory"); \ - COPY_CARRY; \ + COPY_CARRY(); \ } while (0) #define optflag_subw(v, s, d) do { \ @@ -313,7 +407,7 @@ static __inline__ int cctrue(int cc) "movb %%al,regflags\n\t" \ "movb %%ah,regflags+1\n\t" \ : "=r" (v) : "rmi" (s), "0" (d) : "%eax","cc","memory"); \ - COPY_CARRY; \ + COPY_CARRY(); \ } while (0) #define optflag_subb(v, s, d) do { \ @@ -323,7 +417,7 @@ static __inline__ int cctrue(int cc) "movb %%al,regflags\n\t" \ "movb %%ah,regflags+1\n\t" \ : "=q" (v) : "qmi" (s), "0" (d) : "%eax","cc","memory"); \ - COPY_CARRY; \ + COPY_CARRY(); \ } while (0) #define optflag_cmpl(s, d) \ @@ -340,7 +434,7 @@ static __inline__ int cctrue(int cc) "seto %%al\n\t" \ "movb %%al,regflags\n\t" \ "movb %%ah,regflags+1\n\t" \ - : : "rmi" (s), "r" (d) : "%eax","cc","memory"); + : : "rmi" (s), "r" (d) : "%eax","cc","memory") #define optflag_cmpb(s, d) \ __asm__ __volatile__ ("cmpb %b0,%b1\n\t" \ @@ -350,10 +444,306 @@ static __inline__ int cctrue(int cc) "movb %%ah,regflags+1\n\t" \ : : "qmi" (s), "q" (d) : "%eax","cc","memory") +#endif /* SAHF_SETO_PROFITABLE */ + +#elif defined(CPU_arm) && defined(ARM_ASSEMBLY) + +/* + * Machine dependent structure for holding the 68k CCR flags + */ +struct flag_struct { + uae_u32 nzcv; + uae_u32 x; +}; + +#define FLAGBIT_N 31 +#define FLAGBIT_Z 30 +#define FLAGBIT_C 29 +#define FLAGBIT_V 28 +#define FLAGBIT_X FLAGBIT_C /* must be in the same position in as x flag */ + +#define FLAGVAL_N (1 << FLAGBIT_N) +#define FLAGVAL_Z (1 << FLAGBIT_Z) +#define FLAGVAL_C (1 << FLAGBIT_C) +#define FLAGVAL_V (1 << FLAGBIT_V) +#define FLAGVAL_X (1 << FLAGBIT_X) + +#define SET_NFLG(y) (regflags.nzcv = (regflags.nzcv & ~FLAGVAL_N) | (((y) & 1) << FLAGBIT_N)) +#define SET_ZFLG(y) (regflags.nzcv = (regflags.nzcv & ~FLAGVAL_Z) | (((y) & 1) << FLAGBIT_Z)) +#define SET_CFLG(y) (regflags.nzcv = (regflags.nzcv & ~FLAGVAL_C) | (((y) & 1) << FLAGBIT_C)) +#define SET_VFLG(y) (regflags.nzcv = (regflags.nzcv & ~FLAGVAL_V) | (((y) & 1) << FLAGBIT_V)) +#define SET_XFLG(y) (regflags.x = ((y) & 1) << FLAGBIT_X) + +#define GET_NFLG() ((regflags.nzcv >> FLAGBIT_N) & 1) +#define GET_ZFLG() ((regflags.nzcv >> FLAGBIT_Z) & 1) +#define GET_CFLG() ((regflags.nzcv >> FLAGBIT_C) & 1) +#define GET_VFLG() ((regflags.nzcv >> FLAGBIT_V) & 1) +#define GET_XFLG() ((regflags.x >> FLAGBIT_X) & 1) + +#define CLEAR_CZNV() (regflags.nzcv = 0) +#define GET_CZNV() (regflags.nzcv) +#define IOR_CZNV(X) (regflags.nzcv |= (X)) +#define SET_CZNV(X) (regflags.nzcv = (X)) + +#define COPY_CARRY() (regflags.x = regflags.nzcv >> (FLAGBIT_C - FLAGBIT_X)) + +extern struct flag_struct regflags __asm__ ("regflags"); + +/* + * Test CCR condition + */ +static inline int cctrue(int cc) +{ + unsigned int nzcv = regflags.nzcv; + switch(cc){ + case 0: return 1; /* T */ + case 1: return 0; /* F */ + case 2: return (nzcv & (FLAGVAL_C | FLAGVAL_Z)) == 0; /* !GET_CFLG && !GET_ZFLG; HI */ + case 3: return (nzcv & (FLAGVAL_C | FLAGVAL_Z)) != 0; /* GET_CFLG || GET_ZFLG; LS */ + case 4: return (nzcv & FLAGVAL_C) == 0; /* !GET_CFLG; CC */ + case 5: return (nzcv & FLAGVAL_C) != 0; /* GET_CFLG; CS */ + case 6: return (nzcv & FLAGVAL_Z) == 0; /* !GET_ZFLG; NE */ + case 7: return (nzcv & FLAGVAL_Z) != 0; /* GET_ZFLG; EQ */ + case 8: return (nzcv & FLAGVAL_V) == 0; /* !GET_VFLG; VC */ + case 9: return (nzcv & FLAGVAL_V) != 0; /* GET_VFLG; VS */ + case 10:return (nzcv & FLAGVAL_N) == 0; /* !GET_NFLG; PL */ + case 11:return (nzcv & FLAGVAL_N) != 0; /* GET_NFLG; MI */ + case 12:return (((nzcv << (FLAGBIT_N - FLAGBIT_V)) ^ nzcv) & FLAGVAL_N) == 0; /* GET_NFLG == GET_VFLG; GE */ + case 13:return (((nzcv << (FLAGBIT_N - FLAGBIT_V)) ^ nzcv) & FLAGVAL_N) != 0; /* GET_NFLG != GET_VFLG; LT */ + case 14: nzcv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); + return (((nzcv << (FLAGBIT_N - FLAGBIT_V)) ^ nzcv) & (FLAGVAL_N | FLAGVAL_Z)) == 0; /* !GET_ZFLG && (GET_NFLG == GET_VFLG); GT */ + case 15: nzcv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); + return (((nzcv << (FLAGBIT_N - FLAGBIT_V)) ^ nzcv) & (FLAGVAL_N | FLAGVAL_Z)) != 0; /* GET_ZFLG || (GET_NFLG != GET_VFLG); LE */ + } + return 0; +} + +#define optflag_testl(v) do {\ + __asm__ __volatile__ ("tst %[rv],%[rv]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "bic %[nzcv],#0x30000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rv] "r" (v) \ + : "cc"); \ + } while(0) + +#define optflag_addl(v, s, d) do { \ + __asm__ __volatile__ ("adds %[rv],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY(); \ + } while(0) + +#define optflag_subl(v, s, d) do { \ + __asm__ __volatile__ ("subs %[rv],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY(); \ + } while(0) + +#define optflag_cmpl(s, d) do { \ + __asm__ __volatile__ ("cmp %[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rs] "ri" (s), [rd] "0" (d) \ + : "cc"); \ + } while(0) + +#if defined(ARMV6_ASSEMBLY) + +// #pragma message "ARM/v6 Assembly optimized flags" + +#define optflag_testw(v) do { \ + __asm__ __volatile__ ("sxth %[rv],%[rv]\n\t" \ + "tst %[rv],%[rv]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "bic %[nzcv],#0x30000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rv] "0" (v) \ + : "cc"); \ + }while(0) + +#define optflag_testb(v) do {\ + __asm__ __volatile__ ("sxtb %[rv],%[rv]\n\t" \ + "tst %[rv],%[rv]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "bic %[nzcv],#0x30000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rv] "0" (v) \ + : "cc"); \ + }while(0) + +#define optflag_addw(v, s, d) do { \ + __asm__ __volatile__ ("sxth %[rd],%[rd]\n\t" \ + "sxth %[rs],%[rs]\n\t" \ + "adds %[rd],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY(); \ + } while(0) + +#define optflag_addb(v, s, d) do { \ + __asm__ __volatile__ ("sxtb %[rd],%[rd]\n\t" \ + "sxtb %[rs],%[rs]\n\t" \ + "adds %[rd],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY(); \ + } while(0) + +#define optflag_subw(v, s, d) do { \ + __asm__ __volatile__ ("sxth %[rd],%[rd]\n\t" \ + "sxth %[rs],%[rs]\n\t" \ + "subs %[rd],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY(); \ + } while(0) + +#define optflag_subb(v, s, d) do { \ + __asm__ __volatile__ ("sxtb %[rd],%[rd]\n\t" \ + "sxtb %[rs],%[rs]\n\t" \ + "subs %[rd],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY(); \ + } while(0) + +#define optflag_cmpw(s, d) do { \ + __asm__ __volatile__ ("sxth %[rd],%[rd]\n\t" \ + "sxth %[rs],%[rs]\n\t" \ + "cmp %[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rs] "ri" (s), [rd] "0" (d) \ + : "cc"); \ + } while(0) + +#define optflag_cmpb(s, d) do { \ + __asm__ __volatile__ ("sxtb %[rd],%[rd]\n\t" \ + "sxtb %[rs],%[rs]\n\t" \ + "cmp %[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rs] "ri" (s), [rd] "0" (d) \ + : "cc"); \ + } while(0) + +#else + +// #pragma message "ARM/generic Assembly optimized flags" + +#define optflag_testw(v) do { \ + __asm__ __volatile__ ("lsl %[rv],%[rv],#16\n\t" \ + "tst %[rv],%[rv]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "bic %[nzcv],#0x30000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rv] "0" (v) \ + : "cc"); \ + }while(0) + +#define optflag_testb(v) do {\ + __asm__ __volatile__ ("lsl %[rv],%[rv],#24\n\t" \ + "tst %[rv],%[rv]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "bic %[nzcv],#0x30000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rv] "0" (v) \ + : "cc"); \ + }while(0) + +#define optflag_addw(v, s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#16\n\t" \ + "adds %[rd],%[rd],%[rs],lsl #16\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "lsr %[rv],%[rd],#16\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY(); \ + } while(0) + +#define optflag_addb(v, s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#24\n\t" \ + "adds %[rd],%[rd],%[rs],lsl #24\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "lsr %[rv],%[rd],#24\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY(); \ + } while(0) + +#define optflag_subw(v, s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#16\n\t" \ + "subs %[rd],%[rd],%[rs],lsl #16\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + "lsr %[rv],%[rd],#16\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY(); \ + } while(0) + +#define optflag_subb(v, s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#24\n\t" \ + "subs %[rd],%[rd],%[rs],lsl #24\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + "lsr %[rv],%[rd],#24\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY(); \ + } while(0) + +#define optflag_cmpw(s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#16\n\t" \ + "cmp %[rd],%[rs],lsl #16\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rs] "ri" (s), [rd] "0" (d) \ + : "cc"); \ + } while(0) + +#define optflag_cmpb(s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#24\n\t" \ + "cmp %[rd],%[rs],lsl #24\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rs] "ri" (s), [rd] "0" (d) \ + : "cc"); \ + } while(0) + #endif -#elif defined(SPARC_V8_ASSEMBLY) || defined(SPARC_V9_ASSEMBLY) +#elif defined(CPU_sparc) && (defined(SPARC_V8_ASSEMBLY) || defined(SPARC_V9_ASSEMBLY)) +/* + * Machine dependent structure for holding the 68k CCR flags + */ struct flag_struct { unsigned char nzvc; unsigned char x; @@ -361,52 +751,62 @@ struct flag_struct { extern struct flag_struct regflags; -#define FLAGVAL_Z 0x04 -#define FLAGVAL_N 0x08 +#define FLAGBIT_N 3 +#define FLAGBIT_Z 2 +#define FLAGBIT_V 1 +#define FLAGBIT_C 0 +#define FLAGBIT_X FLAGBIT_C /* should be in the same position as the x flag */ -#define SET_ZFLG(y) (regflags.nzvc = (regflags.nzvc & ~0x04) | (((y) & 1) << 2)) -#define SET_CFLG(y) (regflags.nzvc = (regflags.nzvc & ~1) | ((y) & 1)) -#define SET_VFLG(y) (regflags.nzvc = (regflags.nzvc & ~0x02) | (((y) & 1) << 1)) -#define SET_NFLG(y) (regflags.nzvc = (regflags.nzvc & ~0x08) | (((y) & 1) << 3)) -#define SET_XFLG(y) (regflags.x = (y)) +#define FLAGVAL_N (1 << FLAGBIT_N) +#define FLAGVAL_Z (1 << FLAGBIT_Z) +#define FLAGVAL_C (1 << FLAGBIT_C) +#define FLAGVAL_V (1 << FLAGBIT_V) +#define FLAGVAL_X (1 << FLAGBIT_X) -#define GET_ZFLG ((regflags.nzvc >> 2) & 1) -#define GET_CFLG (regflags.nzvc & 1) -#define GET_VFLG ((regflags.nzvc >> 1) & 1) -#define GET_NFLG ((regflags.nzvc >> 3) & 1) -#define GET_XFLG (regflags.x & 1) +#define SET_ZFLG(y) (regflags.nzvc = (regflags.nzvc & ~FLAGVAL_Z) | (((y) & 1) << FLAGBIT_Z)) +#define SET_CFLG(y) (regflags.nzvc = (regflags.nzvc & ~FLAGVAL_C) | (((y) & 1) << FLAGBIT_C)) +#define SET_VFLG(y) (regflags.nzvc = (regflags.nzvc & ~FLAGVAL_V) | (((y) & 1) << FLAGBIT_V)) +#define SET_NFLG(y) (regflags.nzvc = (regflags.nzvc & ~FLAGVAL_V) | (((y) & 1) << FLAGBIT_N)) +#define SET_XFLG(y) (regflags.x = ((y) & 1) << FLAGBIT_X) -#define CLEAR_CZNV (regflags.nzvc = 0) -#define GET_CZNV (reflags.nzvc) -#define IOR_CZNV(X) (refglags.nzvc |= (X)) -#define SET_CZNV(X) (regflags.nzvc = (X)) +#define GET_ZFLG() ((regflags.nzvc >> FLAGBIT_Z) & 1) +#define GET_CFLG() ((regflags.nzvc >> FLAGBIT_C) & 1) +#define GET_VFLG() ((regflags.nzvc >> FLAGBIT_V) & 1) +#define GET_NFLG() ((regflags.nzvc >> FLAGBIT_N) & 1) +#define GET_XFLG() ((regflags.x >> FLAGBIT_X) & 1) -#define COPY_CARRY (regflags.x = regflags.nzvc) +#define CLEAR_CZNV() (regflags.nzvc = 0) +#define GET_CZNV() (regflags.nzvc) +#define IOR_CZNV(X) (regflags.nzvc |= (X)) +#define SET_CZNV(X) (regflags.nzvc = (X)) -static __inline__ int cctrue(int cc) +#define COPY_CARRY() (regflags.x = regflags.nzvc >> (FLAGBIT_C - FLAGBIT_X)) + +/* + * Test CCR condition + */ +static inline int cctrue(int cc) { uae_u32 nzvc = regflags.nzvc; - switch(cc){ - case 0: return 1; /* T */ - case 1: return 0; /* F */ - case 2: return (nzvc & 0x05) == 0; /* !GET_CFLG && !GET_ZFLG; HI */ - case 3: return (nzvc & 0x05) != 0; /* GET_CFLG || GET_ZFLG; LS */ - case 4: return (nzvc & 1) == 0; /* !GET_CFLG; CC */ - case 5: return (nzvc & 1) != 0; /* GET_CFLG; CS */ - case 6: return (nzvc & 0x04) == 0; /* !GET_ZFLG; NE */ - case 7: return (nzvc & 0x04) != 0; /* GET_ZFLG; EQ */ - case 8: return (nzvc & 0x02) == 0;/* !GET_VFLG; VC */ - case 9: return (nzvc & 0x02) != 0;/* GET_VFLG; VS */ - case 10:return (nzvc & 0x08) == 0; /* !GET_NFLG; PL */ - case 11:return (nzvc & 0x08) != 0; /* GET_NFLG; MI */ - case 12:return (((nzvc << 2) ^ nzvc) & 0x08) == 0; /* GET_NFLG == GET_VFLG; GE */ - case 13:return (((nzvc << 2) ^ nzvc) & 0x08) != 0;/* GET_NFLG != GET_VFLG; LT */ - case 14: - nzvc &= 0x0e; - return (((nzvc << 2) ^ nzvc) & 0x0c) == 0; /* !GET_ZFLG && (GET_NFLG == GET_VFLG); GT */ - case 15: - nzvc &= 0x0e; - return (((nzvc << 2) ^ nzvc) & 0x0c) != 0; /* GET_ZFLG || (GET_NFLG != GET_VFLG); LE */ + switch (cc) { + case 0: return 1; /* T */ + case 1: return 0; /* F */ + case 2: return (cznv & (FLAGVAL_C | FLAGVAL_Z)) == 0; /* !CFLG && !ZFLG HI */ + case 3: return (cznv & (FLAGVAL_C | FLAGVAL_Z)) != 0; /* CFLG || ZFLG LS */ + case 4: return (cznv & FLAGVAL_C) == 0; /* !CFLG CC */ + case 5: return (cznv & FLAGVAL_C) != 0; /* CFLG CS */ + case 6: return (cznv & FLAGVAL_Z) == 0; /* !ZFLG NE */ + case 7: return (cznv & FLAGVAL_Z) != 0; /* ZFLG EQ */ + case 8: return (cznv & FLAGVAL_V) == 0; /* !VFLG VC */ + case 9: return (cznv & FLAGVAL_V) != 0; /* VFLG VS */ + case 10: return (cznv & FLAGVAL_N) == 0; /* !NFLG PL */ + case 11: return (cznv & FLAGVAL_N) != 0; /* NFLG MI */ + case 12: return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & FLAGVAL_N) == 0; /* NFLG == VFLG GE */ + case 13: return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & FLAGVAL_N) != 0; /* NFLG != VFLG LT */ + case 14: cznv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); /* ZFLG && (NFLG == VFLG) GT */ + return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & (FLAGVAL_N | FLAGVAL_Z)) == 0; + case 15: cznv &= (FLAGVAL_N | FLAGVAL_Z | FLAGVAL_V); /* ZFLG && (NFLG != VFLG) LE */ + return (((cznv << (FLAGBIT_N - FLAGBIT_V)) ^ cznv) & (FLAGVAL_N | FLAGVAL_Z)) != 0; } return 0; } @@ -1008,6 +1408,9 @@ static inline uae_u32 sparc_v9_flag_addx_32(flag_struct *flags, uae_u32 src, uae #else +/* + * Machine independent structure for holding the 68k CCR flags + */ struct flag_struct { unsigned int c; unsigned int z; @@ -1030,22 +1433,25 @@ extern struct flag_struct regflags; #define SET_ZFLG(x) (ZFLG = (x)) #define SET_XFLG(x) (XFLG = (x)) -#define GET_CFLG CFLG -#define GET_NFLG NFLG -#define GET_VFLG VFLG -#define GET_ZFLG ZFLG -#define GET_XFLG XFLG +#define GET_CFLG() CFLG +#define GET_NFLG() NFLG +#define GET_VFLG() VFLG +#define GET_ZFLG() ZFLG +#define GET_XFLG() XFLG -#define CLEAR_CZNV do { \ +#define CLEAR_CZNV() do { \ SET_CFLG (0); \ SET_ZFLG (0); \ SET_NFLG (0); \ SET_VFLG (0); \ } while (0) -#define COPY_CARRY (SET_XFLG (GET_CFLG)) +#define COPY_CARRY() (SET_XFLG (GET_CFLG ())) -static __inline__ int cctrue(const int cc) +/* + * Test CCR condition + */ +static inline int cctrue(const int cc) { switch(cc){ case 0: return 1; /* T */ diff --git a/BasiliskII/src/uae_cpu/memory-uae.h b/BasiliskII/src/uae_cpu/memory-uae.h new file mode 100644 index 00000000..cbae60b0 --- /dev/null +++ b/BasiliskII/src/uae_cpu/memory-uae.h @@ -0,0 +1,606 @@ +/* + * memory.h - memory management + * + * Copyright (c) 2001-2006 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + /* + * UAE - The Un*x Amiga Emulator + * + * memory management + * + * Copyright 1995 Bernd Schmidt + */ + +#ifndef UAE_MEMORY_H +#define UAE_MEMORY_H + +#include "sysdeps.h" +#include "string.h" +#include "hardware.h" +#include "parameters.h" +#include "registers.h" +#include "cpummu.h" +#include "readcpu.h" + +# include + +// newcpu.h +extern void Exception (int, uaecptr); +#ifdef EXCEPTIONS_VIA_LONGJMP + extern JMP_BUF excep_env; + #define SAVE_EXCEPTION \ + JMP_BUF excep_env_old; \ + memcpy(excep_env_old, excep_env, sizeof(JMP_BUF)) + #define RESTORE_EXCEPTION \ + memcpy(excep_env, excep_env_old, sizeof(JMP_BUF)) + #define TRY(var) int var = SETJMP(excep_env); if (!var) + #define CATCH(var) else + #define THROW(n) LONGJMP(excep_env, n) + #define THROW_AGAIN(var) LONGJMP(excep_env, var) + #define VOLATILE volatile +#else + struct m68k_exception { + int prb; + m68k_exception (int exc) : prb (exc) {} + operator int() { return prb; } + }; + #define SAVE_EXCEPTION + #define RESTORE_EXCEPTION + #define TRY(var) try + #define CATCH(var) catch(m68k_exception var) + #define THROW(n) throw m68k_exception(n) + #define THROW_AGAIN(var) throw + #define VOLATILE +#endif /* EXCEPTIONS_VIA_LONGJMP */ +extern int in_exception_2; + +#define STRAM_END 0x0e00000UL // should be replaced by global ROMBase as soon as ROMBase will be a constant +#define ROM_END 0x0e80000UL // should be replaced by ROMBase + RealROMSize if we are going to work with larger TOS ROMs than 512 kilobytes +#define FastRAM_BEGIN 0x1000000UL // should be replaced by global FastRAMBase as soon as FastRAMBase will be a constant +#ifdef FixedSizeFastRAM +#define FastRAM_SIZE (FixedSizeFastRAM * 1024 * 1024) +#else +#define FastRAM_SIZE FastRAMSize +#endif + +#ifdef FIXED_VIDEORAM +#define ARANYMVRAMSTART 0xf0000000UL +#endif + +#define ARANYMVRAMSIZE 0x00100000 // should be a variable to protect VGA card offscreen memory + +#ifdef FIXED_VIDEORAM +extern uintptr VMEMBaseDiff; +#else +extern uae_u32 VideoRAMBase; +#endif + +#ifdef ARAM_PAGE_CHECK +extern uaecptr pc_page, read_page, write_page; +extern uintptr pc_offset, read_offset, write_offset; +# ifdef PROTECT2K +# define ARAM_PAGE_MASK 0x7ff +# else +# ifdef FULLMMU +# define ARAM_PAGE_MASK 0xfff +# else +# define ARAM_PAGE_MASK 0xfffff +# endif +# endif +#endif + +extern uintptr MEMBaseDiff; +extern uintptr ROMBaseDiff; +extern uintptr FastRAMBaseDiff; +# define InitMEMBaseDiff(va, ra) (MEMBaseDiff = (uintptr)(va) - (uintptr)(ra)) +# define InitROMBaseDiff(va, ra) (ROMBaseDiff = (uintptr)(va) - (uintptr)(ra)) +# define InitFastRAMBaseDiff(va, ra) (FastRAMBaseDiff = (uintptr)(va) - (uintptr)(ra)) + +#ifdef FIXED_VIDEORAM +#define InitVMEMBaseDiff(va, ra) (VMEMBaseDiff = (uintptr)(va) - (uintptr)(ra)) +#else +#define InitVMEMBaseDiff(va, ra) (ra = (uintptr)(va) + MEMBaseDiff) +#endif + +extern "C" void breakpt(void); + + +static inline uae_u64 do_get_mem_quad(uae_u64 *a) {return SDL_SwapBE64(*a);} +static inline void do_put_mem_quad(uae_u64 *a, uae_u64 v) {*a = SDL_SwapBE64(v);} + + +#ifndef NOCHECKBOUNDARY +static ALWAYS_INLINE bool test_ram_boundary(uaecptr addr, int size, bool super, bool write) +{ + if (addr <= (FastRAM_BEGIN + FastRAM_SIZE - size)) { +#ifdef PROTECT2K + // protect first 2kB of RAM - access in supervisor mode only + if (!super && addr < 0x00000800UL) + return false; +#endif + // check for write access to protected areas: + // - first two longwords of ST-RAM are non-writable (ROM shadow) + // - non-writable area between end of ST-RAM and begin of FastRAM + if (!write || addr >= FastRAM_BEGIN || (addr >= 8 && addr <= (STRAM_END - size))) + return true; + } +#ifdef FIXED_VIDEORAM + return addr >= ARANYMVRAMSTART && addr <= (ARANYMVRAMSTART + ARANYMVRAMSIZE - size); +#else + return addr >= VideoRAMBase && addr <= (VideoRAMBase + ARANYMVRAMSIZE - size); +#endif +} +/* + * "size" is the size of the memory access (byte = 1, word = 2, long = 4) + */ +static ALWAYS_INLINE void check_ram_boundary(uaecptr addr, int size, bool write) +{ + if (test_ram_boundary(addr, size, regs.s, write)) + return; + + // D(bug("BUS ERROR %s at $%x\n", (write ? "writing" : "reading"), addr)); + regs.mmu_fault_addr = addr; + regs.mmu_ssw = ((size & 3) << 5) | (write ? 0 : (1 << 8)); /* MMU_SW_RW */ + breakpt(); + THROW(2); +} + +#else +static inline bool test_ram_boundary(uaecptr, int, bool, bool) { return 1; } +static inline void check_ram_boundary(uaecptr, int, bool) { } +#endif + +#ifdef FIXED_VIDEORAM +# define do_get_real_address(a) ((uae_u8 *)(((uaecptr)(a) < ARANYMVRAMSTART) ? ((uaecptr)(a) + MEMBaseDiff) : ((uaecptr)(a) + VMEMBaseDiff))) +#else +# define do_get_real_address(a) ((uae_u8 *)((uintptr)(a) + MEMBaseDiff)) +#endif + +static inline uae_u8 *phys_get_real_address(uaecptr addr) +{ + return do_get_real_address(addr); +} + +#ifndef NOCHECKBOUNDARY +static inline bool phys_valid_address(uaecptr addr, bool write, int sz) +{ + return test_ram_boundary(addr, sz, regs.s, write); +} +#else +static inline bool phys_valid_address(uaecptr, bool, int) { return true; } +#endif + +static inline uae_u64 phys_get_quad(uaecptr addr) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ read_page) <= ARAM_PAGE_MASK)) + return do_get_mem_quad((uae_u64*)(addr + read_offset)); +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) return HWget_l(addr); /* TODO: must be HWget_q */ +#endif + check_ram_boundary(addr, 8, false); + uae_u64 * const m = (uae_u64 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + read_page = addr; + read_offset = (uintptr)m - (uintptr)addr; +#endif + return do_get_mem_quad(m); +} + +static inline uae_u32 phys_get_long(uaecptr addr) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ read_page) <= ARAM_PAGE_MASK)) + return do_get_mem_long((uae_u32*)(addr + read_offset)); +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) return HWget_l(addr); +#endif + check_ram_boundary(addr, 4, false); + uae_u32 * const m = (uae_u32 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + read_page = addr; + read_offset = (uintptr)m - (uintptr)addr; +#endif + return do_get_mem_long(m); +} + +static inline uae_u32 phys_get_word(uaecptr addr) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ read_page) <= ARAM_PAGE_MASK)) + return do_get_mem_word((uae_u16*)(addr + read_offset)); +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) return HWget_w(addr); +#endif + check_ram_boundary(addr, 2, false); + uae_u16 * const m = (uae_u16 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + read_page = addr; + read_offset = (uintptr)m - (uintptr)addr; +#endif + return do_get_mem_word(m); +} + +static inline uae_u32 phys_get_byte(uaecptr addr) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ read_page) <= ARAM_PAGE_MASK)) + return do_get_mem_byte((uae_u8*)(addr + read_offset)); +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) return HWget_b(addr); +#endif + check_ram_boundary(addr, 1, false); + uae_u8 * const m = (uae_u8 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + read_page = addr; + read_offset = (uintptr)m - (uintptr)addr; +#endif + return do_get_mem_byte(m); +} + +static inline void phys_put_quad(uaecptr addr, uae_u64 l) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ write_page) <= ARAM_PAGE_MASK)) { + do_put_mem_quad((uae_u64*)(addr + write_offset), l); + return; + } +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) { + HWput_l(addr, l); /* TODO: must be HWput_q */ + return; + } +#endif + check_ram_boundary(addr, 8, true); + uae_u64 * const m = (uae_u64 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + write_page = addr; + write_offset = (uintptr)m - (uintptr)addr; +#endif + do_put_mem_quad(m, l); +} + +static inline void phys_put_long(uaecptr addr, uae_u32 l) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ write_page) <= ARAM_PAGE_MASK)) { + do_put_mem_long((uae_u32*)(addr + write_offset), l); + return; + } +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) { + HWput_l(addr, l); + return; + } +#endif + check_ram_boundary(addr, 4, true); + uae_u32 * const m = (uae_u32 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + write_page = addr; + write_offset = (uintptr)m - (uintptr)addr; +#endif + do_put_mem_long(m, l); +} + +static inline void phys_put_word(uaecptr addr, uae_u32 w) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ write_page) <= ARAM_PAGE_MASK)) { + do_put_mem_word((uae_u16*)(addr + write_offset), w); + return; + } +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) { + HWput_w(addr, w); + return; + } +#endif + check_ram_boundary(addr, 2, true); + uae_u16 * const m = (uae_u16 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + write_page = addr; + write_offset = (uintptr)m - (uintptr)addr; +#endif + do_put_mem_word(m, w); +} + +static inline void phys_put_byte(uaecptr addr, uae_u32 b) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ write_page) <= ARAM_PAGE_MASK)) { + do_put_mem_byte((uae_u8*)(addr + write_offset), b); + return; + } +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) { + HWput_b(addr, b); + return; + } +#endif + check_ram_boundary(addr, 1, true); + uae_u8 * const m = (uae_u8 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + write_page = addr; + write_offset = (uintptr)m - (uintptr)addr; +#endif + do_put_mem_byte(m, b); +} + +#ifdef FULLMMU +static ALWAYS_INLINE bool is_unaligned(uaecptr addr, int size) +{ + return unlikely((addr & (size - 1)) && (addr ^ (addr + size - 1)) & 0x1000); +} + +static ALWAYS_INLINE uae_u8 *mmu_get_real_address(uaecptr addr, struct mmu_atc_line *cl) +{ + return do_get_real_address(cl->phys + addr); +} + +static ALWAYS_INLINE uae_u32 mmu_get_quad(uaecptr addr, int data) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 0, &cl))) + return do_get_mem_quad((uae_u64 *)mmu_get_real_address(addr, cl)); + return mmu_get_quad_slow(addr, regs.s, data, cl); +} + +static ALWAYS_INLINE uae_u64 get_quad(uaecptr addr) +{ + return mmu_get_quad(addr, 1); +} + +static ALWAYS_INLINE uae_u32 mmu_get_long(uaecptr addr, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 0, &cl))) + return do_get_mem_long((uae_u32 *)mmu_get_real_address(addr, cl)); + return mmu_get_long_slow(addr, regs.s, data, size, cl); +} + +static ALWAYS_INLINE uae_u32 get_long(uaecptr addr) +{ + if (unlikely(is_unaligned(addr, 4))) + return mmu_get_long_unaligned(addr, 1); + return mmu_get_long(addr, 1, sz_long); +} + +static ALWAYS_INLINE uae_u16 mmu_get_word(uaecptr addr, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 0, &cl))) + return do_get_mem_word((uae_u16 *)mmu_get_real_address(addr, cl)); + return mmu_get_word_slow(addr, regs.s, data, size, cl); +} + +static ALWAYS_INLINE uae_u16 get_word(uaecptr addr) +{ + if (unlikely(is_unaligned(addr, 2))) + return mmu_get_word_unaligned(addr, 1); + return mmu_get_word(addr, 1, sz_word); +} + +static ALWAYS_INLINE uae_u8 mmu_get_byte(uaecptr addr, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 0, &cl))) + return do_get_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl)); + return mmu_get_byte_slow(addr, regs.s, data, size, cl); +} + +static ALWAYS_INLINE uae_u8 get_byte(uaecptr addr) +{ + return mmu_get_byte(addr, 1, sz_byte); +} + +static ALWAYS_INLINE void mmu_put_quad(uaecptr addr, uae_u64 val, int data) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 1, &cl))) + do_put_mem_quad((uae_u64 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_quad_slow(addr, val, regs.s, data, cl); +} + +static ALWAYS_INLINE void put_quad(uaecptr addr, uae_u32 val) +{ + mmu_put_quad(addr, val, 1); +} + +static ALWAYS_INLINE void mmu_put_long(uaecptr addr, uae_u32 val, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 1, &cl))) + do_put_mem_long((uae_u32 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_long_slow(addr, val, regs.s, data, size, cl); +} + +static ALWAYS_INLINE void put_long(uaecptr addr, uae_u32 val) +{ + if (unlikely(is_unaligned(addr, 4))) + mmu_put_long_unaligned(addr, val, 1); + else + mmu_put_long(addr, val, 1, sz_long); +} + +static ALWAYS_INLINE void mmu_put_word(uaecptr addr, uae_u16 val, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 1, &cl))) + do_put_mem_word((uae_u16 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_word_slow(addr, val, regs.s, data, size, cl); +} + +static ALWAYS_INLINE void put_word(uaecptr addr, uae_u16 val) +{ + if (unlikely(is_unaligned(addr, 2))) + mmu_put_word_unaligned(addr, val, 1); + else + mmu_put_word(addr, val, 1, sz_word); +} + +static ALWAYS_INLINE void mmu_put_byte(uaecptr addr, uae_u8 val, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 1, &cl))) + do_put_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_byte_slow(addr, val, regs.s, data, size, cl); +} + +static ALWAYS_INLINE void put_byte(uaecptr addr, uae_u8 val) +{ + mmu_put_byte(addr, val, 1, sz_byte); +} + +static inline uae_u8 *get_real_address(uaecptr addr, int write, int sz) +{ + (void)sz; + return phys_get_real_address(mmu_translate(addr, regs.s, 1, write)); +} + +static ALWAYS_INLINE uae_u32 mmu_get_user_long(uaecptr addr, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 0, &cl))) + return do_get_mem_long((uae_u32 *)mmu_get_real_address(addr, cl)); + return mmu_get_long_slow(addr, super, data, size, cl); +} + +static ALWAYS_INLINE uae_u16 mmu_get_user_word(uaecptr addr, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 0, &cl))) + return do_get_mem_word((uae_u16 *)mmu_get_real_address(addr, cl)); + return mmu_get_word_slow(addr, super, data, size, cl); +} + +static ALWAYS_INLINE uae_u8 mmu_get_user_byte(uaecptr addr, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 0, &cl))) + return do_get_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl)); + return mmu_get_byte_slow(addr, super, data, size, cl); +} + +static ALWAYS_INLINE void mmu_put_user_long(uaecptr addr, uae_u32 val, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 1, &cl))) + do_put_mem_long((uae_u32 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_long_slow(addr, val, super, data, size, cl); +} + +static ALWAYS_INLINE void mmu_put_user_word(uaecptr addr, uae_u16 val, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 1, &cl))) + do_put_mem_word((uae_u16 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_word_slow(addr, val, super, data, size, cl); +} + +static ALWAYS_INLINE void mmu_put_user_byte(uaecptr addr, uae_u8 val, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 1, &cl))) + do_put_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_byte_slow(addr, val, super, data, size, cl); +} + +static inline bool valid_address(uaecptr addr, bool write, int sz) +{ + SAVE_EXCEPTION; + TRY(prb) { + (void)sz; + check_ram_boundary(mmu_translate(addr, regs.s, 1, (write ? 1 : 0)), sz, write); + RESTORE_EXCEPTION; + return true; + } + CATCH(prb) { + RESTORE_EXCEPTION; + return false; + } +} + +#else + +# define get_quad(a) phys_get_quad(a) +# define get_long(a) phys_get_long(a) +# define get_word(a) phys_get_word(a) +# define get_byte(a) phys_get_byte(a) +# define put_quad(a,b) phys_put_quad(a,b) +# define put_long(a,b) phys_put_long(a,b) +# define put_word(a,b) phys_put_word(a,b) +# define put_byte(a,b) phys_put_byte(a,b) +# define get_real_address(a,w,s) phys_get_real_address(a) + +#define valid_address(a,w,s) phys_valid_address(a,w,s) +#endif + +static inline void flush_internals() { +#ifdef ARAM_PAGE_CHECK + pc_page = 0xeeeeeeee; + read_page = 0xeeeeeeee; + write_page = 0xeeeeeeee; +#endif +} + +#endif /* MEMORY_H */ + +/* +vim:ts=4:sw=4: +*/ diff --git a/BasiliskII/src/uae_cpu/memory.cpp b/BasiliskII/src/uae_cpu/memory.cpp index 7483f506..e56f993d 100644 --- a/BasiliskII/src/uae_cpu/memory.cpp +++ b/BasiliskII/src/uae_cpu/memory.cpp @@ -1,642 +1,59 @@ /* - * UAE - The Un*x Amiga Emulator + * memory.cpp - memory management * - * Memory management + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * (c) 1995 Bernd Schmidt + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is free software; you can redistribute it and/or modify + * ARAnyM is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, + * ARAnyM is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with ARAnyM; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#include -#include + /* + * UAE - The Un*x Amiga Emulator + * + * Memory management + * + * (c) 1995 Bernd Schmidt + */ #include "sysdeps.h" -#include "cpu_emulation.h" -#include "main.h" -#include "video.h" - -#include "m68k.h" #include "memory.h" -#include "readcpu.h" -#include "newcpu.h" +#define DEBUG 0 +#include "debug.h" -#if !REAL_ADDRESSING && !DIRECT_ADDRESSING - -static bool illegal_mem = false; - -#ifdef SAVE_MEMORY_BANKS -addrbank *mem_banks[65536]; -#else -addrbank mem_banks[65536]; +#ifdef ARAM_PAGE_CHECK +uaecptr pc_page = 0xeeeeeeee; +uintptr pc_offset = 0; +uaecptr read_page = 0xeeeeeeee; +uintptr read_offset = 0; +uaecptr write_page = 0xeeeeeeee; +uintptr write_offset = 0; #endif -#ifdef WORDS_BIGENDIAN -# define swap_words(X) (X) -#else -# define swap_words(X) (((X) >> 16) | ((X) << 16)) -#endif - -#ifdef NO_INLINE_MEMORY_ACCESS -uae_u32 longget (uaecptr addr) +extern "C" void breakpt(void) { - return call_mem_get_func (get_mem_bank (addr).lget, addr); -} -uae_u32 wordget (uaecptr addr) -{ - return call_mem_get_func (get_mem_bank (addr).wget, addr); -} -uae_u32 byteget (uaecptr addr) -{ - return call_mem_get_func (get_mem_bank (addr).bget, addr); -} -void longput (uaecptr addr, uae_u32 l) -{ - call_mem_put_func (get_mem_bank (addr).lput, addr, l); -} -void wordput (uaecptr addr, uae_u32 w) -{ - call_mem_put_func (get_mem_bank (addr).wput, addr, w); -} -void byteput (uaecptr addr, uae_u32 b) -{ - call_mem_put_func (get_mem_bank (addr).bput, addr, b); -} -#endif - -/* A dummy bank that only contains zeros */ - -static uae_u32 REGPARAM2 dummy_lget (uaecptr) REGPARAM; -static uae_u32 REGPARAM2 dummy_wget (uaecptr) REGPARAM; -static uae_u32 REGPARAM2 dummy_bget (uaecptr) REGPARAM; -static void REGPARAM2 dummy_lput (uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 dummy_wput (uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 dummy_bput (uaecptr, uae_u32) REGPARAM; - -uae_u32 REGPARAM2 dummy_lget (uaecptr addr) -{ - if (illegal_mem) - write_log ("Illegal lget at %08x\n", addr); - - return 0; + // bug("bus err: pc=%08x, sp=%08x, addr=%08x", m68k_getpc(), regs.regs[15], regs.mmu_fault_addr); } -uae_u32 REGPARAM2 dummy_wget (uaecptr addr) -{ - if (illegal_mem) - write_log ("Illegal wget at %08x\n", addr); +#if !KNOWN_ALLOC && !NORMAL_ADDRESSING +// This part need rewrite for ARAnyM !! +// It can be taken from hatari. - return 0; -} - -uae_u32 REGPARAM2 dummy_bget (uaecptr addr) -{ - if (illegal_mem) - write_log ("Illegal bget at %08x\n", addr); - - return 0; -} - -void REGPARAM2 dummy_lput (uaecptr addr, uae_u32 l) -{ - if (illegal_mem) - write_log ("Illegal lput at %08x\n", addr); -} -void REGPARAM2 dummy_wput (uaecptr addr, uae_u32 w) -{ - if (illegal_mem) - write_log ("Illegal wput at %08x\n", addr); -} -void REGPARAM2 dummy_bput (uaecptr addr, uae_u32 b) -{ - if (illegal_mem) - write_log ("Illegal bput at %08x\n", addr); -} - -/* Mac RAM (32 bit addressing) */ - -static uae_u32 REGPARAM2 ram_lget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 ram_wget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 ram_bget(uaecptr) REGPARAM; -static void REGPARAM2 ram_lput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 ram_wput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 ram_bput(uaecptr, uae_u32) REGPARAM; -static uae_u8 *REGPARAM2 ram_xlate(uaecptr addr) REGPARAM; - -static uintptr RAMBaseDiff; // RAMBaseHost - RAMBaseMac - -uae_u32 REGPARAM2 ram_lget(uaecptr addr) -{ - uae_u32 *m; - m = (uae_u32 *)(RAMBaseDiff + addr); - return do_get_mem_long(m); -} - -uae_u32 REGPARAM2 ram_wget(uaecptr addr) -{ - uae_u16 *m; - m = (uae_u16 *)(RAMBaseDiff + addr); - return do_get_mem_word(m); -} - -uae_u32 REGPARAM2 ram_bget(uaecptr addr) -{ - return (uae_u32)*(uae_u8 *)(RAMBaseDiff + addr); -} - -void REGPARAM2 ram_lput(uaecptr addr, uae_u32 l) -{ - uae_u32 *m; - m = (uae_u32 *)(RAMBaseDiff + addr); - do_put_mem_long(m, l); -} - -void REGPARAM2 ram_wput(uaecptr addr, uae_u32 w) -{ - uae_u16 *m; - m = (uae_u16 *)(RAMBaseDiff + addr); - do_put_mem_word(m, w); -} - -void REGPARAM2 ram_bput(uaecptr addr, uae_u32 b) -{ - *(uae_u8 *)(RAMBaseDiff + addr) = b; -} - -uae_u8 *REGPARAM2 ram_xlate(uaecptr addr) -{ - return (uae_u8 *)(RAMBaseDiff + addr); -} - -/* Mac RAM (24 bit addressing) */ - -static uae_u32 REGPARAM2 ram24_lget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 ram24_wget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 ram24_bget(uaecptr) REGPARAM; -static void REGPARAM2 ram24_lput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 ram24_wput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 ram24_bput(uaecptr, uae_u32) REGPARAM; -static uae_u8 *REGPARAM2 ram24_xlate(uaecptr addr) REGPARAM; - -uae_u32 REGPARAM2 ram24_lget(uaecptr addr) -{ - uae_u32 *m; - m = (uae_u32 *)(RAMBaseDiff + (addr & 0xffffff)); - return do_get_mem_long(m); -} - -uae_u32 REGPARAM2 ram24_wget(uaecptr addr) -{ - uae_u16 *m; - m = (uae_u16 *)(RAMBaseDiff + (addr & 0xffffff)); - return do_get_mem_word(m); -} - -uae_u32 REGPARAM2 ram24_bget(uaecptr addr) -{ - return (uae_u32)*(uae_u8 *)(RAMBaseDiff + (addr & 0xffffff)); -} - -void REGPARAM2 ram24_lput(uaecptr addr, uae_u32 l) -{ - uae_u32 *m; - m = (uae_u32 *)(RAMBaseDiff + (addr & 0xffffff)); - do_put_mem_long(m, l); -} - -void REGPARAM2 ram24_wput(uaecptr addr, uae_u32 w) -{ - uae_u16 *m; - m = (uae_u16 *)(RAMBaseDiff + (addr & 0xffffff)); - do_put_mem_word(m, w); -} - -void REGPARAM2 ram24_bput(uaecptr addr, uae_u32 b) -{ - *(uae_u8 *)(RAMBaseDiff + (addr & 0xffffff)) = b; -} - -uae_u8 *REGPARAM2 ram24_xlate(uaecptr addr) -{ - return (uae_u8 *)(RAMBaseDiff + (addr & 0xffffff)); -} - -/* Mac ROM (32 bit addressing) */ - -static uae_u32 REGPARAM2 rom_lget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 rom_wget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 rom_bget(uaecptr) REGPARAM; -static void REGPARAM2 rom_lput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 rom_wput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 rom_bput(uaecptr, uae_u32) REGPARAM; -static uae_u8 *REGPARAM2 rom_xlate(uaecptr addr) REGPARAM; - -static uintptr ROMBaseDiff; // ROMBaseHost - ROMBaseMac - -uae_u32 REGPARAM2 rom_lget(uaecptr addr) -{ - uae_u32 *m; - m = (uae_u32 *)(ROMBaseDiff + addr); - return do_get_mem_long(m); -} - -uae_u32 REGPARAM2 rom_wget(uaecptr addr) -{ - uae_u16 *m; - m = (uae_u16 *)(ROMBaseDiff + addr); - return do_get_mem_word(m); -} - -uae_u32 REGPARAM2 rom_bget(uaecptr addr) -{ - return (uae_u32)*(uae_u8 *)(ROMBaseDiff + addr); -} - -void REGPARAM2 rom_lput(uaecptr addr, uae_u32 b) -{ - if (illegal_mem) - write_log ("Illegal ROM lput at %08x\n", addr); -} - -void REGPARAM2 rom_wput(uaecptr addr, uae_u32 b) -{ - if (illegal_mem) - write_log ("Illegal ROM wput at %08x\n", addr); -} - -void REGPARAM2 rom_bput(uaecptr addr, uae_u32 b) -{ - if (illegal_mem) - write_log ("Illegal ROM bput at %08x\n", addr); -} - -uae_u8 *REGPARAM2 rom_xlate(uaecptr addr) -{ - return (uae_u8 *)(ROMBaseDiff + addr); -} - -/* Mac ROM (24 bit addressing) */ - -static uae_u32 REGPARAM2 rom24_lget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 rom24_wget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 rom24_bget(uaecptr) REGPARAM; -static uae_u8 *REGPARAM2 rom24_xlate(uaecptr addr) REGPARAM; - -uae_u32 REGPARAM2 rom24_lget(uaecptr addr) -{ - uae_u32 *m; - m = (uae_u32 *)(ROMBaseDiff + (addr & 0xffffff)); - return do_get_mem_long(m); -} - -uae_u32 REGPARAM2 rom24_wget(uaecptr addr) -{ - uae_u16 *m; - m = (uae_u16 *)(ROMBaseDiff + (addr & 0xffffff)); - return do_get_mem_word(m); -} - -uae_u32 REGPARAM2 rom24_bget(uaecptr addr) -{ - return (uae_u32)*(uae_u8 *)(ROMBaseDiff + (addr & 0xffffff)); -} - -uae_u8 *REGPARAM2 rom24_xlate(uaecptr addr) -{ - return (uae_u8 *)(ROMBaseDiff + (addr & 0xffffff)); -} - -/* Frame buffer */ - -static uae_u32 REGPARAM2 frame_direct_lget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 frame_direct_wget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 frame_direct_bget(uaecptr) REGPARAM; -static void REGPARAM2 frame_direct_lput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 frame_direct_wput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 frame_direct_bput(uaecptr, uae_u32) REGPARAM; - -static uae_u32 REGPARAM2 frame_host_555_lget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 frame_host_555_wget(uaecptr) REGPARAM; -static void REGPARAM2 frame_host_555_lput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 frame_host_555_wput(uaecptr, uae_u32) REGPARAM; - -static uae_u32 REGPARAM2 frame_host_565_lget(uaecptr) REGPARAM; -static uae_u32 REGPARAM2 frame_host_565_wget(uaecptr) REGPARAM; -static void REGPARAM2 frame_host_565_lput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 frame_host_565_wput(uaecptr, uae_u32) REGPARAM; - -static uae_u32 REGPARAM2 frame_host_888_lget(uaecptr) REGPARAM; -static void REGPARAM2 frame_host_888_lput(uaecptr, uae_u32) REGPARAM; - -static uae_u8 *REGPARAM2 frame_xlate(uaecptr addr) REGPARAM; - -static uintptr FrameBaseDiff; // MacFrameBaseHost - MacFrameBaseMac - -uae_u32 REGPARAM2 frame_direct_lget(uaecptr addr) -{ - uae_u32 *m; - m = (uae_u32 *)(FrameBaseDiff + addr); - return do_get_mem_long(m); -} - -uae_u32 REGPARAM2 frame_direct_wget(uaecptr addr) -{ - uae_u16 *m; - m = (uae_u16 *)(FrameBaseDiff + addr); - return do_get_mem_word(m); -} - -uae_u32 REGPARAM2 frame_direct_bget(uaecptr addr) -{ - return (uae_u32)*(uae_u8 *)(FrameBaseDiff + addr); -} - -void REGPARAM2 frame_direct_lput(uaecptr addr, uae_u32 l) -{ - uae_u32 *m; - m = (uae_u32 *)(FrameBaseDiff + addr); - do_put_mem_long(m, l); -} - -void REGPARAM2 frame_direct_wput(uaecptr addr, uae_u32 w) -{ - uae_u16 *m; - m = (uae_u16 *)(FrameBaseDiff + addr); - do_put_mem_word(m, w); -} - -void REGPARAM2 frame_direct_bput(uaecptr addr, uae_u32 b) -{ - *(uae_u8 *)(FrameBaseDiff + addr) = b; -} - -uae_u32 REGPARAM2 frame_host_555_lget(uaecptr addr) -{ - uae_u32 *m, l; - m = (uae_u32 *)(FrameBaseDiff + addr); - l = *m; - return swap_words(l); -} - -uae_u32 REGPARAM2 frame_host_555_wget(uaecptr addr) -{ - uae_u16 *m; - m = (uae_u16 *)(FrameBaseDiff + addr); - return *m; -} - -void REGPARAM2 frame_host_555_lput(uaecptr addr, uae_u32 l) -{ - uae_u32 *m; - m = (uae_u32 *)(FrameBaseDiff + addr); - *m = swap_words(l); -} - -void REGPARAM2 frame_host_555_wput(uaecptr addr, uae_u32 w) -{ - uae_u16 *m; - m = (uae_u16 *)(FrameBaseDiff + addr); - *m = w; -} - -uae_u32 REGPARAM2 frame_host_565_lget(uaecptr addr) -{ - uae_u32 *m, l; - m = (uae_u32 *)(FrameBaseDiff + addr); - l = *m; - l = (l & 0x001f001f) | ((l >> 1) & 0x7fe07fe0); - return swap_words(l); -} - -uae_u32 REGPARAM2 frame_host_565_wget(uaecptr addr) -{ - uae_u16 *m, w; - m = (uae_u16 *)(FrameBaseDiff + addr); - w = *m; - return (w & 0x1f) | ((w >> 1) & 0x7fe0); -} - -void REGPARAM2 frame_host_565_lput(uaecptr addr, uae_u32 l) -{ - uae_u32 *m; - m = (uae_u32 *)(FrameBaseDiff + addr); - l = (l & 0x001f001f) | ((l << 1) & 0xffc0ffc0); - *m = swap_words(l); -} - -void REGPARAM2 frame_host_565_wput(uaecptr addr, uae_u32 w) -{ - uae_u16 *m; - m = (uae_u16 *)(FrameBaseDiff + addr); - *m = (w & 0x1f) | ((w << 1) & 0xffc0); -} - -uae_u32 REGPARAM2 frame_host_888_lget(uaecptr addr) -{ - uae_u32 *m, l; - m = (uae_u32 *)(FrameBaseDiff + addr); - return *m; -} - -void REGPARAM2 frame_host_888_lput(uaecptr addr, uae_u32 l) -{ - uae_u32 *m; - m = (uae_u32 *)(MacFrameBaseHost + addr - MacFrameBaseMac); - *m = l; -} - -uae_u8 *REGPARAM2 frame_xlate(uaecptr addr) -{ - return (uae_u8 *)(FrameBaseDiff + addr); -} - -/* Mac framebuffer RAM (24 bit addressing) - * - * This works by duplicating appropriate writes to the 32-bit - * address-space framebuffer. - */ - -static void REGPARAM2 fram24_lput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 fram24_wput(uaecptr, uae_u32) REGPARAM; -static void REGPARAM2 fram24_bput(uaecptr, uae_u32) REGPARAM; - -void REGPARAM2 fram24_lput(uaecptr addr, uae_u32 l) -{ - uaecptr page_off = addr & 0xffff; - if (0xa700 <= page_off && page_off < 0xfc80) { - uae_u32 *fm; - fm = (uae_u32 *)(MacFrameBaseHost + page_off - 0xa700); - do_put_mem_long(fm, l); - } - - uae_u32 *m; - m = (uae_u32 *)(RAMBaseDiff + (addr & 0xffffff)); - do_put_mem_long(m, l); -} - -void REGPARAM2 fram24_wput(uaecptr addr, uae_u32 w) -{ - uaecptr page_off = addr & 0xffff; - if (0xa700 <= page_off && page_off < 0xfc80) { - uae_u16 *fm; - fm = (uae_u16 *)(MacFrameBaseHost + page_off - 0xa700); - do_put_mem_word(fm, w); - } - - uae_u16 *m; - m = (uae_u16 *)(RAMBaseDiff + (addr & 0xffffff)); - do_put_mem_word(m, w); -} - -void REGPARAM2 fram24_bput(uaecptr addr, uae_u32 b) -{ - uaecptr page_off = addr & 0xffff; - if (0xa700 <= page_off && page_off < 0xfc80) { - *(uae_u8 *)(MacFrameBaseHost + page_off - 0xa700) = b; - } - - *(uae_u8 *)(RAMBaseDiff + (addr & 0xffffff)) = b; -} - -/* Default memory access functions */ - -uae_u8 *REGPARAM2 default_xlate (uaecptr a) -{ - write_log("Your Mac program just did something terribly stupid\n"); - return NULL; -} - -/* Address banks */ - -addrbank dummy_bank = { - dummy_lget, dummy_wget, dummy_bget, - dummy_lput, dummy_wput, dummy_bput, - default_xlate -}; - -addrbank ram_bank = { - ram_lget, ram_wget, ram_bget, - ram_lput, ram_wput, ram_bput, - ram_xlate -}; - -addrbank ram24_bank = { - ram24_lget, ram24_wget, ram24_bget, - ram24_lput, ram24_wput, ram24_bput, - ram24_xlate -}; - -addrbank rom_bank = { - rom_lget, rom_wget, rom_bget, - rom_lput, rom_wput, rom_bput, - rom_xlate -}; - -addrbank rom24_bank = { - rom24_lget, rom24_wget, rom24_bget, - rom_lput, rom_wput, rom_bput, - rom24_xlate -}; - -addrbank frame_direct_bank = { - frame_direct_lget, frame_direct_wget, frame_direct_bget, - frame_direct_lput, frame_direct_wput, frame_direct_bput, - frame_xlate -}; - -addrbank frame_host_555_bank = { - frame_host_555_lget, frame_host_555_wget, frame_direct_bget, - frame_host_555_lput, frame_host_555_wput, frame_direct_bput, - frame_xlate -}; - -addrbank frame_host_565_bank = { - frame_host_565_lget, frame_host_565_wget, frame_direct_bget, - frame_host_565_lput, frame_host_565_wput, frame_direct_bput, - frame_xlate -}; - -addrbank frame_host_888_bank = { - frame_host_888_lget, frame_direct_wget, frame_direct_bget, - frame_host_888_lput, frame_direct_wput, frame_direct_bput, - frame_xlate -}; - -addrbank fram24_bank = { - ram24_lget, ram24_wget, ram24_bget, - fram24_lput, fram24_wput, fram24_bput, - ram24_xlate -}; - -void memory_init(void) -{ - for(long i=0; i<65536; i++) - put_mem_bank(i<<16, &dummy_bank); - - // Limit RAM size to not overlap ROM - uint32 ram_size = RAMSize > ROMBaseMac ? ROMBaseMac : RAMSize; - - RAMBaseDiff = (uintptr)RAMBaseHost - (uintptr)RAMBaseMac; - ROMBaseDiff = (uintptr)ROMBaseHost - (uintptr)ROMBaseMac; - FrameBaseDiff = (uintptr)MacFrameBaseHost - (uintptr)MacFrameBaseMac; - - // Map RAM, ROM and display - if (TwentyFourBitAddressing) { - map_banks(&ram24_bank, RAMBaseMac >> 16, ram_size >> 16); - map_banks(&rom24_bank, ROMBaseMac >> 16, ROMSize >> 16); - - // Map frame buffer at end of RAM. - map_banks(&fram24_bank, ((RAMBaseMac + ram_size) >> 16) - 1, 1); - } else { - map_banks(&ram_bank, RAMBaseMac >> 16, ram_size >> 16); - map_banks(&rom_bank, ROMBaseMac >> 16, ROMSize >> 16); - - // Map frame buffer - switch (MacFrameLayout) { - case FLAYOUT_DIRECT: - map_banks(&frame_direct_bank, MacFrameBaseMac >> 16, (MacFrameSize >> 16) + 1); - break; - case FLAYOUT_HOST_555: - map_banks(&frame_host_555_bank, MacFrameBaseMac >> 16, (MacFrameSize >> 16) + 1); - break; - case FLAYOUT_HOST_565: - map_banks(&frame_host_565_bank, MacFrameBaseMac >> 16, (MacFrameSize >> 16) + 1); - break; - case FLAYOUT_HOST_888: - map_banks(&frame_host_888_bank, MacFrameBaseMac >> 16, (MacFrameSize >> 16) + 1); - break; - } - } -} - -void map_banks(addrbank *bank, int start, int size) -{ - int bnr; - unsigned long int hioffs = 0, endhioffs = 0x100; - - if (start >= 0x100) { - for (bnr = start; bnr < start + size; bnr++) - put_mem_bank (bnr << 16, bank); - return; - } - if (TwentyFourBitAddressing) endhioffs = 0x10000; - for (hioffs = 0; hioffs < endhioffs; hioffs += 0x100) - for (bnr = start; bnr < start+size; bnr++) - put_mem_bank((bnr + hioffs) << 16, bank); -} - -#endif /* !REAL_ADDRESSING && !DIRECT_ADDRESSING */ +#error Not prepared for your platform, maybe you need memory banks from hatari +#endif /* !KNOWN_ALLOC && !NORMAL_ADDRESSING */ diff --git a/BasiliskII/src/uae_cpu/memory.h b/BasiliskII/src/uae_cpu/memory.h index 75a6303b..f7bab41d 100644 --- a/BasiliskII/src/uae_cpu/memory.h +++ b/BasiliskII/src/uae_cpu/memory.h @@ -23,107 +23,39 @@ #ifndef UAE_MEMORY_H #define UAE_MEMORY_H -#if !DIRECT_ADDRESSING && !REAL_ADDRESSING - -/* Enabling this adds one additional native memory reference per 68k memory - * access, but saves one shift (on the x86). Enabling this is probably - * better for the cache. My favourite benchmark (PP2) doesn't show a - * difference, so I leave this enabled. */ - -#if 1 || defined SAVE_MEMORY -#define SAVE_MEMORY_BANKS -#endif - -typedef uae_u32 (REGPARAM2 *mem_get_func)(uaecptr) REGPARAM; -typedef void (REGPARAM2 *mem_put_func)(uaecptr, uae_u32) REGPARAM; -typedef uae_u8 *(REGPARAM2 *xlate_func)(uaecptr) REGPARAM; - -#undef DIRECT_MEMFUNCS_SUCCESSFUL - -#ifndef CAN_MAP_MEMORY -#undef USE_COMPILER -#endif - -#if defined(USE_COMPILER) && !defined(USE_MAPPED_MEMORY) -#define USE_MAPPED_MEMORY -#endif - -typedef struct { - /* These ones should be self-explanatory... */ - mem_get_func lget, wget, bget; - mem_put_func lput, wput, bput; - /* Use xlateaddr to translate an Amiga address to a uae_u8 * that can - * be used to address memory without calling the wget/wput functions. - * This doesn't work for all memory banks, so this function may call - * abort(). */ - xlate_func xlateaddr; -} addrbank; - -extern uae_u8 filesysory[65536]; - -extern addrbank ram_bank; // Mac RAM -extern addrbank rom_bank; // Mac ROM -extern addrbank frame_bank; // Frame buffer - -/* Default memory access functions */ - -extern uae_u8 *REGPARAM2 default_xlate(uaecptr addr) REGPARAM; - -#define bankindex(addr) (((uaecptr)(addr)) >> 16) - -#ifdef SAVE_MEMORY_BANKS -extern addrbank *mem_banks[65536]; -#define get_mem_bank(addr) (*mem_banks[bankindex(addr)]) -#define put_mem_bank(addr, b) (mem_banks[bankindex(addr)] = (b)) -#else -extern addrbank mem_banks[65536]; -#define get_mem_bank(addr) (mem_banks[bankindex(addr)]) -#define put_mem_bank(addr, b) (mem_banks[bankindex(addr)] = *(b)) -#endif - -extern void memory_init(void); -extern void map_banks(addrbank *bank, int first, int count); - -#ifndef NO_INLINE_MEMORY_ACCESS - -#define longget(addr) (call_mem_get_func(get_mem_bank(addr).lget, addr)) -#define wordget(addr) (call_mem_get_func(get_mem_bank(addr).wget, addr)) -#define byteget(addr) (call_mem_get_func(get_mem_bank(addr).bget, addr)) -#define longput(addr,l) (call_mem_put_func(get_mem_bank(addr).lput, addr, l)) -#define wordput(addr,w) (call_mem_put_func(get_mem_bank(addr).wput, addr, w)) -#define byteput(addr,b) (call_mem_put_func(get_mem_bank(addr).bput, addr, b)) - -#else - -extern uae_u32 longget(uaecptr addr); -extern uae_u32 wordget(uaecptr addr); -extern uae_u32 byteget(uaecptr addr); -extern void longput(uaecptr addr, uae_u32 l); -extern void wordput(uaecptr addr, uae_u32 w); -extern void byteput(uaecptr addr, uae_u32 b); - -#endif - -#ifndef MD_HAVE_MEM_1_FUNCS - -#define longget_1 longget -#define wordget_1 wordget -#define byteget_1 byteget -#define longput_1 longput -#define wordput_1 wordput -#define byteput_1 byteput - -#endif - -#endif /* !DIRECT_ADDRESSING && !REAL_ADDRESSING */ - -#if REAL_ADDRESSING -const uintptr MEMBaseDiff = 0; -#elif DIRECT_ADDRESSING +#if DIRECT_ADDRESSING extern uintptr MEMBaseDiff; #endif -#if REAL_ADDRESSING || DIRECT_ADDRESSING +extern void Exception (int, uaecptr); +#ifdef EXCEPTIONS_VIA_LONGJMP + extern JMP_BUF excep_env; + #define SAVE_EXCEPTION \ + JMP_BUF excep_env_old; \ + memcpy(excep_env_old, excep_env, sizeof(JMP_BUF)) + #define RESTORE_EXCEPTION \ + memcpy(excep_env, excep_env_old, sizeof(JMP_BUF)) + #define TRY(var) int var = SETJMP(excep_env); if (!var) + #define CATCH(var) else + #define THROW(n) LONGJMP(excep_env, n) + #define THROW_AGAIN(var) LONGJMP(excep_env, var) + #define VOLATILE volatile +#else + struct m68k_exception { + int prb; + m68k_exception (int exc) : prb (exc) {} + operator int() { return prb; } + }; + #define SAVE_EXCEPTION + #define RESTORE_EXCEPTION + #define TRY(var) try + #define CATCH(var) catch(m68k_exception var) + #define THROW(n) throw m68k_exception(n) + #define THROW_AGAIN(var) throw + #define VOLATILE +#endif /* EXCEPTIONS_VIA_LONGJMP */ + +#if DIRECT_ADDRESSING static __inline__ uae_u8 *do_get_real_address(uaecptr addr) { return (uae_u8 *)MEMBaseDiff + addr; @@ -137,71 +69,57 @@ static __inline__ uae_u32 get_long(uaecptr addr) uae_u32 * const m = (uae_u32 *)do_get_real_address(addr); return do_get_mem_long(m); } +#define phys_get_long get_long static __inline__ uae_u32 get_word(uaecptr addr) { uae_u16 * const m = (uae_u16 *)do_get_real_address(addr); return do_get_mem_word(m); } +#define phys_get_word get_word static __inline__ uae_u32 get_byte(uaecptr addr) { uae_u8 * const m = (uae_u8 *)do_get_real_address(addr); return do_get_mem_byte(m); } +#define phys_get_byte get_byte static __inline__ void put_long(uaecptr addr, uae_u32 l) { uae_u32 * const m = (uae_u32 *)do_get_real_address(addr); do_put_mem_long(m, l); } +#define phys_put_long put_long static __inline__ void put_word(uaecptr addr, uae_u32 w) { uae_u16 * const m = (uae_u16 *)do_get_real_address(addr); do_put_mem_word(m, w); } +#define phys_put_word put_word static __inline__ void put_byte(uaecptr addr, uae_u32 b) { uae_u8 * const m = (uae_u8 *)do_get_real_address(addr); do_put_mem_byte(m, b); } +#define phys_put_byte put_byte static __inline__ uae_u8 *get_real_address(uaecptr addr) { return do_get_real_address(addr); } +static inline uae_u8 *get_real_address(uaecptr addr, int write, int sz) +{ + return do_get_real_address(addr); +} +static inline uae_u8 *phys_get_real_address(uaecptr addr) +{ + return do_get_real_address(addr); +} static __inline__ uae_u32 get_virtual_address(uae_u8 *addr) { return do_get_virtual_address(addr); } -#else -static __inline__ uae_u32 get_long(uaecptr addr) -{ - return longget_1(addr); -} -static __inline__ uae_u32 get_word(uaecptr addr) -{ - return wordget_1(addr); -} -static __inline__ uae_u32 get_byte(uaecptr addr) -{ - return byteget_1(addr); -} -static __inline__ void put_long(uaecptr addr, uae_u32 l) -{ - longput_1(addr, l); -} -static __inline__ void put_word(uaecptr addr, uae_u32 w) -{ - wordput_1(addr, w); -} -static __inline__ void put_byte(uaecptr addr, uae_u32 b) -{ - byteput_1(addr, b); -} -static __inline__ uae_u8 *get_real_address(uaecptr addr) -{ - return get_mem_bank(addr).xlateaddr(addr); -} -/* gb-- deliberately not implemented since it shall not be used... */ -extern uae_u32 get_virtual_address(uae_u8 *addr); -#endif /* DIRECT_ADDRESSING || REAL_ADDRESSING */ +#endif /* DIRECT_ADDRESSING */ + +static __inline__ void check_ram_boundary(uaecptr addr, int size, bool write) {} +static inline void flush_internals() {} #endif /* MEMORY_H */ diff --git a/BasiliskII/src/uae_cpu/newcpu.cpp b/BasiliskII/src/uae_cpu/newcpu.cpp index d13a6078..0832df82 100644 --- a/BasiliskII/src/uae_cpu/newcpu.cpp +++ b/BasiliskII/src/uae_cpu/newcpu.cpp @@ -1,51 +1,80 @@ /* - * UAE - The Un*x Amiga Emulator + * newcpu.cpp - CPU emulation * - * MC68000 emulation + * Copyright (c) 2010 ARAnyM dev team (see AUTHORS) + * * - * (c) 1995 Bernd Schmidt + * Inspired by Christian Bauer's Basilisk II * - * This program is free software; you can redistribute it and/or modify + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, + * ARAnyM is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with ARAnyM; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -#include -#include -#include + /* + * UAE - The Un*x Amiga Emulator + * + * MC68000 emulation + * + * (c) 1995 Bernd Schmidt + */ #include "sysdeps.h" +#include #include "cpu_emulation.h" #include "main.h" #include "emul_op.h" - -extern int intlev(void); // From baisilisk_glue.cpp - #include "m68k.h" #include "memory.h" #include "readcpu.h" #include "newcpu.h" -#include "compiler/compemu.h" +#ifdef USE_JIT +# include "compiler/compemu.h" +#endif #include "fpu/fpu.h" - -#if defined(ENABLE_EXCLUSIVE_SPCFLAGS) && !defined(HAVE_HARDWARE_LOCKS) -B2_mutex *spcflags_lock = NULL; +#include "cpummu.h" +#if 0 +#include "natfeats.h" +#include "disasm-glue.h" +#endif +#if USE_JIT +extern bool UseJIT; #endif -bool quit_program = false; +#include + +#define DEBUG 0 +#include "debug.h" + +#define SANITY_CHECK_ATC 1 + +struct fixup fixup = {0, 0, 0}; + +int quit_program = 0; +int exit_val = 0; + +// For instruction $7139 +bool cpu_debugging = false; + struct flag_struct regflags; +/* LongJump buffers */ +#ifdef EXCEPTIONS_VIA_LONGJMP +JMP_BUF excep_env; +#endif /* Opcode of faulting instruction */ uae_u16 last_op_for_exception_3; /* PC at fault time */ @@ -60,19 +89,34 @@ int movem_index1[256]; int movem_index2[256]; int movem_next[256]; -cpuop_func *cpufunctbl[65536]; +#ifdef FLIGHT_RECORDER + +// feel free to edit the following defines to customize the dump +#define FRLOG_HOTKEY 1 /* 1 = dump only when hotkey is held down */ +#define FRLOG_ALL 1 /* 1 = dump continuously to ever growing log */ +#define FRLOG_IRQ 0 /* 1 = dump also CPU in interrupts */ +#define FRLOG_REGS 0 /* 1 = dump also all data/address registers */ +#define FRLOG_SIZE 8192 /* this many instructions in single dump */ -#if FLIGHT_RECORDER struct rec_step { - uae_u32 pc; -#if FLIGHT_RECORDER >= 2 uae_u32 d[8]; uae_u32 a[8]; -#endif + uae_u32 pc; + uae_u16 sr; + uae_u32 usp; + uae_u32 msp; + uae_u32 isp; + uae_u16 instr; }; -const int LOG_SIZE = 32768; -static rec_step log[LOG_SIZE]; +bool cpu_flight_recorder_active = false; + +#if FRLOG_ALL +const int LOG_SIZE = 10; +#else +const int LOG_SIZE = FRLOG_SIZE; +#endif +static rec_step frlog[LOG_SIZE]; static int log_ptr = -1; // First time initialization static const char *log_filename(void) @@ -81,852 +125,627 @@ static const char *log_filename(void) return name ? name : "log.68k"; } -void m68k_record_step(uaecptr pc) -{ -#if FLIGHT_RECORDER >= 2 - /* XXX: if LSB is set, we are recording from generated code and we - don't support registers recording yet. */ - if ((pc & 1) == 0) { - for (int i = 0; i < 8; i++) { - log[log_ptr].d[i] = m68k_dreg(regs, i); - log[log_ptr].a[i] = m68k_areg(regs, i); - } - } -#endif - log[log_ptr].pc = pc; - log_ptr = (log_ptr + 1) % LOG_SIZE; -} - -static void dump_log(void) +void dump_flight_recorder(void) { +#if FRLOG_ALL + FILE *f = fopen(log_filename(), "a"); +#else FILE *f = fopen(log_filename(), "w"); +#endif if (f == NULL) return; for (int i = 0; i < LOG_SIZE; i++) { int j = (i + log_ptr) % LOG_SIZE; - uae_u32 pc = log[j].pc & ~1; - fprintf(f, "pc %08x", pc); -#if FLIGHT_RECORDER >= 2 - fprintf(f, "\n"); - if ((log[j].pc & 1) == 0) { - fprintf(f, "d0 %08x d1 %08x d2 %08x d3 %08x\n", log[j].d[0], log[j].d[1], log[j].d[2], log[j].d[3]); - fprintf(f, "d4 %08x d5 %08x d6 %08x d7 %08x\n", log[j].d[4], log[j].d[5], log[j].d[6], log[j].d[7]); - fprintf(f, "a0 %08x a1 %08x a2 %08x a3 %08x\n", log[j].a[0], log[j].a[1], log[j].a[2], log[j].a[3]); - fprintf(f, "a4 %08x a5 %08x a6 %08x a7 %08x\n", log[j].a[4], log[j].a[5], log[j].a[6], log[j].a[7]); - } -#else - fprintf(f, " | "); -#endif -#if ENABLE_MON - disass_68k(f, pc); + fprintf(f, "pc %08x instr %04x sr %04x usp %08x msp %08x isp %08x\n", frlog[j].pc, frlog[j].instr, frlog[j].sr, frlog[j].usp, frlog[j].msp, frlog[j].isp); + // adding a simple opcode -> assembler conversion table would help +#if FRLOG_REGS + fprintf(f, "d0 %08x d1 %08x d2 %08x d3 %08x\n", frlog[j].d[0], frlog[j].d[1], frlog[j].d[2], frlog[j].d[3]); + fprintf(f, "d4 %08x d5 %08x d6 %08x d7 %08x\n", frlog[j].d[4], frlog[j].d[5], frlog[j].d[6], frlog[j].d[7]); + fprintf(f, "a0 %08x a1 %08x a2 %08x a3 %08x\n", frlog[j].a[0], frlog[j].a[1], frlog[j].a[2], frlog[j].a[3]); + fprintf(f, "a4 %08x a5 %08x a6 %08x a7 %08x\n", frlog[j].a[4], frlog[j].a[5], frlog[j].a[6], frlog[j].a[7]); #endif + m68k_disasm(f, frlog[j].pc, NULL, 1); } fclose(f); } -#endif -#if ENABLE_MON -static void dump_regs(void) +void m68k_record_step(uaecptr pc, int opcode) { - m68k_dumpstate(NULL); -} -#endif + static bool last_state = false; -#define COUNT_INSTRS 0 +#if FRLOG_HOTKEY + if (! cpu_flight_recorder_active) { + if (last_state) { + // dump log out + dump_flight_recorder(); -#if COUNT_INSTRS -static unsigned long int instrcount[65536]; -static uae_u16 opcodenums[65536]; - -static int compfn (const void *el1, const void *el2) -{ - return instrcount[*(const uae_u16 *)el1] < instrcount[*(const uae_u16 *)el2]; -} - -static char *icountfilename (void) -{ - char *name = getenv ("INSNCOUNT"); - if (name) - return name; - return COUNT_INSTRS == 2 ? "frequent.68k" : "insncount"; -} - -void dump_counts (void) -{ - FILE *f = fopen (icountfilename (), "w"); - unsigned long int total; - int i; - - write_log ("Writing instruction count file...\n"); - for (i = 0; i < 65536; i++) { - opcodenums[i] = i; - total += instrcount[i]; + // remember last state + last_state = false; + } + return; } - qsort (opcodenums, 65536, sizeof(uae_u16), compfn); - - fprintf (f, "Total: %lu\n", total); - for (i=0; i < 65536; i++) { - unsigned long int cnt = instrcount[opcodenums[i]]; - struct instr *dp; - struct mnemolookup *lookup; - if (!cnt) - break; - dp = table68k + opcodenums[i]; - for (lookup = lookuptab;lookup->mnemo != dp->mnemo; lookup++) - ; - fprintf (f, "%04x: %lu %s\n", opcodenums[i], cnt, lookup->name); - } - fclose (f); -} -#else -void dump_counts (void) -{ -} #endif + if (! last_state) { + // reset old log + log_ptr = 0; + memset(frlog, 0, sizeof(frlog)); + // remember last state + last_state = true; + } + +#if FRLOG_REGS + for (int i = 0; i < 8; i++) { + frlog[log_ptr].d[i] = m68k_dreg(regs, i); + frlog[log_ptr].a[i] = m68k_areg(regs, i); + } +#endif + frlog[log_ptr].pc = pc; + + MakeSR(); +#if ! FRLOG_IRQ + // is CPU in interrupt handler? Quit if should not be logged. + if (regs.s && !regs.m) return; +#endif + frlog[log_ptr].sr = regs.sr; + frlog[log_ptr].usp = regs.usp; + frlog[log_ptr].msp = regs.msp; + frlog[log_ptr].isp = regs.isp; + frlog[log_ptr].instr = opcode; + + log_ptr = (log_ptr + 1) % LOG_SIZE; +#if FRLOG_ALL + if (log_ptr == 0) dump_flight_recorder(); +#endif +} +#endif /* FLIGHT_RECORDER */ + int broken_in; -static __inline__ unsigned int cft_map (unsigned int f) +static inline unsigned int cft_map (unsigned int f) { -#ifndef HAVE_GET_WORD_UNSWAPPED - return f; +#if !defined(HAVE_GET_WORD_UNSWAPPED) || defined(FULLMMU) + return f; #else - return ((f >> 8) & 255) | ((f & 255) << 8); + return do_byteswap_16(f); #endif } -void REGPARAM2 op_illg_1 (uae_u32 opcode) REGPARAM; - void REGPARAM2 op_illg_1 (uae_u32 opcode) { - op_illg (cft_map (opcode)); + op_illg (cft_map (opcode)); } -static void build_cpufunctbl (void) -{ - int i; - unsigned long opcode; - unsigned int cpu_level = 0; // 68000 (default) - if (CPUType == 4) - cpu_level = 4; // 68040 with FPU - else { - if (FPUType) - cpu_level = 3; // 68020 with FPU - else if (CPUType >= 2) - cpu_level = 2; // 68020 - else if (CPUType == 1) - cpu_level = 1; - } - struct cputbl *tbl = ( - cpu_level == 4 ? op_smalltbl_0_ff - : cpu_level == 3 ? op_smalltbl_1_ff - : cpu_level == 2 ? op_smalltbl_2_ff - : cpu_level == 1 ? op_smalltbl_3_ff - : op_smalltbl_4_ff); - - for (opcode = 0; opcode < 65536; opcode++) - cpufunctbl[cft_map (opcode)] = op_illg_1; - for (i = 0; tbl[i].handler != NULL; i++) { - if (! tbl[i].specific) - cpufunctbl[cft_map (tbl[i].opcode)] = tbl[i].handler; - } - for (opcode = 0; opcode < 65536; opcode++) { - cpuop_func *f; - - if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) - continue; - - if (table68k[opcode].handler != -1) { - f = cpufunctbl[cft_map (table68k[opcode].handler)]; - if (f == op_illg_1) - abort(); - cpufunctbl[cft_map (opcode)] = f; - } - } - for (i = 0; tbl[i].handler != NULL; i++) { - if (tbl[i].specific) - cpufunctbl[cft_map (tbl[i].opcode)] = tbl[i].handler; - } -} void init_m68k (void) { - int i; + int i; - for (i = 0 ; i < 256 ; i++) { - int j; - for (j = 0 ; j < 8 ; j++) { - if (i & (1 << j)) break; - } - movem_index1[i] = j; - movem_index2[i] = 7-j; - movem_next[i] = i & (~(1 << j)); + for (i = 0 ; i < 256 ; i++) { + int j; + for (j = 0 ; j < 8 ; j++) { + if (i & (1 << j)) break; } -#if COUNT_INSTRS - { - FILE *f = fopen (icountfilename (), "r"); - memset (instrcount, 0, sizeof instrcount); - if (f) { - uae_u32 opcode, count, total; - char name[20]; - write_log ("Reading instruction count file...\n"); - fscanf (f, "Total: %lu\n", &total); - while (fscanf (f, "%lx: %lu %s\n", &opcode, &count, name) == 3) { - instrcount[opcode] = count; - } - fclose(f); - } - } -#endif - read_table68k (); - do_merges (); - - build_cpufunctbl (); - -#if defined(ENABLE_EXCLUSIVE_SPCFLAGS) && !defined(HAVE_HARDWARE_LOCKS) - spcflags_lock = B2_create_mutex(); -#endif - fpu_init(CPUType == 4); + movem_index1[i] = j; + movem_index2[i] = 7-j; + movem_next[i] = i & (~(1 << j)); + } + fpu_init (CPUType == 4); } void exit_m68k (void) { fpu_exit (); -#if defined(ENABLE_EXCLUSIVE_SPCFLAGS) && !defined(HAVE_HARDWARE_LOCKS) - B2_delete_mutex(spcflags_lock); -#endif } -struct regstruct regs, lastint_regs; -static struct regstruct regs_backup[16]; -static int backup_pointer = 0; -static long int m68kpc_offset; -int lastint_no; +struct regstruct regs; +// MJ static struct regstruct regs_backup[16]; +// MJ static int backup_pointer = 0; -#if REAL_ADDRESSING || DIRECT_ADDRESSING -#define get_ibyte_1(o) get_byte(get_virtual_address(regs.pc_p) + (o) + 1) -#define get_iword_1(o) get_word(get_virtual_address(regs.pc_p) + (o)) -#define get_ilong_1(o) get_long(get_virtual_address(regs.pc_p) + (o)) + +#ifdef FULLMMU +static inline uae_u8 get_ibyte_1(uae_u32 o) +{ + return get_ibyte(o); +} +static inline uae_u16 get_iword_1(uae_u32 o) +{ + return get_iword(o); +} +static inline uae_u32 get_ilong_1(uae_u32 o) +{ + return get_ilong(o); +} #else -#define get_ibyte_1(o) get_byte(regs.pc + (regs.pc_p - regs.pc_oldp) + (o) + 1) -#define get_iword_1(o) get_word(regs.pc + (regs.pc_p - regs.pc_oldp) + (o)) -#define get_ilong_1(o) get_long(regs.pc + (regs.pc_p - regs.pc_oldp) + (o)) +# define get_ibyte_1(o) get_byte(m68k_getpc() + (o) + 1) +# define get_iword_1(o) get_word(m68k_getpc() + (o)) +# define get_ilong_1(o) get_long(m68k_getpc() + (o)) #endif -uae_s32 ShowEA (int reg, amodes mode, wordsizes size, char *buf) +/* + * extract bitfield data from memory and return it in the MSBs + * bdata caches the unmodified data for put_bitfield() + */ +uae_u32 get_bitfield(uae_u32 src, uae_u32 bdata[2], uae_s32 offset, int width) { - uae_u16 dp; - uae_s8 disp8; - uae_s16 disp16; - int r; - uae_u32 dispreg; - uaecptr addr; - uae_s32 offset = 0; - char buffer[80]; + uae_u32 tmp, res, mask; - switch (mode){ - case Dreg: - sprintf (buffer,"D%d", reg); + offset &= 7; + mask = 0xffffffffu << (32 - width); + switch ((offset + width + 7) >> 3) { + case 1: + tmp = get_byte(src); + res = tmp << (24 + offset); + bdata[0] = tmp & ~(mask >> (24 + offset)); break; - case Areg: - sprintf (buffer,"A%d", reg); + case 2: + tmp = get_word(src); + res = tmp << (16 + offset); + bdata[0] = tmp & ~(mask >> (16 + offset)); break; - case Aind: - sprintf (buffer,"(A%d)", reg); + case 3: + tmp = get_word(src); + res = tmp << (16 + offset); + bdata[0] = tmp & ~(mask >> (16 + offset)); + tmp = get_byte(src + 2); + res |= tmp << (8 + offset); + bdata[1] = tmp & ~(mask >> (8 + offset)); break; - case Aipi: - sprintf (buffer,"(A%d)+", reg); + case 4: + tmp = get_long(src); + res = tmp << offset; + bdata[0] = tmp & ~(mask >> offset); break; - case Apdi: - sprintf (buffer,"-(A%d)", reg); - break; - case Ad16: - disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - addr = m68k_areg(regs,reg) + (uae_s16)disp16; - sprintf (buffer,"(A%d,$%04x) == $%08lx", reg, disp16 & 0xffff, - (unsigned long)addr); - break; - case Ad8r: - dp = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - disp8 = dp & 0xFF; - r = (dp & 0x7000) >> 12; - dispreg = dp & 0x8000 ? m68k_areg(regs,r) : m68k_dreg(regs,r); - if (!(dp & 0x800)) dispreg = (uae_s32)(uae_s16)(dispreg); - dispreg <<= (dp >> 9) & 3; - - if (dp & 0x100) { - uae_s32 outer = 0, disp = 0; - uae_s32 base = m68k_areg(regs,reg); - char name[10]; - sprintf (name,"A%d, ",reg); - if (dp & 0x80) { base = 0; name[0] = 0; } - if (dp & 0x40) dispreg = 0; - if ((dp & 0x30) == 0x20) { disp = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x30) == 0x30) { disp = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - base += disp; - - if ((dp & 0x3) == 0x2) { outer = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x3) == 0x3) { outer = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - - if (!(dp & 4)) base += dispreg; - if (dp & 3) base = get_long (base); - if (dp & 4) base += dispreg; - - addr = base + outer; - sprintf (buffer,"(%s%c%d.%c*%d+%d)+%d == $%08lx", name, - dp & 0x8000 ? 'A' : 'D', (int)r, dp & 0x800 ? 'L' : 'W', - 1 << ((dp >> 9) & 3), - disp,outer, - (unsigned long)addr); - } else { - addr = m68k_areg(regs,reg) + (uae_s32)((uae_s8)disp8) + dispreg; - sprintf (buffer,"(A%d, %c%d.%c*%d, $%02x) == $%08lx", reg, - dp & 0x8000 ? 'A' : 'D', (int)r, dp & 0x800 ? 'L' : 'W', - 1 << ((dp >> 9) & 3), disp8, - (unsigned long)addr); - } - break; - case PC16: - addr = m68k_getpc () + m68kpc_offset; - disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - addr += (uae_s16)disp16; - sprintf (buffer,"(PC,$%04x) == $%08lx", disp16 & 0xffff,(unsigned long)addr); - break; - case PC8r: - addr = m68k_getpc () + m68kpc_offset; - dp = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - disp8 = dp & 0xFF; - r = (dp & 0x7000) >> 12; - dispreg = dp & 0x8000 ? m68k_areg(regs,r) : m68k_dreg(regs,r); - if (!(dp & 0x800)) dispreg = (uae_s32)(uae_s16)(dispreg); - dispreg <<= (dp >> 9) & 3; - - if (dp & 0x100) { - uae_s32 outer = 0,disp = 0; - uae_s32 base = addr; - char name[10]; - sprintf (name,"PC, "); - if (dp & 0x80) { base = 0; name[0] = 0; } - if (dp & 0x40) dispreg = 0; - if ((dp & 0x30) == 0x20) { disp = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x30) == 0x30) { disp = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - base += disp; - - if ((dp & 0x3) == 0x2) { outer = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x3) == 0x3) { outer = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - - if (!(dp & 4)) base += dispreg; - if (dp & 3) base = get_long (base); - if (dp & 4) base += dispreg; - - addr = base + outer; - sprintf (buffer,"(%s%c%d.%c*%d+%d)+%d == $%08lx", name, - dp & 0x8000 ? 'A' : 'D', (int)r, dp & 0x800 ? 'L' : 'W', - 1 << ((dp >> 9) & 3), - disp,outer, - (unsigned long)addr); - } else { - addr += (uae_s32)((uae_s8)disp8) + dispreg; - sprintf (buffer,"(PC, %c%d.%c*%d, $%02x) == $%08lx", dp & 0x8000 ? 'A' : 'D', - (int)r, dp & 0x800 ? 'L' : 'W', 1 << ((dp >> 9) & 3), - disp8, (unsigned long)addr); - } - break; - case absw: - sprintf (buffer,"$%08lx", (unsigned long)(uae_s32)(uae_s16)get_iword_1 (m68kpc_offset)); - m68kpc_offset += 2; - break; - case absl: - sprintf (buffer,"$%08lx", (unsigned long)get_ilong_1 (m68kpc_offset)); - m68kpc_offset += 4; - break; - case imm: - switch (size){ - case sz_byte: - sprintf (buffer,"#$%02x", (unsigned int)(get_iword_1 (m68kpc_offset) & 0xff)); - m68kpc_offset += 2; - break; - case sz_word: - sprintf (buffer,"#$%04x", (unsigned int)(get_iword_1 (m68kpc_offset) & 0xffff)); - m68kpc_offset += 2; - break; - case sz_long: - sprintf (buffer,"#$%08lx", (unsigned long)(get_ilong_1 (m68kpc_offset))); - m68kpc_offset += 4; - break; - default: - break; - } - break; - case imm0: - offset = (uae_s32)(uae_s8)get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - sprintf (buffer,"#$%02x", (unsigned int)(offset & 0xff)); - break; - case imm1: - offset = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - sprintf (buffer,"#$%04x", (unsigned int)(offset & 0xffff)); - break; - case imm2: - offset = (uae_s32)get_ilong_1 (m68kpc_offset); - m68kpc_offset += 4; - sprintf (buffer,"#$%08lx", (unsigned long)offset); - break; - case immi: - offset = (uae_s32)(uae_s8)(reg & 0xff); - sprintf (buffer,"#$%08lx", (unsigned long)offset); + case 5: + tmp = get_long(src); + res = tmp << offset; + bdata[0] = tmp & ~(mask >> offset); + tmp = get_byte(src + 4); + res |= tmp >> (8 - offset); + bdata[1] = tmp & ~(mask << (8 - offset)); break; default: + /* Panic? */ + res = 0; break; } - if (buf == 0) - printf ("%s", buffer); - else - strcat (buf, buffer); - return offset; + return res; } -/* The plan is that this will take over the job of exception 3 handling - - * the CPU emulation functions will just do a longjmp to m68k_go whenever - * they hit an odd address. */ -static int verify_ea (int reg, amodes mode, wordsizes size, uae_u32 *val) +/* + * write bitfield data (in the LSBs) back to memory, upper bits + * must be cleared already. + */ +void put_bitfield(uae_u32 dst, uae_u32 bdata[2], uae_u32 val, uae_s32 offset, int width) { - uae_u16 dp; - uae_s8 disp8; - uae_s16 disp16; - int r; - uae_u32 dispreg; - uaecptr addr; - uae_s32 offset = 0; - - switch (mode){ - case Dreg: - *val = m68k_dreg (regs, reg); - return 1; - case Areg: - *val = m68k_areg (regs, reg); - return 1; - - case Aind: - case Aipi: - addr = m68k_areg (regs, reg); + offset = (offset & 7) + width; + switch ((offset + 7) >> 3) { + case 1: + put_byte(dst, bdata[0] | (val << (8 - offset))); break; - case Apdi: - addr = m68k_areg (regs, reg); + case 2: + put_word(dst, bdata[0] | (val << (16 - offset))); break; - case Ad16: - disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - addr = m68k_areg(regs,reg) + (uae_s16)disp16; + case 3: + put_word(dst, bdata[0] | (val >> (offset - 16))); + put_byte(dst + 2, bdata[1] | (val << (24 - offset))); break; - case Ad8r: - addr = m68k_areg (regs, reg); -d8r_common: - dp = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - disp8 = dp & 0xFF; - r = (dp & 0x7000) >> 12; - dispreg = dp & 0x8000 ? m68k_areg(regs,r) : m68k_dreg(regs,r); - if (!(dp & 0x800)) dispreg = (uae_s32)(uae_s16)(dispreg); - dispreg <<= (dp >> 9) & 3; - - if (dp & 0x100) { - uae_s32 outer = 0, disp = 0; - uae_s32 base = addr; - if (dp & 0x80) base = 0; - if (dp & 0x40) dispreg = 0; - if ((dp & 0x30) == 0x20) { disp = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x30) == 0x30) { disp = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - base += disp; - - if ((dp & 0x3) == 0x2) { outer = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x3) == 0x3) { outer = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - - if (!(dp & 4)) base += dispreg; - if (dp & 3) base = get_long (base); - if (dp & 4) base += dispreg; - - addr = base + outer; - } else { - addr += (uae_s32)((uae_s8)disp8) + dispreg; - } + case 4: + put_long(dst, bdata[0] | (val << (32 - offset))); break; - case PC16: - addr = m68k_getpc () + m68kpc_offset; - disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - addr += (uae_s16)disp16; - break; - case PC8r: - addr = m68k_getpc () + m68kpc_offset; - goto d8r_common; - case absw: - addr = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - break; - case absl: - addr = get_ilong_1 (m68kpc_offset); - m68kpc_offset += 4; - break; - case imm: - switch (size){ - case sz_byte: - *val = get_iword_1 (m68kpc_offset) & 0xff; - m68kpc_offset += 2; - break; - case sz_word: - *val = get_iword_1 (m68kpc_offset) & 0xffff; - m68kpc_offset += 2; - break; - case sz_long: - *val = get_ilong_1 (m68kpc_offset); - m68kpc_offset += 4; - break; - default: - break; - } - return 1; - case imm0: - *val = (uae_s32)(uae_s8)get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - return 1; - case imm1: - *val = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - return 1; - case imm2: - *val = get_ilong_1 (m68kpc_offset); - m68kpc_offset += 4; - return 1; - case immi: - *val = (uae_s32)(uae_s8)(reg & 0xff); - return 1; - default: - addr = 0; + case 5: + put_long(dst, bdata[0] | (val >> (offset - 32))); + put_byte(dst + 4, bdata[1] | (val << (40 - offset))); break; } - if ((addr & 1) == 0) - return 1; - - last_addr_for_exception_3 = m68k_getpc () + m68kpc_offset; - last_fault_for_exception_3 = addr; - return 0; } uae_u32 get_disp_ea_020 (uae_u32 base, uae_u32 dp) { - int reg = (dp >> 12) & 15; - uae_s32 regd = regs.regs[reg]; - if ((dp & 0x800) == 0) - regd = (uae_s32)(uae_s16)regd; - regd <<= (dp >> 9) & 3; - if (dp & 0x100) { - uae_s32 outer = 0; - if (dp & 0x80) base = 0; - if (dp & 0x40) regd = 0; + int reg = (dp >> 12) & 15; + uae_s32 regd = regs.regs[reg]; + if ((dp & 0x800) == 0) + regd = (uae_s32)(uae_s16)regd; + regd <<= (dp >> 9) & 3; + if (dp & 0x100) { + uae_s32 outer = 0; + if (dp & 0x80) base = 0; + if (dp & 0x40) regd = 0; - if ((dp & 0x30) == 0x20) base += (uae_s32)(uae_s16)next_iword(); - if ((dp & 0x30) == 0x30) base += next_ilong(); + if ((dp & 0x30) == 0x20) base += (uae_s32)(uae_s16)next_iword(); + if ((dp & 0x30) == 0x30) base += next_ilong(); - if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)next_iword(); - if ((dp & 0x3) == 0x3) outer = next_ilong(); + if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)next_iword(); + if ((dp & 0x3) == 0x3) outer = next_ilong(); - if ((dp & 0x4) == 0) base += regd; - if (dp & 0x3) base = get_long (base); - if (dp & 0x4) base += regd; + if ((dp & 0x4) == 0) base += regd; + if (dp & 0x3) base = get_long (base); + if (dp & 0x4) base += regd; - return base + outer; - } else { - return base + (uae_s32)((uae_s8)dp) + regd; - } + return base + outer; + } else { + return base + (uae_s32)((uae_s8)dp) + regd; + } } uae_u32 get_disp_ea_000 (uae_u32 base, uae_u32 dp) { - int reg = (dp >> 12) & 15; - uae_s32 regd = regs.regs[reg]; + int reg = (dp >> 12) & 15; + uae_s32 regd = regs.regs[reg]; #if 1 - if ((dp & 0x800) == 0) - regd = (uae_s32)(uae_s16)regd; - return base + (uae_s8)dp + regd; + if ((dp & 0x800) == 0) + regd = (uae_s32)(uae_s16)regd; + return base + (uae_s8)dp + regd; #else - /* Branch-free code... benchmark this again now that - * things are no longer inline. */ - uae_s32 regd16; - uae_u32 mask; - mask = ((dp & 0x800) >> 11) - 1; - regd16 = (uae_s32)(uae_s16)regd; - regd16 &= mask; - mask = ~mask; - base += (uae_s8)dp; - regd &= mask; - regd |= regd16; - return base + regd; + /* Branch-free code... benchmark this again now that + * things are no longer inline. */ + uae_s32 regd16; + uae_u32 mask; + mask = ((dp & 0x800) >> 11) - 1; + regd16 = (uae_s32)(uae_s16)regd; + regd16 &= mask; + mask = ~mask; + base += (uae_s8)dp; + regd &= mask; + regd |= regd16; + return base + regd; #endif } void MakeSR (void) { #if 0 - assert((regs.t1 & 1) == regs.t1); - assert((regs.t0 & 1) == regs.t0); - assert((regs.s & 1) == regs.s); - assert((regs.m & 1) == regs.m); - assert((XFLG & 1) == XFLG); - assert((NFLG & 1) == NFLG); - assert((ZFLG & 1) == ZFLG); - assert((VFLG & 1) == VFLG); - assert((CFLG & 1) == CFLG); + assert((regs.t1 & 1) == regs.t1); + assert((regs.t0 & 1) == regs.t0); + assert((regs.s & 1) == regs.s); + assert((regs.m & 1) == regs.m); + assert((XFLG & 1) == XFLG); + assert((NFLG & 1) == NFLG); + assert((ZFLG & 1) == ZFLG); + assert((VFLG & 1) == VFLG); + assert((CFLG & 1) == CFLG); #endif - regs.sr = ((regs.t1 << 15) | (regs.t0 << 14) - | (regs.s << 13) | (regs.m << 12) | (regs.intmask << 8) - | (GET_XFLG << 4) | (GET_NFLG << 3) | (GET_ZFLG << 2) | (GET_VFLG << 1) - | GET_CFLG); + regs.sr = ((regs.t1 << 15) | (regs.t0 << 14) + | (regs.s << 13) | (regs.m << 12) | (regs.intmask << 8) + | (GET_XFLG() << 4) | (GET_NFLG() << 3) | (GET_ZFLG() << 2) | (GET_VFLG() << 1) + | GET_CFLG()); } void MakeFromSR (void) { - int oldm = regs.m; - int olds = regs.s; + int oldm = regs.m; + int olds = regs.s; - regs.t1 = (regs.sr >> 15) & 1; - regs.t0 = (regs.sr >> 14) & 1; - regs.s = (regs.sr >> 13) & 1; - regs.m = (regs.sr >> 12) & 1; - regs.intmask = (regs.sr >> 8) & 7; - SET_XFLG ((regs.sr >> 4) & 1); - SET_NFLG ((regs.sr >> 3) & 1); - SET_ZFLG ((regs.sr >> 2) & 1); - SET_VFLG ((regs.sr >> 1) & 1); - SET_CFLG (regs.sr & 1); - if (CPUType >= 2) { - if (olds != regs.s) { - if (olds) { - if (oldm) - regs.msp = m68k_areg(regs, 7); - else - regs.isp = m68k_areg(regs, 7); - m68k_areg(regs, 7) = regs.usp; - } else { - regs.usp = m68k_areg(regs, 7); - m68k_areg(regs, 7) = regs.m ? regs.msp : regs.isp; - } - } else if (olds && oldm != regs.m) { - if (oldm) { - regs.msp = m68k_areg(regs, 7); - m68k_areg(regs, 7) = regs.isp; - } else { - regs.isp = m68k_areg(regs, 7); - m68k_areg(regs, 7) = regs.msp; - } - } - } else { - if (olds != regs.s) { - if (olds) { - regs.isp = m68k_areg(regs, 7); - m68k_areg(regs, 7) = regs.usp; - } else { - regs.usp = m68k_areg(regs, 7); - m68k_areg(regs, 7) = regs.isp; - } - } + regs.t1 = (regs.sr >> 15) & 1; + regs.t0 = (regs.sr >> 14) & 1; + regs.s = (regs.sr >> 13) & 1; + mmu_set_super(regs.s); + regs.m = (regs.sr >> 12) & 1; + regs.intmask = (regs.sr >> 8) & 7; + SET_XFLG ((regs.sr >> 4) & 1); + SET_NFLG ((regs.sr >> 3) & 1); + SET_ZFLG ((regs.sr >> 2) & 1); + SET_VFLG ((regs.sr >> 1) & 1); + SET_CFLG (regs.sr & 1); + if (olds != regs.s) { + if (olds) { + if (oldm) + regs.msp = m68k_areg(regs, 7); + else + regs.isp = m68k_areg(regs, 7); + m68k_areg(regs, 7) = regs.usp; + } else { + regs.usp = m68k_areg(regs, 7); + m68k_areg(regs, 7) = regs.m ? regs.msp : regs.isp; + } + } else if (olds && oldm != regs.m) { + if (oldm) { + regs.msp = m68k_areg(regs, 7); + m68k_areg(regs, 7) = regs.isp; + } else { + regs.isp = m68k_areg(regs, 7); + m68k_areg(regs, 7) = regs.msp; + } } - SPCFLAGS_SET( SPCFLAG_INT ); - if (regs.t1 || regs.t0) - SPCFLAGS_SET( SPCFLAG_TRACE ); - else - /* Keep SPCFLAG_DOTRACE, we still want a trace exception for - SR-modifying instructions (including STOP). */ - SPCFLAGS_CLEAR( SPCFLAG_TRACE ); + // SPCFLAGS_SET( SPCFLAG_INT ); + SPCFLAGS_SET( SPCFLAG_INT ); + if (regs.t1 || regs.t0) + SPCFLAGS_SET( SPCFLAG_TRACE ); + else + SPCFLAGS_CLEAR( SPCFLAG_TRACE ); } +/* for building exception frames */ +static inline void exc_push_word(uae_u16 w) +{ + m68k_areg(regs, 7) -= 2; + put_word(m68k_areg(regs, 7), w); +} +static inline void exc_push_long(uae_u32 l) +{ + m68k_areg(regs, 7) -= 4; + put_long (m68k_areg(regs, 7), l); +} + +static inline void exc_make_frame( + int format, + uae_u16 sr, + uae_u32 currpc, + int nr, + uae_u32 x0, + uae_u32 x1 +) +{ + switch(format) { + case 4: + exc_push_long(x1); + exc_push_long(x0); + break; + case 3: + case 2: + exc_push_long(x0); + break; + } + + exc_push_word((format << 12) + (nr * 4)); /* format | vector */ + exc_push_long(currpc); + exc_push_word(sr); +#if 0 /* debugging helpers; activate as needed */ + if (/* nr != 0x45 && */ /* Timer-C */ + nr != 0x1c && /* VBL */ + nr != 0x46) /* ACIA */ + { + memptr sp = m68k_areg(regs, 7); + uae_u16 sr = get_word(sp); + fprintf(stderr, "Exc:%02x SP: %08x USP: %08x SR: %04x PC: %08x Format: %04x", nr, sp, regs.usp, sr, get_long(sp + 2), get_word(sp + 6)); + if (nr >= 32 && nr < 48) + { + fprintf(stderr, " Opcode: $%04x", sr & 0x2000 ? get_word(sp + 8) : get_word(regs.usp)); + } + fprintf(stderr, "\n"); + } +#endif +} + + +void ex_rte(void) +{ + uae_u16 newsr; + uae_u32 newpc; + uae_s16 format; + + for (;;) + { + newsr = get_word(m68k_areg(regs, 7)); + m68k_areg(regs, 7) += 2; + newpc = get_long(m68k_areg(regs, 7)); + m68k_areg(regs, 7) += 4; + format = get_word(m68k_areg(regs, 7)); + m68k_areg(regs, 7) += 2; + if ((format & 0xF000) == 0x0000) break; + else if ((format & 0xF000) == 0x1000) { ; } + else if ((format & 0xF000) == 0x2000) { m68k_areg(regs, 7) += 4; break; } +// else if ((format & 0xF000) == 0x3000) { m68k_areg(regs, 7) += 4; break; } + else if ((format & 0xF000) == 0x7000) { m68k_areg(regs, 7) += 52; break; } + else if ((format & 0xF000) == 0x8000) { m68k_areg(regs, 7) += 50; break; } + else if ((format & 0xF000) == 0x9000) { m68k_areg(regs, 7) += 12; break; } + else if ((format & 0xF000) == 0xa000) { m68k_areg(regs, 7) += 24; break; } + else if ((format & 0xF000) == 0xb000) { m68k_areg(regs, 7) += 84; break; } + else { Exception(14,0); return; } + regs.sr = newsr; + MakeFromSR(); + } +#if 0 /* debugging helpers; activate as needed */ + { + memptr sp = m68k_areg(regs, 7) - 8; + int nr = (format & 0xfff) >> 2; + if (/* nr != 0x45 && */ /* Timer-C */ + nr != 0x1c && /* VBL */ + nr != 0x46) /* ACIA */ + fprintf(stderr, "RTE SP: %08x USP: %08x SR: %04x PC: %08x Format: %04x olds=%d nr=%02x -> %08x\n", sp, regs.usp, newsr, m68k_getpc(), format, regs.s, nr, newpc); + } +#endif + regs.sr = newsr; + MakeFromSR(); + m68k_setpc_rte(newpc); + fill_prefetch_0(); +} + +#ifdef EXCEPTIONS_VIA_LONGJMP +static int building_bus_fault_stack_frame=0; +#endif + void Exception(int nr, uaecptr oldpc) { - uae_u32 currpc = m68k_getpc (); - MakeSR(); - if (!regs.s) { - regs.usp = m68k_areg(regs, 7); - if (CPUType >= 2) - m68k_areg(regs, 7) = regs.m ? regs.msp : regs.isp; - else - m68k_areg(regs, 7) = regs.isp; - regs.s = 1; + uae_u32 currpc = m68k_getpc (); + MakeSR(); + + if (fixup.flag) + { + m68k_areg(regs, fixup.reg) = fixup.value; + fixup.flag = 0; + } + + if (!regs.s) { + regs.usp = m68k_areg(regs, 7); + m68k_areg(regs, 7) = regs.m ? regs.msp : regs.isp; + regs.s = 1; + mmu_set_super(1); + } + + if (nr == 2) { + /* BUS ERROR handler begins */ +#ifdef ENABLE_EPSLIMITER + check_eps_limit(currpc); +#endif + // panicbug("Exception Nr. %d CPC: %08x NPC: %08x SP=%08x Addr: %08x", nr, currpc, get_long (regs.vbr + 4*nr), m68k_areg(regs, 7), regs.mmu_fault_addr); +#ifdef EXCEPTIONS_VIA_LONGJMP + if (!building_bus_fault_stack_frame) +#else + try +#endif + { +#ifdef EXCEPTIONS_VIA_LONGJMP + building_bus_fault_stack_frame= 1; +#endif + /* 68040 */ + exc_push_long(0); /* PD3 */ + exc_push_long(0); /* PD2 */ + exc_push_long(0); /* PD1 */ + exc_push_long(0); /* PD0/WB1D */ + exc_push_long(0); /* WB1A */ + exc_push_long(0); /* WB2D */ + exc_push_long(0); /* WB2A */ + exc_push_long(regs.wb3_data); /* WB3D */ + exc_push_long(regs.mmu_fault_addr); /* WB3A */ + exc_push_long(regs.mmu_fault_addr); + exc_push_word(0); /* WB1S */ + exc_push_word(0); /* WB2S */ + exc_push_word(regs.wb3_status); /* WB3S */ + regs.wb3_status = 0; + exc_push_word(regs.mmu_ssw); + exc_push_long(regs.mmu_fault_addr); /* EA */ + exc_make_frame(7, regs.sr, regs.fault_pc, 2, 0, 0); + } - if (CPUType > 0) { - if (nr == 2 || nr == 3) { - int i; - /* @@@ this is probably wrong (?) */ - for (i = 0 ; i < 12 ; i++) { - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), 0); - } - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), 0xa000 + nr * 4); - } else if (nr ==5 || nr == 6 || nr == 7 || nr == 9) { - m68k_areg(regs, 7) -= 4; - put_long (m68k_areg(regs, 7), oldpc); - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), 0x2000 + nr * 4); - } else if (regs.m && nr >= 24 && nr < 32) { - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), nr * 4); - m68k_areg(regs, 7) -= 4; - put_long (m68k_areg(regs, 7), currpc); - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), regs.sr); - regs.sr |= (1 << 13); - regs.msp = m68k_areg(regs, 7); - m68k_areg(regs, 7) = regs.isp; - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), 0x1000 + nr * 4); - } else { - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), nr * 4); - } - } else { - if (nr == 2 || nr == 3) { - m68k_areg(regs, 7) -= 12; - /* ??????? */ - if (nr == 3) { - put_long (m68k_areg(regs, 7), last_fault_for_exception_3); - put_word (m68k_areg(regs, 7)+4, last_op_for_exception_3); - put_long (m68k_areg(regs, 7)+8, last_addr_for_exception_3); - } - write_log ("Exception!\n"); - goto kludge_me_do; - } - } - m68k_areg(regs, 7) -= 4; - put_long (m68k_areg(regs, 7), currpc); -kludge_me_do: - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), regs.sr); - m68k_setpc (get_long (regs.vbr + 4*nr)); - SPCFLAGS_SET( SPCFLAG_JIT_END_COMPILE ); - fill_prefetch_0 (); - regs.t1 = regs.t0 = regs.m = 0; - SPCFLAGS_CLEAR( SPCFLAG_TRACE | SPCFLAG_DOTRACE ); +#ifdef EXCEPTIONS_VIA_LONGJMP + else +#else + catch (m68k_exception) +#endif + { + report_double_bus_error(); +#ifdef EXCEPTIONS_VIA_LONGJMP + building_bus_fault_stack_frame= 0; +#endif + return; + } + +#ifdef EXCEPTIONS_VIA_LONGJMP + building_bus_fault_stack_frame= 0; +#endif + /* end of BUS ERROR handler */ + } else if (nr == 3) { + exc_make_frame(2, regs.sr, last_addr_for_exception_3, nr, + last_fault_for_exception_3 & 0xfffffffe, 0); + } else if (nr == 5 || nr == 6 || nr == 7 || nr == 9) { + /* div by zero, CHK, TRAP or TRACE */ + exc_make_frame(2, regs.sr, currpc, nr, oldpc, 0); + } else if (regs.m && nr >= 24 && nr < 32) { + /* interrupts! */ + exc_make_frame(0, regs.sr, currpc, nr, 0, 0); + regs.sr |= (1 << 13); + regs.msp = m68k_areg(regs, 7); + m68k_areg(regs, 7) = regs.isp; + + exc_make_frame(1, /* throwaway */ + regs.sr, currpc, nr, 0, 0); + } else { + exc_make_frame(0, regs.sr, currpc, nr, 0, 0); + } + m68k_setpc (get_long (regs.vbr + 4*nr)); + SPCFLAGS_SET( SPCFLAG_JIT_END_COMPILE ); + fill_prefetch_0 (); + regs.t1 = regs.t0 = regs.m = 0; + SPCFLAGS_CLEAR(SPCFLAG_TRACE | SPCFLAG_DOTRACE); } static void Interrupt(int nr) { - assert(nr < 8 && nr >= 0); - lastint_regs = regs; - lastint_no = nr; - Exception(nr+24, 0); + assert(nr < 8 && nr >= 0); + Exception(nr+24, 0); - regs.intmask = nr; - SPCFLAGS_SET( SPCFLAG_INT ); + regs.intmask = nr; + // why the hell the SPCFLAG_INT is to be set??? (joy) + // regs.spcflags |= SPCFLAG_INT; (disabled by joy) + SPCFLAGS_SET( SPCFLAG_INT ); } -static int caar, cacr, tc, itt0, itt1, dtt0, dtt1, mmusr, urp, srp; - -static int movec_illg (int regno) +static void SCCInterrupt(int nr) { - switch (CPUType) { - case 1: - if ((regno & 0x7ff) <= 1) - return 0; - break; - case 2: - case 3: - if ((regno & 0x7ff) <= 2) - return 0; - if (regno == 3 || regno == 4) - return 0; - break; - case 4: - if ((regno & 0x7ff) <= 7) { - if (regno != 0x802) - return 0; - } - break; - } - return 1; + // fprintf(stderr, "CPU: in SCCInterrupt\n"); + Exception(nr, 0); + + regs.intmask = 5;// ex 5 +} + +static void MFPInterrupt(int nr) +{ + // fprintf(stderr, "CPU: in MFPInterrupt\n"); + Exception(nr, 0); + + regs.intmask = 6; } int m68k_move2c (int regno, uae_u32 *regp) { - if (movec_illg (regno)) { - op_illg (0x4E7B); - return 0; - } else { - switch (regno) { - case 0: regs.sfc = *regp & 7; break; - case 1: regs.dfc = *regp & 7; break; - case 2: - cacr = *regp & (CPUType < 4 ? 0x3 : 0x80008000); -#if USE_JIT - if (CPUType < 4) { - set_cache_state(cacr&1); - if (*regp & 0x08) - flush_icache(1); - } - else { - set_cache_state(cacr&0x8000); - } + switch (regno) { + case 0: regs.sfc = *regp & 7; break; + case 1: regs.dfc = *regp & 7; break; + case 2: regs.cacr = *regp & 0x80008000; +#ifdef USE_JIT + set_cache_state(regs.cacr & 0x8000); + if (*regp & 0x08) { /* Just to be on the safe side */ + flush_icache(); + } #endif - break; - case 3: tc = *regp & 0xc000; break; - case 4: itt0 = *regp & 0xffffe364; break; - case 5: itt1 = *regp & 0xffffe364; break; - case 6: dtt0 = *regp & 0xffffe364; break; - case 7: dtt1 = *regp & 0xffffe364; break; - case 0x800: regs.usp = *regp; break; - case 0x801: regs.vbr = *regp; break; - case 0x802: caar = *regp &0xfc; break; - case 0x803: regs.msp = *regp; if (regs.m == 1) m68k_areg(regs, 7) = regs.msp; break; - case 0x804: regs.isp = *regp; if (regs.m == 0) m68k_areg(regs, 7) = regs.isp; break; - case 0x805: mmusr = *regp; break; - case 0x806: urp = *regp; break; - case 0x807: srp = *regp; break; - default: - op_illg (0x4E7B); - return 0; - } + break; + case 3: mmu_set_tc(*regp & 0xc000); break; + case 4: + case 5: + case 6: + case 7: mmu_set_ttr(regno, *regp & 0xffffe364); break; + case 0x800: regs.usp = *regp; break; + case 0x801: regs.vbr = *regp; break; + case 0x802: regs.caar = *regp & 0xfc; break; + case 0x803: regs.msp = *regp; if (regs.m == 1) m68k_areg(regs, 7) = regs.msp; break; + case 0x804: regs.isp = *regp; if (regs.m == 0) m68k_areg(regs, 7) = regs.isp; break; + case 0x805: mmu_set_mmusr(*regp); break; + case 0x806: regs.urp = *regp & MMU_ROOT_PTR_ADDR_MASK; break; + case 0x807: regs.srp = *regp & MMU_ROOT_PTR_ADDR_MASK; break; + default: + op_illg (0x4E7B); + return 0; } - return 1; + return 1; } int m68k_movec2 (int regno, uae_u32 *regp) { - if (movec_illg (regno)) - { - op_illg (0x4E7A); - return 0; - } else { - switch (regno) { - case 0: *regp = regs.sfc; break; - case 1: *regp = regs.dfc; break; - case 2: *regp = cacr; break; - case 3: *regp = tc; break; - case 4: *regp = itt0; break; - case 5: *regp = itt1; break; - case 6: *regp = dtt0; break; - case 7: *regp = dtt1; break; - case 0x800: *regp = regs.usp; break; - case 0x801: *regp = regs.vbr; break; - case 0x802: *regp = caar; break; - case 0x803: *regp = regs.m == 1 ? m68k_areg(regs, 7) : regs.msp; break; - case 0x804: *regp = regs.m == 0 ? m68k_areg(regs, 7) : regs.isp; break; - case 0x805: *regp = mmusr; break; - case 0x806: *regp = urp; break; - case 0x807: *regp = srp; break; - default: - op_illg (0x4E7A); - return 0; - } + switch (regno) { + case 0: *regp = regs.sfc; break; + case 1: *regp = regs.dfc; break; + case 2: *regp = regs.cacr; break; + case 3: *regp = regs.tc; break; + case 4: *regp = regs.itt0; break; + case 5: *regp = regs.itt1; break; + case 6: *regp = regs.dtt0; break; + case 7: *regp = regs.dtt1; break; + case 0x800: *regp = regs.usp; break; + case 0x801: *regp = regs.vbr; break; + case 0x802: *regp = regs.caar; break; + case 0x803: *regp = regs.m == 1 ? m68k_areg(regs, 7) : regs.msp; break; + case 0x804: *regp = regs.m == 0 ? m68k_areg(regs, 7) : regs.isp; break; + case 0x805: *regp = regs.mmusr; break; + case 0x806: *regp = regs.urp; break; + case 0x807: *regp = regs.srp; break; + default: + op_illg (0x4E7A); + return 0; } - return 1; + return 1; } -static __inline__ int +#if !defined(uae_s64) +static inline int div_unsigned(uae_u32 src_hi, uae_u32 src_lo, uae_u32 div, uae_u32 *quot, uae_u32 *rem) { uae_u32 q = 0, cbit = 0; int i; if (div <= src_hi) { - return 1; + return 1; } for (i = 0 ; i < 32 ; i++) { cbit = src_hi & 0x80000000ul; @@ -943,129 +762,131 @@ div_unsigned(uae_u32 src_hi, uae_u32 src_lo, uae_u32 div, uae_u32 *quot, uae_u32 *rem = src_hi; return 0; } +#endif -void m68k_divl (uae_u32 opcode, uae_u32 src, uae_u16 extra, uaecptr oldpc) +void m68k_divl (uae_u32 /*opcode*/, uae_u32 src, uae_u16 extra, uaecptr oldpc) { #if defined(uae_s64) - if (src == 0) { - Exception (5, oldpc); - return; - } - if (extra & 0x800) { - /* signed variant */ - uae_s64 a = (uae_s64)(uae_s32)m68k_dreg(regs, (extra >> 12) & 7); - uae_s64 quot, rem; + if (src == 0) { + Exception (5, oldpc); + return; + } + if (extra & 0x800) { + /* signed variant */ + uae_s64 a = (uae_s64)(uae_s32)m68k_dreg(regs, (extra >> 12) & 7); + uae_s64 quot, rem; - if (extra & 0x400) { - a &= 0xffffffffu; - a |= (uae_s64)m68k_dreg(regs, extra & 7) << 32; - } - rem = a % (uae_s64)(uae_s32)src; - quot = a / (uae_s64)(uae_s32)src; - if ((quot & UVAL64(0xffffffff80000000)) != 0 - && (quot & UVAL64(0xffffffff80000000)) != UVAL64(0xffffffff80000000)) - { - SET_VFLG (1); - SET_NFLG (1); - SET_CFLG (0); - } else { - if (((uae_s32)rem < 0) != ((uae_s64)a < 0)) rem = -rem; - SET_VFLG (0); - SET_CFLG (0); - SET_ZFLG (((uae_s32)quot) == 0); - SET_NFLG (((uae_s32)quot) < 0); - m68k_dreg(regs, extra & 7) = (uae_u32)rem; - m68k_dreg(regs, (extra >> 12) & 7) = (uae_u32)quot; - } + if (extra & 0x400) { + a &= 0xffffffffu; + a |= (uae_s64)m68k_dreg(regs, extra & 7) << 32; + } + rem = a % (uae_s64)(uae_s32)src; + quot = a / (uae_s64)(uae_s32)src; + if ((quot & UVAL64(0xffffffff80000000)) != 0 + && (quot & UVAL64(0xffffffff80000000)) != UVAL64(0xffffffff80000000)) + { + SET_VFLG (1); + SET_NFLG (1); + SET_CFLG (0); } else { - /* unsigned */ - uae_u64 a = (uae_u64)(uae_u32)m68k_dreg(regs, (extra >> 12) & 7); - uae_u64 quot, rem; - - if (extra & 0x400) { - a &= 0xffffffffu; - a |= (uae_u64)m68k_dreg(regs, extra & 7) << 32; - } - rem = a % (uae_u64)src; - quot = a / (uae_u64)src; - if (quot > 0xffffffffu) { - SET_VFLG (1); - SET_NFLG (1); - SET_CFLG (0); - } else { - SET_VFLG (0); - SET_CFLG (0); - SET_ZFLG (((uae_s32)quot) == 0); - SET_NFLG (((uae_s32)quot) < 0); - m68k_dreg(regs, extra & 7) = (uae_u32)rem; - m68k_dreg(regs, (extra >> 12) & 7) = (uae_u32)quot; - } + if (((uae_s32)rem < 0) != ((uae_s64)a < 0)) rem = -rem; + SET_VFLG (0); + SET_CFLG (0); + SET_ZFLG (((uae_s32)quot) == 0); + SET_NFLG (((uae_s32)quot) < 0); + m68k_dreg(regs, extra & 7) = rem; + m68k_dreg(regs, (extra >> 12) & 7) = quot; } + } else { + /* unsigned */ + uae_u64 a = (uae_u64)(uae_u32)m68k_dreg(regs, (extra >> 12) & 7); + uae_u64 quot, rem; + + if (extra & 0x400) { + a &= 0xffffffffu; + a |= (uae_u64)m68k_dreg(regs, extra & 7) << 32; + } + rem = a % (uae_u64)src; + quot = a / (uae_u64)src; + if (quot > 0xffffffffu) { + SET_VFLG (1); + SET_NFLG (1); + SET_CFLG (0); + } else { + SET_VFLG (0); + SET_CFLG (0); + SET_ZFLG (((uae_s32)quot) == 0); + SET_NFLG (((uae_s32)quot) < 0); + m68k_dreg(regs, extra & 7) = rem; + m68k_dreg(regs, (extra >> 12) & 7) = quot; + } + } #else - if (src == 0) { - Exception (5, oldpc); - return; - } - if (extra & 0x800) { - /* signed variant */ - uae_s32 lo = (uae_s32)m68k_dreg(regs, (extra >> 12) & 7); - uae_s32 hi = lo < 0 ? -1 : 0; - uae_s32 save_high; - uae_u32 quot, rem; - uae_u32 sign; + if (src == 0) { + Exception (5, oldpc); + return; + } + if (extra & 0x800) { + /* signed variant */ + uae_s32 lo = (uae_s32)m68k_dreg(regs, (extra >> 12) & 7); + uae_s32 hi = lo < 0 ? -1 : 0; + uae_s32 save_high; + uae_u32 quot, rem; + uae_u32 sign; - if (extra & 0x400) { - hi = (uae_s32)m68k_dreg(regs, extra & 7); - } - save_high = hi; - sign = (hi ^ src); - if (hi < 0) { - hi = ~hi; - lo = -lo; - if (lo == 0) hi++; - } - if ((uae_s32)src < 0) src = -src; - if (div_unsigned(hi, lo, src, ", &rem) || - (sign & 0x80000000) ? quot > 0x80000000 : quot > 0x7fffffff) { - SET_VFLG (1); - SET_NFLG (1); - SET_CFLG (0); - } else { - if (sign & 0x80000000) quot = -quot; - if (((uae_s32)rem < 0) != (save_high < 0)) rem = -rem; - SET_VFLG (0); - SET_CFLG (0); - SET_ZFLG (((uae_s32)quot) == 0); - SET_NFLG (((uae_s32)quot) < 0); - m68k_dreg(regs, extra & 7) = rem; - m68k_dreg(regs, (extra >> 12) & 7) = quot; - } + if (extra & 0x400) { + hi = (uae_s32)m68k_dreg(regs, extra & 7); + } + save_high = hi; + sign = (hi ^ src); + if (hi < 0) { + hi = ~hi; + lo = -lo; + if (lo == 0) hi++; + } + if ((uae_s32)src < 0) src = -src; + if (div_unsigned(hi, lo, src, ", &rem) || + (sign & 0x80000000) ? quot > 0x80000000 : quot > 0x7fffffff) { + SET_VFLG (1); + SET_NFLG (1); + SET_CFLG (0); } else { - /* unsigned */ - uae_u32 lo = (uae_u32)m68k_dreg(regs, (extra >> 12) & 7); - uae_u32 hi = 0; - uae_u32 quot, rem; - - if (extra & 0x400) { - hi = (uae_u32)m68k_dreg(regs, extra & 7); - } - if (div_unsigned(hi, lo, src, ", &rem)) { - SET_VFLG (1); - SET_NFLG (1); - SET_CFLG (0); - } else { - SET_VFLG (0); - SET_CFLG (0); - SET_ZFLG (((uae_s32)quot) == 0); - SET_NFLG (((uae_s32)quot) < 0); - m68k_dreg(regs, extra & 7) = rem; - m68k_dreg(regs, (extra >> 12) & 7) = quot; - } + if (sign & 0x80000000) quot = -quot; + if (((uae_s32)rem < 0) != (save_high < 0)) rem = -rem; + SET_VFLG (0); + SET_CFLG (0); + SET_ZFLG (((uae_s32)quot) == 0); + SET_NFLG (((uae_s32)quot) < 0); + m68k_dreg(regs, extra & 7) = rem; + m68k_dreg(regs, (extra >> 12) & 7) = quot; } + } else { + /* unsigned */ + uae_u32 lo = (uae_u32)m68k_dreg(regs, (extra >> 12) & 7); + uae_u32 hi = 0; + uae_u32 quot, rem; + + if (extra & 0x400) { + hi = (uae_u32)m68k_dreg(regs, extra & 7); + } + if (div_unsigned(hi, lo, src, ", &rem)) { + SET_VFLG (1); + SET_NFLG (1); + SET_CFLG (0); + } else { + SET_VFLG (0); + SET_CFLG (0); + SET_ZFLG (((uae_s32)quot) == 0); + SET_NFLG (((uae_s32)quot) < 0); + m68k_dreg(regs, extra & 7) = rem; + m68k_dreg(regs, (extra >> 12) & 7) = quot; + } + } #endif } -static __inline__ void +#if !defined(uae_s64) +static inline void mul_unsigned(uae_u32 src1, uae_u32 src2, uae_u32 *dst_hi, uae_u32 *dst_lo) { uae_u32 r0 = (src1 & 0xffff) * (src2 & 0xffff); @@ -1083,148 +904,202 @@ mul_unsigned(uae_u32 src1, uae_u32 src2, uae_u32 *dst_hi, uae_u32 *dst_lo) *dst_lo = lo; *dst_hi = r3; } +#endif -void m68k_mull (uae_u32 opcode, uae_u32 src, uae_u16 extra) +void m68k_mull (uae_u32 /*opcode*/, uae_u32 src, uae_u16 extra) { #if defined(uae_s64) - if (extra & 0x800) { - /* signed variant */ - uae_s64 a = (uae_s64)(uae_s32)m68k_dreg(regs, (extra >> 12) & 7); + if (extra & 0x800) { + /* signed variant */ + uae_s64 a = (uae_s64)(uae_s32)m68k_dreg(regs, (extra >> 12) & 7); - a *= (uae_s64)(uae_s32)src; - SET_VFLG (0); - SET_CFLG (0); - SET_ZFLG (a == 0); - SET_NFLG (a < 0); - if (extra & 0x400) - m68k_dreg(regs, extra & 7) = a >> 32; - else if ((a & UVAL64(0xffffffff80000000)) != 0 - && (a & UVAL64(0xffffffff80000000)) != UVAL64(0xffffffff80000000)) - { - SET_VFLG (1); - } - m68k_dreg(regs, (extra >> 12) & 7) = (uae_u32)a; - } else { - /* unsigned */ - uae_u64 a = (uae_u64)(uae_u32)m68k_dreg(regs, (extra >> 12) & 7); - - a *= (uae_u64)src; - SET_VFLG (0); - SET_CFLG (0); - SET_ZFLG (a == 0); - SET_NFLG (((uae_s64)a) < 0); - if (extra & 0x400) - m68k_dreg(regs, extra & 7) = a >> 32; - else if ((a & UVAL64(0xffffffff00000000)) != 0) { - SET_VFLG (1); - } - m68k_dreg(regs, (extra >> 12) & 7) = (uae_u32)a; + a *= (uae_s64)(uae_s32)src; + SET_VFLG (0); + SET_CFLG (0); + SET_ZFLG (a == 0); + SET_NFLG (a < 0); + if (extra & 0x400) + m68k_dreg(regs, extra & 7) = a >> 32; + else if ((a & UVAL64(0xffffffff80000000)) != 0 + && (a & UVAL64(0xffffffff80000000)) != UVAL64(0xffffffff80000000)) + { + SET_VFLG (1); } + m68k_dreg(regs, (extra >> 12) & 7) = (uae_u32)a; + } else { + /* unsigned */ + uae_u64 a = (uae_u64)(uae_u32)m68k_dreg(regs, (extra >> 12) & 7); + + a *= (uae_u64)src; + SET_VFLG (0); + SET_CFLG (0); + SET_ZFLG (a == 0); + SET_NFLG (((uae_s64)a) < 0); + if (extra & 0x400) + m68k_dreg(regs, extra & 7) = a >> 32; + else if ((a & UVAL64(0xffffffff00000000)) != 0) { + SET_VFLG (1); + } + m68k_dreg(regs, (extra >> 12) & 7) = (uae_u32)a; + } #else - if (extra & 0x800) { - /* signed variant */ - uae_s32 src1,src2; - uae_u32 dst_lo,dst_hi; - uae_u32 sign; + if (extra & 0x800) { + /* signed variant */ + uae_s32 src1,src2; + uae_u32 dst_lo,dst_hi; + uae_u32 sign; - src1 = (uae_s32)src; - src2 = (uae_s32)m68k_dreg(regs, (extra >> 12) & 7); - sign = (src1 ^ src2); - if (src1 < 0) src1 = -src1; - if (src2 < 0) src2 = -src2; - mul_unsigned((uae_u32)src1,(uae_u32)src2,&dst_hi,&dst_lo); - if (sign & 0x80000000) { - dst_hi = ~dst_hi; - dst_lo = -dst_lo; - if (dst_lo == 0) dst_hi++; - } - SET_VFLG (0); - SET_CFLG (0); - SET_ZFLG (dst_hi == 0 && dst_lo == 0); - SET_NFLG (((uae_s32)dst_hi) < 0); - if (extra & 0x400) - m68k_dreg(regs, extra & 7) = dst_hi; - else if ((dst_hi != 0 || (dst_lo & 0x80000000) != 0) - && ((dst_hi & 0xffffffff) != 0xffffffff - || (dst_lo & 0x80000000) != 0x80000000)) - { - SET_VFLG (1); - } - m68k_dreg(regs, (extra >> 12) & 7) = dst_lo; - } else { - /* unsigned */ - uae_u32 dst_lo,dst_hi; - - mul_unsigned(src,(uae_u32)m68k_dreg(regs, (extra >> 12) & 7),&dst_hi,&dst_lo); - - SET_VFLG (0); - SET_CFLG (0); - SET_ZFLG (dst_hi == 0 && dst_lo == 0); - SET_NFLG (((uae_s32)dst_hi) < 0); - if (extra & 0x400) - m68k_dreg(regs, extra & 7) = dst_hi; - else if (dst_hi != 0) { - SET_VFLG (1); - } - m68k_dreg(regs, (extra >> 12) & 7) = dst_lo; + src1 = (uae_s32)src; + src2 = (uae_s32)m68k_dreg(regs, (extra >> 12) & 7); + sign = (src1 ^ src2); + if (src1 < 0) src1 = -src1; + if (src2 < 0) src2 = -src2; + mul_unsigned((uae_u32)src1,(uae_u32)src2,&dst_hi,&dst_lo); + if (sign & 0x80000000) { + dst_hi = ~dst_hi; + dst_lo = -dst_lo; + if (dst_lo == 0) dst_hi++; } + SET_VFLG (0); + SET_CFLG (0); + SET_ZFLG (dst_hi == 0 && dst_lo == 0); + SET_NFLG (((uae_s32)dst_hi) < 0); + if (extra & 0x400) + m68k_dreg(regs, extra & 7) = dst_hi; + else if ((dst_hi != 0 || (dst_lo & 0x80000000) != 0) + && ((dst_hi & 0xffffffff) != 0xffffffff + || (dst_lo & 0x80000000) != 0x80000000)) + { + SET_VFLG (1); + } + m68k_dreg(regs, (extra >> 12) & 7) = dst_lo; + } else { + /* unsigned */ + uae_u32 dst_lo,dst_hi; + + mul_unsigned(src,(uae_u32)m68k_dreg(regs, (extra >> 12) & 7),&dst_hi,&dst_lo); + + SET_VFLG (0); + SET_CFLG (0); + SET_ZFLG (dst_hi == 0 && dst_lo == 0); + SET_NFLG (((uae_s32)dst_hi) < 0); + if (extra & 0x400) + m68k_dreg(regs, extra & 7) = dst_hi; + else if (dst_hi != 0) { + SET_VFLG (1); + } + m68k_dreg(regs, (extra >> 12) & 7) = dst_lo; + } #endif } -static const char* ccnames[] = -{ "T ","F ","HI","LS","CC","CS","NE","EQ", - "VC","VS","PL","MI","GE","LT","GT","LE" }; // If value is greater than zero, this means we are still processing an EmulOp // because the counter is incremented only in m68k_execute(), i.e. interpretive // execution only +#ifdef USE_JIT static int m68k_execute_depth = 0; +#endif void m68k_reset (void) { - m68k_areg (regs, 7) = 0x2000; - m68k_setpc (ROMBaseMac + 0x2a); - fill_prefetch_0 (); - regs.s = 1; - regs.m = 0; - regs.stopped = 0; - regs.t1 = 0; - regs.t0 = 0; - SET_ZFLG (0); - SET_XFLG (0); - SET_CFLG (0); - SET_VFLG (0); - SET_NFLG (0); - SPCFLAGS_INIT( 0 ); - regs.intmask = 7; - regs.vbr = regs.sfc = regs.dfc = 0; - fpu_reset(); + regs.s = 1; + regs.m = 0; + regs.stopped = 0; + regs.t1 = 0; + regs.t0 = 0; + SET_ZFLG (0); + SET_XFLG (0); + SET_CFLG (0); + SET_VFLG (0); + SET_NFLG (0); + SPCFLAGS_INIT( 0 ); + regs.intmask = 7; + regs.vbr = regs.sfc = regs.dfc = 0; -#if FLIGHT_RECORDER + // need to ensure the following order of initialization is correct + // (it is definitely better than what it was before this commit + // since it was reading from 0x00000000 in User mode and with active MMU) + mmu_set_tc(regs.tc & ~0x8000); /* disable mmu */ +#if 0 + m68k_areg (regs, 7) = phys_get_long(0x00000000); +#else + m68k_areg (regs, 7) = 0x2000; +#endif +#if 0 + m68k_setpc (phys_get_long(0x00000004)); +#else + m68k_setpc (ROMBaseMac + 0x2a); +#endif + fill_prefetch_0 (); + + /* gb-- moved into {fpp,fpu_x86}.cpp::fpu_init() + regs.fpcr = regs.fpsr = regs.fpiar = 0; */ + fpu_reset(); +#if 0 + // MMU + mmu_reset(); + mmu_set_super(1); + // Cache + regs.cacr = 0; + regs.caar = 0; +#endif +#ifdef FLIGHT_RECORDER log_ptr = 0; - memset(log, 0, sizeof(log)); -#endif - -#if ENABLE_MON - static bool first_time = true; - if (first_time) { - first_time = false; - mon_add_command("regs", dump_regs, "regs Dump m68k emulator registers\n"); -#if FLIGHT_RECORDER - // Install "log" command in mon - mon_add_command("log", dump_log, "log Dump m68k emulation log\n"); -#endif - } + memset(frlog, 0, sizeof(frlog)); #endif } void m68k_emulop_return(void) { SPCFLAGS_SET( SPCFLAG_BRK ); - quit_program = true; + quit_program = 1; +} + +static void save_regs(struct M68kRegisters &r) +{ + int i; + + for (i=0; i<8; i++) { + r.d[i] = m68k_dreg(regs, i); + r.a[i] = m68k_areg(regs, i); + } + r.pc = m68k_getpc(); + MakeSR(); + r.sr = regs.sr; + r.isp = regs.isp; + r.usp = regs.usp; + r.msp = regs.msp; + if ((r.sr & 0x2000) == 0) + r.usp = r.a[7]; + else if ((r.sr & 0x1000) != 0) + r.msp = r.a[7]; + else + r.isp = r.a[7]; +} + +static void restore_regs(struct M68kRegisters &r) +{ + int i; + + for (i=0; i<8; i++) { + m68k_dreg(regs, i) = r.d[i]; + m68k_areg(regs, i) = r.a[i]; + } + regs.isp = r.isp; + regs.usp = r.usp; + regs.msp = r.msp; + regs.sr = r.sr; + MakeFromSR(); } void m68k_emulop(uae_u32 opcode) { +#if 0 + struct M68kRegisters r; + save_regs(r); + if (EmulOp(opcode, &r)) + restore_regs(r); +#else struct M68kRegisters r; int i; @@ -1241,6 +1116,135 @@ void m68k_emulop(uae_u32 opcode) } regs.sr = r.sr; MakeFromSR(); +#endif +} + +#if 0 +void m68k_natfeat_id(void) +{ + struct M68kRegisters r; + + /* is it really necessary to save all registers? */ + save_regs(r); + + memptr stack = r.a[7] + 4; /* skip return address */ + r.d[0] = nf_get_id(stack); + + restore_regs(r); +} + +void m68k_natfeat_call(void) +{ + struct M68kRegisters r; + + /* is it really necessary to save all registers? */ + save_regs(r); + + memptr stack = r.a[7] + 4; /* skip return address */ + bool isSupervisorMode = ((r.sr & 0x2000) == 0x2000); + r.d[0] = nf_call(stack, isSupervisorMode); + + restore_regs(r); +} +#endif + +static int m68k_call(uae_u32 pc) +{ + VOLATILE int exc = 0; + m68k_setpc(pc); + TRY(prb) { +#ifdef USE_JIT + if (UseJIT) { + exec_nostats(); + // m68k_do_compile_execute(); + // The above call to m68k_do_compile_execute fails with BadAccess in sigsegv_handler (MAC, if it is executed after the first compile_block) + // (NULL pointer to addr_instr). + // Call exec_nostats avoids calling compile_block, because stack modification is only temporary + // which will fill up compile cache with BOGUS data. + // we can call exec_nostats directly, do our code, and return back here. + } + else +#endif + m68k_do_execute(); + } + CATCH(prb) { + exc = int(prb); + } + return exc; +} + +static uae_u32 m68k_alloca(int size) +{ + uae_u32 sp = (m68k_areg(regs, 7) - size) & ~1; + m68k_areg(regs, 7) = sp; + if ((regs.sr & 0x2000) == 0) + regs.usp = sp; + else if ((regs.sr & 0x1000) != 0) + regs.msp = sp; + else + regs.isp = sp; + return sp; +} + +#if 0 +uae_u32 linea68000(volatile uae_u16 opcode) +{ + sigjmp_buf jmp; + struct M68kRegisters r; + volatile uae_u32 abase = 0; + + SAVE_EXCEPTION; + save_regs(r); + + const int sz = 8 + sizeof(void *); + volatile uae_u32 sp = 0; + uae_u32 backup[(sz + 3) / 4]; + + if (sigsetjmp(jmp, 1) == 0) + { + void *p = jmp; + uae_u8 *sp_p; + int exc; + + sp = m68k_alloca(sz); + memcpy(backup, phys_get_real_address(sp), sz); + + WriteHWMemInt16(sp, opcode); + WriteHWMemInt16(sp + 2, 0xa0ff); + WriteHWMemInt32(sp + 4, 13); + sp_p = phys_get_real_address(sp + 8); + *((void **)sp_p) = p; + if ((exc = m68k_call(sp)) != 0) + { + panicbug("exception %d in LINEA", exc); + m68k_dreg(regs, 0) = 0; + } + } else + { + abase = m68k_dreg(regs, 0); + } + + if (sp) { + memcpy(phys_get_real_address(sp), backup, sz); + } + restore_regs(r); + m68k_setpc(r.pc); + RESTORE_EXCEPTION; + return abase; +} +#endif + + +static void rts68000() +{ + uae_u32 SP = m68k_getpc() + 6; + sigjmp_buf *p; + uae_u8 *sp_p = phys_get_real_address(SP); + + p = (sigjmp_buf *)(*((void **)sp_p)); + SP += sizeof(void *); + m68k_areg(regs, 7) = SP; + siglongjmp(*p, 1); } void REGPARAM2 op_illg (uae_u32 opcode) @@ -1248,6 +1252,19 @@ void REGPARAM2 op_illg (uae_u32 opcode) uaecptr pc = m68k_getpc (); if ((opcode & 0xF000) == 0xA000) { +#if 0 + if (opcode == 0xa0ff) + { + uae_u32 call = ReadHWMemInt32(pc + 2); + switch (call) + { + case 13: + rts68000(); + return; + } + m68k_setpc(pc + 6); + } +#endif Exception(0xA,0); return; } @@ -1257,8 +1274,8 @@ void REGPARAM2 op_illg (uae_u32 opcode) return; } - write_log ("Illegal instruction: %04x at %08x\n", opcode, pc); -#if USE_JIT && JIT_DEBUG + D(bug("Illegal instruction: %04x at %08x", opcode, pc)); +#if defined(USE_JIT) && defined(JIT_DEBUG) compiler_dumpstate(); #endif @@ -1266,59 +1283,119 @@ void REGPARAM2 op_illg (uae_u32 opcode) return; } -void mmu_op(uae_u32 opcode, uae_u16 extra) -{ - if ((opcode & 0xFE0) == 0x0500) { - /* PFLUSH */ - mmusr = 0; - } else if ((opcode & 0x0FD8) == 0x548) { - /* PTEST */ - } else - op_illg (opcode); -} - -static int n_insns = 0, n_spcinsns = 0; - static uaecptr last_trace_ad = 0; static void do_trace (void) { - if (regs.t0 && CPUType >= 2) { - uae_u16 opcode; - /* should also include TRAP, CHK, SR modification FPcc */ - /* probably never used so why bother */ - /* We can afford this to be inefficient... */ - m68k_setpc (m68k_getpc ()); - fill_prefetch_0 (); - opcode = get_word(m68k_getpc()); - if (opcode == 0x4e72 /* RTE */ - || opcode == 0x4e74 /* RTD */ - || opcode == 0x4e75 /* RTS */ - || opcode == 0x4e77 /* RTR */ - || opcode == 0x4e76 /* TRAPV */ - || (opcode & 0xffc0) == 0x4e80 /* JSR */ - || (opcode & 0xffc0) == 0x4ec0 /* JMP */ - || (opcode & 0xff00) == 0x6100 /* BSR */ - || ((opcode & 0xf000) == 0x6000 /* Bcc */ - && cctrue((opcode >> 8) & 0xf)) - || ((opcode & 0xf0f0) == 0x5050 /* DBcc */ - && !cctrue((opcode >> 8) & 0xf) - && (uae_s16)m68k_dreg(regs, opcode & 7) != 0)) - { - last_trace_ad = m68k_getpc (); - SPCFLAGS_CLEAR( SPCFLAG_TRACE ); - SPCFLAGS_SET( SPCFLAG_DOTRACE ); - } - } else if (regs.t1) { - last_trace_ad = m68k_getpc (); - SPCFLAGS_CLEAR( SPCFLAG_TRACE ); - SPCFLAGS_SET( SPCFLAG_DOTRACE ); + if (regs.t0) { + uae_u16 opcode; + /* should also include TRAP, CHK, SR modification FPcc */ + /* probably never used so why bother */ + /* We can afford this to be inefficient... */ + m68k_setpc (m68k_getpc ()); + fill_prefetch_0 (); + opcode = get_word(m68k_getpc()); + if (opcode == 0x4e72 /* RTE */ + || opcode == 0x4e74 /* RTD */ + || opcode == 0x4e75 /* RTS */ + || opcode == 0x4e77 /* RTR */ + || opcode == 0x4e76 /* TRAPV */ + || (opcode & 0xffc0) == 0x4e80 /* JSR */ + || (opcode & 0xffc0) == 0x4ec0 /* JMP */ + || (opcode & 0xff00) == 0x6100 /* BSR */ + || ((opcode & 0xf000) == 0x6000 /* Bcc */ + && cctrue((opcode >> 8) & 0xf)) + || ((opcode & 0xf0f0) == 0x5050 /* DBcc */ + && !cctrue((opcode >> 8) & 0xf) + && (uae_s16)m68k_dreg(regs, opcode & 7) != 0)) + { + last_trace_ad = m68k_getpc (); + SPCFLAGS_CLEAR( SPCFLAG_TRACE ); + SPCFLAGS_SET( SPCFLAG_DOTRACE ); } + } else if (regs.t1) { + last_trace_ad = m68k_getpc (); + SPCFLAGS_CLEAR( SPCFLAG_TRACE ); + SPCFLAGS_SET( SPCFLAG_DOTRACE ); + } } -int m68k_do_specialties (void) +#if 0 +#define SERVE_VBL_MFP(resetStop) \ +{ \ + if (SPCFLAGS_TEST( SPCFLAG_INT3|SPCFLAG_VBL|SPCFLAG_INT5|SPCFLAG_SCC|SPCFLAG_MFP )) { \ + if (SPCFLAGS_TEST( SPCFLAG_INT3 )) { \ + if (3 > regs.intmask) { \ + Interrupt(3); \ + regs.stopped = 0; \ + SPCFLAGS_CLEAR( SPCFLAG_INT3 ); \ + if (resetStop) \ + SPCFLAGS_CLEAR( SPCFLAG_STOP ); \ + } \ + } \ + if (SPCFLAGS_TEST( SPCFLAG_VBL )) { \ + if (4 > regs.intmask) { \ + Interrupt(4); \ + regs.stopped = 0; \ + SPCFLAGS_CLEAR( SPCFLAG_VBL ); \ + if (resetStop) \ + SPCFLAGS_CLEAR( SPCFLAG_STOP ); \ + } \ + } \ + if (SPCFLAGS_TEST( SPCFLAG_INT5 )) { \ + if (5 > regs.intmask) { \ + Interrupt(5); \ + regs.stopped = 0; \ + SPCFLAGS_CLEAR( SPCFLAG_INT5 ); \ + if (resetStop) \ + SPCFLAGS_CLEAR( SPCFLAG_STOP ); \ + } \ + } \ + if (SPCFLAGS_TEST( SPCFLAG_SCC )) { \ + if (5 > regs.intmask) { \ + int vector_number=SCCdoInterrupt(); \ + if(vector_number){ \ + SCCInterrupt(vector_number); \ + regs.stopped = 0; \ + SPCFLAGS_CLEAR( SPCFLAG_SCC); \ + if (resetStop) \ + SPCFLAGS_CLEAR( SPCFLAG_STOP ); \ + } \ + else \ + SPCFLAGS_CLEAR( SPCFLAG_SCC ); \ + } \ + } \ + if (SPCFLAGS_TEST( SPCFLAG_MFP )) { \ + if (6 > regs.intmask) { \ + int vector_number = MFPdoInterrupt(); \ + if (vector_number) { \ + MFPInterrupt(vector_number); \ + regs.stopped = 0; \ + if (resetStop) \ + SPCFLAGS_CLEAR( SPCFLAG_STOP ); \ + } \ + else \ + SPCFLAGS_CLEAR( SPCFLAG_MFP ); \ + } \ + } \ + } \ +} + +#define SERVE_INTERNAL_IRQ() \ +{ \ + if (SPCFLAGS_TEST( SPCFLAG_INTERNAL_IRQ )) { \ + SPCFLAGS_CLEAR( SPCFLAG_INTERNAL_IRQ ); \ + invoke200HzInterrupt(); \ + } \ +} +#endif + +int m68k_do_specialties(void) { -#if USE_JIT +#if 0 + SERVE_INTERNAL_IRQ(); +#endif +#ifdef USE_JIT // Block was compiled SPCFLAGS_CLEAR( SPCFLAG_JIT_END_COMPILE ); @@ -1329,11 +1406,39 @@ int m68k_do_specialties (void) if ((m68k_execute_depth == 0) && SPCFLAGS_TEST( SPCFLAG_JIT_EXEC_RETURN )) SPCFLAGS_CLEAR( SPCFLAG_JIT_EXEC_RETURN ); #endif - + /*n_spcinsns++;*/ if (SPCFLAGS_TEST( SPCFLAG_DOTRACE )) { Exception (9,last_trace_ad); } +#if 0 /* not for ARAnyM; emulating 040 only */ + if ((regs.spcflags & SPCFLAG_STOP) && regs.s == 0 && currprefs.cpu_model <= 68010) { + // 68000/68010 undocumented special case: + // if STOP clears S-bit and T was not set: + // cause privilege violation exception, PC pointing to following instruction. + // If T was set before STOP: STOP works as documented. + m68k_unset_stop(); + Exception(8, 0); + } +#endif while (SPCFLAGS_TEST( SPCFLAG_STOP )) { + //TODO: Check +#if 0 + if ((regs.sr & 0x700) == 0x700) + { + panicbug("STOPed with interrupts disabled, exiting; pc=$%08x", m68k_getpc()); + m68k_dumpstate (stderr, NULL); + quit_program = 1; +#ifdef FULL_HISTORY + ndebug::showHistory(20, false); + m68k_dumpstate (stderr, NULL); +#endif + return 1; + } +#endif +#if 0 + // give unused time slices back to OS + SleepAndWait(); +#endif if (SPCFLAGS_TEST( SPCFLAG_INT | SPCFLAG_DOINT )){ SPCFLAGS_CLEAR( SPCFLAG_INT | SPCFLAG_DOINT ); int intr = intlev (); @@ -1343,10 +1448,30 @@ int m68k_do_specialties (void) SPCFLAGS_CLEAR( SPCFLAG_STOP ); } } + +#if 0 + SERVE_INTERNAL_IRQ(); + SERVE_VBL_MFP(true); +#endif +#if 0 + if (SPCFLAGS_TEST( SPCFLAG_BRK )) + break; +#endif } if (SPCFLAGS_TEST( SPCFLAG_TRACE )) do_trace (); +#if 0 + SERVE_VBL_MFP(false); +#endif + +/* +// do not understand the INT vs DOINT stuff so I disabled it (joy) + if (regs.spcflags & SPCFLAG_INT) { + regs.spcflags &= ~SPCFLAG_INT; + regs.spcflags |= SPCFLAG_DOINT; + } +*/ if (SPCFLAGS_TEST( SPCFLAG_DOINT )) { SPCFLAGS_CLEAR( SPCFLAG_DOINT ); int intr = intlev (); @@ -1355,159 +1480,213 @@ int m68k_do_specialties (void) regs.stopped = 0; } } + if (SPCFLAGS_TEST( SPCFLAG_INT )) { SPCFLAGS_CLEAR( SPCFLAG_INT ); SPCFLAGS_SET( SPCFLAG_DOINT ); } - if (SPCFLAGS_TEST( SPCFLAG_BRK )) { - SPCFLAGS_CLEAR( SPCFLAG_BRK ); + + if (SPCFLAGS_TEST( SPCFLAG_BRK /*| SPCFLAG_MODE_CHANGE*/ )) { + SPCFLAGS_CLEAR( SPCFLAG_BRK /*| SPCFLAG_MODE_CHANGE*/ ); return 1; } + return 0; } void m68k_do_execute (void) { - for (;;) { - uae_u32 opcode = GET_OPCODE; -#if FLIGHT_RECORDER - m68k_record_step(m68k_getpc()); + uae_u32 pc; + uae_u32 opcode; + for (;;) { + regs.fault_pc = pc = m68k_getpc(); +#ifdef FULL_HISTORY +#ifdef NEED_TO_DEBUG_BADLY + history[lasthist] = regs; + historyf[lasthist] = regflags; +#else + history[lasthist] = m68k_getpc(); #endif - (*cpufunctbl[opcode])(opcode); - cpu_check_ticks(); - if (SPCFLAGS_TEST(SPCFLAG_ALL_BUT_EXEC_RETURN)) { - if (m68k_do_specialties()) - return; - } + if (++lasthist == MAX_HIST) lasthist = 0; + if (lasthist == firsthist) { + if (++firsthist == MAX_HIST) firsthist = 0; } +#endif + +#ifndef FULLMMU +#ifdef ARAM_PAGE_CHECK + if (((pc ^ pc_page) > ARAM_PAGE_MASK)) { + check_ram_boundary(pc, 2, false); + pc_page = pc; + pc_offset = (uintptr)get_real_address(pc, 0, sz_word) - pc; + } +#else + check_ram_boundary(pc, 2, false); +#endif +#endif + opcode = GET_OPCODE; +#ifdef FLIGHT_RECORDER + m68k_record_step(m68k_getpc(), cft_map(opcode)); +#endif + (*cpufunctbl[opcode])(opcode); + cpu_check_ticks(); + regs.fault_pc = m68k_getpc(); + + if (SPCFLAGS_TEST(SPCFLAG_ALL_BUT_EXEC_RETURN)) { + if (m68k_do_specialties()) + return; + } + } } void m68k_execute (void) { -#if USE_JIT - ++m68k_execute_depth; +#ifdef USE_JIT + m68k_execute_depth++; #endif +#ifdef DEBUGGER + VOLATILE bool after_exception = false; +#endif + +setjmpagain: + TRY(prb) { for (;;) { - if (quit_program) - break; - m68k_do_execute(); + if (quit_program > 0) { + if (quit_program == 1) { +#ifdef FLIGHT_RECORDER + dump_flight_recorder(); +#endif + break; + } + quit_program = 0; + m68k_reset (); + } +#ifdef DEBUGGER + if (debugging && !after_exception) debug(); + after_exception = false; +#endif + m68k_do_execute(); } -#if USE_JIT - --m68k_execute_depth; + } + CATCH(prb) { + Exception(prb, 0); +#ifdef DEBUGGER + after_exception = true; +#endif + goto setjmpagain; + } + +#ifdef USE_JIT + m68k_execute_depth--; #endif } -static void m68k_verify (uaecptr addr, uaecptr *nextpc) +void m68k_disasm (FILE *f, uaecptr addr, uaecptr *nextpc, int cnt) { - uae_u32 opcode, val; - struct instr *dp; +#ifdef HAVE_DISASM_M68K + char buf[256]; + int size; - opcode = get_iword_1(0); - last_op_for_exception_3 = opcode; - m68kpc_offset = 2; - - if (cpufunctbl[cft_map (opcode)] == op_illg_1) { - opcode = 0x4AFC; - } - dp = table68k + opcode; - - if (dp->suse) { - if (!verify_ea (dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, &val)) { - Exception (3, 0); - return; - } - } - if (dp->duse) { - if (!verify_ea (dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, &val)) { - Exception (3, 0); - return; - } + disasm_info.memory_vma = addr; + while (cnt-- > 0) { + size = m68k_disasm_to_buf(&disasm_info, buf, 1); + fprintf(f, "%s\n", buf); + if (size < 0) + break; } + if (nextpc) + *nextpc = disasm_info.memory_vma; +#else + if (nextpc) + *nextpc = addr; + (void) f; + (void) cnt; +#endif } -void m68k_disasm (uaecptr addr, uaecptr *nextpc, int cnt) +#ifdef DEBUGGER +void newm68k_disasm(FILE *f, uaecptr addr, uaecptr *nextpc, unsigned int cnt) { - uaecptr newpc = 0; - m68kpc_offset = addr - m68k_getpc (); - while (cnt-- > 0) { - char instrname[20],*ccpt; - int opwords; - uae_u32 opcode; - struct mnemolookup *lookup; - struct instr *dp; - printf ("%08lx: ", m68k_getpc () + m68kpc_offset); - for (opwords = 0; opwords < 5; opwords++){ - printf ("%04x ", get_iword_1 (m68kpc_offset + opwords*2)); - } - opcode = get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - if (cpufunctbl[cft_map (opcode)] == op_illg_1) { - opcode = 0x4AFC; - } - dp = table68k + opcode; - for (lookup = lookuptab;lookup->mnemo != dp->mnemo; lookup++) - ; +#ifdef HAVE_DISASM_M68K + char buf[256]; - strcpy (instrname, lookup->name); - ccpt = strstr (instrname, "cc"); - if (ccpt != 0) { - strncpy (ccpt, ccnames[dp->cc], 2); - } - printf ("%s", instrname); - switch (dp->size){ - case sz_byte: printf (".B "); break; - case sz_word: printf (".W "); break; - case sz_long: printf (".L "); break; - default: printf (" "); break; - } - - if (dp->suse) { - newpc = m68k_getpc () + m68kpc_offset; - newpc += ShowEA (dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0); - } - if (dp->suse && dp->duse) - printf (","); - if (dp->duse) { - newpc = m68k_getpc () + m68kpc_offset; - newpc += ShowEA (dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 0); - } - if (ccpt != 0) { - if (cctrue(dp->cc)) - printf (" == %08x (TRUE)", newpc); - else - printf (" == %08x (FALSE)", newpc); - } else if ((opcode & 0xff00) == 0x6100) /* BSR */ - printf (" == %08x", newpc); - printf ("\n"); - } - if (nextpc) - *nextpc = m68k_getpc () + m68kpc_offset; + disasm_info.memory_vma = addr; + if (cnt == 0) { + m68k_disasm_to_buf(&disasm_info, buf, 1); + } else { + while (cnt-- > 0) { + m68k_disasm_to_buf(&disasm_info, buf, 1); + fprintf(f, "%s\n", buf); + } + } + if (nextpc) + *nextpc = disasm_info.memory_vma; +#else + if (nextpc) + *nextpc = addr; + (void) cnt; +#endif } -void m68k_dumpstate (uaecptr *nextpc) -{ - int i; - for (i = 0; i < 8; i++){ - printf ("D%d: %08x ", i, m68k_dreg(regs, i)); - if ((i & 3) == 3) printf ("\n"); - } - for (i = 0; i < 8; i++){ - printf ("A%d: %08x ", i, m68k_areg(regs, i)); - if ((i & 3) == 3) printf ("\n"); - } - if (regs.s == 0) regs.usp = m68k_areg(regs, 7); - if (regs.s && regs.m) regs.msp = m68k_areg(regs, 7); - if (regs.s && regs.m == 0) regs.isp = m68k_areg(regs, 7); - printf ("USP=%08x ISP=%08x MSP=%08x VBR=%08x\n", - regs.usp,regs.isp,regs.msp,regs.vbr); - printf ("T=%d%d S=%d M=%d X=%ld N=%ld Z=%ld V=%ld C=%ld IMASK=%d\n", - regs.t1, regs.t0, regs.s, regs.m, - GET_XFLG, GET_NFLG, GET_ZFLG, GET_VFLG, GET_CFLG, regs.intmask); +#endif /* DEBUGGER */ - fpu_dump_registers(); - fpu_dump_flags(); +#ifdef FULL_HISTORY +void showDisasm(uaecptr addr) { +#ifdef HAVE_DISASM_M68K + char buf[256]; - m68k_disasm(m68k_getpc (), nextpc, 1); - if (nextpc) - printf ("next PC: %08x\n", *nextpc); + disasm_info.memory_vma = addr; + m68k_disasm_to_buf(&disasm_info, buf, 1); + bug("%s", buf); +#else + (void) addr; +#endif +} +#endif /* FULL_HISTORY */ + +void m68k_dumpstate (FILE *out, uaecptr *nextpc) +{ + int i; + for (i = 0; i < 8; i++){ + fprintf (out, "D%d: %08lx ", i, (unsigned long)m68k_dreg(regs, i)); + if ((i & 3) == 3) fprintf (out, "\n"); + } + for (i = 0; i < 8; i++){ + fprintf (out, "A%d: %08lx ", i, (unsigned long)m68k_areg(regs, i)); + if ((i & 3) == 3) fprintf (out, "\n"); + } + if (regs.s == 0) regs.usp = m68k_areg(regs, 7); + if (regs.s && regs.m) regs.msp = m68k_areg(regs, 7); + if (regs.s && regs.m == 0) regs.isp = m68k_areg(regs, 7); + fprintf (out, "USP=%08lx ISP=%08lx MSP=%08lx VBR=%08lx\n", + (unsigned long)regs.usp, (unsigned long)regs.isp, + (unsigned long)regs.msp, (unsigned long)regs.vbr); + fprintf (out, "T=%d%d S=%d M=%d X=%d N=%d Z=%d V=%d C=%d IMASK=%d TCE=%d TCP=%d\n", + regs.t1, regs.t0, regs.s, regs.m, + (int)GET_XFLG(), (int)GET_NFLG(), (int)GET_ZFLG(), (int)GET_VFLG(), (int)GET_CFLG(), regs.intmask, + regs.mmu_enabled, regs.mmu_pagesize_8k); + fprintf (out, "CACR=%08lx CAAR=%08lx URP=%08lx SRP=%08lx\n", + (unsigned long)regs.cacr, + (unsigned long)regs.caar, + (unsigned long)regs.urp, + (unsigned long)regs.srp); + fprintf (out, "DTT0=%08lx DTT1=%08lx ITT0=%08lx ITT1=%08lx\n", + (unsigned long)regs.dtt0, + (unsigned long)regs.dtt1, + (unsigned long)regs.itt0, + (unsigned long)regs.itt1); + for (i = 0; i < 8; i++){ + fprintf (out, "FP%d: %g ", i, (double)fpu.registers[i]); + if ((i & 3) == 3) fprintf (out, "\n"); + } +#if 0 + fprintf (out, "N=%d Z=%d I=%d NAN=%d\n", + (regs.fpsr & 0x8000000) != 0, + (regs.fpsr & 0x4000000) != 0, + (regs.fpsr & 0x2000000) != 0, + (regs.fpsr & 0x1000000) != 0); +#endif + m68k_disasm(out, m68k_getpc (), nextpc, 1); + if (nextpc) + fprintf (out, "next PC: %08lx\n", (unsigned long)*nextpc); } diff --git a/BasiliskII/src/uae_cpu/newcpu.h b/BasiliskII/src/uae_cpu/newcpu.h index e2d5b5ed..478a3785 100644 --- a/BasiliskII/src/uae_cpu/newcpu.h +++ b/BasiliskII/src/uae_cpu/newcpu.h @@ -1,41 +1,51 @@ /* - * UAE - The Un*x Amiga Emulator + * newcpu.h - CPU emulation * - * MC68000 emulation + * Copyright (c) 2009 ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II * - * Copyright 1995 Bernd Schmidt + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * This program is free software; you can redistribute it and/or modify + * ARAnyM is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, + * ARAnyM is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with ARAnyM; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + /* + * UAE - The Un*x Amiga Emulator + * + * MC68000 emulation + * + * Copyright 1995 Bernd Schmidt + */ #ifndef NEWCPU_H #define NEWCPU_H -#ifndef FLIGHT_RECORDER -#define FLIGHT_RECORDER 0 -#endif - -#include "m68k.h" -#include "readcpu.h" +#include "sysdeps.h" +#include "registers.h" #include "spcflags.h" +#include "m68k.h" +#include "memory.h" -#if ENABLE_MON -#include "mon.h" -#include "mon_disass.h" -#endif +# include +extern struct fixup { + int flag; + uae_u32 reg; + uaecptr value; +}fixup; extern int areg_byteinc[]; extern int imm8_table[]; @@ -57,122 +67,113 @@ extern int broken_in; #endif #define cpuop_begin() do { cpuop_tag("begin"); } while (0) -#define cpuop_end() do { cpuop_tag("end"); } while (0) +#define cpuop_end() do { cpuop_tag("end"); } while (0) typedef void REGPARAM2 cpuop_func (uae_u32) REGPARAM; - + struct cputbl { cpuop_func *handler; uae_u16 specific; uae_u16 opcode; }; -extern cpuop_func *cpufunctbl[65536] ASM_SYM("cpufunctbl"); +extern cpuop_func *cpufunctbl[65536]; -#if USE_JIT +#ifdef USE_JIT typedef void compop_func (uae_u32) REGPARAM; struct comptbl { compop_func *handler; - uae_u32 specific; uae_u32 opcode; + uae_u32 specific; +#define COMP_OPCODE_ISJUMP 0x0001 +#define COMP_OPCODE_LONG_OPCODE 0x0002 +#define COMP_OPCODE_CMOV 0x0004 +#define COMP_OPCODE_ISADDX 0x0008 +#define COMP_OPCODE_ISCJUMP 0x0010 +#define COMP_OPCODE_USES_FPU 0x0020 }; #endif extern void REGPARAM2 op_illg (uae_u32) REGPARAM; -extern void m68k_dumpstate(uaecptr *nextpc); - -typedef char flagtype; - -struct regstruct { - uae_u32 regs[16]; - - uae_u32 pc; - uae_u8 * pc_p; - uae_u8 * pc_oldp; - - spcflags_t spcflags; - int intmask; - - uae_u32 vbr, sfc, dfc; - uaecptr usp, isp, msp; - uae_u16 sr; - flagtype t1; - flagtype t0; - flagtype s; - flagtype m; - flagtype x; - flagtype stopped; - -#if USE_PREFETCH_BUFFER - /* Fellow sources say this is 4 longwords. That's impossible. It needs - * to be at least a longword. The HRM has some cryptic comment about two - * instructions being on the same longword boundary. - * The way this is implemented now seems like a good compromise. - */ - uae_u32 prefetch; -#endif -}; - -extern regstruct regs, lastint_regs; #define m68k_dreg(r,num) ((r).regs[(num)]) #define m68k_areg(r,num) (((r).regs + 8)[(num)]) -#define get_ibyte(o) do_get_mem_byte((uae_u8 *)(regs.pc_p + (o) + 1)) -#define get_iword(o) do_get_mem_word((uae_u16 *)(regs.pc_p + (o))) -#define get_ilong(o) do_get_mem_long((uae_u32 *)(regs.pc_p + (o))) +#ifdef FULLMMU +static ALWAYS_INLINE uae_u8 get_ibyte(uae_u32 o) +{ + return mmu_get_byte(m68k_getpc() + o + 1, 0, sz_byte); +} +static ALWAYS_INLINE uae_u16 get_iword(uae_u32 o) +{ + return mmu_get_word(m68k_getpc() + o, 0, sz_word); +} +static ALWAYS_INLINE uae_u32 get_ilong(uae_u32 o) +{ + uaecptr addr = m68k_getpc() + o; + + if (unlikely(is_unaligned(addr, 4))) + return mmu_get_long_unaligned(addr, 0); + return mmu_get_long(addr, 0, sz_long); +} -#ifdef HAVE_GET_WORD_UNSWAPPED -#define GET_OPCODE (do_get_mem_word_unswapped (regs.pc_p)) #else -#define GET_OPCODE (get_iword (0)) +#define get_ibyte(o) do_get_mem_byte((uae_u8 *)(get_real_address(m68k_getpc(), 0, sz_byte) + (o) + 1)) +#define get_iword(o) do_get_mem_word((uae_u16 *)(get_real_address(m68k_getpc(), 0, sz_word) + (o))) +#define get_ilong(o) do_get_mem_long((uae_u32 *)(get_real_address(m68k_getpc(), 0, sz_long) + (o))) #endif -#if USE_PREFETCH_BUFFER -static __inline__ uae_u32 get_ibyte_prefetch (uae_s32 o) +#if 0 +static inline uae_u32 get_ibyte_prefetch (uae_s32 o) { if (o > 3 || o < 0) - return do_get_mem_byte((uae_u8 *)(regs.pc_p + o + 1)); + return do_get_mem_byte((uae_u8 *)(do_get_real_address(regs.pcp, false, false) + o + 1)); return do_get_mem_byte((uae_u8 *)(((uae_u8 *)®s.prefetch) + o + 1)); } -static __inline__ uae_u32 get_iword_prefetch (uae_s32 o) +static inline uae_u32 get_iword_prefetch (uae_s32 o) { if (o > 3 || o < 0) - return do_get_mem_word((uae_u16 *)(regs.pc_p + o)); + return do_get_mem_word((uae_u16 *)(do_get_real_address(regs.pcp, false, false) + o)); return do_get_mem_word((uae_u16 *)(((uae_u8 *)®s.prefetch) + o)); } -static __inline__ uae_u32 get_ilong_prefetch (uae_s32 o) +static inline uae_u32 get_ilong_prefetch (uae_s32 o) { if (o > 3 || o < 0) - return do_get_mem_long((uae_u32 *)(regs.pc_p + o)); + return do_get_mem_long((uae_u32 *)(do_get_real_address(regs.pcp, false, false) + o)); if (o == 0) return do_get_mem_long(®s.prefetch); - return (do_get_mem_word (((uae_u16 *)®s.prefetch) + 1) << 16) | do_get_mem_word ((uae_u16 *)(regs.pc_p + 4)); + return (do_get_mem_word (((uae_u16 *)®s.prefetch) + 1) << 16) | do_get_mem_word ((uae_u16 *)(do_get_real_address(regs.pcp, false, false) + 4)); } #endif -static __inline__ void fill_prefetch_0 (void) +#ifdef FULLMMU +#define m68k_incpc(o) (regs.pc += (o)) +#else +#define m68k_incpc(o) (regs.pc_p += (o)) +#endif + +static inline void fill_prefetch_0 (void) { #if USE_PREFETCH_BUFFER uae_u32 r; #ifdef UNALIGNED_PROFITABLE - r = *(uae_u32 *)regs.pc_p; + r = *(uae_u32 *)do_get_real_address(m68k_getpc(), false, false); regs.prefetch = r; #else - r = do_get_mem_long ((uae_u32 *)regs.pc_p); + r = do_get_mem_long ((uae_u32 *)do_get_real_address(m68k_getpc(), false, false)); do_put_mem_long (®s.prefetch, r); #endif #endif } #if 0 -static __inline__ void fill_prefetch_2 (void) +static inline void fill_prefetch_2 (void) { uae_u32 r = do_get_mem_long (®s.prefetch) << 16; - uae_u32 r2 = do_get_mem_word (((uae_u16 *)regs.pc_p) + 1); + uae_u32 r2 = do_get_mem_word (((uae_u16 *)do_get_real_address(regs.pcp, false, false)) + 1); r |= r2; do_put_mem_long (®s.prefetch, r); } @@ -180,119 +181,97 @@ static __inline__ void fill_prefetch_2 (void) #define fill_prefetch_2 fill_prefetch_0 #endif -static __inline__ uaecptr m68k_getpc (void) -{ -#if REAL_ADDRESSING || DIRECT_ADDRESSING - return get_virtual_address(regs.pc_p); -#else - return regs.pc + ((char *)regs.pc_p - (char *)regs.pc_oldp); -#endif -} - -static __inline__ void m68k_setpc (uaecptr newpc) -{ -#if ENABLE_MON - uae_u32 previous_pc = m68k_getpc(); -#endif - -#if REAL_ADDRESSING || DIRECT_ADDRESSING - regs.pc_p = get_real_address(newpc); -#else - regs.pc_p = regs.pc_oldp = get_real_address(newpc); - regs.pc = newpc; -#endif - -#if ENABLE_MON - if (IS_BREAK_POINT(newpc)) { - printf("Stopped at break point address: %08x. Last PC: %08x\n", newpc, previous_pc); - m68k_dumpstate(NULL); - const char *arg[4] = {"mon", "-m", "-r", NULL}; - mon(3, arg); - } -#endif // end of #if ENABLE_MON -} - -static __inline__ void m68k_incpc (uae_s32 delta) -{ -#if ENABLE_MON - uae_u32 previous_pc = m68k_getpc(); -#endif - regs.pc_p += (delta); -#if ENABLE_MON - uaecptr next_pc = m68k_getpc(); - if (IS_BREAK_POINT(next_pc)) { - printf("Stopped at break point address: %08x. Last PC: %08x\n", next_pc, previous_pc); - m68k_dumpstate(NULL); - const char *arg[4] = {"mon", "-m", "-r", NULL}; - mon(3, arg); - } -#endif // end of #if ENABLE_MON -} - /* These are only used by the 68020/68881 code, and therefore don't * need to handle prefetch. */ -static __inline__ uae_u32 next_ibyte (void) +static inline uae_u32 next_ibyte (void) { uae_u32 r = get_ibyte (0); m68k_incpc (2); return r; } -static __inline__ uae_u32 next_iword (void) +static inline uae_u32 next_iword (void) { uae_u32 r = get_iword (0); m68k_incpc (2); return r; } -static __inline__ uae_u32 next_ilong (void) +static inline uae_u32 next_ilong (void) { uae_u32 r = get_ilong (0); m68k_incpc (4); return r; } +static inline void m68k_setpc (uaecptr newpc) +{ +#ifndef FULLMMU + regs.pc_p = regs.pc_oldp = get_real_address(newpc, 0, sz_word); +#endif + regs.fault_pc = regs.pc = newpc; +} + #define m68k_setpc_fast m68k_setpc #define m68k_setpc_bcc m68k_setpc #define m68k_setpc_rte m68k_setpc -static __inline__ void m68k_do_rts(void) +static inline void m68k_do_rts(void) { - m68k_setpc(get_long(m68k_areg(regs, 7))); - m68k_areg(regs, 7) += 4; + m68k_setpc(get_long(m68k_areg(regs, 7))); + m68k_areg(regs, 7) += 4; } -static __inline__ void m68k_do_bsr(uaecptr oldpc, uae_s32 offset) +static inline void m68k_do_bsr(uaecptr oldpc, uae_s32 offset) { - m68k_areg(regs, 7) -= 4; - put_long(m68k_areg(regs, 7), oldpc); - m68k_incpc(offset); + put_long(m68k_areg(regs, 7) - 4, oldpc); + m68k_areg(regs, 7) -= 4; + m68k_incpc(offset); } -static __inline__ void m68k_do_jsr(uaecptr oldpc, uaecptr dest) +static inline void m68k_do_jsr(uaecptr oldpc, uaecptr dest) { - m68k_areg(regs, 7) -= 4; - put_long(m68k_areg(regs, 7), oldpc); - m68k_setpc(dest); + put_long(m68k_areg(regs, 7) - 4, oldpc); + m68k_areg(regs, 7) -= 4; + m68k_setpc(dest); } -static __inline__ void m68k_setstopped (int stop) +static inline void m68k_setstopped (int stop) { regs.stopped = stop; /* A traced STOP instruction drops through immediately without actually stopping. */ - if (stop && (regs.spcflags & SPCFLAG_DOTRACE) == 0) - SPCFLAGS_SET( SPCFLAG_STOP ); + if (stop && !( SPCFLAGS_TEST( SPCFLAG_DOTRACE ))) + SPCFLAGS_SET( SPCFLAG_STOP ); } -extern uae_u32 get_disp_ea_020 (uae_u32 base, uae_u32 dp); -extern uae_u32 get_disp_ea_000 (uae_u32 base, uae_u32 dp); +#ifdef FULLMMU +# define GET_OPCODE (get_iword (0)) +#elif defined ARAM_PAGE_CHECK +# ifdef HAVE_GET_WORD_UNSWAPPED +# define GET_OPCODE (do_get_mem_word_unswapped((uae_u16*)(pc + pc_offset))); +# else +# define GET_OPCODE (do_get_mem_word((uae_u16*)(pc + pc_offset))); +# endif +#else +# ifdef HAVE_GET_WORD_UNSWAPPED +# define GET_OPCODE (do_get_mem_word_unswapped ((uae_u16*)get_real_address(m68k_getpc(), 0, sz_word))) +# else +# define GET_OPCODE (get_iword (0)) +# endif +#endif + +extern REGPARAM uae_u32 get_disp_ea_020 (uae_u32 base, uae_u32 dp); +extern REGPARAM uae_u32 get_disp_ea_000 (uae_u32 base, uae_u32 dp); +extern REGPARAM uae_u32 get_bitfield(uae_u32 src, uae_u32 bdata[2], uae_s32 offset, int width); +extern REGPARAM void put_bitfield(uae_u32 dst, uae_u32 bdata[2], uae_u32 val, uae_s32 offset, int width); + -extern uae_s32 ShowEA (int reg, amodes mode, wordsizes size, char *buf); extern void MakeSR (void); extern void MakeFromSR (void); extern void Exception (int, uaecptr); +extern void ex_rte(void); extern void dump_counts (void); extern int m68k_move2c (int, uae_u32 *); extern int m68k_movec2 (int, uae_u32 *); @@ -300,15 +279,19 @@ extern void m68k_divl (uae_u32, uae_u32, uae_u16, uaecptr); extern void m68k_mull (uae_u32, uae_u32, uae_u16); extern void m68k_emulop (uae_u32); extern void m68k_emulop_return (void); +extern void m68k_natfeat_id(void); +extern void m68k_natfeat_call(void); extern void init_m68k (void); extern void exit_m68k (void); -extern void m68k_dumpstate (uaecptr *); -extern void m68k_disasm (uaecptr, uaecptr *, int); +extern void m68k_dumpstate (FILE *, uaecptr *); +extern void m68k_disasm (FILE *, uaecptr, uaecptr *, int); +extern void newm68k_disasm(FILE *, uaecptr, uaecptr *, unsigned int); +extern void showDisasm(uaecptr); extern void m68k_reset (void); extern void m68k_enter_debugger(void); extern int m68k_do_specialties(void); - -extern void mmu_op (uae_u32, uae_u16); +extern void m68k_instr_set(void); +uae_u32 linea68000(uae_u16 opcode); /* Opcode of faulting instruction */ extern uae_u16 last_op_for_exception_3; @@ -319,24 +302,19 @@ extern uaecptr last_fault_for_exception_3; #define CPU_OP_NAME(a) op ## a -/* 68020 + 68881 */ -extern struct cputbl op_smalltbl_0_ff[]; -/* 68020 */ -extern struct cputbl op_smalltbl_1_ff[]; -/* 68010 */ -extern struct cputbl op_smalltbl_2_ff[]; -/* 68000 */ -extern struct cputbl op_smalltbl_3_ff[]; -/* 68000 slow but compatible. */ -extern struct cputbl op_smalltbl_4_ff[]; +/* 68040+ 68881 */ +extern const struct cputbl op_smalltbl_0_ff[]; +extern const struct cputbl op_smalltbl_0_nf[]; -#if FLIGHT_RECORDER -extern void m68k_record_step(uaecptr) REGPARAM; +#ifdef FLIGHT_RECORDER +extern void m68k_record_step(uaecptr, int); #endif + extern void m68k_do_execute(void); extern void m68k_execute(void); -#if USE_JIT +#ifdef USE_JIT extern void m68k_compile_execute(void); +extern void m68k_do_compile_execute(void); #endif #ifdef USE_CPU_EMUL_SERVICES extern int32 emulated_ticks; @@ -351,5 +329,7 @@ static inline void cpu_check_ticks(void) #define cpu_check_ticks() #define cpu_do_check_ticks() #endif - + +cpuop_func op_illg_1; + #endif /* NEWCPU_H */ diff --git a/BasiliskII/src/uae_cpu/noflags.h b/BasiliskII/src/uae_cpu/noflags.h index eacbc214..e3b7a3a5 100644 --- a/BasiliskII/src/uae_cpu/noflags.h +++ b/BasiliskII/src/uae_cpu/noflags.h @@ -33,13 +33,13 @@ #define NOFLAGS_CMP 0 #undef SET_NFLG_ALWAYS -static __inline__ void SET_NFLG_ALWAYS(uae_u32 x) +static inline void SET_NFLG_ALWAYS(uae_u32 x) { SET_NFLG(x); /* This has not yet been redefined */ } #undef SET_CFLG_ALWAYS -static __inline__ void SET_CFLG_ALWAYS(uae_u32 x) +static inline void SET_CFLG_ALWAYS(uae_u32 x) { SET_CFLG(x); /* This has not yet been redefined */ } @@ -62,13 +62,13 @@ static __inline__ void SET_CFLG_ALWAYS(uae_u32 x) #define SET_XFLG(y) do {uae_u32 dummy=(y); } while (0) #undef CLEAR_CZNV -#define CLEAR_CZNV +#define CLEAR_CZNV() #undef IOR_CZNV #define IOR_CZNV(y) do {uae_u32 dummy=(y); } while (0) #undef SET_CZNV #define SET_CZNV(y) do {uae_u32 dummy=(y); } while (0) #undef COPY_CARRY -#define COPY_CARRY +#define COPY_CARRY() #ifdef optflag_testl #undef optflag_testl diff --git a/BasiliskII/src/uae_cpu/readcpu.cpp b/BasiliskII/src/uae_cpu/readcpu.cpp index 3fccdfb7..742e0be5 100644 --- a/BasiliskII/src/uae_cpu/readcpu.cpp +++ b/BasiliskII/src/uae_cpu/readcpu.cpp @@ -1,34 +1,30 @@ +/* 2002 MJ */ /* * UAE - The Un*x Amiga Emulator * * Read 68000 CPU specs from file "table68k" * * Copyright 1995,1996 Bernd Schmidt - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include -#include -#include -#include - #include "sysdeps.h" #include "readcpu.h" +#include +#include +#include +#include + +using std::strncmp; +using std::abort; +using std::fprintf; +using std::strcmp; +using std::strlen; +using std::malloc; + int nr_cpuop_funcs; +struct instr *table68k; +static int readcpu_mismatch; struct mnemolookup lookuptab[] = { { i_ILLG, "ILLEGAL" }, @@ -153,16 +149,19 @@ struct mnemolookup lookuptab[] = { { i_CPUSHA, "CPUSHA" }, { i_MOVE16, "MOVE16" }, - { i_EMULOP_RETURN, "EMULOP_RETURN" }, - { i_EMULOP, "EMULOP" }, - + { i_EMULOP_RETURN, "EMULOP_RETURN" }, + { i_EMULOP, "EMULOP" }, + { i_MMUOP, "MMUOP" }, + + {i_NATFEAT_ID, "NATFEAT_ID" }, + {i_NATFEAT_CALL, "NATFEAT_CALL" }, + { i_ILLG, "" }, }; -struct instr *table68k; -static __inline__ amodes mode_from_str (const char *str) +static inline amodes mode_from_str (const char *str) { if (strncmp (str, "Dreg", 4) == 0) return Dreg; if (strncmp (str, "Areg", 4) == 0) return Areg; @@ -180,7 +179,7 @@ static __inline__ amodes mode_from_str (const char *str) return (amodes)0; } -static __inline__ amodes mode_from_mr (int mode, int reg) +static inline amodes mode_from_mr (int mode, int reg) { switch (mode) { case 0: return Dreg; @@ -215,31 +214,32 @@ static void build_insn (int insn) int i, n; int flaglive = 0, flagdead = 0; - int cflow = 0; + int cflow = 0; id = defs68k[insn]; - // Control flow information - cflow = id.cflow; - - // Mask of flags set/used - unsigned char flags_set(0), flags_used(0); - - for (i = 0, n = 4; i < 5; i++, n--) { - switch (id.flaginfo[i].flagset) { - case fa_unset: case fa_isjmp: break; - default: flags_set |= (1 << n); - } - - switch (id.flaginfo[i].flaguse) { - case fu_unused: case fu_isjmp: break; - default: flags_used |= (1 << n); - } + // Control flow information + cflow = id.cflow; + + // Mask of flags set/used + unsigned char flags_set(0), flags_used(0); + + for (i = 0, n = 4; i < 5; i++, n--) { + switch (id.flaginfo[i].flagset) { + case fa_unset: case fa_isjmp: break; + default: flags_set |= (1 << n); } - + + switch (id.flaginfo[i].flaguse) { + case fu_unused: case fu_isjmp: break; + default: flags_used |= (1 << n); + } + } + for (i = 0; i < 5; i++) { switch (id.flaginfo[i].flagset){ case fa_unset: break; + case fa_isjmp: break; case fa_zero: flagdead |= 1 << i; break; case fa_one: flagdead |= 1 << i; break; case fa_dontcare: flagdead |= 1 << i; break; @@ -252,6 +252,8 @@ static void build_insn (int insn) for (i = 0; i < 5; i++) { switch (id.flaginfo[i].flaguse) { case fu_unused: break; + case fu_isjmp: flaglive |= 1 << i; break; + case fu_maybecc: flaglive |= 1 << i; break; case fu_unknown: flaglive = -1; goto out2; case fu_used: flaglive |= 1 << i; break; } @@ -306,6 +308,7 @@ static void build_insn (int insn) continue; if (bitcnt[bitI] && (bitval[bitI] == 0x00 || bitval[bitI] == 0xff)) continue; + if (bitcnt[bitE] && (bitval[bitE] == 0x00)) continue; @@ -346,9 +349,9 @@ static void build_insn (int insn) } } mnp++; - if ((unsigned)mnp >= sizeof(mnemonic) - 1) { - mnemonic[sizeof(mnemonic) - 1] = 0; - fprintf(stderr, "Instruction %s overflow\n", mnemonic); + if ((unsigned)mnp >= (sizeof(mnemonic)-1)) { + mnemonic[sizeof(mnemonic)-1] = '\0'; + fprintf(stderr, "WTF!!! Instruction '%s' overflow\n", mnemonic); abort(); } } @@ -379,6 +382,7 @@ static void build_insn (int insn) case 'A': srcmode = Areg; switch (opcstr[pos++]) { + case 'l': srcmode = absl; break; case 'r': srcreg = bitval[bitr]; srcgather = 1; srcpos = bitpos[bitr]; break; case 'R': srcreg = bitval[bitR]; srcgather = 1; srcpos = bitpos[bitR]; break; default: abort(); @@ -388,9 +392,11 @@ static void build_insn (int insn) case 'P': srcmode = Aipi; pos++; break; } break; +#if 0 case 'L': srcmode = absl; break; +#endif case '#': switch (opcstr[pos++]) { case 'z': srcmode = imm; break; @@ -436,7 +442,7 @@ static void build_insn (int insn) srcpos = bitpos[bitK]; } break; - case 'E': srcmode = immi; srcreg = bitval[bitE]; + case 'E': srcmode = immi; srcreg = bitval[bitE]; if (CPU_EMU_SIZE < 5) { // gb-- what is CPU_EMU_SIZE used for ?? /* 1..255 */ srcgather = 1; @@ -444,8 +450,8 @@ static void build_insn (int insn) srcpos = bitpos[bitE]; } break; - case 'p': srcmode = immi; srcreg = bitval[bitp]; - if (CPU_EMU_SIZE < 5) { + case 'p': srcmode = immi; srcreg = bitval[bitp]; + if (CPU_EMU_SIZE < 5) { // gb-- what is CPU_EMU_SIZE used for ?? /* 0..3 */ srcgather = 1; srctype = 7; @@ -582,21 +588,22 @@ static void build_insn (int insn) case 'A': destmode = Areg; switch (opcstr[pos++]) { + case 'l': destmode = absl; break; case 'r': destreg = bitval[bitr]; dstgather = 1; dstpos = bitpos[bitr]; break; case 'R': destreg = bitval[bitR]; dstgather = 1; dstpos = bitpos[bitR]; break; - case 'x': destreg = 0; dstgather = 0; dstpos = 0; break; + case 'x': destreg = 0; dstgather = 0; dstpos = 0; break; default: abort(); } - if (dstpos < 0 || dstpos >= 32) - abort(); switch (opcstr[pos]) { case 'p': destmode = Apdi; pos++; break; case 'P': destmode = Aipi; pos++; break; } break; +#if 0 case 'L': destmode = absl; break; +#endif case '#': switch (opcstr[pos++]) { case 'z': destmode = imm; break; @@ -767,7 +774,7 @@ static void build_insn (int insn) table68k[opc].flaginfo[i].flaguse = id.flaginfo[i].flaguse; } #endif - + // Fix flags used information for Scc, Bcc, TRAPcc, DBcc instructions if ( table68k[opc].mnemo == i_Scc || table68k[opc].mnemo == i_Bcc @@ -795,7 +802,7 @@ static void build_insn (int insn) case 15:flags_used = 0x0E; break; /* LE */ } } - + #if 1 /* gb-- flagdead and flaglive would not have correct information */ table68k[opc].flagdead = flags_set; @@ -811,22 +818,6 @@ static void build_insn (int insn) } -void read_table68k (void) -{ - int i; - - table68k = (struct instr *)malloc (65536 * sizeof (struct instr)); - for (i = 0; i < 65536; i++) { - table68k[i].mnemo = i_ILLG; - table68k[i].handler = -1; - } - for (i = 0; i < n_defs68k; i++) { - build_insn (i); - } -} - -static int mismatch; - static void handle_merges (long int opcode) { uae_u16 smsk; @@ -851,9 +842,9 @@ static void handle_merges (long int opcode) case 5: smsk = 63; sbitdst = 64; break; case 6: - smsk = 255; sbitdst = 256; break; + smsk = 255; sbitdst = 256; break; case 7: - smsk = 3; sbitdst = 4; break; + smsk = 3; sbitdst = 4; break; default: smsk = 0; sbitdst = 0; abort(); @@ -869,7 +860,7 @@ static void handle_merges (long int opcode) } for (srcreg=0; srcreg < sbitdst; srcreg++) { for (dstreg=0; dstreg < dstend; dstreg++) { - uae_u16 code = uae_u16(opcode); + uae_u16 code = opcode; code = (code & ~smsk) | (srcreg << table68k[opcode].spos); code = (code & ~dmsk) | (dstreg << table68k[opcode].dpos); @@ -882,20 +873,20 @@ static void handle_merges (long int opcode) || table68k[code].suse != table68k[opcode].suse || table68k[code].duse != table68k[opcode].duse) { - mismatch++; continue; + readcpu_mismatch++; continue; } if (table68k[opcode].suse && (table68k[opcode].spos != table68k[code].spos || table68k[opcode].smode != table68k[code].smode || table68k[opcode].stype != table68k[code].stype)) { - mismatch++; continue; + readcpu_mismatch++; continue; } if (table68k[opcode].duse && (table68k[opcode].dpos != table68k[code].dpos || table68k[opcode].dmode != table68k[code].dmode)) { - mismatch++; continue; + readcpu_mismatch++; continue; } if (code != opcode) @@ -904,11 +895,11 @@ static void handle_merges (long int opcode) } } -void do_merges (void) +static void do_merges (void) { long int opcode; int nr = 0; - mismatch = 0; + readcpu_mismatch = 0; for (opcode = 0; opcode < 65536; opcode++) { if (table68k[opcode].handler != -1 || table68k[opcode].mnemo == i_ILLG) continue; @@ -918,116 +909,26 @@ void do_merges (void) nr_cpuop_funcs = nr; } -int get_no_mismatches (void) -{ - return mismatch; -} -const char *get_instruction_name (unsigned int opcode) +void init_table68k (void) { - struct instr *ins = &table68k[opcode]; - for (int i = 0; lookuptab[i].name[0]; i++) { - if (ins->mnemo == lookuptab[i].mnemo) - return lookuptab[i].name; + int i; + + free(table68k); + table68k = (struct instr *)malloc (65536 * sizeof (struct instr)); + for (i = 0; i < 65536; i++) { + table68k[i].mnemo = i_ILLG; + table68k[i].handler = -1; } - abort(); - return NULL; -} - -static char *get_ea_string (amodes mode, wordsizes size) -{ - static char buffer[80]; - - buffer[0] = 0; - switch (mode){ - case Dreg: - strcpy (buffer,"Dn"); - break; - case Areg: - strcpy (buffer,"An"); - break; - case Aind: - strcpy (buffer,"(An)"); - break; - case Aipi: - strcpy (buffer,"(An)+"); - break; - case Apdi: - strcpy (buffer,"-(An)"); - break; - case Ad16: - strcpy (buffer,"(d16,An)"); - break; - case Ad8r: - strcpy (buffer,"(d8,An,Xn)"); - break; - case PC16: - strcpy (buffer,"(d16,PC)"); - break; - case PC8r: - strcpy (buffer,"(d8,PC,Xn)"); - break; - case absw: - strcpy (buffer,"(xxx).W"); - break; - case absl: - strcpy (buffer,"(xxx).L"); - break; - case imm: - switch (size){ - case sz_byte: - strcpy (buffer,"#.B"); - break; - case sz_word: - strcpy (buffer,"#.W"); - break; - case sz_long: - strcpy (buffer,"#.L"); - break; - default: - break; - } - break; - case imm0: - strcpy (buffer,"#.B"); - break; - case imm1: - strcpy (buffer,"#.W"); - break; - case imm2: - strcpy (buffer,"#.L"); - break; - case immi: - strcpy (buffer,"#"); - break; - - default: - break; + for (i = 0; i < n_defs68k; i++) { + build_insn (i); } - return buffer; + do_merges(); } -const char *get_instruction_string (unsigned int opcode) + +void exit_table68k (void) { - static char out[100]; - struct instr *ins; - - strcpy (out, get_instruction_name (opcode)); - - ins = &table68k[opcode]; - if (ins->size == sz_byte) - strcat (out,".B"); - if (ins->size == sz_word) - strcat (out,".W"); - if (ins->size == sz_long) - strcat (out,".L"); - strcat (out," "); - if (ins->suse) - strcat (out, get_ea_string (amodes(ins->smode), wordsizes(ins->size))); - if (ins->duse) { - if (ins->suse) - strcat (out,","); - strcat (out, get_ea_string (amodes(ins->dmode), wordsizes(ins->size))); - } - return out; + free(table68k); + table68k = NULL; } diff --git a/BasiliskII/src/uae_cpu/readcpu.h b/BasiliskII/src/uae_cpu/readcpu.h index 6fba3c39..3bdc0cd6 100644 --- a/BasiliskII/src/uae_cpu/readcpu.h +++ b/BasiliskII/src/uae_cpu/readcpu.h @@ -1,16 +1,16 @@ -#ifndef READCPU_H -#define READCPU_H +#ifndef UAE_READCPU_H +#define UAE_READCPU_H #ifdef __cplusplus extern "C" { #endif -ENUMDECL { +typedef enum { Dreg, Areg, Aind, Aipi, Apdi, Ad16, Ad8r, absw, absl, PC16, PC8r, imm, imm0, imm1, imm2, immi, am_unknown, am_illg -} ENUMNAME (amodes); +} amodes; -ENUMDECL { +typedef enum { i_ILLG, i_OR, i_AND, i_EOR, i_ORSR, i_ANDSR, i_EORSR, @@ -35,43 +35,42 @@ ENUMDECL { i_PACK, i_UNPK, i_TAS, i_BKPT, i_CALLM, i_RTM, i_TRAPcc, i_MOVES, i_FPP, i_FDBcc, i_FScc, i_FTRAPcc, i_FBcc, i_FSAVE, i_FRESTORE, i_CINVL, i_CINVP, i_CINVA, i_CPUSHL, i_CPUSHP, i_CPUSHA, i_MOVE16, - i_MMUOP, - i_EMULOP_RETURN, i_EMULOP -} ENUMNAME (instrmnem); + i_MMUOP, i_EMULOP_RETURN, i_EMULOP, i_NATFEAT_ID, i_NATFEAT_CALL +} instrmnem; extern struct mnemolookup { instrmnem mnemo; const char *name; } lookuptab[]; -ENUMDECL { +typedef enum { sz_byte, sz_word, sz_long -} ENUMNAME (wordsizes); +} wordsizes; -ENUMDECL { - fa_set, fa_unset, fa_zero, fa_one, fa_dontcare, fa_unknown, fa_isjmp -} ENUMNAME (flagaffect); +typedef enum { + fa_set, fa_unset, fa_zero, fa_one, fa_dontcare, fa_unknown, fa_isjmp, + fa_isbranch +} flagaffect; -ENUMDECL { +typedef enum { fu_used, fu_unused, fu_maybecc, fu_unknown, fu_isjmp -} ENUMNAME (flaguse); +} flaguse; -ENUMDECL { - fl_normal = 0, +typedef enum { + fl_normal = 0, fl_branch = 1, - fl_jump = 2, - fl_return = 3, - fl_trap = 4, - fl_const_jump = 8, - - /* Instructions that can trap don't mark the end of a block */ - fl_end_block = 3 -} ENUMNAME (cflow_t); + fl_jump = 2, + fl_return = 3, + fl_trap = 4, + fl_const_jump = 8, + /* Instructions that can trap don't mark the end of a block */ + fl_end_block = 3 +} cflow_t; -ENUMDECL { +typedef enum { bit0, bit1, bitc, bitC, bitf, biti, bitI, bitj, bitJ, bitk, bitK, bits, bitS, bitd, bitD, bitr, bitR, bitz, bitE, bitp, lastbit -} ENUMNAME (bitvals); +} bitvals; struct instr_def { unsigned int bits; @@ -84,7 +83,7 @@ struct instr_def { unsigned int flaguse:3; unsigned int flagset:3; } flaginfo[5]; - unsigned char cflow; + unsigned char cflow; unsigned char sduse; const char *opcstr; }; @@ -103,28 +102,24 @@ extern struct instr { unsigned int mnemo:8; unsigned int cc:4; unsigned int plev:2; - unsigned int size:2; - unsigned int smode:5; + wordsizes size:2; + amodes smode:5; unsigned int stype:3; - unsigned int dmode:5; + amodes dmode:5; unsigned int suse:1; unsigned int duse:1; unsigned int unused1:1; unsigned int clev:3; - unsigned int cflow:3; + unsigned int cflow:3; unsigned int unused2:2; } *table68k; -extern void read_table68k (void); -extern void do_merges (void); -extern int get_no_mismatches (void); +extern void init_table68k(void); +extern void exit_table68k(void); extern int nr_cpuop_funcs; -extern const char *get_instruction_name (unsigned int opcode); -extern const char *get_instruction_string (unsigned int opcode); - #ifdef __cplusplus } #endif -#endif /* READCPU_H */ +#endif diff --git a/BasiliskII/src/uae_cpu/readcpua.cpp b/BasiliskII/src/uae_cpu/readcpua.cpp new file mode 100644 index 00000000..521c241f --- /dev/null +++ b/BasiliskII/src/uae_cpu/readcpua.cpp @@ -0,0 +1,5 @@ +/* + * readcpu.cpp must be compiled twice, once for the generator program + * and once for the actual executable + */ +#include "readcpu.cpp" diff --git a/BasiliskII/src/uae_cpu/registers.h b/BasiliskII/src/uae_cpu/registers.h new file mode 100644 index 00000000..16f67092 --- /dev/null +++ b/BasiliskII/src/uae_cpu/registers.h @@ -0,0 +1,115 @@ +/* 2001 MJ */ + +#ifndef REGISTERS_H +#define REGISTERS_H + +#include "sysdeps.h" +#include "spcflags.h" +typedef char flagtype; + + +struct xttrx { + uae_u32 log_addr_base : 8; + uae_u32 log_addr_mask : 8; + uae_u32 enable : 1; + uae_u32 s_field : 2; + uae_u32 : 3; + uae_u32 usr1 : 1; + uae_u32 usr0 : 1; + uae_u32 : 1; + uae_u32 cmode : 2; + uae_u32 : 2; + uae_u32 write : 1; + uae_u32 : 2; +}; + +struct mmusr_t { + uae_u32 phys_addr : 20; + uae_u32 bus_err : 1; + uae_u32 global : 1; + uae_u32 usr1 : 1; + uae_u32 usr0 : 1; + uae_u32 super : 1; + uae_u32 cmode : 2; + uae_u32 modif : 1; + uae_u32 : 1; + uae_u32 write : 1; + uae_u32 ttrhit : 1; + uae_u32 resident : 1; +}; + +struct log_addr4 { + uae_u32 rif : 7; + uae_u32 pif : 7; + uae_u32 paif : 6; + uae_u32 poff : 12; +}; + +struct log_addr8 { + uae_u32 rif : 7; + uae_u32 pif : 7; + uae_u32 paif : 5; + uae_u32 poff : 13; +}; + +extern struct regstruct +{ + uae_u32 regs[16]; + uaecptr usp,isp,msp; + uae_u16 sr; + flagtype t1; + flagtype t0; + flagtype s; + flagtype m; + flagtype stopped; + uint32_t intmask; + + uae_u32 pc; + uae_u32 fault_pc; + uae_u8 *pc_p; + uae_u8 *pc_oldp; + + uae_u32 vbr,sfc,dfc; + + volatile uae_u32 spcflags; + +#if 0 + uae_u32 kick_mask; + + /* Fellow sources say this is 4 longwords. That's impossible. It needs + * to be at least a longword. The HRM has some cryptic comment about two + * instructions being on the same longword boundary. + * The way this is implemented now seems like a good compromise. + */ + uae_u32 prefetch; +#endif + + /* MMU reg*/ + uae_u32 urp,srp; + uae_u32 tc; + + int mmu_enabled; /* flagtype tce; */ + int mmu_pagesize_8k; /* flagtype tcp; */ + + uae_u32 dtt0,dtt1,itt0,itt1; + uae_u32 mmusr; + + uae_u32 mmu_fslw, mmu_fault_addr; + uae_u16 mmu_ssw; + uae_u32 wb3_data; + uae_u16 wb3_status; + + /* Cache reg*/ + uae_u32 cacr,caar; +} regs; + +static inline uaecptr m68k_getpc (void) +{ +#ifdef FULLMMU + return regs.pc; +#else + return regs.pc + ((char *)regs.pc_p - (char *)regs.pc_oldp); +#endif +} + +#endif diff --git a/BasiliskII/src/uae_cpu/spcflags.h b/BasiliskII/src/uae_cpu/spcflags.h index 3c3fc032..eb465e72 100644 --- a/BasiliskII/src/uae_cpu/spcflags.h +++ b/BasiliskII/src/uae_cpu/spcflags.h @@ -1,92 +1,78 @@ -/* - * UAE - The Un*x Amiga Emulator - * - * MC68000 emulation - * - * Copyright 1995 Bernd Schmidt - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ + /* + * UAE - The Un*x Amiga Emulator + * + * MC68000 emulation + * + * Copyright 1995 Bernd Schmidt + */ #ifndef SPCFLAGS_H #define SPCFLAGS_H -typedef uae_u32 spcflags_t; +#if 0 +#include "SDL_compat.h" +#endif enum { - SPCFLAG_STOP = 0x01, - SPCFLAG_INT = 0x02, - SPCFLAG_BRK = 0x04, - SPCFLAG_TRACE = 0x08, - SPCFLAG_DOTRACE = 0x10, - SPCFLAG_DOINT = 0x20, -#if USE_JIT - SPCFLAG_JIT_END_COMPILE = 0x40, - SPCFLAG_JIT_EXEC_RETURN = 0x80, + SPCFLAG_STOP = 0x01, +#if 0 + SPCFLAG_INTERNAL_IRQ = 0x02, #else - SPCFLAG_JIT_END_COMPILE = 0, - SPCFLAG_JIT_EXEC_RETURN = 0, + SPCFLAG_INT = 0x02, #endif - - SPCFLAG_ALL = SPCFLAG_STOP - | SPCFLAG_INT - | SPCFLAG_BRK - | SPCFLAG_TRACE - | SPCFLAG_DOTRACE - | SPCFLAG_DOINT - | SPCFLAG_JIT_END_COMPILE - | SPCFLAG_JIT_EXEC_RETURN - , - + SPCFLAG_BRK = 0x04, + SPCFLAG_TRACE = 0x08, + SPCFLAG_DOTRACE = 0x10, + SPCFLAG_DOINT = 0x20, +#ifdef USE_JIT + SPCFLAG_JIT_END_COMPILE = 0x40, + SPCFLAG_JIT_EXEC_RETURN = 0x80, +#else + SPCFLAG_JIT_END_COMPILE = 0, + SPCFLAG_JIT_EXEC_RETURN = 0, +#endif + SPCFLAG_VBL = 0x100, + SPCFLAG_MFP = 0x200, + SPCFLAG_INT3 = 0x800, + SPCFLAG_INT5 = 0x1000, + SPCFLAG_SCC = 0x2000, +// SPCFLAG_MODE_CHANGE = 0x4000, + SPCFLAG_ALL = SPCFLAG_STOP +#if 0 + | SPCFLAG_INTERNAL_IRQ +#else + | SPCFLAG_INT +#endif + | SPCFLAG_BRK + | SPCFLAG_TRACE + | SPCFLAG_DOTRACE + | SPCFLAG_DOINT + | SPCFLAG_JIT_END_COMPILE + | SPCFLAG_JIT_EXEC_RETURN + | SPCFLAG_INT3 + | SPCFLAG_VBL + | SPCFLAG_INT5 + | SPCFLAG_SCC + | SPCFLAG_MFP + , + SPCFLAG_ALL_BUT_EXEC_RETURN = SPCFLAG_ALL & ~SPCFLAG_JIT_EXEC_RETURN + }; +#if 0 +#define SPCFLAGS_TEST(m) \ + (regs.spcflags & (m)) +#else #define SPCFLAGS_TEST(m) \ ((regs.spcflags & (m)) != 0) +#endif /* Macro only used in m68k_reset() */ #define SPCFLAGS_INIT(m) do { \ regs.spcflags = (m); \ } while (0) -#if !(ENABLE_EXCLUSIVE_SPCFLAGS) - -#define SPCFLAGS_SET(m) do { \ - regs.spcflags |= (m); \ -} while (0) - -#define SPCFLAGS_CLEAR(m) do { \ - regs.spcflags &= ~(m); \ -} while (0) - -#elif defined(X86_ASSEMBLY) - -#define HAVE_HARDWARE_LOCKS - -#define SPCFLAGS_SET(m) do { \ - __asm__ __volatile__("lock\n\torl %1,%0" : "=m" (regs.spcflags) : "i" ((m))); \ -} while (0) - -#define SPCFLAGS_CLEAR(m) do { \ - __asm__ __volatile__("lock\n\tandl %1,%0" : "=m" (regs.spcflags) : "i" (~(m))); \ -} while (0) - -#else - -#undef HAVE_HARDWARE_LOCKS - #include "main.h" extern B2_mutex *spcflags_lock; @@ -99,9 +85,91 @@ extern B2_mutex *spcflags_lock; #define SPCFLAGS_CLEAR(m) do { \ B2_lock_mutex(spcflags_lock); \ regs.spcflags &= ~(m); \ - B2_unlock_mutex(spcflags_lock); \ + B2_unlock_mutex(spcflags_lock); \ } while (0) +#define SleepAndWait() usleep(1000); + +#if 0 +#ifndef ENABLE_EXCLUSIVE_SPCFLAGS + +#define SPCFLAGS_SET(m) do { \ + regs.spcflags |= (m); \ +} while (0) + +#define SPCFLAGS_CLEAR(m) do { \ + regs.spcflags &= ~(m); \ +} while (0) + +#if 0 +#define SleepAndWait() usleep(1000) +#endif + +#elif defined(X86_ASSEMBLY) +// #elif (defined(CPU_i386) || defined(CPU_x86_64)) && defined(X86_ASSEMBLY) && !defined(ENABLE_REALSTOP) + +// #define HAVE_HARDWARE_LOCKS 1 +#define HAVE_HARDWARE_LOCKS + +#define SPCFLAGS_SET(m) do { \ + __asm__ __volatile__("lock\n\torl %1,%0" : "=m" (regs.spcflags) : "i" ((m))); \ +} while (0) + +#define SPCFLAGS_CLEAR(m) do { \ + __asm__ __volatile__("lock\n\tandl %1,%0" : "=m" (regs.spcflags) : "i" (~(m))); \ +} while (0) + +// #define SleepAndWait() usleep(1000) + +// #elif !defined(ENABLE_REALSTOP) + +// #undef HAVE_HARDWARE_LOCKS +// extern SDL_mutex *spcflags_lock; + +// #define SPCFLAGS_SET(m) do { \ +// SDL_LockMutex(spcflags_lock); \ +// regs.spcflags |= (m); \ +// SDL_UnlockMutex(spcflags_lock); \ +// } while (0) + +// #define SPCFLAGS_CLEAR(m) do { \ +// SDL_LockMutex(spcflags_lock); \ +// regs.spcflags &= ~(m); \ +// SDL_UnlockMutex(spcflags_lock); \ +// } while (0) + +// #define SleepAndWait() usleep(1000) + +#else +/// Full STOP instruction implementation (default configuration) + +#undef HAVE_HARDWARE_LOCKS +#if 0 +extern SDL_mutex *spcflags_lock; +extern SDL_cond *stop_condition; + +#define SPCFLAGS_SET(m) do { \ + SDL_LockMutex(spcflags_lock); \ + regs.spcflags |= (m); \ + if (regs.spcflags & SPCFLAG_STOP) \ + SDL_CondSignal(stop_condition); \ + SDL_UnlockMutex(spcflags_lock); \ +} while (0) + +#define SPCFLAGS_CLEAR(m) do { \ + SDL_LockMutex(spcflags_lock); \ + regs.spcflags &= ~(m); \ + SDL_UnlockMutex(spcflags_lock); \ +} while (0) + +#define SleepAndWait() do { \ + SDL_LockMutex(spcflags_lock); \ + SDL_CondWait(stop_condition, spcflags_lock); \ + SDL_UnlockMutex(spcflags_lock); \ +} while (0) +#endif + +#endif #endif #endif /* SPCFLAGS_H */ diff --git a/BasiliskII/src/uae_cpu/table68k b/BasiliskII/src/uae_cpu/table68k index ab9eabe1..7405bd31 100644 --- a/BasiliskII/src/uae_cpu/table68k +++ b/BasiliskII/src/uae_cpu/table68k @@ -10,7 +10,7 @@ % J: immediate 0..15 % k: immediate 0..7 % K: immediate 0..63 -% p: immediate 0..3 (CINV and CPUSH instructions: Cache Field) +% p: immediate 0..3 (CINV and CPUSH: cache field) % s: source mode % S: source reg % d: dest mode @@ -28,14 +28,15 @@ % ArP: --> (Ar)+ % L: --> (xxx.L) % -% Fields on a line: -% 16 chars bitpattern : -% CPU level / privilege level : +% Fields on a line: +% 16 chars bitpattern : +% CPU level / privildge level : % CPU level 0: 68000 % 1: 68010 % 2: 68020 % 3: 68020/68881 % 4: 68040 +% 5: 68060 % privilege level 0: not privileged % 1: unprivileged only on 68000 (check regs.s) % 2: privileged (check regs.s) @@ -46,8 +47,10 @@ % 0 means flag reset % 1 means flag set % ? means programmer was too lazy to check or instruction may trap -% everything else means flag set/used +% + means instruction is conditional branch (ignored, only for sync) +% / means instruction is unconditional branch/call (ignored, only for sync) % x means flag is unknown and well-behaved programs shouldn't check it +% everything else means flag set/used % % Control flow % two letters, combination of @@ -108,7 +111,7 @@ 0011 DDDd ddss sSSS:00:-----:-----:--:12: MOVEA.W s,d[Areg] 0011 DDDd ddss sSSS:00:-NZ00:-----:--:12: MOVE.W s,d[!Areg] -0100 0000 zzdd dDDD:00:XxZxC:X-Z--:--:30: NEGX.z d[!Areg] +0100 0000 zzdd dDDD:00:XNZVC:X-Z--:--:30: NEGX.z d[!Areg] 0100 0000 11dd dDDD:01:-----:XNZVC:T-:10: MVSR2.W d[!Areg] 0100 0010 zzdd dDDD:00:-0100:-----:--:20: CLR.z d[!Areg] 0100 0010 11dd dDDD:10:-----:XNZVC:--:10: MVSR2.B d[!Areg] @@ -119,13 +122,13 @@ 0100 1000 0000 1rrr:20:-----:-----:--:31: LINK.L Ar,#2 0100 1000 00dd dDDD:00:X?Z?C:X-Z--:--:30: NBCD.B d[!Areg] 0100 1000 0100 1kkk:20:-----:-----:T-:10: BKPT #k -0100 1000 01ss sSSS:00:-NZ00:-----:--:30: SWAP.W s[Dreg] +0100 1000 01ss sSSS:00:-NZ00:-----:--:30: SWAP.W s[Dreg] 0100 1000 01ss sSSS:00:-----:-----:--:00: PEA.L s[!Dreg,Areg,Aipi,Apdi,Immd] -0100 1000 10dd dDDD:00:-NZ00:-----:--:30: EXT.W d[Dreg] +0100 1000 10dd dDDD:00:-NZ00:-----:--:30: EXT.W d[Dreg] 0100 1000 10dd dDDD:00:-----:-----:--:02: MVMLE.W #1,d[!Dreg,Areg,Aipi] -0100 1000 11dd dDDD:00:-NZ00:-----:--:30: EXT.L d[Dreg] +0100 1000 11dd dDDD:00:-NZ00:-----:--:30: EXT.L d[Dreg] 0100 1000 11dd dDDD:00:-----:-----:--:02: MVMLE.L #1,d[!Dreg,Areg,Aipi] -0100 1001 11dd dDDD:00:-NZ00:-----:--:30: EXT.B d[Dreg] +0100 1001 11dd dDDD:00:-NZ00:-----:--:30: EXT.B d[Dreg] 0100 1010 zzss sSSS:00:-NZ00:-----:--:10: TST.z s 0100 1010 11dd dDDD:00:-NZ00:-----:--:30: TAS.B d[!Areg] 0100 1010 1111 1100:00:-----:-----:T-:00: ILLEGAL @@ -148,21 +151,24 @@ 0100 1110 0111 0111:00:XNZVC:-----:-R:00: RTR 0100 1110 0111 1010:12:-----:-----:T-:10: MOVEC2 #1 0100 1110 0111 1011:12:-----:-----:T-:10: MOVE2C #1 -0100 1110 10ss sSSS:00:-----:-----:-J:80: JSR.L s[!Dreg,Areg,Aipi,Apdi,Immd] +0100 1110 10ss sSSS:00://///://///:-J:80: JSR.L s[!Dreg,Areg,Aipi,Apdi,Immd] 0100 rrr1 00ss sSSS:00:-N???:-----:T-:11: CHK.L s[!Areg],Dr 0100 rrr1 10ss sSSS:00:-N???:-----:T-:11: CHK.W s[!Areg],Dr -0100 1110 11ss sSSS:00:-----:-----:-J:80: JMP.L s[!Dreg,Areg,Aipi,Apdi,Immd] +0100 1110 11ss sSSS:00://///://///:-J:80: JMP.L s[!Dreg,Areg,Aipi,Apdi,Immd] 0100 rrr1 11ss sSSS:00:-----:-----:--:02: LEA.L s[!Dreg,Areg,Aipi,Apdi,Immd],Ar -0101 jjj0 01dd dDDD:00:-----:-----:--:13: ADDA.W #j,d[Areg] -0101 jjj0 10dd dDDD:00:-----:-----:--:13: ADDA.L #j,d[Areg] +% This variant of ADDQ is word and long sized only +0101 jjj0 01dd dDDD:00:-----:-----:--:13: ADDA.W #j,d[Areg] +0101 jjj0 10dd dDDD:00:-----:-----:--:13: ADDA.L #j,d[Areg] 0101 jjj0 zzdd dDDD:00:XNZVC:-----:--:13: ADD.z #j,d[!Areg] -0101 jjj1 01dd dDDD:00:-----:-----:--:13: SUBA.W #j,d[Areg] -0101 jjj1 10dd dDDD:00:-----:-----:--:13: SUBA.L #j,d[Areg] + +% This variant of SUBQ is word and long sized only +0101 jjj1 01dd dDDD:00:-----:-----:--:13: SUBA.W #j,d[Areg] +0101 jjj1 10dd dDDD:00:-----:-----:--:13: SUBA.L #j,d[Areg] 0101 jjj1 zzdd dDDD:00:XNZVC:-----:--:13: SUB.z #j,d[!Areg] -0101 cccc 1100 1rrr:00:-----:-????:-B:31: DBcc.W Dr,#1 -0101 cccc 11dd dDDD:00:-----:-????:--:20: Scc.B d[!Areg] +0101 cccc 1100 1rrr:00:-----:-++++:-B:31: DBcc.W Dr,#1 +0101 cccc 11dd dDDD:00:-----:-++++:--:20: Scc.B d[!Areg] 0101 cccc 1111 1010:20:-----:-????:T-:10: TRAPcc #1 0101 cccc 1111 1011:20:-----:-????:T-:10: TRAPcc #2 0101 cccc 1111 1100:20:-----:-????:T-:00: TRAPcc @@ -170,30 +176,30 @@ % Bxx.L is 68020 only, but setting the CPU level to 2 would give illegal % instruction exceptions when compiling a 68000 only emulation, which isn't % what we want either. -0110 0001 0000 0000:00:-----:-----:-B:40: BSR.W #1 -0110 0001 IIII IIII:00:-----:-----:-B:40: BSR.B #i -0110 0001 1111 1111:00:-----:-----:-B:40: BSR.L #2 -0110 CCCC 0000 0000:00:-----:-????:-B:40: Bcc.W #1 -0110 CCCC IIII IIII:00:-----:-????:-B:40: Bcc.B #i -0110 CCCC 1111 1111:00:-----:-????:-B:40: Bcc.L #2 +0110 0001 0000 0000:00://///://///:-B:40: BSR.W #1 +0110 0001 IIII IIII:00://///://///:-B:40: BSR.B #i +0110 0001 1111 1111:00://///://///:-B:40: BSR.L #2 +0110 CCCC 0000 0000:00:-----:-++++:-B:40: Bcc.W #1 +0110 CCCC IIII IIII:00:-----:-++++:-B:40: Bcc.B #i +0110 CCCC 1111 1111:00:-----:-++++:-B:40: Bcc.L #2 0111 rrr0 iiii iiii:00:-NZ00:-----:--:12: MOVE.L #i,Dr 1000 rrr0 zzss sSSS:00:-NZ00:-----:--:13: OR.z s[!Areg],Dr 1000 rrr0 11ss sSSS:00:-NZV0:-----:T-:13: DIVU.W s[!Areg],Dr -1000 rrr1 00dd dDDD:00:XxZxC:X-Z--:--:13: SBCD.B d[Dreg],Dr -1000 rrr1 00dd dDDD:00:XxZxC:X-Z--:--:13: SBCD.B d[Areg-Apdi],Arp +1000 rrr1 00dd dDDD:00:X?Z?C:X-Z--:--:13: SBCD.B d[Dreg],Dr +1000 rrr1 00dd dDDD:00:X?Z?C:X-Z--:--:13: SBCD.B d[Areg-Apdi],Arp 1000 rrr1 zzdd dDDD:00:-NZ00:-----:--:13: OR.z Dr,d[!Areg,Dreg] -1000 rrr1 01dd dDDD:20:-----:-----:--:12: PACK d[Dreg],Dr -1000 rrr1 01dd dDDD:20:-----:-----:--:12: PACK d[Areg-Apdi],Arp -1000 rrr1 10dd dDDD:20:-----:-----:--:12: UNPK d[Dreg],Dr -1000 rrr1 10dd dDDD:20:-----:-----:--:12: UNPK d[Areg-Apdi],Arp +1000 rrr1 01dd dDDD:20:-----:-----:--:12: PACK d[Dreg],Dr +1000 rrr1 01dd dDDD:20:-----:-----:--:12: PACK d[Areg-Apdi],Arp +1000 rrr1 10dd dDDD:20:-----:-----:--:12: UNPK d[Dreg],Dr +1000 rrr1 10dd dDDD:20:-----:-----:--:12: UNPK d[Areg-Apdi],Arp 1000 rrr1 11ss sSSS:00:-NZV0:-----:T-:13: DIVS.W s[!Areg],Dr 1001 rrr0 zzss sSSS:00:XNZVC:-----:--:13: SUB.z s,Dr 1001 rrr0 11ss sSSS:00:-----:-----:--:13: SUBA.W s,Ar -1001 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: SUBX.z d[Dreg],Dr -1001 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: SUBX.z d[Areg-Apdi],Arp +1001 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: SUBX.z d[Dreg],Dr +1001 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: SUBX.z d[Areg-Apdi],Arp 1001 rrr1 zzdd dDDD:00:XNZVC:-----:--:13: SUB.z Dr,d[!Areg,Dreg] 1001 rrr1 11ss sSSS:00:-----:-----:--:13: SUBA.L s,Ar @@ -205,18 +211,18 @@ 1100 rrr0 zzss sSSS:00:-NZ00:-----:--:13: AND.z s[!Areg],Dr 1100 rrr0 11ss sSSS:00:-NZ00:-----:--:13: MULU.W s[!Areg],Dr -1100 rrr1 00dd dDDD:00:XxZxC:X-Z--:--:13: ABCD.B d[Dreg],Dr -1100 rrr1 00dd dDDD:00:XxZxC:X-Z--:--:13: ABCD.B d[Areg-Apdi],Arp +1100 rrr1 00dd dDDD:00:X?Z?C:X-Z--:--:13: ABCD.B d[Dreg],Dr +1100 rrr1 00dd dDDD:00:X?Z?C:X-Z--:--:13: ABCD.B d[Areg-Apdi],Arp 1100 rrr1 zzdd dDDD:00:-NZ00:-----:--:13: AND.z Dr,d[!Areg,Dreg] -1100 rrr1 01dd dDDD:00:-----:-----:--:33: EXG.L Dr,d[Dreg] -1100 rrr1 01dd dDDD:00:-----:-----:--:33: EXG.L Ar,d[Areg] -1100 rrr1 10dd dDDD:00:-----:-----:--:33: EXG.L Dr,d[Areg] +1100 rrr1 01dd dDDD:00:-----:-----:--:33: EXG.L Dr,d[Dreg] +1100 rrr1 01dd dDDD:00:-----:-----:--:33: EXG.L Ar,d[Areg] +1100 rrr1 10dd dDDD:00:-----:-----:--:33: EXG.L Dr,d[Areg] 1100 rrr1 11ss sSSS:00:-NZ00:-----:--:13: MULS.W s[!Areg],Dr 1101 rrr0 zzss sSSS:00:XNZVC:-----:--:13: ADD.z s,Dr 1101 rrr0 11ss sSSS:00:-----:-----:--:13: ADDA.W s,Ar -1101 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: ADDX.z d[Dreg],Dr -1101 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: ADDX.z d[Areg-Apdi],Arp +1101 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: ADDX.z d[Dreg],Dr +1101 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: ADDX.z d[Areg-Apdi],Arp 1101 rrr1 zzdd dDDD:00:XNZVC:-----:--:13: ADD.z Dr,d[!Areg,Dreg] 1101 rrr1 11ss sSSS:00:-----:-----:--:13: ADDA.L s,Ar @@ -224,8 +230,8 @@ 1110 jjjf zz00 1RRR:00:XNZ0C:-----:--:13: LSf.z #j,DR 1110 jjjf zz01 0RRR:00:XNZ0C:X----:--:13: ROXf.z #j,DR 1110 jjjf zz01 1RRR:00:-NZ0C:-----:--:13: ROf.z #j,DR -1110 rrrf zz10 0RRR:00:XNZVC:-----:--:13: ASf.z Dr,DR -1110 rrrf zz10 1RRR:00:XNZ0C:-----:--:13: LSf.z Dr,DR +1110 rrrf zz10 0RRR:00:XNZVC:X----:--:13: ASf.z Dr,DR +1110 rrrf zz10 1RRR:00:XNZ0C:X----:--:13: LSf.z Dr,DR 1110 rrrf zz11 0RRR:00:XNZ0C:X----:--:13: ROXf.z Dr,DR 1110 rrrf zz11 1RRR:00:-NZ0C:-----:--:13: ROf.z Dr,DR 1110 000f 11dd dDDD:00:XNZVC:-----:--:13: ASfW.W d[!Dreg,Areg] @@ -255,7 +261,6 @@ 1111 0011 01ss sSSS:32:-----:-----:--:10: FRESTORE s[!Dreg,Areg,Apdi,Immd] % 68040 instructions -1111 0101 iiii iSSS:40:-----:-----:T-:11: MMUOP #i,s 1111 0100 pp00 1rrr:42:-----:-----:T-:02: CINVL #p,Ar 1111 0100 pp01 0rrr:42:-----:-----:T-:02: CINVP #p,Ar 1111 0100 pp01 1rrr:42:-----:-----:T-:00: CINVA #p @@ -264,11 +269,19 @@ 1111 0100 pp11 1rrr:42:-----:-----:T-:00: CPUSHA #p % destination register number is encoded in the following word 1111 0110 0010 0rrr:40:-----:-----:--:12: MOVE16 ArP,AxP -1111 0110 00ss sSSS:40:-----:-----:--:12: MOVE16 s[Dreg-Aipi],L -1111 0110 00dd dDDD:40:-----:-----:--:12: MOVE16 L,d[Areg-Aipi] -1111 0110 00ss sSSS:40:-----:-----:--:12: MOVE16 s[Aind],L -1111 0110 00dd dDDD:40:-----:-----:--:12: MOVE16 L,d[Aipi-Aind] +1111 0110 00ss sSSS:40:-----:-----:--:12: MOVE16 s[Dreg-Aipi],Al +1111 0110 00dd dDDD:40:-----:-----:--:12: MOVE16 Al,d[Areg-Aipi] +1111 0110 00ss sSSS:40:-----:-----:--:12: MOVE16 s[Aind],Al +1111 0110 00dd dDDD:40:-----:-----:--:12: MOVE16 Al,d[Aipi-Aind] -% EmulOp instructions -0111 0001 0000 0000:00:-----:-----:-R:00: EMULOP_RETURN -0111 0001 EEEE EEEE:00:-----:-----:-J:10: EMULOP #E +% MMU disabled +% 1111 0101 iiii iSSS:42:?????:?????:T-:11: MMUOP #i,s + +% EmulOp instructions (used by linux68k) +0111 0001 0000 0000:02:-----:XNZVC:-R:00: EMULOP_RETURN +0111 0001 EEEE EEEE:00:-----:XNZVC:-J:10: EMULOP #E + +% NatFea instructions (do I have the srcaddr correct?) +% NatFeat disabled +% 0111 0011 0000 0000:00:-----:XNZVC:-J:00: NATFEAT_ID +% 0111 0011 0000 0001:00:-----:XNZVC:-J:00: NATFEAT_CALL