From 1758ef58b5b677daf9d0ae9df3751177ce7e718d Mon Sep 17 00:00:00 2001 From: uyjulian Date: Sun, 15 Apr 2018 20:23:12 -0500 Subject: [PATCH] Port of CPU code from ARAnyM (currently hangs) --- BasiliskII/src/Unix/CMakeLists.txt | 9 +- BasiliskII/src/Unix/main_unix.cpp | 5 +- BasiliskII/src/Unix/sysdeps.h | 11 + BasiliskII/src/include/main.h | 7 - BasiliskII/src/uae_cpu/Makefile.am | 80 + BasiliskII/src/uae_cpu/aranym_glue.cpp | 327 + BasiliskII/src/uae_cpu/basilisk_glue.cpp | 81 +- BasiliskII/src/uae_cpu/build68k.c | 71 +- .../src/uae_cpu/compiler/codegen_arm.cpp | 2872 +++++++ BasiliskII/src/uae_cpu/compiler/codegen_arm.h | 1292 ++++ .../src/uae_cpu/compiler/codegen_x86.cpp | 5372 +++++++++++++ BasiliskII/src/uae_cpu/compiler/codegen_x86.h | 1996 +++++ BasiliskII/src/uae_cpu/compiler/compemu.h | 543 ++ BasiliskII/src/uae_cpu/compiler/compemu1.cpp | 2 + BasiliskII/src/uae_cpu/compiler/compemu2.cpp | 2 + BasiliskII/src/uae_cpu/compiler/compemu3.cpp | 2 + BasiliskII/src/uae_cpu/compiler/compemu4.cpp | 2 + BasiliskII/src/uae_cpu/compiler/compemu5.cpp | 2 + BasiliskII/src/uae_cpu/compiler/compemu6.cpp | 2 + BasiliskII/src/uae_cpu/compiler/compemu7.cpp | 2 + BasiliskII/src/uae_cpu/compiler/compemu8.cpp | 2 + .../src/uae_cpu/compiler/compemu_fpp.cpp | 1638 ++++ .../uae_cpu/compiler/compemu_midfunc_arm.cpp | 2106 +++++ .../uae_cpu/compiler/compemu_midfunc_arm.h | 184 + .../uae_cpu/compiler/compemu_midfunc_arm2.cpp | 5428 +++++++++++++ .../uae_cpu/compiler/compemu_midfunc_arm2.h | 348 + .../uae_cpu/compiler/compemu_midfunc_x86.cpp | 2982 ++++++++ .../uae_cpu/compiler/compemu_midfunc_x86.h | 252 + .../src/uae_cpu/compiler/compemu_support.cpp | 5111 +++++++++++++ BasiliskII/src/uae_cpu/compiler/compstbla.cpp | 5 + BasiliskII/src/uae_cpu/compiler/flags_arm.h | 52 + BasiliskII/src/uae_cpu/compiler/flags_x86.h | 52 + BasiliskII/src/uae_cpu/compiler/gencomp.c | 3619 +++++++++ BasiliskII/src/uae_cpu/compiler/gencomp_arm.c | 4981 ++++++++++++ .../src/uae_cpu/compiler/test_codegen_arm.c | 264 + .../src/uae_cpu/compiler/test_codegen_x86.cpp | 1008 +++ BasiliskII/src/uae_cpu/cpu_emulation.h | 16 +- BasiliskII/src/uae_cpu/cpudefsa.cpp | 5 + BasiliskII/src/uae_cpu/cpuemu1.cpp | 2 + BasiliskII/src/uae_cpu/cpuemu1_nf.cpp | 3 + BasiliskII/src/uae_cpu/cpuemu2.cpp | 2 + BasiliskII/src/uae_cpu/cpuemu2_nf.cpp | 3 + BasiliskII/src/uae_cpu/cpuemu3.cpp | 2 + BasiliskII/src/uae_cpu/cpuemu3_nf.cpp | 3 + BasiliskII/src/uae_cpu/cpuemu4.cpp | 2 + BasiliskII/src/uae_cpu/cpuemu4_nf.cpp | 3 + BasiliskII/src/uae_cpu/cpuemu5.cpp | 2 + BasiliskII/src/uae_cpu/cpuemu5_nf.cpp | 4 + BasiliskII/src/uae_cpu/cpuemu6.cpp | 2 + BasiliskII/src/uae_cpu/cpuemu6_nf.cpp | 3 + BasiliskII/src/uae_cpu/cpuemu7.cpp | 2 + BasiliskII/src/uae_cpu/cpuemu7_nf.cpp | 3 + BasiliskII/src/uae_cpu/cpuemu8.cpp | 2 + BasiliskII/src/uae_cpu/cpuemu8_nf.cpp | 3 + BasiliskII/src/uae_cpu/cpufunctbla.cpp | 5 + BasiliskII/src/uae_cpu/cpummu.cpp | 1096 +++ BasiliskII/src/uae_cpu/cpummu.h | 267 + BasiliskII/src/uae_cpu/cpuopti.c | 298 - BasiliskII/src/uae_cpu/cpustbl_nf.cpp | 2 + BasiliskII/src/uae_cpu/cpustbla.cpp | 5 + BasiliskII/src/uae_cpu/debug.cpp | 82 + BasiliskII/src/uae_cpu/fpu/core.h | 268 + BasiliskII/src/uae_cpu/fpu/exceptions.cpp | 193 + BasiliskII/src/uae_cpu/fpu/exceptions.h | 154 + BasiliskII/src/uae_cpu/fpu/flags.cpp | 174 + BasiliskII/src/uae_cpu/fpu/flags.h | 228 + BasiliskII/src/uae_cpu/fpu/fpu.h | 59 + BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp | 2330 ++++++ BasiliskII/src/uae_cpu/fpu/fpu_ieee.h | 154 + BasiliskII/src/uae_cpu/fpu/fpu_mpfr.cpp | 2110 +++++ BasiliskII/src/uae_cpu/fpu/fpu_uae.cpp | 2553 +++++++ BasiliskII/src/uae_cpu/fpu/fpu_uae.h | 217 + BasiliskII/src/uae_cpu/fpu/fpu_x86.cpp | 6791 +++++++++++++++++ BasiliskII/src/uae_cpu/fpu/fpu_x86.h | 384 + BasiliskII/src/uae_cpu/fpu/fpu_x86_asm.h | 104 + BasiliskII/src/uae_cpu/fpu/impl.h | 159 + BasiliskII/src/uae_cpu/fpu/mathlib.cpp | 105 + BasiliskII/src/uae_cpu/fpu/mathlib.h | 1185 +++ BasiliskII/src/uae_cpu/fpu/rounding.cpp | 69 + BasiliskII/src/uae_cpu/fpu/rounding.h | 159 + BasiliskII/src/uae_cpu/fpu/types.h | 181 + BasiliskII/src/uae_cpu/gencpu.c | 1766 +++-- BasiliskII/src/uae_cpu/m68k.h | 705 +- BasiliskII/src/uae_cpu/memory-uae.h | 606 ++ BasiliskII/src/uae_cpu/memory.cpp | 59 + BasiliskII/src/uae_cpu/memory.h | 45 + BasiliskII/src/uae_cpu/newcpu.cpp | 1741 +++-- BasiliskII/src/uae_cpu/newcpu.h | 366 +- BasiliskII/src/uae_cpu/noflags.h | 142 + BasiliskII/src/uae_cpu/readcpu.cpp | 143 +- BasiliskII/src/uae_cpu/readcpu.h | 59 +- BasiliskII/src/uae_cpu/readcpua.cpp | 5 + BasiliskII/src/uae_cpu/registers.h | 116 + BasiliskII/src/uae_cpu/spcflags.h | 104 + BasiliskII/src/uae_cpu/table68k | 392 +- 95 files changed, 63979 insertions(+), 2376 deletions(-) create mode 100644 BasiliskII/src/uae_cpu/Makefile.am create mode 100644 BasiliskII/src/uae_cpu/aranym_glue.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/codegen_arm.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/codegen_arm.h create mode 100644 BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/codegen_x86.h create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu.h create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu1.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu2.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu3.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu4.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu5.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu6.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu7.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu8.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu_fpp.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.h create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.h create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.h create mode 100644 BasiliskII/src/uae_cpu/compiler/compemu_support.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/compstbla.cpp create mode 100644 BasiliskII/src/uae_cpu/compiler/flags_arm.h create mode 100644 BasiliskII/src/uae_cpu/compiler/flags_x86.h create mode 100644 BasiliskII/src/uae_cpu/compiler/gencomp.c create mode 100644 BasiliskII/src/uae_cpu/compiler/gencomp_arm.c create mode 100644 BasiliskII/src/uae_cpu/compiler/test_codegen_arm.c create mode 100644 BasiliskII/src/uae_cpu/compiler/test_codegen_x86.cpp create mode 100644 BasiliskII/src/uae_cpu/cpudefsa.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu1.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu1_nf.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu2.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu2_nf.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu3.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu3_nf.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu4.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu4_nf.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu5.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu5_nf.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu6.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu6_nf.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu7.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu7_nf.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu8.cpp create mode 100644 BasiliskII/src/uae_cpu/cpuemu8_nf.cpp create mode 100644 BasiliskII/src/uae_cpu/cpufunctbla.cpp create mode 100644 BasiliskII/src/uae_cpu/cpummu.cpp create mode 100644 BasiliskII/src/uae_cpu/cpummu.h delete mode 100644 BasiliskII/src/uae_cpu/cpuopti.c create mode 100644 BasiliskII/src/uae_cpu/cpustbl_nf.cpp create mode 100644 BasiliskII/src/uae_cpu/cpustbla.cpp create mode 100644 BasiliskII/src/uae_cpu/debug.cpp create mode 100644 BasiliskII/src/uae_cpu/fpu/core.h create mode 100644 BasiliskII/src/uae_cpu/fpu/exceptions.cpp create mode 100644 BasiliskII/src/uae_cpu/fpu/exceptions.h create mode 100644 BasiliskII/src/uae_cpu/fpu/flags.cpp create mode 100644 BasiliskII/src/uae_cpu/fpu/flags.h create mode 100644 BasiliskII/src/uae_cpu/fpu/fpu.h create mode 100644 BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp create mode 100644 BasiliskII/src/uae_cpu/fpu/fpu_ieee.h create mode 100644 BasiliskII/src/uae_cpu/fpu/fpu_mpfr.cpp create mode 100644 BasiliskII/src/uae_cpu/fpu/fpu_uae.cpp create mode 100644 BasiliskII/src/uae_cpu/fpu/fpu_uae.h create mode 100644 BasiliskII/src/uae_cpu/fpu/fpu_x86.cpp create mode 100644 BasiliskII/src/uae_cpu/fpu/fpu_x86.h create mode 100644 BasiliskII/src/uae_cpu/fpu/fpu_x86_asm.h create mode 100644 BasiliskII/src/uae_cpu/fpu/impl.h create mode 100644 BasiliskII/src/uae_cpu/fpu/mathlib.cpp create mode 100644 BasiliskII/src/uae_cpu/fpu/mathlib.h create mode 100644 BasiliskII/src/uae_cpu/fpu/rounding.cpp create mode 100644 BasiliskII/src/uae_cpu/fpu/rounding.h create mode 100644 BasiliskII/src/uae_cpu/fpu/types.h create mode 100644 BasiliskII/src/uae_cpu/memory-uae.h create mode 100644 BasiliskII/src/uae_cpu/memory.cpp create mode 100644 BasiliskII/src/uae_cpu/noflags.h create mode 100644 BasiliskII/src/uae_cpu/readcpua.cpp create mode 100644 BasiliskII/src/uae_cpu/registers.h create mode 100644 BasiliskII/src/uae_cpu/spcflags.h diff --git a/BasiliskII/src/Unix/CMakeLists.txt b/BasiliskII/src/Unix/CMakeLists.txt index fca7bca0..65a607f8 100644 --- a/BasiliskII/src/Unix/CMakeLists.txt +++ b/BasiliskII/src/Unix/CMakeLists.txt @@ -15,7 +15,7 @@ add_custom_command(OUTPUT cpudefs.cpp add_executable(gencpu ../uae_cpu/gencpu.c ../uae_cpu/readcpu.cpp cpudefs.cpp) -add_custom_command(OUTPUT cpuemu.cpp cpustbl.cpp COMMAND gencpu DEPENDS gencpu) +add_custom_command(OUTPUT cpuemu.cpp cpustbl.cpp cpufunctbl.cpp COMMAND gencpu DEPENDS gencpu) set(BasiliskII_SRCS ../main.cpp @@ -76,10 +76,11 @@ set(BasiliskII_SRCS ../uae_cpu/basilisk_glue.cpp ../uae_cpu/newcpu.cpp ../uae_cpu/readcpu.cpp - # ../uae_cpu/fpp.cpp + ../uae_cpu/fpu/fpu_uae.cpp cpustbl.cpp cpudefs.cpp cpuemu.cpp + cpufunctbl.cpp #addressing mode =direct -DDIRECT_ADDRESSING #includes ) @@ -87,8 +88,10 @@ set(BasiliskII_SRCS add_executable(BasiliskII ${BasiliskII_SRCS}) set_source_files_properties(${BasiliskII_SRCS} - PROPERTIES COMPILE_FLAGS "-DDIRECT_ADDRESSING -DDATADIR=\\\".\\\"") + PROPERTIES COMPILE_FLAGS "-DDIRECT_ADDRESSING -DFPU_UAE -DDATADIR=\\\".\\\"") target_link_libraries(BasiliskII ${COREFOUNDATION_LIBRARY} ${IOKIT_LIBRARY} ${SDL_LIBRARY}) + + #keycodes -> ../SDL/keycodes diff --git a/BasiliskII/src/Unix/main_unix.cpp b/BasiliskII/src/Unix/main_unix.cpp index 644c215b..3affe236 100644 --- a/BasiliskII/src/Unix/main_unix.cpp +++ b/BasiliskII/src/Unix/main_unix.cpp @@ -199,9 +199,8 @@ static void sigsegv_dump_state(sigsegv_info_t *sip) fprintf(stderr, " [IP=%p]", fault_instruction); fprintf(stderr, "\n"); #if EMULATED_68K - uaecptr nextpc; - extern void m68k_dumpstate(uaecptr *nextpc); - m68k_dumpstate(&nextpc); + extern void m68k_dumpstate (FILE *, uaecptr *); + m68k_dumpstate(stderr, 0); #endif VideoQuitFullScreen(); diff --git a/BasiliskII/src/Unix/sysdeps.h b/BasiliskII/src/Unix/sysdeps.h index 8dd77b1d..56b7a71d 100644 --- a/BasiliskII/src/Unix/sysdeps.h +++ b/BasiliskII/src/Unix/sysdeps.h @@ -712,6 +712,17 @@ static inline uae_u32 do_byteswap_16(uae_u32 v) #endif #define REGPARAM2 + +#if __GNUC__ < 3 +# define __builtin_expect(foo,bar) (foo) +#endif +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define ALWAYS_INLINE inline __attribute__((always_inline)) + +#define memptr uint32 + + #endif /* NEED_CONFIG_H_ONLY */ #endif diff --git a/BasiliskII/src/include/main.h b/BasiliskII/src/include/main.h index 1ba7b6ac..b55ddc6d 100644 --- a/BasiliskII/src/include/main.h +++ b/BasiliskII/src/include/main.h @@ -31,13 +31,6 @@ extern int FPUType; // Flag: 24-bit-addressing? extern bool TwentyFourBitAddressing; -// 68k register structure (for Execute68k()) -struct M68kRegisters { - uint32 d[8]; - uint32 a[8]; - uint16 sr; -}; - // General functions extern bool InitAll(const char *vmdir); extern void ExitAll(void); diff --git a/BasiliskII/src/uae_cpu/Makefile.am b/BasiliskII/src/uae_cpu/Makefile.am new file mode 100644 index 00000000..0f27219d --- /dev/null +++ b/BasiliskII/src/uae_cpu/Makefile.am @@ -0,0 +1,80 @@ +# +# Note: this Makefile only contains rules for the source +# generator tools. +# + +# +# suppress warnings about overriding LDFLAGS and CPPFLAGS +# +AUTOMAKE_OPTIONS = -Wno-gnu + +AM_CPPFLAGS = $(DEFINES) \ + "-I$(srcdir)/../include" \ + "-I$(srcdir)/../Unix" \ + "-I$(builddir)/.." \ + "-I$(builddir)" \ + "-I$(srcdir)" + +CC = $(CC_FOR_BUILD) +CXX = $(CXX_FOR_BUILD) + +LDFLAGS = $(LDFLAGS_FOR_BUILD) +CPPFLAGS = $(CPPFLAGS_FOR_BUILD) +CFLAGS = $(CFLAGS_FOR_BUILD) +CXXFLAGS = $(CXXFLAGS_FOR_BUILD) +LIBS=-lm + +CFLAGS_NOWARN = $(DBGSP) +AM_CFLAGS = $(CFLAGS_NOWARN) $(WFLAGS) +AM_CXXFLAGS = $(CFLAGS_NOWARN) $(WFLAGS) + +noinst_PROGRAMS = build68k gencpu +if USE_JIT +noinst_PROGRAMS += gencomp +endif + +BUILT_SOURCES = \ + cpudefs.cpp \ + cpuemu.cpp \ + cpustbl.cpp \ + cpufunctbl.cpp \ + cputbl.h \ + $(empty) + +build68k_SOURCES = build68k.c +gencpu_SOURCES = gencpu.c m68k.h readcpu.cpp readcpu.h cpudefs.cpp +gencomp_SOURCES = +if GENCOMP_ARCH_X86 +gencomp_SOURCES += compiler/gencomp.c +endif +if GENCOMP_ARCH_ARM +gencomp_SOURCES += compiler/gencomp_arm.c +endif +gencomp_SOURCES += readcpu.cpp cpudefs.cpp + +if USE_JIT +BUILT_SOURCES += compemu.cpp compstbl.cpp comptbl.h +endif + + +cpudefs.cpp: build68k$(EXEEXT) $(srcdir)/table68k + ./build68k <$(srcdir)/table68k > $@ +cpuemu.cpp: gencpu$(EXEEXT) + ./gencpu$(EXEEXT) +cpustbl.cpp cpufunctbl.cpp cputbl.h: cpuemu.cpp +compemu.cpp: gencomp$(EXEEXT) + ./gencomp$(EXEEXT) +compstbl.cpp comptbl.h: compemu.cpp + +CLEANFILES = $(BUILT_SOURCES) + +EXTRA_DIST = \ + table68k \ + compiler/codegen_arm.cpp compiler/codegen_arm.h \ + compiler/compemu_midfunc_arm.cpp compiler/compemu_midfunc_arm.h \ + compiler/compemu_midfunc_arm2.cpp compiler/compemu_midfunc_arm2.h \ + compiler/test_codegen_arm.c \ + compiler/codegen_x86.cpp compiler/codegen_x86.h \ + compiler/compemu_midfunc_x86.cpp compiler/compemu_midfunc_x86.h \ + compiler/test_codegen_x86.cpp \ + $(empty) diff --git a/BasiliskII/src/uae_cpu/aranym_glue.cpp b/BasiliskII/src/uae_cpu/aranym_glue.cpp new file mode 100644 index 00000000..7148d446 --- /dev/null +++ b/BasiliskII/src/uae_cpu/aranym_glue.cpp @@ -0,0 +1,327 @@ +/* + * aranym_glue.cpp - CPU interface + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "sysdeps.h" + +#include "cpu_emulation.h" +#include "newcpu.h" +#include "hardware.h" +#include "input.h" +#ifdef USE_JIT +# include "compiler/compemu.h" +#endif +#include "nf_objs.h" + +#include "debug.h" + +// RAM and ROM pointers +memptr RAMBase = 0; // RAM base (Atari address space) gb-- init is important +uint8 *RAMBaseHost; // RAM base (host address space) +uint32 RAMSize = 0x00e00000; // Size of RAM + +memptr ROMBase = 0x00e00000; // ROM base (Atari address space) +uint8 *ROMBaseHost; // ROM base (host address space) +uint32 ROMSize = 0x00100000; // Size of ROM + +uint32 RealROMSize; // Real size of ROM + +memptr HWBase = 0x00f00000; // HW base (Atari address space) +uint8 *HWBaseHost; // HW base (host address space) +uint32 HWSize = 0x00100000; // Size of HW space + +memptr FastRAMBase = 0x01000000; // Fast-RAM base (Atari address space) +uint8 *FastRAMBaseHost; // Fast-RAM base (host address space) + +#ifdef HW_SIGSEGV +uint8 *FakeIOBaseHost; +#endif + +#ifdef FIXED_VIDEORAM +memptr VideoRAMBase = ARANYMVRAMSTART; // VideoRAM base (Atari address space) +#else +memptr VideoRAMBase; // VideoRAM base (Atari address space) +#endif +uint8 *VideoRAMBaseHost;// VideoRAM base (host address space) +//uint32 VideoRAMSize; // Size of VideoRAM + +#ifndef NOT_MALLOC +uintptr MEMBaseDiff; // Global offset between a Atari address and its Host equivalent +uintptr ROMBaseDiff; +uintptr FastRAMBaseDiff; +#endif + +uintptr VMEMBaseDiff; // Global offset between a Atari VideoRAM address and /dev/fb0 mmap + +// From newcpu.cpp +extern int quit_program; + +#if defined(ENABLE_EXCLUSIVE_SPCFLAGS) && !defined(HAVE_HARDWARE_LOCKS) +SDL_mutex *spcflags_lock; +#endif +#if defined(ENABLE_REALSTOP) +SDL_cond *stop_condition; +#endif + + +/* + * Initialize 680x0 emulation + */ + +bool InitMEM() { + InitMEMBaseDiff(RAMBaseHost, RAMBase); + InitROMBaseDiff(ROMBaseHost, ROMBase); + InitFastRAMBaseDiff(FastRAMBaseHost, FastRAMBase); + InitVMEMBaseDiff(VideoRAMBaseHost, VideoRAMBase); + return true; +} + +bool Init680x0(void) +{ + init_m68k(); + +#if defined(ENABLE_EXCLUSIVE_SPCFLAGS) && !defined(HAVE_HARDWARE_LOCKS) + if ((spcflags_lock = SDL_CreateMutex()) == NULL) { + panicbug("Error by SDL_CreateMutex()"); + exit(EXIT_FAILURE); + } +#endif + +#if ENABLE_REALSTOP + if ((stop_condition = SDL_CreateCond()) == NULL) { + panicbug("Error by SDL_CreateCond()"); + exit(EXIT_FAILURE); + } +#endif + +#ifdef USE_JIT + if (bx_options.jit.jit) compiler_init(); +#endif + return true; +} + +/* + * Instr. RESET + */ + +void AtariReset(void) +{ + // reset Atari hardware here + HWReset(); + // reset NatFeats here + NFReset(); + // reset the input devices (input.cpp) + InputReset(); + +} + +/* + * Reset CPU + */ + +void Reset680x0(void) +{ + m68k_reset(); +} + +/* + * Deinitialize 680x0 emulation + */ + +void Exit680x0(void) +{ +#ifdef USE_JIT + if (bx_options.jit.jit) compiler_exit(); +#endif + exit_m68k(); +} + + +/* + * Reset and start 680x0 emulation + */ + +void Start680x0(void) +{ + m68k_reset(); +#ifdef USE_JIT + if (bx_options.jit.jit) { + m68k_compile_execute(); + } + else +#endif + m68k_execute(); +} + +/* + * Restart running 680x0 emulation safely from different thread + */ +void Restart680x0(void) +{ + quit_program = 2; + TriggerNMI(); +} + +/* + * Quit 680x0 emulation safely from different thread + */ +void Quit680x0(void) +{ + quit_program = 1; + TriggerNMI(); +} + + +int MFPdoInterrupt(void) +{ + return getMFP()->doInterrupt(); +} + +int SCCdoInterrupt(void) +{ + return getSCC()->doInterrupt(); +} + +/* + * Trigger interrupts + */ +void TriggerInternalIRQ(void) +{ + SPCFLAGS_SET( SPCFLAG_INTERNAL_IRQ ); +} + +void TriggerInt3(void) +{ + SPCFLAGS_SET( SPCFLAG_INT3 ); +} + +void TriggerVBL(void) +{ + SPCFLAGS_SET( SPCFLAG_VBL ); +} + +void TriggerInt5(void) +{ + SPCFLAGS_SET( SPCFLAG_INT5 ); +} + +void TriggerSCC(bool enable) +{ + if (enable) + SPCFLAGS_SET( SPCFLAG_SCC ); + else + SPCFLAGS_CLEAR( SPCFLAG_SCC ); +} + +void TriggerMFP(bool enable) +{ + if (enable) + SPCFLAGS_SET( SPCFLAG_MFP ); + else + SPCFLAGS_CLEAR( SPCFLAG_MFP ); +} + +void TriggerNMI(void) +{ + SPCFLAGS_SET( SPCFLAG_BRK ); // use _BRK for NMI +} + +#ifndef REBOOT_OR_HALT +#define REBOOT_OR_HALT 0 // halt by default +#endif + +#if REBOOT_OR_HALT == 1 +# define CPU_MSG "CPU: Rebooting" +# define CPU_ACTION Restart680x0() +#else +# define CPU_MSG "CPU: Halting" +# define CPU_ACTION Quit680x0() +#endif + +#ifdef ENABLE_EPSLIMITER + +#ifndef EPS_LIMIT +# define EPS_LIMIT 10000 /* this might be too high if ARAnyM is slowed down by printing the bus errors on console */ +#endif + +void check_eps_limit(uaecptr pc) +{ + static long last_exception_time=-1; + static long exception_per_sec=0; + static long exception_per_sec_pc=0; + static uaecptr prevpc = 0; + + if (bx_options.cpu.eps_enabled) { + if (last_exception_time == -1) { + last_exception_time = SDL_GetTicks(); + } + + exception_per_sec++; + + if (pc == prevpc) { + /* BUS ERRORs occur at the same PC - watch out! */ + exception_per_sec_pc++; + } + else { + exception_per_sec_pc = 0; + prevpc = pc; + } + + if (SDL_GetTicks() - last_exception_time > 1000) { + last_exception_time = SDL_GetTicks(); + if (exception_per_sec_pc > bx_options.cpu.eps_max || + exception_per_sec > EPS_LIMIT /* make it configurable */) { + panicbug("CPU: Exception per second limit reached: %ld/%ld", + exception_per_sec_pc, exception_per_sec); + /* would be cool to open SDL dialog here: */ + /* [Exception per seconds limit reached. XXXXX exception + occured in the last second. The limit is set to YYYYY + in your config file. Do you want to continue emulation, + reset ARAnyM or quit ?][Continue] [Reset] [Quit] + */ + panicbug(CPU_MSG); + CPU_ACTION; + } + exception_per_sec = 0; + exception_per_sec_pc = 0; + } + } +} +#endif + +void report_double_bus_error() +{ + panicbug("CPU: Double bus fault detected !"); + /* would be cool to open SDL dialog here: */ + /* [Double bus fault detected. The emulated system crashed badly. + Do you want to reset ARAnyM or quit ?] [Reset] [Quit]" + */ + panicbug(CPU_MSG); + CPU_ACTION; +} + +#ifdef FLIGHT_RECORDER +extern bool cpu_flight_recorder_active; +void cpu_flight_recorder(int activate) { cpu_flight_recorder_active = activate; } +#endif diff --git a/BasiliskII/src/uae_cpu/basilisk_glue.cpp b/BasiliskII/src/uae_cpu/basilisk_glue.cpp index 9d344816..1b03160a 100644 --- a/BasiliskII/src/uae_cpu/basilisk_glue.cpp +++ b/BasiliskII/src/uae_cpu/basilisk_glue.cpp @@ -1,7 +1,7 @@ /* * basilisk_glue.cpp - Glue UAE CPU to Basilisk II CPU engine interface * - * Basilisk II (C) 1997-1999 Christian Bauer + * Basilisk II (C) 1997-2008 Christian Bauer * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,8 +19,10 @@ */ #include "sysdeps.h" + #include "cpu_emulation.h" #include "main.h" +#include "prefs.h" #include "emul_op.h" #include "rom_patches.h" #include "timer.h" @@ -28,9 +30,11 @@ #include "memory.h" #include "readcpu.h" #include "newcpu.h" +#include "compiler/compemu.h" + // RAM and ROM pointers -uint32 RAMBaseMac; // RAM base (Mac address space) +uint32 RAMBaseMac = 0; // RAM base (Mac address space) gb-- initializer is important uint8 *RAMBaseHost; // RAM base (host address space) uint32 RAMSize; // Size of RAM uint32 ROMBaseMac; // ROM base (Mac address space) @@ -48,8 +52,12 @@ int MacFrameLayout; // Frame buffer layout uintptr MEMBaseDiff; // Global offset between a Mac address and its Host equivalent #endif +#if USE_JIT +bool UseJIT = false; +#endif + // From newcpu.cpp -extern int quit_program; +extern bool quit_program; /* @@ -59,13 +67,41 @@ extern int quit_program; bool Init680x0(void) { #if REAL_ADDRESSING + // Mac address space = host address space + RAMBaseMac = (uintptr)RAMBaseHost; + ROMBaseMac = (uintptr)ROMBaseHost; +#elif DIRECT_ADDRESSING // Mac address space = host address space minus constant offset (MEMBaseDiff) // NOTE: MEMBaseDiff is set up in main_unix.cpp/main() RAMBaseMac = 0; ROMBaseMac = Host2MacAddr(ROMBaseHost); +#else + // Initialize UAE memory banks + RAMBaseMac = 0; + switch (ROMVersion) { + case ROM_VERSION_64K: + case ROM_VERSION_PLUS: + case ROM_VERSION_CLASSIC: + ROMBaseMac = 0x00400000; + break; + case ROM_VERSION_II: + ROMBaseMac = 0x00a00000; + break; + case ROM_VERSION_32: + ROMBaseMac = 0x40800000; + break; + default: + return false; + } + memory_init(); #endif init_m68k(); +#if USE_JIT + UseJIT = compiler_use_jit(); + if (UseJIT) + compiler_init(); +#endif return true; } @@ -76,9 +112,25 @@ bool Init680x0(void) void Exit680x0(void) { +#if USE_JIT + if (UseJIT) + compiler_exit(); +#endif + exit_m68k(); } +/* + * Initialize memory mapping of frame buffer (called upon video mode change) + */ + +void InitFrameBufferMapping(void) +{ +#if !REAL_ADDRESSING && !DIRECT_ADDRESSING + memory_init(); +#endif +} + /* * Reset and start 680x0 emulation (doesn't return) */ @@ -86,7 +138,12 @@ void Exit680x0(void) void Start680x0(void) { m68k_reset(); - m68k_go(true); +#if USE_JIT + if (UseJIT) + m68k_compile_execute(); + else +#endif + m68k_execute(); } @@ -97,7 +154,7 @@ void Start680x0(void) void TriggerInterrupt(void) { idle_resume(); - regs.spcflags |= SPCFLAG_INT; + SPCFLAGS_SET( SPCFLAG_INT ); } void TriggerNMI(void) @@ -143,8 +200,8 @@ void Execute68kTrap(uint16 trap, struct M68kRegisters *r) // Execute trap m68k_setpc(m68k_areg(regs, 7)); fill_prefetch_0(); - quit_program = 0; - m68k_go(true); + quit_program = false; + m68k_execute(); // Clean up stack m68k_areg(regs, 7) += 4; @@ -158,7 +215,7 @@ void Execute68kTrap(uint16 trap, struct M68kRegisters *r) r->d[i] = m68k_dreg(regs, i); for (i=0; i<7; i++) r->a[i] = m68k_areg(regs, i); - quit_program = 0; + quit_program = false; } @@ -190,8 +247,8 @@ void Execute68k(uint32 addr, struct M68kRegisters *r) // Execute routine m68k_setpc(addr); fill_prefetch_0(); - quit_program = 0; - m68k_go(true); + quit_program = false; + m68k_execute(); // Clean up stack m68k_areg(regs, 7) += 2; @@ -205,5 +262,7 @@ void Execute68k(uint32 addr, struct M68kRegisters *r) r->d[i] = m68k_dreg(regs, i); for (i=0; i<7; i++) r->a[i] = m68k_areg(regs, i); - quit_program = 0; + quit_program = false; } + +void report_double_bus_error() {} diff --git a/BasiliskII/src/uae_cpu/build68k.c b/BasiliskII/src/uae_cpu/build68k.c index 42a7de8b..e996758d 100644 --- a/BasiliskII/src/uae_cpu/build68k.c +++ b/BasiliskII/src/uae_cpu/build68k.c @@ -1,3 +1,27 @@ +/* + * build68k.c - m68k CPU builder + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ /* * UAE - The Un*x Amiga Emulator * @@ -6,13 +30,15 @@ * Copyright 1995,1996 Bernd Schmidt */ -#include -#include -#include - -#include "sysdeps.h" #include "readcpu.h" +#include +#include +#include +#include +#include +#undef abort + static FILE *tablef; static int nextch = 0; @@ -51,7 +77,7 @@ static int nextchtohex(void) } } -int main(int argc, char **argv) +int main() { int no_insns = 0; @@ -76,6 +102,7 @@ int main(int argc, char **argv) char opcstr[256]; int bitpos[16]; int flagset[5], flaguse[5]; + char cflow; unsigned int bitmask,bitpattern; int n_variable; @@ -107,6 +134,8 @@ int main(int argc, char **argv) case 'r': currbit = bitr; break; case 'R': currbit = bitR; break; case 'z': currbit = bitz; break; + case 'E': currbit = bitE; break; + case 'p': currbit = bitp; break; default: abort(); } if (!(bitmask & 1)) { @@ -121,6 +150,7 @@ int main(int argc, char **argv) patbits[i] = nextch; getnextch(); } + (void) patbits; while (isspace(nextch) || nextch == ':') /* Get CPU and privilege level */ getnextch(); @@ -156,6 +186,7 @@ int main(int argc, char **argv) switch(nextch){ case '-': flagset[i] = fa_unset; break; case '/': flagset[i] = fa_isjmp; break; + case '+': flagset[i] = fa_isbranch; break; case '0': flagset[i] = fa_zero; break; case '1': flagset[i] = fa_one; break; case 'x': flagset[i] = fa_dontcare; break; @@ -182,6 +213,26 @@ int main(int argc, char **argv) } } + getnextch(); + while (isspace(nextch)) + getnextch(); + + if (nextch != ':') /* Get control flow information */ + abort(); + + cflow = 0; + for(i = 0; i < 2; i++) { + getnextch(); + switch(nextch){ + case '-': break; + case 'R': cflow |= fl_return; break; + case 'B': cflow |= fl_branch; break; + case 'J': cflow |= fl_jump; break; + case 'T': cflow |= fl_trap; break; + default: abort(); + } + } + getnextch(); while (isspace(nextch)) getnextch(); @@ -201,7 +252,7 @@ int main(int argc, char **argv) if (nextch != ':') abort(); - fgets(opcstr, 250, tablef); + assert(fgets(opcstr, 250, tablef) != NULL); getnextch(); { int j; @@ -209,12 +260,12 @@ int main(int argc, char **argv) char *opstrp = opcstr, *osendp; int slen = 0; - while (isspace(*opstrp)) + while (isspace((int)*opstrp)) opstrp++; osendp = opstrp; while (*osendp) { - if (!isspace (*osendp)) + if (!isspace ((int)*osendp)) slen = osendp - opstrp + 1; osendp++; } @@ -233,7 +284,7 @@ int main(int argc, char **argv) for(i = 0; i < 5; i++) { printf("{ %d, %d }%c ", flaguse[i], flagset[i], i == 4 ? ' ' : ','); } - printf("}, %d, \"%s\"}", sduse, opstrp); + printf("}, %d, %d, \"%s\"}", cflow, sduse, opstrp); } } printf("};\nint n_defs68k = %d;\n", no_insns); diff --git a/BasiliskII/src/uae_cpu/compiler/codegen_arm.cpp b/BasiliskII/src/uae_cpu/compiler/codegen_arm.cpp new file mode 100644 index 00000000..fb7e69c7 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/codegen_arm.cpp @@ -0,0 +1,2872 @@ +/* + * compiler/codegen_arm.cpp - ARM code generator + * + * Copyright (c) 2013 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * JIT compiler m68k -> ARM + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Current state: + * - Experimental + * - Still optimizable + * - Not clock cycle optimized + * - as a first step this compiler emulates x86 instruction to be compatible + * with gencomp. Better would be a specialized version of gencomp compiling + * 68k instructions to ARM compatible instructions. This is a step for the + * future + * + */ + +#include "flags_arm.h" + +// Declare the built-in __clear_cache function. +extern void __clear_cache (char*, char*); + +/************************************************************************* + * Some basic information about the the target CPU * + *************************************************************************/ + +#define R0_INDEX 0 +#define R1_INDEX 1 +#define R2_INDEX 2 +#define R3_INDEX 3 +#define R4_INDEX 4 +#define R5_INDEX 5 +#define R6_INDEX 6 +#define R7_INDEX 7 +#define R8_INDEX 8 +#define R9_INDEX 9 +#define R10_INDEX 10 +#define R11_INDEX 11 +#define R12_INDEX 12 +#define R13_INDEX 13 +#define R14_INDEX 14 +#define R15_INDEX 15 + +#define RSP_INDEX 13 +#define RLR_INDEX 14 +#define RPC_INDEX 15 + +/* The register in which subroutines return an integer return value */ +#define REG_RESULT R0_INDEX + +/* The registers subroutines take their first and second argument in */ +#define REG_PAR1 R0_INDEX +#define REG_PAR2 R1_INDEX + +#define REG_WORK1 R2_INDEX +#define REG_WORK2 R3_INDEX + +//#define REG_DATAPTR R10_INDEX + +#define REG_PC_PRE R0_INDEX /* The register we use for preloading regs.pc_p */ +#define REG_PC_TMP R1_INDEX /* Another register that is not the above */ + +#define SHIFTCOUNT_NREG R1_INDEX /* Register that can be used for shiftcount. + -1 if any reg will do. Normally this can be set to -1 but compemu_support is tied to 1 */ +#define MUL_NREG1 R0_INDEX /* %r4 will hold the low 32 bits after a 32x32 mul */ +#define MUL_NREG2 R1_INDEX /* %r5 will hold the high 32 bits */ + +#define STACK_ALIGN 4 +#define STACK_OFFSET sizeof(void *) +#define STACK_SHADOW_SPACE 0 + +uae_s8 always_used[]={2,3,-1}; +uae_s8 can_byte[]={0,1,4,5,6,7,8,9,10,11,12,-1}; +uae_s8 can_word[]={0,1,4,5,6,7,8,9,10,11,12,-1}; + +uae_u8 call_saved[]={0,0,0,0,1,1,1,1,1,1,1,1,0,1,1,1}; + +/* This *should* be the same as call_saved. But: + - We might not really know which registers are saved, and which aren't, + so we need to preserve some, but don't want to rely on everyone else + also saving those registers + - Special registers (such like the stack pointer) should not be "preserved" + by pushing, even though they are "saved" across function calls +*/ +static const uae_u8 need_to_preserve[]={0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0}; +static const uae_u32 PRESERVE_MASK = ((1<=-128 && x<=127); +} + +static inline int is8bit(uae_s32 x) +{ + return (x>=-255 && x<=255); +} + +static inline int isword(uae_s32 x) +{ + return (x>=-32768 && x<=32767); +} + +#define jit_unimplemented(fmt, ...) do{ panicbug("**** Unimplemented ****"); panicbug(fmt, ## __VA_ARGS__); abort(); }while (0) + +#if 0 /* currently unused */ +static void jit_fail(const char *msg, const char *file, int line, const char *function) +{ + panicbug("JIT failure in function %s from file %s at line %d: %s", + function, file, line, msg); + abort(); +} +#endif + +LOWFUNC(NONE,WRITE,1,raw_push_l_r,(RR4 r)) +{ + PUSH(r); +} +LENDFUNC(NONE,WRITE,1,raw_push_l_r,(RR4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_r,(RR4 r)) +{ + POP(r); +} +LENDFUNC(NONE,READ,1,raw_pop_l_r,(RR4 r)) + +LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, RR1 s)) +{ + MVN_ri(REG_WORK1, 0); // mvn r2,#0 + LSL_rri(REG_WORK2, d, 24); // lsl r3, %[d], #24 + ORR_rrrLSRi(REG_WORK2, REG_WORK2, REG_WORK1, 8); // orr r3, r3, r2, lsr #8 + LSL_rri(REG_WORK1, s, 24); // lsl r2, %[s], #24 + + ADCS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adcs r3, r3, r2 + + BIC_rri(d, d, 0xFF); // bic %[d],%[d],#0xFF + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr %[d],%[d], R3 LSR #24 +} +LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, RR1 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, RR2 s)) +{ + MVN_ri(REG_WORK1, 0); // mvn r2,#0 + LSL_rri(REG_WORK2, d, 16); // lsl r3, %[d], #16 + ORR_rrrLSRi(REG_WORK2, REG_WORK2, REG_WORK1, 16); // orr r3, r3, r2, lsr #16 + LSL_rri(REG_WORK1, s, 16); // lsl r2, %[s], #16 + + ADCS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 +#ifdef ARMV6_ASSEMBLY + PKHTB_rrrASRi(d,d,REG_WORK2,16); +#else + BIC_rri(d, d, 0xff); // bic %[d],%[d],#0xff + BIC_rri(d, d, 0xff00); // bic %[d],%[d],#0xff00 + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr %[d], %[d], r3, lsr #16 +#endif +} +LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, RR2 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, RR4 s)) +{ + ADCS_rrr(d, d, s); // adcs %[d],%[d],%[s] +} +LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, RR4 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, RR1 s)) +{ + LSL_rri(REG_WORK1, s, 24); // lsl r2, %[s], #24 + LSL_rri(REG_WORK2, d, 24); // lsl r3, %[d], #24 + + ADDS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 + + BIC_rri(d, d, 0xFF); // bic %[d],%[d],#0xFF + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr %[d],%[d], r3 LSR #24 +} +LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, RR1 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, RR2 s)) +{ + LSL_rri(REG_WORK1, s, 16); // lsl r2, %[s], #16 + LSL_rri(REG_WORK2, d, 16); // lsl r3, %[d], #16 + + ADDS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 + +#ifdef ARMV6_ASSEMBLY + PKHTB_rrrASRi(d,d,REG_WORK2,16); +#else + BIC_rri(d, d, 0xff); // bic %[d],%[d],#0xff + BIC_rri(d, d, 0xff00); // bic %[d],%[d],#0xff00 + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr r7, r7, r3, LSR #16 +#endif +} +LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, RR2 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, RR4 s)) +{ + ADDS_rrr(d, d, s); // adds %[d], %[d], %[s] +} +LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, RR4 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_word_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldrh r2, [pc, #offs] +#else +# ifdef ARMV6_ASSEMBLY + LDRH_rRI(REG_WORK1, RPC_INDEX, 24); // ldrh r2, [pc, #24] ; +# else + LDRH_rRI(REG_WORK1, RPC_INDEX, 16); // ldrh r2, [pc, #16] ; +# endif +#endif + LSL_rri(REG_WORK2, d, 16); // lsl r3, %[d], #16 + LSL_rri(REG_WORK1, REG_WORK1, 16); // lsl r2, r2, #16 + + ADDS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 + +#ifdef ARMV6_ASSEMBLY + PKHTB_rrrASRi(d,d,REG_WORK2,16); +#else + BIC_rri(d, d, 0xff); // bic %[d],%[d],#0xff + BIC_rri(d, d, 0xff00); // bic %[d],%[d],#0xff00 + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr %[d],%[d], r3, LSR #16 +#endif + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_word(i); + skip_word(0); + //: +#endif +} +LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) +{ + LSL_rri(REG_WORK2, d, 24); // lsl r3, %[d], #24 + + ADDS_rri(REG_WORK2, REG_WORK2, i << 24); // adds r3, r3, #0x12000000 + + BIC_rri(d, d, 0xFF); // bic %[d],%[d], #0xFF + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr %[d],%[d], r3, lsr #24 +} +LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + ADDS_rrr(d, d, REG_WORK1); // adds %[d], %[d], r2 +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + ADDS_rrr(d, d, REG_WORK1); // adds %[d], %[d], r2 + B_i(0); // b + + //: + emit_long(i); + //: +#endif +} +LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, RR1 s)) +{ + MVN_rrLSLi(REG_WORK1, s, 24); // mvn r2, %[s], lsl #24 + MVN_rrLSRi(REG_WORK1, REG_WORK1, 24); // mvn r2, %[s], lsr #24 + AND_rrr(d, d, REG_WORK1); // and %[d], %[d], r2 + + LSLS_rri(REG_WORK1, d, 24); // lsls r2, %[d], #24 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, RR1 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, RR2 s)) +{ + MVN_rrLSLi(REG_WORK1, s, 16); // mvn r2, %[s], lsl #16 + MVN_rrLSRi(REG_WORK1, REG_WORK1, 16); // mvn r2, %[s], lsr #16 + AND_rrr(d, d, REG_WORK1); // and %[d], %[d], r2 + + LSLS_rri(REG_WORK1, d, 16); // lsls r2, %[d], #16 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, RR2 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, RR4 s)) +{ + ANDS_rrr(d, d, s); // ands r7, r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, RR4 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #16] ; +#endif + ANDS_rrr(d, d, REG_WORK1); // ands %[d], %[d], r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(i); + //: +#endif +} +LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, RR4 s)) +{ + MOV_rr(REG_WORK1, s); // mov r2,%[s] + RSB_rri(REG_WORK2, REG_WORK1, 0); // rsb r3,r2,#0 + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // and r2,r2,r3 + CLZ_rr(REG_WORK2, REG_WORK1); // clz r3,r2 + MOV_ri(d, 32); // mov %[d],#32 + SUB_rrr(d, d, REG_WORK2); // sub %[d],%[d],r3 + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TEQ_ri(d, 0); // teq %[d],#0 + CC_SUBS_rri(NATIVE_CC_NE, d,d,1); // sub %[d],%[d],#1 + CC_BIC_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_Z_FLAG); // bic r3,r3,#0x40000000 + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_Z_FLAG); // orr r3,r3,#0x40000000 + MSR_CPSR_r(REG_WORK2); // msr cpsr,r3 +} +LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, RR4 s)) + +LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) +{ +#if defined(ARMV6_ASSEMBLY) + REVSH_rr(REG_WORK1,r); // revsh r2,%[r] + UXTH_rr(REG_WORK1, REG_WORK1); // utxh r2,r2 + LSR_rri(r, r, 16); + ORR_rrrLSLi(r, REG_WORK1, r, 16); // orr %[r], %[r], r2 +#else + MOV_rr(REG_WORK1, r); // mov r2, r6 + BIC_rri(REG_WORK1, REG_WORK1, 0xff0000); // bic r2, r2, #0xff0000 + BIC_rri(REG_WORK1, REG_WORK1, 0xff000000); // bic r2, r2, #0xff000000 + + EOR_rrr(r, r, REG_WORK1); // eor r6, r6, r2 + + ORR_rrrLSRi(r, r, REG_WORK1, 8); // orr r6, r6, r2, lsr #8 + BIC_rri(REG_WORK1, REG_WORK1, 0xff00); // bic r2, r2, #0xff00 + ORR_rrrLSLi(r,r,REG_WORK1, 8); // orr r6, r6, r2, lsl #8 +#endif +} +LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) + +LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) +{ +#if defined(ARMV6_ASSEMBLY) + REV_rr(r,r); // rev %[r],%[r] +#else + EOR_rrrRORi(REG_WORK1, r, r, 16); // eor r2, r6, r6, ror #16 + BIC_rri(REG_WORK1, REG_WORK1, 0xff0000); // bic r2, r2, #0xff0000 + ROR_rri(r, r, 8); // ror r6, r6, #8 + EOR_rrrLSRi(r, r, REG_WORK1, 8); // eor r6, r6, r2, lsr #8 +#endif +} +LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(RR4 r, IMM i)) +{ + int imm = (1 << (i & 0x1f)); + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_ri(r, imm); // tst r6, #0x1000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(RR4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(RR4 r, RR4 b)) +{ + AND_rri(REG_WORK2, b, 0x1f); // and r3, r7, #0x1f + LSR_rrr(REG_WORK1, r, REG_WORK2); // lsr r2, r6, r3 + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_ri(REG_WORK1, 1); // tst r2, #1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(RR4 r, RR4 b)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, RR4 b)) +{ + MOV_ri(REG_WORK1, 1); // mov r2, #1 + AND_rri(REG_WORK2, b, 0x1f); // and r3, r7, #0x1f + LSL_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // lsl r2, r2, r3 + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_rr(r, REG_WORK1); // tst r6, r2 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + EOR_rrr(r, r, REG_WORK1); // eor r6, r6, r2 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, RR4 b)) + +LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, RR4 b)) +{ + MOV_ri(REG_WORK1, 1); // mov r2, #1 + AND_rri(REG_WORK2, b, 0x1f); // and r3, r7, #0x1f + LSL_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // lsl r2, r2, r3 + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_rr(r, REG_WORK1); // tst r6, r2 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + BIC_rrr(r, r, REG_WORK1); // bic r6, r6, r2 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, RR4 b)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, RR4 b)) +{ + MOV_ri(REG_WORK1, 1); // mov r2, #1 + AND_rri(REG_WORK2, b, 0x1f); // and r3, r7, #0x1f + LSL_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // lsl r2, r2, r3 + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_rr(r, REG_WORK1); // tst r6, r2 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + ORR_rrr(r, r, REG_WORK1); // orr r6, r6, r2 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, RR4 b)) + +LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, RR4 s, IMM cc)) +{ + switch (cc) { + case 9: // LS + BEQ_i(0); // beq Z != 0 + BCC_i(0); // bcc C == 0 + + //: + MOV_rr(d, s); // mov r7,r6 + break; + + case 8: // HI + BEQ_i(1); // beq Z != 0 + BCS_i(0); // bcs C != 0 + MOV_rr(d, s); // mov r7,#0 + break; + + default: + CC_MOV_rr(cc, d, s); // MOVcc R7,#1 + break; + } + //: +} +LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, RR4 s, IMM cc)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b,(RR1 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + SXTB_rr(REG_WORK1, d); // sxtb r2,%[d] + SXTB_rr(REG_WORK2, s); // sxtb r3,%[s] +#else + LSL_rri(REG_WORK1, d, 24); // lsl r2,r6,#24 + LSL_rri(REG_WORK2, s, 24); // lsl r3,r7,#24 +#endif + CMP_rr(REG_WORK1, REG_WORK2); // cmp r2, r3 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b,(RR1 d, RR1 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_w,(RR2 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + SXTH_rr(REG_WORK1, d); // sxtb r2,%[d] + SXTH_rr(REG_WORK2, s); // sxtb r3,%[s] +#else + LSL_rri(REG_WORK1, d, 16); // lsl r6, r1, #16 + LSL_rri(REG_WORK2, s, 16); // lsl r7, r2, #16 +#endif + + CMP_rr(REG_WORK1, REG_WORK2); // cmp r7, r6, asr #16 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_cmp_w,(RR2 d, RR2 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l,(RR4 d, RR4 s)) +{ + CMP_rr(d, s); // cmp r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l,(RR4 d, RR4 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, RR4 s)) +{ + SMULL_rrrr(REG_WORK1, REG_WORK2, d, s); // smull r2,r3,r7,r6 + MOV_rr(d, REG_WORK1); // mov r7,r2 +} +LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, RR4 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) +{ + SMULL_rrrr(REG_WORK1, REG_WORK2, d, s); // smull r2,r3,r7,r6 + MOV_rr(MUL_NREG1, REG_WORK1); // mov r7,r2 + MOV_rr(MUL_NREG2, REG_WORK2); +} +LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + ADD_rrr(d, s, REG_WORK1); // add r7, r6, r2 +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + ADD_rrr(d, s, REG_WORK1); // add r7, r6, r2 + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} +LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, RR4 s, IMM offset)) + +LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)) +{ + int shft; + switch(factor) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // LDR R2,[PC, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // LDR R2,[PC, #8] +#endif + ADD_rrr(REG_WORK1, s, REG_WORK1); // ADD R7,R6,R2 + ADD_rrrLSLi(d, REG_WORK1, index, shft); // ADD R7,R7,R5,LSL #2 +#if !defined(USE_DATA_BUFFER) + B_i(0); // B jp + + emit_long(offset); + //; +#endif +} +LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)) + +LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) +{ + int shft; + switch(factor) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + + ADD_rrrLSLi(d, s, index, shft); // ADD R7,R6,R5,LSL #2 +} +LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) + +LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 12); // ldr r2, [pc, #12] ; +#endif + LDRB_rRR(REG_WORK1, REG_WORK1, s); // ldrb r2, [r2, r6] + + BIC_rri(d, d, 0xff); // bic r7, r7, #0xff + ORR_rrr(d, d, REG_WORK1); // orr r7, r7, r2 +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} +LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, RR4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(RR4 d, RR1 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2,[pc, #offs] + STRB_rRR(s, d, REG_WORK1); // strb r6,[r7, r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2,[pc,#4] + STRB_rRR(s, d, REG_WORK1); // strb r6,[r7, r2] + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(RR4 d, RR1 s, IMM offset)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // ldr r2, [pc, #8] ; +#endif + MOV_ri(REG_WORK2, s & 0xFF); // mov r3, #0x34 + STRB_rR(REG_WORK2, REG_WORK1); // strb r3, [r2] +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //d: + emit_long(d); + + //: +#endif +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, RR1 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + STRB_rR(s, REG_WORK1); // strb r6, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STRB_rR(s, REG_WORK1); // strb r6, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, RR1 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) +{ + BIC_rri(d, d, 0xff); // bic %[d], %[d], #0xff + ORR_rri(d, d, (s & 0xff)); // orr %[d], %[d], #%[s] +} +LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) + +LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 12); // ldr r2, [pc, #12] ; +#endif + LDRB_rR(REG_WORK2, REG_WORK1); // ldrb r2, [r2] + BIC_rri(d, d, 0xff); // bic r7, r7, #0xff + ORR_rrr(d, REG_WORK2, d); // orr r7, r2, r7 +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(s); + //: +#endif +} +LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, RR1 s)) +{ + AND_rri(REG_WORK1, s, 0xff); // and r2,r2, #0xff + BIC_rri(d, d, 0x0ff); // bic %[d], %[d], #0xff + ORR_rrr(d, d, REG_WORK1); // orr %[d], %[d], r2 +} +LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, RR1 s)) + +LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + LDR_rRR(d, REG_WORK1, s); // ldr r7, [r2, r6] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + LDR_rRR(d, REG_WORK1, s); // ldr r7, [r2, r6] + + B_i(0); // b + + emit_long(offset); //: + //: +#endif +} +LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, RR4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(RR4 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2,[pc, #offs] + STR_rRR(s, d, REG_WORK1); // str R6,[R7, r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2,[pc,#4] ; + STR_rRR(s, d, REG_WORK1); // str R6,[R7, r2] + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(RR4 d, RR4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) +{ + // TODO: optimize imm + +#if defined(USE_DATA_BUFFER) + data_check_end(8, 12); + long offs = data_long_offs(d); + + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + + offs = data_long_offs(s); + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // ldr r3, [pc, #offs] ; s + + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // ldr r2, [pc, #8] ; + LDR_rRI(REG_WORK2, RPC_INDEX, 8); // ldr r3, [pc, #8] ; + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] + B_i(1); // b + + emit_long(d); //: + emit_long(s); //: + + //: +#endif +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else +# ifdef ARMV6_ASSEMBLY + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // ldr r2, [pc, #16] ; +# else + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #16] ; +# endif +#endif + LDRH_rRR(REG_WORK1, REG_WORK1, s); // ldrh r2, [r2, r6] + +#ifdef ARMV6_ASSEMBLY + PKHBT_rrr(d,REG_WORK1,d); +#else + BIC_rri(d, d, 0xff); // bic r7, r7, #0xff + BIC_rri(d, d, 0xff00); // bic r7, r7, #0xff00 + ORR_rrr(d, d, REG_WORK1); // orr r7, r7, r2 +#endif + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + emit_long(offset); //: + //: +#endif +} +LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, RR4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(RR4 d, RR2 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2,[pc, #offs] + STRH_rRR(s, d, REG_WORK1); // strh r6,[r7, r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2,[pc,#4] + STRH_rRR(s, d, REG_WORK1); // strh r6,[r7, r2] + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(RR4 d, RR2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, RR2 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc,#offs] + STRH_rR(s, REG_WORK1); // strh r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STRH_rR(s, REG_WORK1); // strh r3, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, RR2 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_word_offs(s); + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // ldrh r3, [pc, #offs] +#else +# ifdef ARMV6_ASSEMBLY + LDRH_rRI(REG_WORK2, RPC_INDEX, 12); // ldrh r3, [pc, #12] ; +# else + LDRH_rRI(REG_WORK2, RPC_INDEX, 4); // ldrh r3, [pc, #12] ; +# endif +#endif + +#ifdef ARMV6_ASSEMBLY + PKHBT_rrr(d,REG_WORK2,d); +#else + BIC_rri(REG_WORK1, d, 0xff); // bic r2, r7, #0xff + BIC_rri(REG_WORK1, REG_WORK1, 0xff00); // bic r2, r2, #0xff00 + ORR_rrr(d, REG_WORK2, REG_WORK1); // orr r7, r3, r2 +#endif + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_word(s); + skip_word(0); + //: +#endif +} +LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) +{ + // TODO: optimize imm + +#if defined(USE_DATA_BUFFER) + data_check_end(8, 12); + long offs = data_long_offs(d); + + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // ldr r3, [pc, #offs] ; + + offs = data_word_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + + STRH_rR(REG_WORK1, REG_WORK2); // strh r2, [r3] +#else + LDR_rRI(REG_WORK2, RPC_INDEX, 8); // ldr r3, [pc, #8] ; + LDRH_rRI(REG_WORK1, RPC_INDEX, 8); // ldrh r2, [pc, #8] ; + STRH_rR(REG_WORK1, REG_WORK2); // strh r2, [r3] + B_i(1); // b + + //mem: + emit_long(d); + //imm: + emit_word(s); + skip_word(0); // Alignment + + //: +#endif +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + STR_rR(s, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STR_rR(s, REG_WORK1); // str r3, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(RR4 d, IMM i, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + +#if defined(USE_DATA_BUFFER) + long offs = data_word_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDRH_rRI(REG_WORK1, RPC_INDEX, 4); // ldrh r2, [pc, #4] ; +#endif + if (offset >= 0) + STRH_rRI(REG_WORK1, d, offset); // strh r2, [r7, #0x54] + else + STRH_rRi(REG_WORK1, d, -offset);// strh r2, [r7, #-0x54] +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_word(i); + skip_word(0); + //: +#endif +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(RR4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 12); // ldr r2, [pc, #12] ; +#endif + LDRH_rR(REG_WORK1, REG_WORK1); // ldrh r2, [r2] + LSR_rri(d, d, 16); // lsr r7, r7, #16 + ORR_rrrLSLi(d, REG_WORK1, d, 16); // orr r7, r2, r7, lsl #16 +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(s); + //: +#endif +} +LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, RR2 s)) +{ + LSL_rri(REG_WORK1, s, 16); // lsl r2, r6, #16 + ORR_rrrLSRi(d, REG_WORK1, d, 16); // orr r7, r2, r7, lsr #16 + ROR_rri(d, d, 16); // ror r7, r7, #16 +} +LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, RR2 s)) + +LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, RR4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + + if (offset >= 0) + LDRH_rRI(REG_WORK1, s, offset); // ldrh r2, [r6, #12] + else + LDRH_rRi(REG_WORK1, s, -offset); // ldrh r2, [r6, #-12] + +#ifdef ARMV6_ASSEMBLY + PKHBT_rrr(d,REG_WORK1,d); +#else + BIC_rri(d, d, 0xff); // bic r7, r7, #0xff + BIC_rri(d, d, 0xff00); // bic r7, r7, #0xff00 + ORR_rrr(d, d, REG_WORK1); // orr r7, r7, r2 +#endif +} +LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, RR4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(RR4 d, RR2 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + + if (offset >= 0) + STRH_rRI(s, d, offset); // strh r6, [r7, #0x7f] + else + STRH_rRi(s, d, -offset);// strh r6, [r7, #-0x7f] +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(RR4 d, RR2 s, IMM offset)) + +LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [r10, #offs] + LDR_rR(d, REG_WORK1); // ldr r7, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + LDR_rR(d, REG_WORK1); // ldr r7, [r2] + B_i(0); // b + + emit_long(s); //: + + //: +#endif +} +LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, MEMR base, RR4 index, IMM factor)) +{ + int shft; + switch(factor) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(base); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] + LDR_rRR_LSLi(d, REG_WORK1, index, shft); // ldr %[d], [r2, %[index], lsl #[shift]] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + LDR_rRR_LSLi(d, REG_WORK1, index, shft); // ldr %[d], [r2, %[index], lsl #[shift]] + + B_i(0); // b + emit_long(base); //: + //: +#endif +} +LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, MEMR base, RR4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(RR4 d, IMM i, IMM offset8)) +{ + Dif(!isbyte(offset8)) abort(); + +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; +#endif + if (offset8 >= 0) + STR_rRI(REG_WORK1, d, offset8); // str r2, [r7, #0x54] + else + STR_rRi(REG_WORK1, d, -offset8); // str r2, [r7, #-0x54] +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(i); + //: +#endif +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(RR4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, RR4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + + if (offset >= 0) { + LDR_rRI(d, s, offset); // ldr r2, [r1, #-12] + } else + LDR_rRi(d, s, -offset); // ldr r2, [r1, #12] +} +LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, RR4 s, IMM offset)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, RR4 s)) +{ + MOV_rr(d, s); // mov %[d], %[s] +} +LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, RR4 s)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(RR4 d, RR4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + + if (offset >= 0) + STR_rRI(s, d, offset); // str r6, [r7, #12] + else + STR_rRi(s, d, -offset); // str r6, [r7, #-12] +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(RR4 d, RR4 s, IMM offset)) + +LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) +{ + UMULL_rrrr(REG_WORK1, REG_WORK2, d, s); // umull r2,r3,r7,r6 + MOV_rr(MUL_NREG1, REG_WORK1); // mov r7,r2 + MOV_rr(MUL_NREG2, REG_WORK2); +} +LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, RR1 s)) +{ + AND_rri(REG_WORK1, s, 0xFF); // and r2, %[s], 0xFF + ORR_rrr(d, d, REG_WORK1); // orr %[d], %[d], r2 + LSLS_rri(REG_WORK1, d, 24); // lsls r2, %[d], #24 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, RR1 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, s); // UXTH r2, %[s] +#else + BIC_rri(REG_WORK1, s, 0xff000000); // bic r2, %[s], #0xff000000 + BIC_rri(REG_WORK1, REG_WORK1, 0x00ff0000); // bic r2, r2, #0x00ff0000 +#endif + ORR_rrr(d, d, REG_WORK1); // orr %[d], %[d], r2 + LSLS_rri(REG_WORK1, d, 16); // lsls r2, %[d], #16 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, RR2 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, RR4 s)) +{ + ORRS_rrr(d, d, s); // orrs r7, r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, RR4 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // LDR r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // LDR r2, [pc,#16] ; +#endif + ORRS_rrr(d, d, REG_WORK1); // ORRS r7,r7,r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + // value: + emit_long(i); + //jp: +#endif +} +LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + int imm = 32 - (i & 0x1f); + + MOV_rrLSLi(REG_WORK1, r, 24); // mov r2,r7,lsl #24 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 8); // orr r2,r2,r2,lsr #8 + + RORS_rri(REG_WORK1, REG_WORK1, imm); // rors r2,r2,#(32 - (i & 0x1f)) + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(REG_WORK1, 1); // tst r2,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(r, r, 0xff); // bic r7,r7,#0xff + ORR_rrr(r, r, REG_WORK1); // orr r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, RR1 r)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + + MOV_ri(REG_WORK2, 32); // mov r3,#32 + AND_rri(REG_WORK1, r, 0x1f); // and r2,r6,#0x1f + SUB_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // sub r3,r3,r2 + + MOV_rrLSLi(REG_WORK1, d, 24); // mov r2,r7,lsl #24 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 8); // orr r2,r2,r2,lsr #8 + + RORS_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // rors r2,r2,r3 + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(REG_WORK1, 1); // tst r2,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(d, d, 0xff); // bic r7,r7,#0xff + + ORR_rrr(d, d, REG_WORK1); // orr r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + int imm = 32 - (i & 0x1f); + + MOV_rrLSLi(REG_WORK1, r, 16); // mov r2,r7,lsl #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + + RORS_rri(REG_WORK1, REG_WORK1, imm); // rors r2,r2,#(32 - (i & 0x1f)) + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(REG_WORK1, 1); // tst r2,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); + + BIC_rri(r, r, 0xff00); // bic r2,r2,#0xff00 + BIC_rri(r, r, 0xff); // bic r2,r2,#0xff + + ORR_rrrLSRi(r, r, REG_WORK1, 16); // orr r7,r7,r2,lsr #16 +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, RR1 r)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + + MOV_ri(REG_WORK2, 32); // mov r3,#32 + AND_rri(REG_WORK1, r, 0x1f); // and r2,r6,#0x1f + SUB_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // sub r3,r3,r2 + + MOV_rrLSLi(REG_WORK1, d, 16); // mov r2,r7,lsl #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + + RORS_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // rors r2,r2,r3 + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(REG_WORK1, 1); // tst r2,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); + + BIC_rri(d, d, 0xff00); // bic r2,r2,#0xff00 + BIC_rri(d, d, 0xff); // bic r2,r2,#0xff + + ORR_rrrLSRi(d, d, REG_WORK1, 16); // orr r2,r2,r7,lsr #16 +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + int imm = 32 - (i & 0x1f); + + RORS_rri(r, r, imm); // rors r7,r7,#(32 - (i & 0x1f)) + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(r, 1); // tst r7,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) +{ + RORS_rri(r, r, i & 0x1F); // RORS r7,r7,#12 +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, RR1 r)) +{ + // TODO: Check if the Bittest is necessary. compemu.c seems to do it itself, but meanwhile make sure, that carry is set correctly + + MOV_ri(REG_WORK1, 32); // mov r2,#32 + AND_rri(REG_WORK2, r, 0x1f); // and r3,r6,#0x1f + SUB_rrr(REG_WORK1, REG_WORK1, REG_WORK2); // sub r2,r2,r3 + + RORS_rrr(d, d, REG_WORK1); // rors r7,r7,r2 + + MRS_CPSR(REG_WORK2); // mrs r3,cpsr + TST_ri(d, 1); // tst r7,#1 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3,r3,#0x20000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3,r3,#0x20000000 + MSR_CPSR_r(REG_WORK2); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, RR1 r)) +{ + RORS_rrr(d, d, r); // RORS r7,r7,r6 +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) +{ + MOV_rrLSLi(REG_WORK1, r, 24); // mov r2,r7,lsl #24 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 8); // orr r2,r2,r2,lsr #8 + + RORS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // rors r2,r2,#12 + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(r, r, 0xff); // bic r7,r7,#0xff + ORR_rrr(r, r, REG_WORK1); // orr r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, RR1 r)) +{ + MOV_rrLSLi(REG_WORK1, d, 24); // mov r2,r7,lsl #24 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 8); // orr r2,r2,r2,lsr #8 + + RORS_rrr(REG_WORK1, REG_WORK1, r); // rors r2,r2,r6 + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(d, d, 0xff); // bic r7,r7,#0xff + ORR_rrr(d, d, REG_WORK1); // orr r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) +{ + MOV_rrLSLi(REG_WORK1, r, 16); // mov r2,r7,lsl #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + + RORS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // RORS r2,r2,#12 + + BIC_rri(r, r, 0xff00); // bic r7,r7,#0xff00 + BIC_rri(r, r, 0xff); // bic r7,r7,#0xff + + ORR_rrrLSRi(r, r, REG_WORK1, 16); // orr r7,r7,r2,lsr #16 +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, RR1 r)) +{ + MOV_rrLSLi(REG_WORK1, d, 16); // mov r2,r7,lsl #16 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, REG_WORK1, 16); // orr r2,r2,r2,lsr #16 + + RORS_rrr(REG_WORK1, REG_WORK1, r); // RORS r2,r2,r6 + + BIC_rri(d, d, 0xff00); // bic r7,r7,#0xff00 + BIC_rri(d, d, 0xff); // bic r7,r7,#0xff + + ORR_rrrLSRi(d, d, REG_WORK1, 16); // orr r7,r7,r2,lsr #16 +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, RR1 r)) + +LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, RR1 s)) +{ + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + + LSL_rri(REG_WORK2, d, 24); // lsl r3, %[d], #24 + LSL_rri(REG_WORK1, s, 24); // lsl r2, r6, #24 + + SBCS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + BIC_rri(d, d, 0xFF); + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr r7, r7, r3 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, RR1 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, RR4 s)) +{ + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + + SBCS_rrr(d, d, s); // sbcs r7, r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, RR4 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, RR2 s)) +{ + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + + LSL_rri(REG_WORK2, d, 16); // lsl r3, %[d], #24 + LSL_rri(REG_WORK1, s, 16); // lsl r2, r6, #16 + + SBCS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + BIC_rri(d,d, 0xff); + BIC_rri(d,d, 0xff00); + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr r7, r7, r3 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, RR2 s)) + +LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) +{ + switch (cc) { + case 9: // LS + BEQ_i(0); // beq + BCC_i(1); // bcs + + MOV_ri(d, 1); // mov r7,#0 + B_i(0); // b + + //: + MOV_ri(d, 0); // mov r7,#1 + break; + + case 8: // HI + BEQ_i(2); // beq Z != 0 + BCS_i(1); // bcc C = 0 + + //: + MOV_ri(d, 1); // mov r7,#0 + B_i(0); // b + + //: + MOV_ri(d, 0); // mov r7,#1 + break; + + default: + CC_MOV_ri(cc, d, 1); // MOVcc R7,#1 + CC_MOV_ri(cc^1, d, 0); // MOVcc^1 R7,#0 + break; + } + //: +} +LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) + +LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) +{ + switch (cc) { + case 9: // LS + BEQ_i(0); // beq + BCC_i(1); // bcs + + MOV_ri(REG_WORK1, 1); // mov r2,#0 + B_i(0); // b + + //: + MOV_ri(REG_WORK1, 0); // mov r2,#1 + break; + + case 8: // HI + BEQ_i(2); // beq Z != 0 + BCS_i(1); // bcc C = 0 + + MOV_ri(REG_WORK1, 1); // mov r2,#0 + B_i(0); // b + + //: + MOV_ri(REG_WORK1, 0); // mov r2,#1 + break; + + default: + CC_MOV_ri(cc, REG_WORK1, 1); // MOVcc R2,#1 + CC_MOV_ri(cc^1, REG_WORK1, 0); // MOVcc^1 R2,#0 + break; + } + //: +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // LDR R3,[PC, #offs] +#else + LDR_rRI(REG_WORK2, RPC_INDEX, 4); // LDR R3,[PC, #4] +#endif + STRB_rR(REG_WORK1, REG_WORK2); // STRB R2,[R3] +#if !defined(USE_DATA_BUFFER) + B_i(0); // B + + emit_long(d); + //: +#endif +} +LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) +{ + LSL_rri(REG_WORK1, r, 24); // LSL r2,r7,#24 + + LSLS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // LSLS r2,r2,#12 + + BIC_rri(r, r, 0xff); // BIC r7,r7,0xff + ORR_rrrLSRi(r, r, REG_WORK1, 24); // ORR r7,r7,r2,lsr #24 +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, RR1 r)) +{ + LSL_rri(REG_WORK1, d, 24); // LSL r2,r7,#24 + LSLS_rrr(REG_WORK1, REG_WORK1, r); // LSLS r2,r2,r6 + BIC_rri(d, d, 0xff); // BIC r7,r7,#0xff + ORR_rrrLSRi(d, d, REG_WORK1, 24); // ORR r7,r7,r2,lsr #24 +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) +{ + LSLS_rri(r,r, i & 0x1f); // lsls r7,r7,#12 +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, RR1 r)) +{ + LSLS_rrr(d, d, r); +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) +{ + LSL_rri(REG_WORK1, r, 16); // LSL r2,r7,#16 + LSLS_rri(REG_WORK1, REG_WORK1, i&0x1f); // LSLS r2,r2,#12 + + ORR_rrrLSRi(REG_WORK1, REG_WORK1, r, 16); // ORR r2,r2,r7,lsr #16 + + ROR_rri(r, REG_WORK1, 16); // ROR r7,r2,#16 +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, RR1 r)) +{ + LSL_rri(REG_WORK1, d, 16); // LSL r2,r7,#16 + LSLS_rrr(REG_WORK1, REG_WORK1, r); // LSLS r2,r2,r6 + ORR_rrrLSRi(REG_WORK1, REG_WORK1, d, 16); // ORR r2,r2,r7,lsr #16 + ROR_rri(d, REG_WORK1, 16); // ROR r7,r2,#16 +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) +{ + LSL_rri(REG_WORK1, r, 24); // lsl r2,r7,#24 + ASR_rri(REG_WORK1, REG_WORK1, 24); // asr r2,r2,#24 + + ASRS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // asrs r2,r2,#12 + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(r,r, 0xff); // bic r7,r7,#0xff + ORR_rrr(r,r,REG_WORK1); // orr r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, RR1 r)) +{ + LSL_rri(REG_WORK1, d, 24); // lsl r2,r7,#24 + ASR_rri(REG_WORK1, REG_WORK1, 24); // asr r2,r2,#24 + + ASRS_rrr(REG_WORK1, REG_WORK1, r); // asrs r2,r2,r6 + + AND_rri(REG_WORK1, REG_WORK1, 0xff); // and r2,r2,#0xff + BIC_rri(d,d, 0xff); // bic r7,r7,#0xff + + ORR_rrr(d,d,REG_WORK1); // orr r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) +{ + LSL_rri(REG_WORK1, r, 16); // lsl r2,r7,#16 + ASR_rri(REG_WORK1, REG_WORK1, 16); // asr r2,r2,#16 + + ASRS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // asrs r2,r2,#12 + +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, REG_WORK1); +#else + BIC_rri(REG_WORK1, REG_WORK1, 0xff000000); + BIC_rri(REG_WORK1, REG_WORK1, 0xff0000); +#endif + + BIC_rri(r,r,0xff00); // bic r7,r7,#0xff00 + BIC_rri(r,r,0xff); // bic r7,r7,#0xff + + ORR_rrr(r,r,REG_WORK1); // orr r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, RR1 r)) +{ + LSL_rri(REG_WORK1, d, 16); // lsl r2,r7,#16 + ASR_rri(REG_WORK1, REG_WORK1, 16); // asr r2,r2,#16 + + ASRS_rrr(REG_WORK1, REG_WORK1, r); // asrs r2,r2,r6 + +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, REG_WORK1); +#else + BIC_rri(REG_WORK1, REG_WORK1, 0xff000000); // bic r2,r2,#0xff000000 + BIC_rri(REG_WORK1, REG_WORK1, 0xff0000); // bic r2,r2,#0xff0000 +#endif + + BIC_rri(d,d, 0xff00); // bic r7,r7,#0xff00 + BIC_rri(d,d, 0xff); // bic r7,r7,#0xff + + ORR_rrr(d,d,REG_WORK1); // orr r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) +{ + ASRS_rri(r, r, i & 0x1f); // ASRS r7,r7,#12 +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, RR1 r)) +{ + ASRS_rrr(d, d, r); // ASRS r7,r7,r6 +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) +{ + AND_rri(REG_WORK1, r, 0xff); // AND r2,r7,#0xFF + + LSRS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // LSRS r2,r2,r6 + + BIC_rri(r, r, 0xFF); // BIC r7,r7,#0xff + ORR_rrr(r, r, REG_WORK1); // ORR r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, RR1 r)) +{ + AND_rri(REG_WORK1, d, 0xff); // AND r2,r7,#0xFF + + LSRS_rrr(REG_WORK1, REG_WORK1, r); // LSRS r2,r2,r6 + + BIC_rri(d, d, 0xFF); // BIC r7,r7,#0xff + ORR_rrr(d, d, REG_WORK1); // ORR r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) +{ + LSRS_rri(r, r, i & 0x1f); // LSRS r7,r7,#12 +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, r); +#else + BIC_rri(REG_WORK1, r, 0xff0000); // BIC r2,r7,#0xff0000 + BIC_rri(REG_WORK1, REG_WORK1, 0xff000000); // BIC r2,r2,#0xff000000 +#endif + + LSRS_rri(REG_WORK1, REG_WORK1, i & 0x1f); // LSRS r2,r2,#12 + + BIC_rri(r, r, 0xFF); // BIC r7,r7,#0xff + BIC_rri(r, r, 0xFF00); // BIC r7,r7,#0xff00 + ORR_rrr(r, r, REG_WORK1); // ORR r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, RR1 r)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, d); +#else + BIC_rri(REG_WORK1, d, 0xff0000); // BIC r2,r7,#0xff0000 + BIC_rri(REG_WORK1, REG_WORK1, 0xff000000); // BIC r2,r2,#0xff000000 +#endif + + LSRS_rrr(REG_WORK1, REG_WORK1, r); // LSRS r2,r2,r6 + + BIC_rri(d, d, 0xFF); // BIC r7,r7,#0xff + BIC_rri(d, d, 0xFF00); // BIC r7,r7,#0xff00 + ORR_rrr(d, d, REG_WORK1); // ORR r7,r7,r2 +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, RR1 r)) +{ + LSRS_rrr(d, d, r); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, RR1 r)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, RR1 s)) +{ + LSL_rri(REG_WORK1, s, 24); // lsl r2, r6, #24 + LSL_rri(REG_WORK2, d, 24); // lsl r3, r7, #24 + + SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + BIC_rri(d, d, 0xFF); + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr r7, r7, r3 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, RR1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) +{ + LSL_rri(REG_WORK2, d, 24); // lsl r3, r7, #24 + + SUBS_rri(REG_WORK2, REG_WORK2, i << 24); // subs r3, r3, #0x12000000 + BIC_rri(d, d, 0xFF); // bic r7, r7, #0xFF + ORR_rrrLSRi(d, d, REG_WORK2, 24); // orr r7, r7, r3, lsr #24 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, RR4 s)) +{ + SUBS_rrr(d, d, s); // subs r7, r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, RR4 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #16] ; +#endif + SUBS_rrr(d, d, REG_WORK1); // subs r7, r7, r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(i); + //: +#endif +} +LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, RR2 s)) +{ + LSL_rri(REG_WORK1, s, 16); // lsl r2, r6, #16 + LSL_rri(REG_WORK2, d, 16); // lsl r3, r7, #16 + + SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + BIC_rri(d, d, 0xff); + BIC_rri(d, d, 0xff00); + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr r7, r7, r3 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, RR2 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) +{ + // TODO: optimize_imm + +#if defined(USE_DATA_BUFFER) + long offs = data_word_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; +#else + LDRH_rRI(REG_WORK1, RPC_INDEX, 36); // ldrh r2, [pc, #36] ; +#endif + LSL_rri(REG_WORK1, REG_WORK1, 16); // lsl r2, r2, #16 + LSL_rri(REG_WORK2, d, 16); // lsl r3, r6, #16 + + SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + BIC_rri(d, d, 0xff); + BIC_rri(d, d, 0xff00); + ORR_rrrLSRi(d, d, REG_WORK2, 16); // orr r6, r3, r6, lsr #16 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + emit_word(i); + skip_word(0); //: + + //: +#endif +} +LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(RR1 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + SXTB_rr(REG_WORK1, s); + SXTB_rr(REG_WORK2, d); +#else + LSL_rri(REG_WORK1, s, 24); // lsl r2, r6, #24 + LSL_rri(REG_WORK2, d, 24); // lsl r3, r7, #24 +#endif + + TST_rr(REG_WORK2, REG_WORK1); // tst r3, r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(RR1 d, RR1 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(RR4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #16] ; +#endif + TST_rr(d, REG_WORK1); // tst r7, r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(i); + //: +#endif +} +LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(RR4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(RR4 d, RR4 s)) +{ + TST_rr(d, s); // tst r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(RR4 d, RR4 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(RR2 d, RR2 s)) +{ +#ifdef ARMV6_ASSEMBLY + SXTH_rr(REG_WORK1, s); + SXTH_rr(REG_WORK2, d); +#else + LSL_rri(REG_WORK1, s, 16); // lsl r2, r6, #16 + LSL_rri(REG_WORK2, d, 16); // lsl r3, r7, #16 +#endif + + TST_rr(REG_WORK2, REG_WORK1); // tst r3, r2 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(RR2 d, RR2 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, RR1 s)) +{ + AND_rri(REG_WORK1, s, 0xFF); // and r2, %[s], 0xFF + EOR_rrr(d, d, REG_WORK1); // eor %[d], %[d], r2 + LSLS_rri(REG_WORK1, d, 24); // lsls r2, %[d], #24 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, RR1 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, s); // UXTH r2, %[s] +#else + BIC_rri(REG_WORK1, s, 0xff000000); // bic r2, %[s], #0xff000000 + BIC_rri(REG_WORK1, REG_WORK1, 0x00ff0000); // bic r2, r2, #0x00ff0000 +#endif + EOR_rrr(d, d, REG_WORK1); // eor %[d], %[d], r2 + LSLS_rri(REG_WORK1, d, 16); // lsls r2, %[d], #16 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, RR2 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, RR4 s)) +{ + EORS_rrr(d, d, s); // eors r7, r7, r6 + + MRS_CPSR(REG_WORK1); // mrs r2, CPSR + BIC_rri(REG_WORK1, REG_WORK1, ARM_CV_FLAGS); // bic r2, r2, #0x30000000 + MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2 +} +LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, RR4 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + SXTH_rr(d, s); // sxth %[d],%[s] +#else + LSL_rri(d, s, 16); // lsl r6, r7, #16 + ASR_rri(d, d, 16); // asr r6, r6, #16 +#endif +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, RR2 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + SXTB_rr(d, s); // SXTB %[d],%[s] +#else + ROR_rri(d, s, 8); // ror r6, r7, #8 + ASR_rri(d, d, 24); // asr r6, r6, #24 +#endif +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, RR1 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(d, s); // UXTB %[d], %[s] +#else + ROR_rri(d, s, 8); // ror r2, r1, #8 + LSR_rri(d, d, 24); // lsr r2, r2, #24 +#endif +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, RR1 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(d, s); // UXTH %[d], %[s] +#else + BIC_rri(d, s, 0xff000000); // bic %[d], %[s], #0xff000000 + BIC_rri(d, d, 0x00ff0000); // bic %[d], %[d], #0x00ff0000 +#endif +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, RR2 s)) + +static inline void raw_dec_sp(int off) +{ + if (off) { + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + SUB_rrr(RSP_INDEX, RSP_INDEX, REG_WORK1); // sub r7, r7, r2 + B_i(0); // b + //: + emit_long(off); + } +} + +static inline void raw_inc_sp(int off) +{ + if (off) { + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + ADD_rrr(RSP_INDEX, RSP_INDEX, REG_WORK1); // sub r7, r7, r2 + B_i(0); // b + //: + emit_long(off); + } +} + +static inline void raw_push_regs_to_preserve(void) { + PUSH_REGS(PRESERVE_MASK); +} + +static inline void raw_pop_preserved_regs(void) { + POP_REGS(PRESERVE_MASK); +} + +// Verify!!! +/* FLAGX is byte sized, and we *do* write it at that size */ +static inline void raw_load_flagx(uae_u32 t, uae_u32 r) +{ + raw_mov_l_rm(t,(uintptr)live.state[r].mem); +} + +static inline void raw_flags_evicted(int r) +{ + //live.state[FLAGTMP].status=CLEAN; + live.state[FLAGTMP].status=INMEM; + live.state[FLAGTMP].realreg=-1; + /* We just "evicted" FLAGTMP. */ + if (live.nat[r].nholds!=1) { + /* Huh? */ + abort(); + } + live.nat[r].nholds=0; +} + +static inline void raw_flags_init(void) { +} + +static __inline__ void raw_flags_set_zero(int s, int tmp) +{ + raw_mov_l_rr(tmp,s); + MRS_CPSR(s); + BIC_rri(s,s,ARM_Z_FLAG); + AND_rri(tmp,tmp,ARM_Z_FLAG); + EOR_rri(tmp,tmp,ARM_Z_FLAG); + ORR_rrr(s,s,tmp); + MSR_CPSR_r(s); +} + +static inline void raw_flags_to_reg(int r) +{ + MRS_CPSR(r); + raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r); + raw_flags_evicted(r); +} + +static inline void raw_reg_to_flags(int r) +{ + MSR_CPSR_r(r); // msr CPSR_fc, %r +} + +/* Apparently, there are enough instructions between flag store and + flag reload to avoid the partial memory stall */ +static inline void raw_load_flagreg(uae_u32 t, uae_u32 r) +{ + raw_mov_l_rm(t,(uintptr)live.state[r].mem); +} + +/* %eax register is clobbered if target processor doesn't support fucomi */ +#define FFLAG_NREG_CLOBBER_CONDITION !have_cmov +#define FFLAG_NREG R0_INDEX +#define FLAG_NREG2 -1 +#define FLAG_NREG1 -1 +#define FLAG_NREG3 -1 + +static inline void raw_fflags_into_flags(int r) +{ + jit_unimplemented("raw_fflags_into_flags %x", r); +} + +static inline void raw_fp_init(void) +{ + int i; + + for (i=0;i=1) { +// emit_byte(0xde); +// emit_byte(0xd9); + live.tos-=2; + } + while (live.tos>=0) { +// emit_byte(0xdd); +// emit_byte(0xd8); + live.tos--; + } + raw_fp_init(); +} + +LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r)) +{ + jit_unimplemented("raw_fmov_mr_drop %x %x", m, r); +} +LENDFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r)) + +LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r)) +{ + jit_unimplemented("raw_fmov_mr %x %x", m, r); +} +LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r)) + +LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m)) +{ + jit_unimplemented("raw_fmov_rm %x %x", r, m); +} +LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m)) + +LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) +{ + jit_unimplemented("raw_fmov_rr %x %x", d, s); +} +LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) + +static inline void raw_emit_nop_filler(int nbytes) +{ + nbytes >>= 2; + while(nbytes--) { NOP(); } +} + +static inline void raw_emit_nop(void) +{ + NOP(); +} + +#ifdef UAE +static +#endif +void compiler_status() { + jit_log("compiled code starts at %p, current at %p (size 0x%x)", compiled_code, current_compile_p, (unsigned int)(current_compile_p - compiled_code)); +} + +// +// ARM doesn't have bsf, but clz is a good alternative instruction for it +// +static bool target_check_bsf(void) +{ + return false; +} + +static void raw_init_cpu(void) +{ + /* Have CMOV support, because ARM support conditions for all instructions */ + have_cmov = true; + + align_loops = 0; + align_jumps = 0; + + raw_flags_init(); +} + +// +// Arm instructions +// +LOWFUNC(WRITE,NONE,2,raw_ADD_l_rr,(RW4 d, RR4 s)) +{ + ADD_rrr(d, d, s); +} +LENDFUNC(WRITE,NONE,2,raw_ADD_l_rr,(RW4 d, RR4 s)) + +LOWFUNC(WRITE,NONE,2,raw_ADD_l_rri,(RW4 d, RR4 s, IMM i)) +{ + ADD_rri(d, s, i); +} +LENDFUNC(WRITE,NONE,2,raw_ADD_l_rri,(RW4 d, RR4 s, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_SUB_l_rri,(RW4 d, RR4 s, IMM i)) +{ + SUB_rri(d, s, i); +} +LENDFUNC(WRITE,NONE,2,raw_SUB_l_rri,(RW4 d, RR4 s, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_AND_b_rr,(RW1 d, RR1 s)) +{ + MVN_rrLSLi(REG_WORK1, s, 24); // mvn r2, %[s], lsl #24 + MVN_rrLSRi(REG_WORK1, REG_WORK1, 24); // mvn r2, %[s], lsr #24 + AND_rrr(d, d, REG_WORK1); // and %[d], %[d], r2 +} +LENDFUNC(WRITE,NONE,2,raw_AND_b_rr,(RW1 d, RR1 s)) + +LOWFUNC(WRITE,NONE,2,raw_AND_l_rr,(RW4 d, RR4 s)) +{ + AND_rrr(d, d, s); +} +LENDFUNC(WRITE,NONE,2,raw_AND_l_rr,(RW4 d, RR4 s)) + +LOWFUNC(WRITE,NONE,2,raw_AND_l_ri,(RW4 d, IMM i)) +{ + AND_rri(d, d, i); +} +LENDFUNC(WRITE,NONE,2,raw_AND_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_AND_w_rr,(RW2 d, RR2 s)) +{ + MVN_rrLSLi(REG_WORK1, s, 16); // mvn r2, %[s], lsl #16 + MVN_rrLSRi(REG_WORK1, REG_WORK1, 16); // mvn r2, %[s], lsr #16 + AND_rrr(d, d, REG_WORK1); // and %[d], %[d], r2 +} +LENDFUNC(WRITE,NONE,2,raw_AND_w_rr,(RW2 d, RR2 s)) + +LOWFUNC(WRITE,NONE,2,raw_EOR_b_rr,(RW1 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(REG_WORK1, s); // UXTH r2, %[s] +#else + AND_rri(REG_WORK1, s, 0xFF); // and r2, %[s], 0xFF +#endif + EOR_rrr(d, d, REG_WORK1); // eor %[d], %[d], r2 +} +LENDFUNC(WRITE,NONE,2,raw_EOR_b_rr,(RW1 d, RR1 s)) + +LOWFUNC(WRITE,NONE,2,raw_EOR_l_rr,(RW4 d, RR4 s)) +{ + EOR_rrr(d, d, s); // eors r7, r7, r6 +} +LENDFUNC(WRITE,NONE,2,raw_EOR_l_rr,(RW4 d, RR4 s)) + +LOWFUNC(WRITE,NONE,2,raw_EOR_w_rr,(RW2 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, s); // UXTH r2, %[s] + EOR_rrr(d, d, REG_WORK1); // eor %[d], %[d], r2 +#else + LSL_rri(REG_WORK1, s, 16); // bic r2, %[s], #0xff000000 + EOR_rrrLSRi(d, d, REG_WORK1, 16); // orr %[d], %[d], r2 +#endif +} +LENDFUNC(WRITE,NONE,2,raw_EOR_w_rr,(RW2 d, RR2 s)) + +LOWFUNC(WRITE,NONE,2,raw_LDR_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(d, RPC_INDEX, offs); // ldr r2, [pc, #offs] +#else + LDR_rR(d, RPC_INDEX); + B_i(0); + emit_long(i); +#endif +} +LENDFUNC(WRITE,NONE,2,raw_LDR_l_rr,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_MOV_l_ri8,(RW4 d, IMM i)) +{ + MOV_ri(d, i); +} +LENDFUNC(WRITE,NONE,2,raw_MOV_l_ri8,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ORR_b_rr,(RW1 d, RR1 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(REG_WORK1, s); // UXTH r2, %[s] +#else + AND_rri(REG_WORK1, s, 0xFF); // and r2, %[s], 0xFF +#endif + ORR_rrr(d, d, REG_WORK1); // orr %[d], %[d], r2 +} +LENDFUNC(WRITE,NONE,2,raw_ORR_b_rr,(RW1 d, RR1 s)) + +LOWFUNC(WRITE,NONE,2,raw_ORR_l_rr,(RW4 d, RR4 s)) +{ + ORR_rrr(d, d, s); +} +LENDFUNC(WRITE,NONE,2,raw_ORR_l_rr,(RW4 d, RR4 s)) + +LOWFUNC(WRITE,NONE,2,raw_ORR_w_rr,(RW2 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK1, s); // UXTH r2, %[s] + ORR_rrr(d, d, REG_WORK1); // orr %[d], %[d], r2 +#else + LSL_rri(REG_WORK1, s, 16); // bic r2, %[s], #0xff000000 + ORR_rrrLSRi(d, d, REG_WORK1, 16); // orr %[d], %[d], r2 +#endif +} +LENDFUNC(WRITE,NONE,2,raw_ORR_w_rr,(RW2 d, RR2 s)) + +LOWFUNC(WRITE,NONE,2,raw_ROR_l_ri,(RW4 r, IMM i)) +{ + ROR_rri(r, r, i); +} +LENDFUNC(WRITE,NONE,2,raw_ROR_l_ri,(RW4 r, IMM i)) + +// +// compuemu_support used raw calls +// +LOWFUNC(WRITE,RMW,2,compemu_raw_add_l_mi,(IMM d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + data_check_end(8, 24); + long target = data_long(d, 24); + long offs = get_data_offset(target); + + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + LDR_rR(REG_WORK2, REG_WORK1); // ldr r3, [r2] + + offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; s + + ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 + + offs = get_data_offset(target); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 20); // ldr r2, [pc, #20] ; + LDR_rR(REG_WORK2, REG_WORK1); // ldr r3, [r2] + + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #16] ; + + ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2 + + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] + + B_i(1); // b + + //: + emit_long(d); + //: + emit_long(s); + //: +#endif +} +LENDFUNC(WRITE,RMW,2,compemu_raw_add_l_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,NONE,2,compemu_raw_and_l_ri,(RW4 d, IMM i)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(i); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + AND_rrr(d, d, REG_WORK1); // ands %[d], %[d], r2 +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #16] ; + AND_rrr(d, d, REG_WORK1); // ands %[d], %[d], r2 + B_i(0); + emit_long(i); +#endif +} +LENDFUNC(WRITE,NONE,2,compemu_raw_and_l_ri,(RW4 d, IMM i)) + +LOWFUNC(NONE,NONE,1,compemu_raw_bswap_32,(RW4 r)) +{ +#if defined(ARMV6_ASSEMBLY) + REV_rr(r,r); // rev %[r],%[r] +#else + EOR_rrrRORi(REG_WORK1, r, r, 16); // eor r2, r6, r6, ror #16 + BIC_rri(REG_WORK1, REG_WORK1, 0xff0000); // bic r2, r2, #0xff0000 + ROR_rri(r, r, 8); // ror r6, r6, #8 + EOR_rrrLSRi(r, r, REG_WORK1, 8); // eor r6, r6, r2, lsr #8 +#endif +} +LENDFUNC(NONE,NONE,1,compemu_raw_bswap_32,(RW4 r)) + +LOWFUNC(WRITE,NONE,2,compemu_raw_bt_l_ri,(RR4 r, IMM i)) +{ + int imm = (1 << (i & 0x1f)); + + MRS_CPSR(REG_WORK2); // mrs r3, CPSR + TST_ri(r, imm); // tst r6, #0x1000000 + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); // bic r3, r3, #0x20000000 + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); // orr r3, r3, #0x20000000 + MSR_CPSR_r(REG_WORK2); // msr CPSR_fc, r3 +} +LENDFUNC(WRITE,NONE,2,compemu_raw_bt_l_ri,(RR4 r, IMM i)) + +LOWFUNC(NONE,READ,5,compemu_raw_cmov_l_rm_indexed,(W4 d, IMM base, RR4 index, IMM factor, IMM cond)) +{ + int shft; + switch(factor) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + + switch (cond) { + case 9: // LS + jit_unimplemented("cmov LS not implemented"); + abort(); + case 8: // HI + jit_unimplemented("cmov HI not implemented"); + abort(); + default: +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(base); + CC_LDR_rRI(cond, REG_WORK1, RPC_INDEX, offs); // ldrcc r2, [pc, #offs] ; + CC_LDR_rRR_LSLi(cond, d, REG_WORK1, index, shft); // ldrcc %[d], [r2, %[index], lsl #[shift]] +#else + CC_LDR_rRI(cond, REG_WORK1, RPC_INDEX, 4); // ldrcc r2, [pc, #4] ; + CC_LDR_rRR_LSLi(cond, d, REG_WORK1, index, shft); // ldrcc %[d], [r2, %[index], lsl #[shift]] + B_i(0); // b +#endif + break; + } +#if !defined(USE_DATA_BUFFER) + emit_long(base); // : + //: +#endif +} +LENDFUNC(NONE,READ,5,compemu_raw_cmov_l_rm_indexed,(W4 d, IMM base, RR4 index, IMM factor, IMM cond)) + +LOWFUNC(WRITE,READ,2,compemu_raw_cmp_l_mi,(MEMR d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + data_check_end(8, 16); + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + LDR_rR(REG_WORK1, REG_WORK1); // ldr r2, [r2] + + offs = data_long_offs(s); + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // ldr r3, [pc, #offs] ; s + + CMP_rr(REG_WORK1, REG_WORK2); // cmp r2, r3 + +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 12); // ldr r2, [pc, #24] ; + LDR_rR(REG_WORK1, REG_WORK1); // ldr r2, [r2] + + LDR_rRI(REG_WORK2, RPC_INDEX, 8); // ldr r3, [pc, #20] ; + + CMP_rr(REG_WORK1, REG_WORK2); // cmp r2, r3 + + B_i(1); // b + + //: + emit_long(d); + //: + emit_long(s); + //: +#endif +} +LENDFUNC(WRITE,READ,2,compemu_raw_cmp_l_mi,(MEMR d, IMM s)) + +LOWFUNC(WRITE,READ,2,compemu_raw_cmp_l_mi8,(MEMR d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // ldr r2, [pc, #8] ; +#endif + LDR_rR(REG_WORK1, REG_WORK1); // ldr r2, [r2] + + CMP_ri(REG_WORK1, s); // cmp r2, r3 + +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} +LENDFUNC(WRITE,READ,2,compemu_raw_cmp_l_mi8,(MEMR d, IMM s)) + +LOWFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(offset); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + ADD_rrr(d, s, REG_WORK1); // add r7, r6, r2 +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + ADD_rrr(d, s, REG_WORK1); // add r7, r6, r2 + B_i(0); // b + + //: + emit_long(offset); + //: +#endif +} +LENDFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset)) + +LOWFUNC(NONE,NONE,4,compemu_raw_lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) +{ + int shft; + switch(factor) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + + ADD_rrrLSLi(d, s, index, shft); // ADD R7,R6,R5,LSL #2 +} +LENDFUNC(NONE,NONE,4,compemu_raw_lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + STRB_rR(s, REG_WORK1); // strb r6, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STRB_rR(s, REG_WORK1); // strb r6, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} +LENDFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s)) + +LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s)) +{ + // TODO: optimize imm + +#if defined(USE_DATA_BUFFER) + data_check_end(8, 12); + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + offs = data_long_offs(s); + LDR_rRI(REG_WORK2, RPC_INDEX, offs); // ldr r3, [pc, #offs] ; s + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // ldr r2, [pc, #8] ; + LDR_rRI(REG_WORK2, RPC_INDEX, 8); // ldr r3, [pc, #8] ; + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] + B_i(1); // b + + emit_long(d); //: + emit_long(s); //: + + //: +#endif +} +LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + STR_rR(s, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STR_rR(s, REG_WORK1); // str r3, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} +LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s)) + +LOWFUNC(NONE,NONE,2,compemu_raw_mov_l_ri,(W4 d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(s); + LDR_rRI(d, RPC_INDEX, offs); // ldr %[d], [pc, #offs] ; +#else + LDR_rR(d, RPC_INDEX); // ldr %[d], [pc] ; + B_i(0); // b + + //: + emit_long(s); + //: +#endif +} +LENDFUNC(NONE,NONE,2,compemu_raw_mov_l_ri,(W4 d, IMM s)) + +LOWFUNC(NONE,READ,2,compemu_raw_mov_l_rm,(W4 d, MEMR s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + LDR_rR(d, REG_WORK1); // ldr r7, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + LDR_rR(d, REG_WORK1); // ldr r7, [r2] + B_i(0); // b + + emit_long(s); //: + //: +#endif +} +LENDFUNC(NONE,READ,2,compemu_raw_mov_l_rm,(W4 d, MEMR s)) + +LOWFUNC(NONE,NONE,2,compemu_raw_mov_l_rr,(W4 d, RR4 s)) +{ + MOV_rr(d, s); // mov %[d], %[s] +} +LENDFUNC(NONE,NONE,2,compemu_raw_mov_l_rr,(W4 d, RR4 s)) + +LOWFUNC(NONE,WRITE,2,compemu_raw_mov_w_mr,(IMM d, RR2 s)) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(d); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; + STRH_rR(s, REG_WORK1); // strh r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #4] ; + STRH_rR(s, REG_WORK1); // strh r3, [r2] + B_i(0); // b + + //: + emit_long(d); + //: +#endif +} +LENDFUNC(NONE,WRITE,2,compemu_raw_mov_w_mr,(IMM d, RR2 s)) + +LOWFUNC(WRITE,RMW,2,compemu_raw_sub_l_mi,(MEMRW d, IMM s)) +{ +#if defined(USE_DATA_BUFFER) + data_check_end(8, 24); + long target = data_long(d, 24); + long offs = get_data_offset(target); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + LDR_rR(REG_WORK2, REG_WORK1); // ldr r3, [r2] + + offs = data_long_offs(s); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; s + + SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + + offs = get_data_offset(target); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 20); // ldr r2, [pc, #32] ; + LDR_rR(REG_WORK2, REG_WORK1); // ldr r3, [r2] + + LDR_rRI(REG_WORK1, RPC_INDEX, 16); // ldr r2, [pc, #28] ; + + SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // subs r3, r3, r2 + + LDR_rRI(REG_WORK1, RPC_INDEX, 4); // ldr r2, [pc, #16] ; + STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2] + + B_i(1); // b + + //: + emit_long(d); + //: + emit_long(s); + //: +#endif +} +LENDFUNC(WRITE,RMW,2,compemu_raw_sub_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,NONE,2,compemu_raw_test_l_rr,(RR4 d, RR4 s)) +{ + TST_rr(d, s); // tst r7, r6 +} +LENDFUNC(WRITE,NONE,2,compemu_raw_test_l_rr,(RR4 d, RR4 s)) + +LOWFUNC(NONE,NONE,2,compemu_raw_zero_extend_16_rr,(W4 d, RR2 s)) +{ +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(d, s); // UXTH %[d], %[s] +#else + BIC_rri(d, s, 0xff000000); // bic %[d], %[s], #0xff000000 + BIC_rri(d, d, 0x00ff0000); // bic %[d], %[d], #0x00ff0000 +#endif +} +LENDFUNC(NONE,NONE,2,compemu_raw_zero_extend_16_rr,(W4 d, RR2 s)) + +static inline void compemu_raw_call(uae_u32 t) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(t); + LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; +#else + LDR_rRI(REG_WORK1, RPC_INDEX, 12); // ldr r2, [pc, #12] ; +#endif + PUSH(RLR_INDEX); // push {lr} + BLX_r(REG_WORK1); // blx r2 + POP(RLR_INDEX); // pop {lr} +#if !defined(USE_DATA_BUFFER) + B_i(0); // b + + //: + emit_long(t); + //: +#endif +} + +static inline void compemu_raw_call_r(RR4 r) +{ + PUSH(RLR_INDEX); // push {lr} + BLX_r(r); // blx r0 + POP(RLR_INDEX); // pop {lr} +} + +static inline void compemu_raw_jcc_l_oponly(int cc) +{ + switch (cc) { + case 9: // LS + BEQ_i(0); // beq + BCC_i(2); // bcc + + //: + LDR_rR(REG_WORK1, RPC_INDEX); // ldr r2, [pc] ; + BX_r(REG_WORK1); // bx r2 + break; + + case 8: // HI + BEQ_i(3); // beq + BCS_i(2); // bcs + + //: + LDR_rR(REG_WORK1, RPC_INDEX); // ldr r2, [pc] ; + BX_r(REG_WORK1); // bx r2 + break; + + default: + CC_LDR_rRI(cc, REG_WORK1, RPC_INDEX, 4); // ldrlt r2, [pc, #4] ; + CC_BX_r(cc, REG_WORK1); // bxlt r2 + B_i(0); // b + break; + } + // emit of target will be done by caller +} + +static inline void compemu_raw_jl(uae_u32 t) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(t); + CC_LDR_rRI(NATIVE_CC_LT, RPC_INDEX, RPC_INDEX, offs); // ldrlt pc, [pc, offs] +#else + CC_LDR_rR(NATIVE_CC_LT, RPC_INDEX, RPC_INDEX); // ldrlt pc, [pc] + B_i(0); // b + + //: + emit_long(t); + //: +#endif +} + +static inline void compemu_raw_jmp(uae_u32 t) +{ + LDR_rR(REG_WORK1, RPC_INDEX); // ldr r2, [pc] + BX_r(REG_WORK1); // bx r2 + emit_long(t); +} + +static inline void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) +{ + int shft; + switch(m) { + case 1: shft=0; break; + case 2: shft=1; break; + case 4: shft=2; break; + case 8: shft=3; break; + default: abort(); + } + + LDR_rR(REG_WORK1, RPC_INDEX); // ldr r2, [pc] ; + LDR_rRR_LSLi(RPC_INDEX, REG_WORK1, r, shft); // ldr pc, [r2, r6, lsl #3] + emit_long(base); +} + +static inline void compemu_raw_jmp_r(RR4 r) +{ + BX_r(r); +} + +static inline void compemu_raw_jnz(uae_u32 t) +{ +#if defined(USE_DATA_BUFFER) + long offs = data_long_offs(t); + CC_LDR_rRI(NATIVE_CC_NE, RPC_INDEX, RPC_INDEX, offs); // ldrne pc, [pc, offs] +#else + CC_LDR_rR(NATIVE_CC_NE, RPC_INDEX, RPC_INDEX); // ldrne pc, [pc] + B_i(0); // b + + emit_long(t); + //: +#endif +} + +static inline void compemu_raw_jz_b_oponly(void) +{ + BNE_i(2); // bne jp + LDRSB_rRI(REG_WORK1, RPC_INDEX, 3); // ldrsb r2,[pc,#3] + ADD_rrr(RPC_INDEX, RPC_INDEX, REG_WORK1); // add pc,pc,r2 + + skip_n_bytes(3); + + // +} + +static inline void compemu_raw_branch(IMM d) +{ + B_i((d >> 2) - 1); +} diff --git a/BasiliskII/src/uae_cpu/compiler/codegen_arm.h b/BasiliskII/src/uae_cpu/compiler/codegen_arm.h new file mode 100644 index 00000000..f92bb1da --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/codegen_arm.h @@ -0,0 +1,1292 @@ +/* + * compiler/codegen_arm.h - IA-32 and AMD64 code generator + * + * Copyright (c) 2013 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * JIT compiler m68k -> ARM + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * This file is derived from CCG, copyright 1999-2003 Ian Piumarta + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef ARM_RTASM_H +#define ARM_RTASM_H + +/* NOTES + * + */ + +/* --- Configuration ------------------------------------------------------- */ + +/* CPSR flags */ + +#define ARM_N_FLAG 0x80000000 +#define ARM_Z_FLAG 0x40000000 +#define ARM_C_FLAG 0x20000000 +#define ARM_V_FLAG 0x10000000 +#define ARM_Q_FLAG 0x08000000 +#define ARM_CV_FLAGS (ARM_C_FLAG|ARM_V_FLAG) + +#define ARM_GE3 0x00080000 +#define ARM_GE2 0x00040000 +#define ARM_GE1 0x00020000 +#define ARM_GE0 0x00010000 + +/* --- Macros -------------------------------------------------------------- */ + +/* ========================================================================= */ +/* --- UTILITY ------------------------------------------------------------- */ +/* ========================================================================= */ + +#define _W(c) emit_long(c) +#define _LS2_ADDR(a) (((a) & 0x01f0000f) | (((a) & 0xf0) << 4)) + +/* ========================================================================= */ +/* --- ENCODINGS ----------------------------------------------------------- */ +/* ========================================================================= */ + +#define IMM32(c) ((c & 0xffffff00) == 0 ? c : \ + (c & 0x3fffffc0) == 0 ? (0x100 | ((c >> 30) & 0x3) | ((c << 2) & 0xfc)) : \ + (c & 0x0ffffff0) == 0 ? (0x200 | ((c >> 28) & 0xf) | ((c << 4) & 0xf0)) : \ + (c & 0x03fffffc) == 0 ? (0x300 | ((c >> 26) & 0x3f) | ((c << 6) & 0xc0) ) : \ + (c & 0x00ffffff) == 0 ? (0x400 | ((c >> 24) & 0xff)) : \ + (c & 0xc03fffff) == 0 ? (0x500 | (c >> 22)) : \ + (c & 0xf00fffff) == 0 ? (0x600 | (c >> 20)) : \ + (c & 0xfc03ffff) == 0 ? (0x700 | (c >> 18)) : \ + (c & 0xff00ffff) == 0 ? (0x800 | (c >> 16)) : \ + (c & 0xffc03fff) == 0 ? (0x900 | (c >> 14)) : \ + (c & 0xfff00fff) == 0 ? (0xa00 | (c >> 12)) : \ + (c & 0xfffc03ff) == 0 ? (0xb00 | (c >> 10)) : \ + (c & 0xffff00ff) == 0 ? (0xc00 | (c >> 8)) : \ + (c & 0xffffc03f) == 0 ? (0xd00 | (c >> 6)) : \ + (c & 0xfffff00f) == 0 ? (0xe00 | (c >> 4)) : \ + (c & 0xfffffc03) == 0 ? (0xf00 | (c >> 2)) : \ + 0\ + ) + +#define SHIFT_IMM(c) (0x02000000 | (IMM32((c)))) + +#define UNSHIFTED_IMM8(c) (0x02000000 | (c)) +#define SHIFT_IMM8_ROR(c,r) (0x02000000 | (c) | ((r >> 1) << 8)) + +#define SHIFT_REG(Rm) (Rm) +#define SHIFT_LSL_i(Rm,s) ((Rm) | ((s) << 7)) +#define SHIFT_LSL_r(Rm,Rs) ((Rm) | ((Rs) << 8) | 0x10) +#define SHIFT_LSR_i(Rm,s) ((Rm) | ((s) << 7) | 0x20) +#define SHIFT_LSR_r(Rm,Rs) ((Rm) | ((Rs) << 8) | 0x30) +#define SHIFT_ASR_i(Rm,s) ((Rm) | ((s) << 7) | 0x40) +#define SHIFT_ASR_r(Rm,Rs) ((Rm) | ((Rs) << 8) | 0x50) +#define SHIFT_ROR_i(Rm,s) ((Rm) | ((s) << 7) | 0x60) +#define SHIFT_ROR_r(Rm,Rs) ((Rm) | ((Rs) << 8) | 0x70) +#define SHIFT_RRX(Rm) ((Rm) | 0x60) +#define SHIFT_PK(Rm,s) ((Rm) | ((s) << 7)) + +// Load/Store addressings +#define ADR_ADD(v) ((1 << 23) | (v)) +#define ADR_SUB(v) (v) + +#define ADR_IMM(v) ((v) | (1 << 24)) +#define ADR_IMMPOST(v) (v) +#define ADR_REG(Rm) ((1 << 25) | (1 << 24) | (Rm)) +#define ADR_REGPOST(Rm) ((1 << 25) | (Rm)) + +#define ADD_IMM(i) ADR_ADD(ADR_IMM(i)) +#define SUB_IMM(i) ADR_SUB(ADR_IMM(i)) + +#define ADD_REG(Rm) ADR_ADD(ADR_REG(Rm)) +#define SUB_REG(Rm) ADR_SUB(ADR_REG(Rm)) + +#define ADD_LSL(Rm,i) ADR_ADD(ADR_REG(Rm) | ((i) << 7)) +#define SUB_LSL(Rm,i) ADR_SUB(ADR_REG(Rm) | ((i) << 7)) + +#define ADD_LSR(Rm,i) ADR_ADD(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (1 << 5)) +#define SUB_LSR(Rm,i) ADR_SUB(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (1 << 5)) + +#define ADD_ASR(Rm,i) ADR_ADD(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (2 << 5)) +#define SUB_ASR(Rm,i) ADR_SUB(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (2 << 5)) + +#define ADD_ROR(Rm,i) ADR_ADD(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (3 << 5)) +#define SUB_ROR(Rm,i) ADR_SUB(ADR_REG(Rm) | (((i) & 0x1f) << 7) | (3 << 5)) + +#define ADD_RRX(Rm) ADR_ADD(ADR_REG(Rm) | (3 << 5)) +#define SUB_RRX(Rm) ADR_SUB(ADR_REG(Rm) | (3 << 5)) + +#define ADD2_IMM(i) ADR_ADD(i | (1 << 22)) +#define SUB2_IMM(i) ADR_SUB(i | (1 << 22)) + +#define ADD2_REG(Rm) ADR_ADD(Rm) +#define SUB2_REG(Rm) ADR_SUB(Rm) + +// MOV, MVN +#define _OP1(cc,op,s,Rd,shift) _W(((cc) << 28) | ((op) << 21) | ((s) << 20) | ((Rd) << 12) | (shift)) + +// CMP, CMN, TST, TEQ +#define _OP2(cc,op,Rn,shift) _W(((cc) << 28) | ((op) << 21) | (1 << 20) | ((Rn) << 16) | (shift)) + +// ADD, SUB, RSB, ADC, SBC, RSC, AND, BIC, EOR, ORR +#define _OP3(cc,op,s,Rd,Rn,shift) _W(((cc) << 28) | ((op) << 21) | ((s) << 20) | ((Rn) << 16) | ((Rd) << 12) | (shift)) + +// LDR, STR +#define _LS1(cc,l,b,Rd,Rn,a) _W(((cc) << 28) | (0x01 << 26) | ((l) << 20) | ((b) << 22) | ((Rn) << 16) | ((Rd) << 12) | (a)) +#define _LS2(cc,p,l,s,h,Rd,Rn,a) _W(((cc) << 28) | ((p) << 24) | ((l) << 20) | ((Rn) << 16) | ((Rd) << 12) | ((s) << 6) | ((h) << 5) | 0x90 | _LS2_ADDR((a))) + +/* ========================================================================= */ +/* --- OPCODES ------------------------------------------------------------- */ +/* ========================================================================= */ + +/* Branch instructions */ +#ifndef __ANDROID__ +enum { + _B, _BL, _BLX, _BX, _BXJ +}; +#endif + +/* Data processing instructions */ +enum { + _AND = 0, + _EOR, + _SUB, + _RSB, + _ADD, + _ADC, + _SBC, + _RSC, + _TST, + _TEQ, + _CMP, + _CMN, + _ORR, + _MOV, + _BIC, + _MVN +}; + +/* Single instruction Multiple Data (SIMD) instructions */ + +/* Multiply instructions */ + +/* Parallel instructions */ + +/* Extend instructions */ + +/* Miscellaneous arithmetic instrations */ + +/* Status register transfer instructions */ + +/* Load and Store instructions */ + +/* Coprocessor instructions */ + +/* Exception generation instructions */ + +/* ========================================================================= */ +/* --- ASSEMBLER ----------------------------------------------------------- */ +/* ========================================================================= */ + +#define NOP() _W(0xe1a00000) +#define SETEND_BE() _W(0xf1010200) +#define SETEND_LE() _W(0xf1010000) + +/* Data processing instructions */ + +/* Opcodes Type 1 */ +// MOVcc rd,#i +#define CC_MOV_ri8(cc,Rd,i) _OP1(cc,_MOV,0,Rd,UNSHIFTED_IMM8(i)) +// MOVcc Rd,#i ROR #s +#define CC_MOV_ri8RORi(cc,Rd,i,s) _OP1(cc,_MOV,0,Rd,SHIFT_IMM8_ROR(i,s)) +#define CC_MOV_ri(cc,Rd,i) _OP1(cc,_MOV,0,Rd,SHIFT_IMM(i)) +#define CC_MOV_rr(cc,Rd,Rm) _OP1(cc,_MOV,0,Rd,SHIFT_REG(Rm)) +#define CC_MOV_rrLSLi(cc,Rd,Rm,i) _OP1(cc,_MOV,0,Rd,SHIFT_LSL_i(Rm,i)) +#define CC_MOV_rrLSLr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,0,Rd,SHIFT_LSL_r(Rm,Rs)) +#define CC_MOV_rrLSRi(cc,Rd,Rm,i) _OP1(cc,_MOV,0,Rd,SHIFT_LSR_i(Rm,i)) +#define CC_MOV_rrLSRr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,0,Rd,SHIFT_LSR_r(Rm,Rs)) +#define CC_MOV_rrASRi(cc,Rd,Rm,i) _OP1(cc,_MOV,0,Rd,SHIFT_ASR_i(Rm,i)) +#define CC_MOV_rrASRr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,0,Rd,SHIFT_ASR_r(Rm,Rs)) +#define CC_MOV_rrRORi(cc,Rd,Rm,i) _OP1(cc,_MOV,0,Rd,SHIFT_ROR_i(Rm,i)) +#define CC_MOV_rrRORr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,0,Rd,SHIFT_ROR_r(Rm,Rs)) +#define CC_MOV_rrRRX(cc,Rd,Rm) _OP1(cc,_MOV,0,Rd,SHIFT_RRX(Rm)) + +// MOV rd,#i +#define MOV_ri8(Rd,i) CC_MOV_ri8(NATIVE_CC_AL,Rd,i) +// MOV Rd,#i ROR #s +#define MOV_ri8RORi(Rd,i,s) CC_MOV_ri8RORi(NATIVE_CC_AL,Rd,i,s) +#define MOV_ri(Rd,i) CC_MOV_ri(NATIVE_CC_AL,Rd,i) +#define MOV_rr(Rd,Rm) CC_MOV_rr(NATIVE_CC_AL,Rd,Rm) +#define MOV_rrLSLi(Rd,Rm,i) CC_MOV_rrLSLi(NATIVE_CC_AL,Rd,Rm,i) +#define MOV_rrLSLr(Rd,Rm,Rs) CC_MOV_rrLSLr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOV_rrLSRi(Rd,Rm,i) CC_MOV_rrLSRi(NATIVE_CC_AL,Rd,Rm,i) +#define MOV_rrLSRr(Rd,Rm,Rs) CC_MOV_rrLSRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOV_rrASRi(Rd,Rm,i) CC_MOV_rrASRi(NATIVE_CC_AL,Rd,Rm,i) +#define MOV_rrASRr(Rd,Rm,Rs) CC_MOV_rrASRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOV_rrRORi(Rd,Rm,i) CC_MOV_rrRORi(NATIVE_CC_AL,Rd,Rm,i) +#define MOV_rrRORr(Rd,Rm,Rs) CC_MOV_rrRORr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOV_rrRRX(Rd,Rm) CC_MOV_rrRRX(NATIVE_CC_AL,Rd,Rm) + +#define CC_MOVS_ri(cc,Rd,i) _OP1(cc,_MOV,1,Rd,SHIFT_IMM(i)) +#define CC_MOVS_rr(cc,Rd,Rm) _OP1(cc,_MOV,1,Rd,SHIFT_REG(Rm)) +#define CC_MOVS_rrLSLi(cc,Rd,Rm,i) _OP1(cc,_MOV,1,Rd,SHIFT_LSL_i(Rm,i)) +#define CC_MOVS_rrLSLr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,1,Rd,SHIFT_LSL_r(Rm,Rs)) +#define CC_MOVS_rrLSRi(cc,Rd,Rm,i) _OP1(cc,_MOV,1,Rd,SHIFT_LSR_i(Rm,i)) +#define CC_MOVS_rrLSRr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,1,Rd,SHIFT_LSR_r(Rm,Rs)) +#define CC_MOVS_rrASRi(cc,Rd,Rm,i) _OP1(cc,_MOV,1,Rd,SHIFT_ASR_i(Rm,i)) +#define CC_MOVS_rrASRr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,1,Rd,SHIFT_ASR_r(Rm,Rs)) +#define CC_MOVS_rrRORi(cc,Rd,Rm,i) _OP1(cc,_MOV,1,Rd,SHIFT_ROR_i(Rm,i)) +#define CC_MOVS_rrRORr(cc,Rd,Rm,Rs) _OP1(cc,_MOV,1,Rd,SHIFT_ROR_r(Rm,Rs)) +#define CC_MOVS_rrRRX(cc,Rd,Rm) _OP1(cc,_MOV,1,Rd,SHIFT_RRX(Rm)) + +#define MOVS_ri(Rd,i) CC_MOVS_ri(NATIVE_CC_AL,Rd,i) +#define MOVS_rr(Rd,Rm) CC_MOVS_rr(NATIVE_CC_AL,Rd,Rm) +#define MOVS_rrLSLi(Rd,Rm,i) CC_MOVS_rrLSLi(NATIVE_CC_AL,Rd,Rm,i) +#define MOVS_rrLSLr(Rd,Rm,Rs) CC_MOVS_rrLSLr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOVS_rrLSRi(Rd,Rm,i) CC_MOVS_rrLSRi(NATIVE_CC_AL,Rd,Rm,i) +#define MOVS_rrLSRr(Rd,Rm,Rs) CC_MOVS_rrLSRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOVS_rrASRi(Rd,Rm,i) CC_MOVS_rrASRi(NATIVE_CC_AL,Rd,Rm,i) +#define MOVS_rrASRr(Rd,Rm,Rs) CC_MOVS_rrASRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOVS_rrRORi(Rd,Rm,i) CC_MOVS_rrRORi(NATIVE_CC_AL,Rd,Rm,i) +#define MOVS_rrRORr(Rd,Rm,Rs) CC_MOVS_rrRORr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MOVS_rrRRX(Rd,Rm) CC_MOVS_rrRRX(NATIVE_CC_AL,Rd,Rm) + +// MVNcc rd,#i +#define CC_MVN_ri8(cc,Rd,i) _OP1(cc,_MVN,0,Rd,UNSHIFTED_IMM8(i)) +// MVNcc Rd,#i ROR #s +#define CC_MVN_ri8RORi(cc,Rd,i,s) _OP1(cc,_MVN,0,Rd,SHIFT_IMM8_ROR(i,s)) +#define CC_MVN_ri(cc,Rd,i) _OP1(cc,_MVN,0,Rd,SHIFT_IMM(i)) +#define CC_MVN_rr(cc,Rd,Rm) _OP1(cc,_MVN,0,Rd,SHIFT_REG(Rm)) +#define CC_MVN_rrLSLi(cc,Rd,Rm,i) _OP1(cc,_MVN,0,Rd,SHIFT_LSL_i(Rm,i)) +#define CC_MVN_rrLSLr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,0,Rd,SHIFT_LSL_r(Rm,Rs)) +#define CC_MVN_rrLSRi(cc,Rd,Rm,i) _OP1(cc,_MVN,0,Rd,SHIFT_LSR_i(Rm,i)) +#define CC_MVN_rrLSRr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,0,Rd,SHIFT_LSR_r(Rm,Rs)) +#define CC_MVN_rrASRi(cc,Rd,Rm,i) _OP1(cc,_MVN,0,Rd,SHIFT_ASR_i(Rm,i)) +#define CC_MVN_rrASRr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,0,Rd,SHIFT_ASR_r(Rm,Rs)) +#define CC_MVN_rrRORi(cc,Rd,Rm,i) _OP1(cc,_MVN,0,Rd,SHIFT_ROR_i(Rm,i)) +#define CC_MVN_rrRORr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,0,Rd,SHIFT_ROR_r(Rm,Rs)) +#define CC_MVN_rrRRX(cc,Rd,Rm) _OP1(cc,_MVN,0,Rd,SHIFT_RRX(Rm)) + +// MVN rd,#i +#define MVN_ri8(Rd,i) CC_MVN_ri8(NATIVE_CC_AL,Rd,i) +// MVN Rd,#i ROR #s +#define MVN_ri8RORi(Rd,i,s) CC_MVN_ri8RORi(NATIVE_CC_AL,Rd,i,s) +#define MVN_ri(Rd,i) CC_MVN_ri(NATIVE_CC_AL,Rd,i) +#define MVN_rr(Rd,Rm) CC_MVN_rr(NATIVE_CC_AL,Rd,Rm) +#define MVN_rrLSLi(Rd,Rm,i) CC_MVN_rrLSLi(NATIVE_CC_AL,Rd,Rm,i) +#define MVN_rrLSLr(Rd,Rm,Rs) CC_MVN_rrLSLr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVN_rrLSRi(Rd,Rm,i) CC_MVN_rrLSRi(NATIVE_CC_AL,Rd,Rm,i) +#define MVN_rrLSRr(Rd,Rm,Rs) CC_MVN_rrLSRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVN_rrASRi(Rd,Rm,i) CC_MVN_rrASRi(NATIVE_CC_AL,Rd,Rm,i) +#define MVN_rrASRr(Rd,Rm,Rs) CC_MVN_rrASRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVN_rrRORi(Rd,Rm,i) CC_MVN_rrRORi(NATIVE_CC_AL,Rd,Rm,i) +#define MVN_rrRORr(Rd,Rm,Rs) CC_MVN_rrRORr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVN_rrRRX(Rd,Rm) CC_MVN_rrRRX(NATIVE_CC_AL,Rd,Rm) + +#define CC_MVNS_ri(cc,Rd,i) _OP1(cc,_MVN,1,Rd,SHIFT_IMM(i)) +#define CC_MVNS_rr(cc,Rd,Rm) _OP1(cc,_MVN,1,Rd,SHIFT_REG(Rm)) +#define CC_MVNS_rrLSLi(cc,Rd,Rm,i) _OP1(cc,_MVN,1,Rd,SHIFT_LSL_i(Rm,i)) +#define CC_MVNS_rrLSLr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,1,Rd,SHIFT_LSL_r(Rm,Rs)) +#define CC_MVNS_rrLSRi(cc,Rd,Rm,i) _OP1(cc,_MVN,1,Rd,SHIFT_LSR_i(Rm,i)) +#define CC_MVNS_rrLSRr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,1,Rd,SHIFT_LSR_r(Rm,Rs)) +#define CC_MVNS_rrASRi(cc,Rd,Rm,i) _OP1(cc,_MVN,1,Rd,SHIFT_ASR_i(Rm,i)) +#define CC_MVNS_rrASRr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,1,Rd,SHIFT_ASR_r(Rm,Rs)) +#define CC_MVNS_rrRORi(cc,Rd,Rm,i) _OP1(cc,_MVN,1,Rd,SHIFT_ROR_i(Rm,i)) +#define CC_MVNS_rrRORr(cc,Rd,Rm,Rs) _OP1(cc,_MVN,1,Rd,SHIFT_ROR_r(Rm,Rs)) +#define CC_MVNS_rrRRX(cc,Rd,Rm) _OP1(cc,_MVN,1,Rd,SHIFT_RRX(Rm)) + +#define MVNS_ri(Rd,i) CC_MVNS_ri(NATIVE_CC_AL,Rd,i) +#define MVNS_rr(Rd,Rm) CC_MVNS_rr(NATIVE_CC_AL,Rd,Rm) +#define MVNS_rrLSLi(Rd,Rm,i) CC_MVNS_rrLSLi(NATIVE_CC_AL,Rd,Rm,i) +#define MVNS_rrLSLr(Rd,Rm,Rs) CC_MVNS_rrLSLr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVNS_rrLSRi(Rd,Rm,i) CC_MVNS_rrLSRi(NATIVE_CC_AL,Rd,Rm,i) +#define MVNS_rrLSRr(Rd,Rm,Rs) CC_MVNS_rrLSRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVNS_rrASRi(Rd,Rm,i) CC_MVNS_rrASRi(NATIVE_CC_AL,Rd,Rm,i) +#define MVNS_rrASRr(Rd,Rm,Rs) CC_MVNS_rrASRr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVNS_rrRORi(Rd,Rm,i) CC_MVNS_rrRORi(NATIVE_CC_AL,Rd,Rm,i) +#define MVNS_rrRORr(Rd,Rm,Rs) CC_MVNS_rrRORr(NATIVE_CC_AL,Rd,Rm,Rs) +#define MVNS_rrRRX(Rd,Rm) CC_MVNS_rrRRX(NATIVE_CC_AL,Rd,Rm) + +/* Opcodes Type 2 */ +#define CC_CMP_ri(cc,Rn,i) _OP2(cc,_CMP,Rn,SHIFT_IMM(i)) +#define CC_CMP_rr(cc,Rn,Rm) _OP2(cc,_CMP,Rn,SHIFT_REG(Rm)) +#define CC_CMP_rrLSLi(cc,Rn,Rm,i) _OP2(cc,_CMP,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_CMP_rrLSLr(cc,Rn,Rm,Rs) _OP2(cc,_CMP,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_CMP_rrLSRi(cc,Rn,Rm,i) _OP2(cc,_CMP,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_CMP_rrLSRr(cc,Rn,Rm,Rs) _OP2(cc,_CMP,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_CMP_rrASRi(cc,Rn,Rm,i) _OP2(cc,_CMP,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_CMP_rrASRr(cc,Rn,Rm,Rs) _OP2(cc,_CMP,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_CMP_rrRORi(cc,Rn,Rm,i) _OP2(cc,_CMP,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_CMP_rrRORr(cc,Rn,Rm,Rs) _OP2(cc,_CMP,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_CMP_rrRRX(cc,Rn,Rm) _OP2(cc,_CMP,Rn,SHIFT_RRX(Rm)) + +#define CMP_ri(Rn,i) CC_CMP_ri(NATIVE_CC_AL,Rn,i) +#define CMP_rr(Rn,Rm) CC_CMP_rr(NATIVE_CC_AL,Rn,Rm) +#define CMP_rrLSLi(Rn,Rm,i) CC_CMP_rrLSLi(NATIVE_CC_AL,Rn,Rm,i) +#define CMP_rrLSLr(Rn,Rm,Rs) CC_CMP_rrLSLr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMP_rrLSRi(Rn,Rm,i) CC_CMP_rrLSRi(NATIVE_CC_AL,Rn,Rm,i) +#define CMP_rrLSRr(Rn,Rm,Rs) CC_CMP_rrLSRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMP_rrASRi(Rn,Rm,i) CC_CMP_rrASRi(NATIVE_CC_AL,Rn,Rm,i) +#define CMP_rrASRr(Rn,Rm,Rs) CC_CMP_rrASRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMP_rrRORi(Rn,Rm,i) CC_CMP_rrRORi(NATIVE_CC_AL,Rn,Rm,i) +#define CMP_rrRORr(Rn,Rm,Rs) CC_CMP_rrRORr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMP_rrRRX(Rn,Rm) CC_CMP_rrRRX(NATIVE_CC_AL,Rn,Rm) + +#define CC_CMN_ri(cc,Rn,i) _OP2(cc,_CMN,Rn,SHIFT_IMM(i)) +#define CC_CMN_rr(cc,Rn,r) _OP2(cc,_CMN,Rn,SHIFT_REG(r)) +#define CC_CMN_rrLSLi(cc,Rn,Rm,i) _OP2(cc,_CMN,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_CMN_rrLSLr(cc,Rn,Rm,Rs) _OP2(cc,_CMN,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_CMN_rrLSRi(cc,Rn,Rm,i) _OP2(cc,_CMN,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_CMN_rrLSRr(cc,Rn,Rm,Rs) _OP2(cc,_CMN,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_CMN_rrASRi(cc,Rn,Rm,i) _OP2(cc,_CMN,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_CMN_rrASRr(cc,Rn,Rm,Rs) _OP2(cc,_CMN,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_CMN_rrRORi(cc,Rn,Rm,i) _OP2(cc,_CMN,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_CMN_rrRORr(cc,Rn,Rm,Rs) _OP2(cc,_CMN,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_CMN_rrRRX(cc,Rn,Rm) _OP2(cc,_CMN,Rn,SHIFT_RRX(Rm)) + +#define CMN_ri(Rn,i) CC_CMN_ri(NATIVE_CC_AL,Rn,i) +#define CMN_rr(Rn,r) CC_CMN_rr(NATIVE_CC_AL,Rn,r) +#define CMN_rrLSLi(Rn,Rm,i) CC_CMN_rrLSLi(NATIVE_CC_AL,Rn,Rm,i) +#define CMN_rrLSLr(Rn,Rm,Rs) CC_CMN_rrLSLr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMN_rrLSRi(Rn,Rm,i) CC_CMN_rrLSRi(NATIVE_CC_AL,Rn,Rm,i) +#define CMN_rrLSRr(Rn,Rm,Rs) CC_CMN_rrLSRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMN_rrASRi(Rn,Rm,i) CC_CMN_rrASRi(NATIVE_CC_AL,Rn,Rm,i) +#define CMN_rrASRr(Rn,Rm,Rs) CC_CMN_rrASRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMN_rrRORi(Rn,Rm,i) CC_CMN_rrRORi(NATIVE_CC_AL,Rn,Rm,i) +#define CMN_rrRORr(Rn,Rm,Rs) CC_CMN_rrRORr(NATIVE_CC_AL,Rn,Rm,Rs) +#define CMN_rrRRX(Rn,Rm) CC_CMN_rrRRX(NATIVE_CC_AL,Rn,Rm) + +#define CC_TST_ri(cc,Rn,i) _OP2(cc,_TST,Rn,SHIFT_IMM(i)) +#define CC_TST_rr(cc,Rn,r) _OP2(cc,_TST,Rn,SHIFT_REG(r)) +#define CC_TST_rrLSLi(cc,Rn,Rm,i) _OP2(cc,_TST,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_TST_rrLSLr(cc,Rn,Rm,Rs) _OP2(cc,_TST,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_TST_rrLSRi(cc,Rn,Rm,i) _OP2(cc,_TST,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_TST_rrLSRr(cc,Rn,Rm,Rs) _OP2(cc,_TST,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_TST_rrASRi(cc,Rn,Rm,i) _OP2(cc,_TST,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_TST_rrASRr(cc,Rn,Rm,Rs) _OP2(cc,_TST,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_TST_rrRORi(cc,Rn,Rm,i) _OP2(cc,_TST,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_TST_rrRORr(cc,Rn,Rm,Rs) _OP2(cc,_TST,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_TST_rrRRX(cc,Rn,Rm) _OP2(cc,_TST,Rn,SHIFT_RRX(Rm)) + +#define TST_ri(Rn,i) CC_TST_ri(NATIVE_CC_AL,Rn,i) +#define TST_rr(Rn,r) CC_TST_rr(NATIVE_CC_AL,Rn,r) +#define TST_rrLSLi(Rn,Rm,i) CC_TST_rrLSLi(NATIVE_CC_AL,Rn,Rm,i) +#define TST_rrLSLr(Rn,Rm,Rs) CC_TST_rrLSLr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TST_rrLSRi(Rn,Rm,i) CC_TST_rrLSRi(NATIVE_CC_AL,Rn,Rm,i) +#define TST_rrLSRr(Rn,Rm,Rs) CC_TST_rrLSRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TST_rrASRi(Rn,Rm,i) CC_TST_rrASRi(NATIVE_CC_AL,Rn,Rm,i) +#define TST_rrASRr(Rn,Rm,Rs) CC_TST_rrASRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TST_rrRORi(Rn,Rm,i) CC_TST_rrRORi(NATIVE_CC_AL,Rn,Rm,i) +#define TST_rrRORr(Rn,Rm,Rs) CC_TST_rrRORr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TST_rrRRX(Rn,Rm) CC_TST_rrRRX(NATIVE_CC_AL,Rn,Rm) + +#define CC_TEQ_ri(cc,Rn,i) _OP2(cc,_TEQ,Rn,SHIFT_IMM(i)) +#define CC_TEQ_rr(cc,Rn,r) _OP2(cc,_TEQ,Rn,SHIFT_REG(r)) +#define CC_TEQ_rrLSLi(cc,Rn,Rm,i) _OP2(cc,_TEQ,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_TEQ_rrLSLr(cc,Rn,Rm,Rs) _OP2(cc,_TEQ,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_TEQ_rrLSRi(cc,Rn,Rm,i) _OP2(cc,_TEQ,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_TEQ_rrLSRr(cc,Rn,Rm,Rs) _OP2(cc,_TEQ,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_TEQ_rrASRi(cc,Rn,Rm,i) _OP2(cc,_TEQ,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_TEQ_rrASRr(cc,Rn,Rm,Rs) _OP2(cc,_TEQ,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_TEQ_rrRORi(cc,Rn,Rm,i) _OP2(cc,_TEQ,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_TEQ_rrRORr(cc,Rn,Rm,Rs) _OP2(cc,_TEQ,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_TEQ_rrRRX(cc,Rn,Rm) _OP2(cc,_TEQ,Rn,SHIFT_RRX(Rm)) + +#define TEQ_ri(Rn,i) CC_TEQ_ri(NATIVE_CC_AL,Rn,i) +#define TEQ_rr(Rn,r) CC_TEQ_rr(NATIVE_CC_AL,Rn,r) +#define TEQ_rrLSLi(Rn,Rm,i) CC_TEQ_rrLSLi(NATIVE_CC_AL,Rn,Rm,i) +#define TEQ_rrLSLr(Rn,Rm,Rs) CC_TEQ_rrLSLr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TEQ_rrLSRi(Rn,Rm,i) CC_TEQ_rrLSRi(NATIVE_CC_AL,Rn,Rm,i) +#define TEQ_rrLSRr(Rn,Rm,Rs) CC_TEQ_rrLSRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TEQ_rrASRi(Rn,Rm,i) CC_TEQ_rrASRi(NATIVE_CC_AL,Rn,Rm,i) +#define TEQ_rrASRr(Rn,Rm,Rs) CC_TEQ_rrASRr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TEQ_rrRORi(Rn,Rm,i) CC_TEQ_rrRORi(NATIVE_CC_AL,Rn,Rm,i) +#define TEQ_rrRORr(Rn,Rm,Rs) CC_TEQ_rrRORr(NATIVE_CC_AL,Rn,Rm,Rs) +#define TEQ_rrRRX(Rn,Rm) CC_TEQ_rrRRX(NATIVE_CC_AL,Rn,Rm) + +/* Opcodes Type 3 */ +#define CC_AND_rri(cc,Rd,Rn,i) _OP3(cc,_AND,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_AND_rrr(cc,Rd,Rn,Rm) _OP3(cc,_AND,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_AND_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_AND_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_AND_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_AND_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_AND_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_AND_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_AND_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_AND_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_AND_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_AND,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define AND_rri(Rd,Rn,i) CC_AND_rri(NATIVE_CC_AL,Rd,Rn,i) +#define AND_rrr(Rd,Rn,Rm) CC_AND_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define AND_rrrLSLi(Rd,Rn,Rm,i) CC_AND_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define AND_rrrLSLr(Rd,Rn,Rm,Rs) CC_AND_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define AND_rrrLSRi(Rd,Rn,Rm,i) CC_AND_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define AND_rrrLSRr(Rd,Rn,Rm,Rs) CC_AND_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define AND_rrrASRi(Rd,Rn,Rm,i) CC_AND_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define AND_rrrASRr(Rd,Rn,Rm,Rs) CC_AND_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define AND_rrrRORi(Rd,Rn,Rm,i) CC_AND_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define AND_rrrRORr(Rd,Rn,Rm,Rs) CC_AND_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define AND_rrrRRX(Rd,Rn,Rm) CC_AND_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ANDS_rri(cc,Rd,Rn,i) _OP3(cc,_AND,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_ANDS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_AND,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ANDS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ANDS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ANDS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ANDS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ANDS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ANDS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ANDS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_AND,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ANDS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_AND,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ANDS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_AND,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define ANDS_rri(Rd,Rn,i) CC_ANDS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ANDS_rrr(Rd,Rn,Rm) CC_ANDS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ANDS_rrrLSLi(Rd,Rn,Rm,i) CC_ANDS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ANDS_rrrLSLr(Rd,Rn,Rm,Rs) CC_ANDS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ANDS_rrrLSRi(Rd,Rn,Rm,i) CC_ANDS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ANDS_rrrLSRr(Rd,Rn,Rm,Rs) CC_ANDS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ANDS_rrrASRi(Rd,Rn,Rm,i) CC_ANDS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ANDS_rrrASRr(Rd,Rn,Rm,Rs) CC_ANDS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ANDS_rrrRORi(Rd,Rn,Rm,i) CC_ANDS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ANDS_rrrRORr(Rd,Rn,Rm,Rs) CC_ANDS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ANDS_rrrRRX(Rd,Rn,Rm) CC_ANDS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_EOR_rri(cc,Rd,Rn,i) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_EOR_rrr(cc,Rd,Rn,Rm) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_EOR_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_EOR_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_EOR_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_EOR_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_EOR_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_EOR_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_EOR_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_EOR_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_EOR_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_EOR,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define EOR_rri(Rd,Rn,i) CC_EOR_rri(NATIVE_CC_AL,Rd,Rn,i) +#define EOR_rrr(Rd,Rn,Rm) CC_EOR_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define EOR_rrrLSLi(Rd,Rn,Rm,i) CC_EOR_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EOR_rrrLSLr(Rd,Rn,Rm,Rs) CC_EOR_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EOR_rrrLSRi(Rd,Rn,Rm,i) CC_EOR_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EOR_rrrLSRr(Rd,Rn,Rm,Rs) CC_EOR_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EOR_rrrASRi(Rd,Rn,Rm,i) CC_EOR_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EOR_rrrASRr(Rd,Rn,Rm,Rs) CC_EOR_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EOR_rrrRORi(Rd,Rn,Rm,i) CC_EOR_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EOR_rrrRORr(Rd,Rn,Rm,Rs) CC_EOR_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EOR_rrrRRX(Rd,Rn,Rm) CC_EOR_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_EORS_rri(cc,Rd,Rn,i) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_EORS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_EORS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_EORS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_EORS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_EORS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_EORS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_EORS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_EORS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_EORS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_EORS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_EOR,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define EORS_rri(Rd,Rn,i) CC_EORS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define EORS_rrr(Rd,Rn,Rm) CC_EORS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define EORS_rrrLSLi(Rd,Rn,Rm,i) CC_EORS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EORS_rrrLSLr(Rd,Rn,Rm,Rs) CC_EORS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EORS_rrrLSRi(Rd,Rn,Rm,i) CC_EORS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EORS_rrrLSRr(Rd,Rn,Rm,Rs) CC_EORS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EORS_rrrASRi(Rd,Rn,Rm,i) CC_EORS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EORS_rrrASRr(Rd,Rn,Rm,Rs) CC_EORS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EORS_rrrRORi(Rd,Rn,Rm,i) CC_EORS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define EORS_rrrRORr(Rd,Rn,Rm,Rs) CC_EORS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define EORS_rrrRRX(Rd,Rn,Rm) CC_EORS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_SUB_rri(cc,Rd,Rn,i) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_SUB_rrr(cc,Rd,Rn,Rm) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_SUB_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_SUB_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_SUB_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_SUB_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_SUB_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_SUB_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_SUB_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_SUB_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_SUB_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_SUB,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define SUB_rri(Rd,Rn,i) CC_SUB_rri(NATIVE_CC_AL,Rd,Rn,i) +#define SUB_rrr(Rd,Rn,Rm) CC_SUB_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define SUB_rrrLSLi(Rd,Rn,Rm,i) CC_SUB_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUB_rrrLSLr(Rd,Rn,Rm,Rs) CC_SUB_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUB_rrrLSRi(Rd,Rn,Rm,i) CC_SUB_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUB_rrrLSRr(Rd,Rn,Rm,Rs) CC_SUB_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUB_rrrASRi(Rd,Rn,Rm,i) CC_SUB_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUB_rrrASRr(Rd,Rn,Rm,Rs) CC_SUB_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUB_rrrRORi(Rd,Rn,Rm,i) CC_SUB_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUB_rrrRORr(Rd,Rn,Rm,Rs) CC_SUB_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUB_rrrRRX(Rd,Rn,Rm) CC_SUB_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_SUBS_rri(cc,Rd,Rn,i) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_SUBS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_SUBS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_SUBS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_SUBS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_SUBS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_SUBS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_SUBS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_SUBS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_SUBS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_SUBS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_SUB,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define SUBS_rri(Rd,Rn,i) CC_SUBS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define SUBS_rrr(Rd,Rn,Rm) CC_SUBS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define SUBS_rrrLSLi(Rd,Rn,Rm,i) CC_SUBS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUBS_rrrLSLr(Rd,Rn,Rm,Rs) CC_SUBS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUBS_rrrLSRi(Rd,Rn,Rm,i) CC_SUBS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUBS_rrrLSRr(Rd,Rn,Rm,Rs) CC_SUBS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUBS_rrrASRi(Rd,Rn,Rm,i) CC_SUBS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUBS_rrrASRr(Rd,Rn,Rm,Rs) CC_SUBS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUBS_rrrRORi(Rd,Rn,Rm,i) CC_SUBS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SUBS_rrrRORr(Rd,Rn,Rm,Rs) CC_SUBS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SUBS_rrrRRX(Rd,Rn,Rm) CC_SUBS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_RSB_rri(cc,Rd,Rn,i) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_RSB_rrr(cc,Rd,Rn,Rm) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_RSB_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_RSB_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_RSB_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_RSB_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_RSB_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_RSB_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_RSB_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_RSB_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_RSB_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_RSB,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define RSB_rri(Rd,Rn,i) CC_RSB_rri(NATIVE_CC_AL,Rd,Rn,i) +#define RSB_rrr(Rd,Rn,Rm) CC_RSB_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define RSB_rrrLSLi(Rd,Rn,Rm,i) CC_RSB_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSB_rrrLSLr(Rd,Rn,Rm,Rs) CC_RSB_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSB_rrrLSRi(Rd,Rn,Rm,i) CC_RSB_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSB_rrrLSRr(Rd,Rn,Rm,Rs) CC_RSB_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSB_rrrASRi(Rd,Rn,Rm,i) CC_RSB_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSB_rrrASRr(Rd,Rn,Rm,Rs) CC_RSB_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSB_rrrRORi(Rd,Rn,Rm,i) CC_RSB_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSB_rrrRORr(Rd,Rn,Rm,Rs) CC_RSB_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSB_rrrRRX(Rd,Rn,Rm) CC_RSB_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_RSBS_rri(cc,Rd,Rn,i) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_RSBS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_RSBS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_RSBS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_RSBS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_RSBS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_RSBS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_RSBS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_RSBS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_RSBS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_RSBS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_RSB,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define RSBS_rri(Rd,Rn,i) CC_RSBS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define RSBS_rrr(Rd,Rn,Rm) CC_RSBS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define RSBS_rrrLSLi(Rd,Rn,Rm,i) CC_RSBS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSBS_rrrLSLr(Rd,Rn,Rm,Rs) CC_RSBS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSBS_rrrLSRi(Rd,Rn,Rm,i) CC_RSBS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSBS_rrrLSRr(Rd,Rn,Rm,Rs) CC_RSBS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSBS_rrrASRi(Rd,Rn,Rm,i) CC_RSBS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSBS_rrrASRr(Rd,Rn,Rm,Rs) CC_RSBS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSBS_rrrRORi(Rd,Rn,Rm,i) CC_RSBS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSBS_rrrRORr(Rd,Rn,Rm,Rs) CC_RSBS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSBS_rrrRRX(Rd,Rn,Rm) CC_RSBS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ADD_rri8(cc,Rd,Rn,i) _OP3(cc,_ADD,0,Rd,Rn,UNSHIFT_IMM8(i)) +#define CC_ADD_rri8RORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_IMM8_ROR(Rm,i)) + +#define CC_ADD_rri(cc,Rd,Rn,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_ADD_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ADD_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ADD_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ADD_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ADD_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ADD_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ADD_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ADD_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ADD_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ADD_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define ADD_rri8(cc,Rd,Rn,i) CC_ADD_rri8(NATIVE_CC_AL,Rd,Rn,i) +#define ADD_rri8RORi(cc,Rd,Rn,Rm,i) CC_ADD_rri8RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) + +#define ADD_rri(Rd,Rn,i) CC_ADD_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ADD_rrr(Rd,Rn,Rm) CC_ADD_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ADD_rrrLSLi(Rd,Rn,Rm,i) CC_ADD_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADD_rrrLSLr(Rd,Rn,Rm,Rs) CC_ADD_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADD_rrrLSRi(Rd,Rn,Rm,i) CC_ADD_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADD_rrrLSRr(Rd,Rn,Rm,Rs) CC_ADD_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADD_rrrASRi(Rd,Rn,Rm,i) CC_ADD_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADD_rrrASRr(Rd,Rn,Rm,Rs) CC_ADD_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADD_rrrRORi(Rd,Rn,Rm,i) CC_ADD_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADD_rrrRORr(Rd,Rn,Rm,Rs) CC_ADD_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADD_rrrRRX(Rd,Rn,Rm) CC_ADD_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ADDS_rri(cc,Rd,Rn,i) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_ADDS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ADDS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ADDS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ADDS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ADDS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ADDS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ADDS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ADDS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ADDS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ADDS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ADD,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define ADDS_rri(Rd,Rn,i) CC_ADDS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ADDS_rrr(Rd,Rn,Rm) CC_ADDS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ADDS_rrrLSLi(Rd,Rn,Rm,i) CC_ADDS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADDS_rrrLSLr(Rd,Rn,Rm,Rs) CC_ADDS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADDS_rrrLSRi(Rd,Rn,Rm,i) CC_ADDS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADDS_rrrLSRr(Rd,Rn,Rm,Rs) CC_ADDS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADDS_rrrASRi(Rd,Rn,Rm,i) CC_ADDS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADDS_rrrASRr(Rd,Rn,Rm,Rs) CC_ADDS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADDS_rrrRORi(Rd,Rn,Rm,i) CC_ADDS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADDS_rrrRORr(Rd,Rn,Rm,Rs) CC_ADDS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADDS_rrrRRX(Rd,Rn,Rm) CC_ADDS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ADC_rri(cc,Rd,Rn,i) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_ADC_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ADC_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ADC_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ADC_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ADC_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ADC_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ADC_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ADC_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ADC_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ADC_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ADC,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define ADC_rri(Rd,Rn,i) CC_ADC_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ADC_rrr(Rd,Rn,Rm) CC_ADC_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ADC_rrrLSLi(Rd,Rn,Rm,i) CC_ADC_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADC_rrrLSLr(Rd,Rn,Rm,Rs) CC_ADC_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADC_rrrLSRi(Rd,Rn,Rm,i) CC_ADC_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADC_rrrLSRr(Rd,Rn,Rm,Rs) CC_ADC_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADC_rrrASRi(Rd,Rn,Rm,i) CC_ADC_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADC_rrrASRr(Rd,Rn,Rm,Rs) CC_ADC_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADC_rrrRORi(Rd,Rn,Rm,i) CC_ADC_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADC_rrrRORr(Rd,Rn,Rm,Rs) CC_ADC_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADC_rrrRRX(Rd,Rn,Rm) CC_ADC_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ADCS_rri(cc,Rd,Rn,i) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_ADCS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ADCS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ADCS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ADCS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ADCS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ADCS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ADCS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ADCS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ADCS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ADCS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ADC,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define ADCS_rri(Rd,Rn,i) CC_ADCS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ADCS_rrr(Rd,Rn,Rm) CC_ADCS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ADCS_rrrLSLi(Rd,Rn,Rm,i) CC_ADCS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADCS_rrrLSLr(Rd,Rn,Rm,Rs) CC_ADCS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADCS_rrrLSRi(Rd,Rn,Rm,i) CC_ADCS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADCS_rrrLSRr(Rd,Rn,Rm,Rs) CC_ADCS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADCS_rrrASRi(Rd,Rn,Rm,i) CC_ADCS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADCS_rrrASRr(Rd,Rn,Rm,Rs) CC_ADCS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADCS_rrrRORi(Rd,Rn,Rm,i) CC_ADCS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADCS_rrrRORr(Rd,Rn,Rm,Rs) CC_ADCS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ADCS_rrrRRX(Rd,Rn,Rm) CC_ADCS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_SBC_rri(cc,Rd,Rn,i) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_SBC_rrr(cc,Rd,Rn,Rm) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_SBC_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_SBC_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_SBC_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_SBC_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_SBC_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_SBC_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_SBC_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_SBC_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_SBC_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_SBC,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define SBC_rri(Rd,Rn,i) CC_SBC_rri(NATIVE_CC_AL,Rd,Rn,i) +#define SBC_rrr(Rd,Rn,Rm) CC_SBC_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define SBC_rrrLSLi(Rd,Rn,Rm,i) CC_SBC_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBC_rrrLSLr(Rd,Rn,Rm,Rs) CC_SBC_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBC_rrrLSRi(Rd,Rn,Rm,i) CC_SBC_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBC_rrrLSRr(Rd,Rn,Rm,Rs) CC_SBC_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBC_rrrASRi(Rd,Rn,Rm,i) CC_SBC_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBC_rrrASRr(Rd,Rn,Rm,Rs) CC_SBC_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBC_rrrRORi(Rd,Rn,Rm,i) CC_SBC_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBC_rrrRORr(Rd,Rn,Rm,Rs) CC_SBC_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBC_rrrRRX(Rd,Rn,Rm) CC_SBC_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_SBCS_rri(cc,Rd,Rn,i) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_SBCS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_SBCS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_SBCS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_SBCS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_SBCS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_SBCS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_SBCS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_SBCS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_SBCS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_SBCS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_SBC,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define SBCS_rri(Rd,Rn,i) CC_SBCS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define SBCS_rrr(Rd,Rn,Rm) CC_SBCS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define SBCS_rrrLSLi(Rd,Rn,Rm,i) CC_SBCS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBCS_rrrLSLr(Rd,Rn,Rm,Rs) CC_SBCS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBCS_rrrLSRi(Rd,Rn,Rm,i) CC_SBCS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBCS_rrrLSRr(Rd,Rn,Rm,Rs) CC_SBCS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBCS_rrrASRi(Rd,Rn,Rm,i) CC_SBCS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBCS_rrrASRr(Rd,Rn,Rm,Rs) CC_SBCS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBCS_rrrRORi(Rd,Rn,Rm,i) CC_SBCS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define SBCS_rrrRORr(Rd,Rn,Rm,Rs) CC_SBCS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define SBCS_rrrRRX(Rd,Rn,Rm) CC_SBCS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_RSC_rri(cc,Rd,Rn,i) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_RSC_rrr(cc,Rd,Rn,Rm) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_RSC_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_RSC_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_RSC_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_RSC_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_RSC_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_RSC_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_RSC_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_RSC_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_RSC_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_RSC,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define RSC_rri(Rd,Rn,i) CC_RSC_rri(NATIVE_CC_AL,Rd,Rn,i) +#define RSC_rrr(Rd,Rn,Rm) CC_RSC_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define RSC_rrrLSLi(Rd,Rn,Rm,i) CC_RSC_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSC_rrrLSLr(Rd,Rn,Rm,Rs) CC_RSC_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSC_rrrLSRi(Rd,Rn,Rm,i) CC_RSC_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSC_rrrLSRr(Rd,Rn,Rm,Rs) CC_RSC_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSC_rrrASRi(Rd,Rn,Rm,i) CC_RSC_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSC_rrrASRr(Rd,Rn,Rm,Rs) CC_RSC_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSC_rrrRORi(Rd,Rn,Rm,i) CC_RSC_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSC_rrrRORr(Rd,Rn,Rm,Rs) CC_RSC_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSC_rrrRRX(Rd,Rn,Rm) CC_RSC_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_RSCS_rri(cc,Rd,Rn,i) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_RSCS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_RSCS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_RSCS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_RSCS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_RSCS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_RSCS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_RSCS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_RSCS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_RSCS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_RSCS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_RSC,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define RSCS_rri(Rd,Rn,i) CC_RSCS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define RSCS_rrr(Rd,Rn,Rm) CC_RSCS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define RSCS_rrrLSLi(Rd,Rn,Rm,i) CC_RSCS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSCS_rrrLSLr(Rd,Rn,Rm,Rs) CC_RSCS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSCS_rrrLSRi(Rd,Rn,Rm,i) CC_RSCS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSCS_rrrLSRr(Rd,Rn,Rm,Rs) CC_RSCS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSCS_rrrASRi(Rd,Rn,Rm,i) CC_RSCS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSCS_rrrASRr(Rd,Rn,Rm,Rs) CC_RSCS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSCS_rrrRORi(Rd,Rn,Rm,i) CC_RSCS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define RSCS_rrrRORr(Rd,Rn,Rm,Rs) CC_RSCS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define RSCS_rrrRRX(Rd,Rn,Rm) CC_RSCS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +// ORRcc Rd,Rn,#i +#define CC_ORR_rri8(cc,Rd,Rn,i) _OP3(cc,_ORR,0,Rd,Rn,UNSHIFTED_IMM8(i)) +// ORRcc Rd,Rn,#i ROR #s +#define CC_ORR_rri8RORi(cc,Rd,Rn,i,s) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_IMM8_ROR(i,s)) + +#define CC_ORR_rri(cc,Rd,Rn,i) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_ORR_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ORR_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ORR_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ORR_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ORR_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ORR_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ORR_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ORR_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ORR_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ORR_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ORR,0,Rd,Rn,SHIFT_RRX(Rm)) + +// ORR Rd,Rn,#i +#define ORR_rri8(Rd,Rn,i) CC_ORR_rri8(NATIVE_CC_AL,Rd,Rn,i) +// ORR Rd,Rn,#i ROR #s +#define ORR_rri8RORi(Rd,Rn,i,s) CC_ORR_rri8RORi(NATIVE_CC_AL,Rd,Rn,i,s) + +#define ORR_rri(Rd,Rn,i) CC_ORR_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ORR_rrr(Rd,Rn,Rm) CC_ORR_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ORR_rrrLSLi(Rd,Rn,Rm,i) CC_ORR_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORR_rrrLSLr(Rd,Rn,Rm,Rs) CC_ORR_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORR_rrrLSRi(Rd,Rn,Rm,i) CC_ORR_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORR_rrrLSRr(Rd,Rn,Rm,Rs) CC_ORR_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORR_rrrASRi(Rd,Rn,Rm,i) CC_ORR_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORR_rrrASRr(Rd,Rn,Rm,Rs) CC_ORR_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORR_rrrRORi(Rd,Rn,Rm,i) CC_ORR_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORR_rrrRORr(Rd,Rn,Rm,Rs) CC_ORR_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORR_rrrRRX(Rd,Rn,Rm) CC_ORR_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_ORRS_rri(cc,Rd,Rn,i) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_ORRS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_ORRS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_ORRS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_ORRS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_ORRS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_ORRS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_ORRS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_ORRS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_ORRS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_ORRS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ORR,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define ORRS_rri(Rd,Rn,i) CC_ORRS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define ORRS_rrr(Rd,Rn,Rm) CC_ORRS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define ORRS_rrrLSLi(Rd,Rn,Rm,i) CC_ORRS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORRS_rrrLSLr(Rd,Rn,Rm,Rs) CC_ORRS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORRS_rrrLSRi(Rd,Rn,Rm,i) CC_ORRS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORRS_rrrLSRr(Rd,Rn,Rm,Rs) CC_ORRS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORRS_rrrASRi(Rd,Rn,Rm,i) CC_ORRS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORRS_rrrASRr(Rd,Rn,Rm,Rs) CC_ORRS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORRS_rrrRORi(Rd,Rn,Rm,i) CC_ORRS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ORRS_rrrRORr(Rd,Rn,Rm,Rs) CC_ORRS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define ORRS_rrrRRX(Rd,Rn,Rm) CC_ORRS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_BIC_rri(cc,Rd,Rn,i) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_IMM(i)) +#define CC_BIC_rrr(cc,Rd,Rn,Rm) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_REG(Rm)) +#define CC_BIC_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_BIC_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_BIC_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_BIC_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_BIC_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_BIC_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_BIC_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_BIC_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_BIC_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_BIC,0,Rd,Rn,SHIFT_RRX(Rm)) + +#define BIC_rri(Rd,Rn,i) CC_BIC_rri(NATIVE_CC_AL,Rd,Rn,i) +#define BIC_rrr(Rd,Rn,Rm) CC_BIC_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define BIC_rrrLSLi(Rd,Rn,Rm,i) CC_BIC_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BIC_rrrLSLr(Rd,Rn,Rm,Rs) CC_BIC_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BIC_rrrLSRi(Rd,Rn,Rm,i) CC_BIC_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BIC_rrrLSRr(Rd,Rn,Rm,Rs) CC_BIC_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BIC_rrrASRi(Rd,Rn,Rm,i) CC_BIC_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BIC_rrrASRr(Rd,Rn,Rm,Rs) CC_BIC_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BIC_rrrRORi(Rd,Rn,Rm,i) CC_BIC_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BIC_rrrRORr(Rd,Rn,Rm,Rs) CC_BIC_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BIC_rrrRRX(Rd,Rn,Rm) CC_BIC_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_BICS_rri(cc,Rd,Rn,i) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_IMM(i)) +#define CC_BICS_rrr(cc,Rd,Rn,Rm) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_REG(Rm)) +#define CC_BICS_rrrLSLi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_LSL_i(Rm,i)) +#define CC_BICS_rrrLSLr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_LSL_r(Rm,Rs)) +#define CC_BICS_rrrLSRi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_LSR_i(Rm,i)) +#define CC_BICS_rrrLSRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_LSR_r(Rm,Rs)) +#define CC_BICS_rrrASRi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_ASR_i(Rm,i)) +#define CC_BICS_rrrASRr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_ASR_r(Rm,Rs)) +#define CC_BICS_rrrRORi(cc,Rd,Rn,Rm,i) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_ROR_i(Rm,i)) +#define CC_BICS_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) +#define CC_BICS_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_BIC,1,Rd,Rn,SHIFT_RRX(Rm)) + +#define BICS_rri(Rd,Rn,i) CC_BICS_rri(NATIVE_CC_AL,Rd,Rn,i) +#define BICS_rrr(Rd,Rn,Rm) CC_BICS_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define BICS_rrrLSLi(Rd,Rn,Rm,i) CC_BICS_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BICS_rrrLSLr(Rd,Rn,Rm,Rs) CC_BICS_rrrLSLr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BICS_rrrLSRi(Rd,Rn,Rm,i) CC_BICS_rrrLSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BICS_rrrLSRr(Rd,Rn,Rm,Rs) CC_BICS_rrrLSRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BICS_rrrASRi(Rd,Rn,Rm,i) CC_BICS_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BICS_rrrASRr(Rd,Rn,Rm,Rs) CC_BICS_rrrASRr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BICS_rrrRORi(Rd,Rn,Rm,i) CC_BICS_rrrRORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define BICS_rrrRORr(Rd,Rn,Rm,Rs) CC_BICS_rrrRORr(NATIVE_CC_AL,Rd,Rn,Rm,Rs) +#define BICS_rrrRRX(Rd,Rn,Rm) CC_BICS_rrrRRX(NATIVE_CC_AL,Rd,Rn,Rm) + +/* Branch instructions */ +#define CC_B_i(cc,i) _W(((cc) << 28) | (10 << 24) | (i)) +#define CC_BL_i(cc,i) _W(((cc) << 28) | (11 << 24) | (i)) +#define CC_BLX_r(cc,r) _W(((cc) << 28) | (0x12 << 20) | (3 << 4) | (0xfff << 8) | (r)) +#define CC_BX_r(cc,r) _W(((cc) << 28) | (0x12 << 20) | (1 << 4) | (0xfff << 8) | (r)) +#define CC_BXJ_r(cc,r) _W(((cc) << 28) | (0x12 << 20) | (2 << 4) | (0xfff << 8) | (r)) + +#define BEQ_i(i) CC_B_i(NATIVE_CC_EQ,i) +#define BNE_i(i) CC_B_i(NATIVE_CC_NE,i) +#define BCS_i(i) CC_B_i(NATIVE_CC_CS,i) +#define BCC_i(i) CC_B_i(NATIVE_CC_CC,i) +#define BMI_i(i) CC_B_i(NATIVE_CC_MI,i) +#define BPL_i(i) CC_B_i(NATIVE_CC_PL,i) +#define BVS_i(i) CC_B_i(NATIVE_CC_VS,i) +#define BVC_i(i) CC_B_i(NATIVE_CC_VC,i) +#define BHI_i(i) CC_B_i(NATIVE_CC_HI,i) +#define BLS_i(i) CC_B_i(NATIVE_CC_LS,i) +#define BGE_i(i) CC_B_i(NATIVE_CC_GE,i) +#define BLT_i(i) CC_B_i(NATIVE_CC_LT,i) +#define BGT_i(i) CC_B_i(NATIVE_CC_GT,i) +#define BLE_i(i) CC_B_i(NATIVE_CC_LE,i) +#define B_i(i) CC_B_i(NATIVE_CC_AL,i) + +#define BL_i(i) CC_BL_i(NATIVE_CC_AL,i) +#define BLX_i(i) _W((NATIVE_CC_AL << 28) | (10 << 24) | (i)) +#define BLX_r(r) CC_BLX_r(NATIVE_CC_AL,r) +#define BX_r(r) CC_BX_r(NATIVE_CC_AL,r) +#define BXJ_r(r) CC_BXJ_r(NATIVE_CC_AL,r) + +/* Status register instructions */ +#define CC_MRS_CPSR(cc,Rd) _W(((cc) << 28) | (0x10 << 20) | ((Rd) << 12) | (0xf << 16)) +#define MRS_CPSR(Rd) CC_MRS_CPSR(NATIVE_CC_AL,Rd) +#define CC_MRS_SPSR(cc,Rd) _W(((cc) << 28) | (0x14 << 20) | ((Rd) << 12) | (0xf << 16)) +#define MRS_SPSR(Rd) CC_MRS_SPSR(NATIVE_CC_AL,Rd) + +#define CC_MSR_CPSR_i(cc,i) _W(((cc) << 28) | (0x32 << 20) | (0x9 << 16) | (0xf << 12) | SHIFT_IMM(i)) +#define CC_MSR_CPSR_r(cc,Rm) _W(((cc) << 28) | (0x12 << 20) | (0x9 << 16) | (0xf << 12) | (Rm)) + +#define MSR_CPSR_i(i) CC_MSR_CPSR_i(NATIVE_CC_AL,(i)) +#define MSR_CPSR_r(Rm) CC_MSR_CPSR_r(NATIVE_CC_AL,(Rm)) + +#define CC_MSR_CPSRf_i(cc,i) _W(((cc) << 28) | (0x32 << 20) | (0x8 << 16) | (0xf << 12) | SHIFT_IMM(i)) +#define CC_MSR_CPSRf_r(cc,Rm) _W(((cc) << 28) | (0x12 << 20) | (0x8 << 16) | (0xf << 12) | (Rm)) + +#define MSR_CPSRf_i(i) CC_MSR_CPSRf_i(NATIVE_CC_AL,(i)) +#define MSR_CPSRf_r(Rm) CC_MSR_CPSRf_r(NATIVE_CC_AL,(Rm)) + +#define CC_MSR_CPSRc_i(cc,i) _W(((cc) << 28) | (0x32 << 20) | (0x1 << 16) | (0xf << 12) | SHIFT_IMM(i)) +#define CC_MSR_CPSRc_r(cc,Rm) _W(((cc) << 28) | (0x12 << 20) | (0x1 << 16) | (0xf << 12) | (Rm)) + +#define MSR_CPSRc_i(i) CC_MSR_CPSRc_i(NATIVE_CC_AL,(i)) +#define MSR_CPSRc_r(Rm) CC_MSR_CPSRc_r(NATIVE_CC_AL,(Rm)) + +/* Load Store instructions */ + +#define CC_PUSH(cc,r) _W(((cc) << 28) | (0x92d << 16) | (1 << (r))) +#define PUSH(r) CC_PUSH(NATIVE_CC_AL, r) + +#define CC_PUSH_REGS(cc,r) _W(((cc) << 28) | (0x92d << 16) | (r)) +#define PUSH_REGS(r) CC_PUSH_REGS(NATIVE_CC_AL, r) + +#define CC_POP(cc,r) _W(((cc) << 28) | (0x8bd << 16) | (1 << (r))) +#define POP(r) CC_POP(NATIVE_CC_AL, r) + +#define CC_POP_REGS(cc,r) _W(((cc) << 28) | (0x8bd << 16) | (r)) +#define POP_REGS(r) CC_POP_REGS(NATIVE_CC_AL, r) + +#define CC_LDR_rR(cc,Rd,Rn) _LS1(cc,1,0,Rd,Rn,ADD_IMM(0)) +#define CC_LDR_rRI(cc,Rd,Rn,i) _LS1(cc,1,0,Rd,Rn,(i) >= 0 ? ADD_IMM(i) : SUB_IMM(-(i))) +#define CC_LDR_rRi(cc,Rd,Rn,i) _LS1(cc,1,0,Rd,Rn,SUB_IMM(i)) +#define CC_LDR_rRR(cc,Rd,Rn,Rm) _LS1(cc,1,0,Rd,Rn,ADD_REG(Rm)) +#define CC_LDR_rRr(cc,Rd,Rn,Rm) _LS1(cc,1,0,Rd,Rn,SUB_REG(Rm)) +#define CC_LDR_rRR_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,ADD_LSL(Rm,i)) +#define CC_LDR_rRr_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,SUB_LSL(Rm,i)) +#define CC_LDR_rRR_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,ADD_LSR(Rm,i)) +#define CC_LDR_rRr_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,SUB_LSR(Rm,i)) +#define CC_LDR_rRR_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,ADD_ASR(Rm,i)) +#define CC_LDR_rRr_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,SUB_ASR(Rm,i)) +#define CC_LDR_rRR_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,ADD_ROR(Rm,i)) +#define CC_LDR_rRr_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,1,0,Rd,Rn,SUB_ROR(Rm,i)) +#define CC_LDR_rRR_RRX(cc,Rd,Rn,Rm) _LS1(cc,1,0,Rd,Rn,ADD_RRX(Rm)) +#define CC_LDR_rRr_RRX(cc,Rd,Rn,Rm) _LS1(cc,1,0,Rd,Rn,SUB_RRX(Rm)) + +#define LDR_rR(Rd,Rn) CC_LDR_rR(NATIVE_CC_AL,Rd,Rn) +#define LDR_rRI(Rd,Rn,i) CC_LDR_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDR_rRi(Rd,Rn,i) CC_LDR_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDR_rRR(Rd,Rn,Rm) CC_LDR_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDR_rRr(Rd,Rn,Rm) CC_LDR_rRr(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDR_rRR_LSLi(Rd,Rn,Rm,i) CC_LDR_rRR_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRr_LSLi(Rd,Rn,Rm,i) CC_LDR_rRr_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRR_LSRi(Rd,Rn,Rm,i) CC_LDR_rRR_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRr_LSRi(Rd,Rn,Rm,i) CC_LDR_rRr_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRR_ASRi(Rd,Rn,Rm,i) CC_LDR_rRR_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRr_ASRi(Rd,Rn,Rm,i) CC_LDR_rRr_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRR_RORi(Rd,Rn,Rm,i) CC_LDR_rRR_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRr_RORi(Rd,Rn,Rm,i) CC_LDR_rRr_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDR_rRR_RRX(Rd,Rn,Rm) CC_LDR_rRR_RRX(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDR_rRr_RRX(Rd,Rn,Rm) CC_LDR_rRr_RRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_STR_rR(cc,Rd,Rn) _LS1(cc,0,0,Rd,Rn,ADD_IMM(0)) +#define CC_STR_rRI(cc,Rd,Rn,i) _LS1(cc,0,0,Rd,Rn,ADD_IMM(i)) +#define CC_STR_rRi(cc,Rd,Rn,i) _LS1(cc,0,0,Rd,Rn,SUB_IMM(i)) +#define CC_STR_rRR(cc,Rd,Rn,Rm) _LS1(cc,0,0,Rd,Rn,ADD_REG(Rm)) +#define CC_STR_rRr(cc,Rd,Rn,Rm) _LS1(cc,0,0,Rd,Rn,SUB_REG(Rm)) +#define CC_STR_rRR_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,ADD_LSL(Rm,i)) +#define CC_STR_rRr_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,SUB_LSL(Rm,i)) +#define CC_STR_rRR_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,ADD_LSR(Rm,i)) +#define CC_STR_rRr_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,SUB_LSR(Rm,i)) +#define CC_STR_rRR_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,ADD_ASR(Rm,i)) +#define CC_STR_rRr_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,SUB_ASR(Rm,i)) +#define CC_STR_rRR_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,ADD_ROR(Rm,i)) +#define CC_STR_rRr_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,0,0,Rd,Rn,SUB_ROR(Rm,i)) +#define CC_STR_rRR_RRX(cc,Rd,Rn,Rm) _LS1(cc,0,0,Rd,Rn,ADD_RRX(Rm)) +#define CC_STR_rRr_RRX(cc,Rd,Rn,Rm) _LS1(cc,0,0,Rd,Rn,SUB_RRX(Rm)) + +#define STR_rR(Rd,Rn) CC_STR_rR(NATIVE_CC_AL,Rd,Rn) +#define STR_rRI(Rd,Rn,i) CC_STR_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define STR_rRi(Rd,Rn,i) CC_STR_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define STR_rRR(Rd,Rn,Rm) CC_STR_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define STR_rRr(Rd,Rn,Rm) CC_STR_rRr(NATIVE_CC_AL,Rd,Rn,Rm) +#define STR_rRR_LSLi(Rd,Rn,Rm,i) CC_STR_rRR_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRr_LSLi(Rd,Rn,Rm,i) CC_STR_rRr_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRR_LSRi(Rd,Rn,Rm,i) CC_STR_rRR_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRr_LSRi(Rd,Rn,Rm,i) CC_STR_rRr_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRR_ASRi(Rd,Rn,Rm,i) CC_STR_rRR_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRr_ASRi(Rd,Rn,Rm,i) CC_STR_rRr_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRR_RORi(Rd,Rn,Rm,i) CC_STR_rRR_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRr_RORi(Rd,Rn,Rm,i) CC_STR_rRr_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STR_rRR_RRX(Rd,Rn,Rm) CC_STR_rRR_RRX(NATIVE_CC_AL,Rd,Rn,Rm) +#define STR_rRr_RRX(Rd,Rn,Rm) CC_STR_rRr_RRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_LDRB_rR(cc,Rd,Rn) _LS1(cc,1,1,Rd,Rn,ADD_IMM(0)) +#define CC_LDRB_rRI(cc,Rd,Rn,i) _LS1(cc,1,1,Rd,Rn,ADD_IMM(i)) +#define CC_LDRB_rRi(cc,Rd,Rn,i) _LS1(cc,1,1,Rd,Rn,SUB_IMM(i)) +#define CC_LDRB_rRR(cc,Rd,Rn,Rm) _LS1(cc,1,1,Rd,Rn,ADD_REG(Rm)) +#define CC_LDRB_rRr(cc,Rd,Rn,Rm) _LS1(cc,1,1,Rd,Rn,SUB_REG(Rm)) +#define CC_LDRB_rRR_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,ADD_LSL(Rm,i)) +#define CC_LDRB_rRr_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,SUB_LSL(Rm,i)) +#define CC_LDRB_rRR_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,ADD_LSR(Rm,i)) +#define CC_LDRB_rRr_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,SUB_LSR(Rm,i)) +#define CC_LDRB_rRR_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,ADD_ASR(Rm,i)) +#define CC_LDRB_rRr_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,SUB_ASR(Rm,i)) +#define CC_LDRB_rRR_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,ADD_ROR(Rm,i)) +#define CC_LDRB_rRr_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,1,1,Rd,Rn,SUB_ROR(Rm,i)) +#define CC_LDRB_rRR_RRX(cc,Rd,Rn,Rm) _LS1(cc,1,1,Rd,Rn,ADD_RRX(Rm)) +#define CC_LDRB_rRr_RRX(cc,Rd,Rn,Rm) _LS1(cc,1,1,Rd,Rn,SUB_RRX(Rm)) + +#define LDRB_rR(Rd,Rn) CC_LDRB_rR(NATIVE_CC_AL,Rd,Rn) +#define LDRB_rRI(Rd,Rn,i) CC_LDRB_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDRB_rRi(Rd,Rn,i) CC_LDRB_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDRB_rRR(Rd,Rn,Rm) CC_LDRB_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRB_rRr(Rd,Rn,Rm) CC_LDRB_rRr(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRB_rRR_LSLi(Rd,Rn,Rm,i) CC_LDRB_rRR_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRr_LSLi(Rd,Rn,Rm,i) CC_LDRB_rRr_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRR_LSRi(Rd,Rn,Rm,i) CC_LDRB_rRR_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRr_LSRi(Rd,Rn,Rm,i) CC_LDRB_rRr_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRR_ASRi(Rd,Rn,Rm,i) CC_LDRB_rRR_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRr_ASRi(Rd,Rn,Rm,i) CC_LDRB_rRr_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRR_RORi(Rd,Rn,Rm,i) CC_LDRB_rRR_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRr_RORi(Rd,Rn,Rm,i) CC_LDRB_rRr_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define LDRB_rRR_RRX(Rd,Rn,Rm) CC_LDRB_rRR_RRX(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRB_rRr_RRX(Rd,Rn,Rm) CC_LDRB_rRr_RRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_STRB_rR(cc,Rd,Rn) _LS1(cc,0,1,Rd,Rn,ADD_IMM(0)) +#define CC_STRB_rRI(cc,Rd,Rn,i) _LS1(cc,0,1,Rd,Rn,ADD_IMM(i)) +#define CC_STRB_rRi(cc,Rd,Rn,i) _LS1(cc,0,1,Rd,Rn,SUB_IMM(i)) +#define CC_STRB_rRR(cc,Rd,Rn,Rm) _LS1(cc,0,1,Rd,Rn,ADD_REG(Rm)) +#define CC_STRB_rRr(cc,Rd,Rn,Rm) _LS1(cc,0,1,Rd,Rn,SUB_REG(Rm)) +#define CC_STRB_rRR_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,ADD_LSL(Rm,i)) +#define CC_STRB_rRr_LSLi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,SUB_LSL(Rm,i)) +#define CC_STRB_rRR_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,ADD_LSR(Rm,i)) +#define CC_STRB_rRr_LSRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,SUB_LSR(Rm,i)) +#define CC_STRB_rRR_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,ADD_ASR(Rm,i)) +#define CC_STRB_rRr_ASRi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,SUB_ASR(Rm,i)) +#define CC_STRB_rRR_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,ADD_ROR(Rm,i)) +#define CC_STRB_rRr_RORi(cc,Rd,Rn,Rm,i) _LS1(cc,0,1,Rd,Rn,SUB_ROR(Rm,i)) +#define CC_STRB_rRR_RRX(cc,Rd,Rn,Rm) _LS1(cc,0,1,Rd,Rn,ADD_RRX(Rm)) +#define CC_STRB_rRr_RRX(cc,Rd,Rn,Rm) _LS1(cc,0,1,Rd,Rn,SUB_RRX(Rm)) + +#define STRB_rR(Rd,Rn) CC_STRB_rR(NATIVE_CC_AL,Rd,Rn) +#define STRB_rRI(Rd,Rn,i) CC_STRB_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define STRB_rRi(Rd,Rn,i) CC_STRB_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define STRB_rRR(Rd,Rn,Rm) CC_STRB_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define STRB_rRr(Rd,Rn,Rm) CC_STRB_rRr(NATIVE_CC_AL,Rd,Rn,Rm) +#define STRB_rRR_LSLi(Rd,Rn,Rm,i) CC_STRB_rRR_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRr_LSLi(Rd,Rn,Rm,i) CC_STRB_rRr_LSLi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRR_LSRi(Rd,Rn,Rm,i) CC_STRB_rRR_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRr_LSRi(Rd,Rn,Rm,i) CC_STRB_rRr_LSRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRR_ASRi(Rd,Rn,Rm,i) CC_STRB_rRR_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRr_ASRi(Rd,Rn,Rm,i) CC_STRB_rRr_ASRi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRR_RORi(Rd,Rn,Rm,i) CC_STRB_rRR_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRr_RORi(Rd,Rn,Rm,i) CC_STRB_rRr_RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define STRB_rRR_RRX(Rd,Rn,Rm) CC_STRB_rRR_RRX(NATIVE_CC_AL,Rd,Rn,Rm) +#define STRB_rRr_RRX(Rd,Rn,Rm) CC_STRB_rRr_RRX(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_LDRSH_rR(cc,Rd,Rn) _LS2(cc,1,1,1,1,Rd,Rn,ADD2_IMM(0)) +#define CC_LDRSH_rRI(cc,Rd,Rn,i) _LS2(cc,1,1,1,1,Rd,Rn,ADD2_IMM(i)) +#define CC_LDRSH_rRi(cc,Rd,Rn,i) _LS2(cc,1,1,1,1,Rd,Rn,SUB2_IMM(i)) +#define CC_LDRSH_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,1,1,1,Rd,Rn,ADD2_REG(Rm)) +#define CC_LDRSH_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,1,1,1,Rd,Rn,SUB2_REG(Rm)) + +#define LDRSH_rR(Rd,Rn) CC_LDRSH_rR(NATIVE_CC_AL,Rd,Rn) +#define LDRSH_rRI(Rd,Rn,i) CC_LDRSH_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDRSH_rRi(Rd,Rn,i) CC_LDRSH_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDRSH_rRR(Rd,Rn,Rm) CC_LDRSH_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRSH_rRr(Rd,Rn,Rm) CC_LDRSH_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_LDRH_rR(cc,Rd,Rn) _LS2(cc,1,1,0,1,Rd,Rn,ADD2_IMM(0)) +#define CC_LDRH_rRI(cc,Rd,Rn,i) _LS2(cc,1,1,0,1,Rd,Rn,(i) >= 0 ? ADD2_IMM(i) : SUB2_IMM(-(i))) +#define CC_LDRH_rRi(cc,Rd,Rn,i) _LS2(cc,1,1,0,1,Rd,Rn,SUB2_IMM(i)) +#define CC_LDRH_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,1,0,1,Rd,Rn,ADD2_REG(Rm)) +#define CC_LDRH_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,1,0,1,Rd,Rn,SUB2_REG(Rm)) + +#define LDRH_rR(Rd,Rn) CC_LDRH_rR(NATIVE_CC_AL,Rd,Rn) +#define LDRH_rRI(Rd,Rn,i) CC_LDRH_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDRH_rRi(Rd,Rn,i) CC_LDRH_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDRH_rRR(Rd,Rn,Rm) CC_LDRH_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRH_rRr(Rd,Rn,Rm) CC_LDRH_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_STRD_rR(cc,Rd,Rn) _LS2(cc,1,0,1,1,Rd,Rn,ADD2_IMM(0)) +#define CC_STRD_rRI(cc,Rd,Rn,i) _LS2(cc,1,0,1,1,Rd,Rn,ADD2_IMM(i)) +#define CC_STRD_rRi(cc,Rd,Rn,i) _LS2(cc,1,0,1,1,Rd,Rn,SUB2_IMM(i)) +#define CC_STRD_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,0,1,1,Rd,Rn,ADD2_REG(Rm)) +#define CC_STRD_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,0,1,1,Rd,Rn,SUB2_REG(Rm)) + +#define STRD_rR(Rd,Rn) CC_STRD_rR(NATIVE_CC_AL,Rd,Rn) +#define STRD_rRI(Rd,Rn,i) CC_STRD_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define STRD_rRi(Rd,Rn,i) CC_STRD_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define STRD_rRR(Rd,Rn,Rm) CC_STRD_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define STRD_rRr(Rd,Rn,Rm) CC_STRD_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_STRH_rR(cc,Rd,Rn) _LS2(cc,1,0,0,1,Rd,Rn,ADD2_IMM(0)) +#define CC_STRH_rRI(cc,Rd,Rn,i) _LS2(cc,1,0,0,1,Rd,Rn,ADD2_IMM(i)) +#define CC_STRH_rRi(cc,Rd,Rn,i) _LS2(cc,1,0,0,1,Rd,Rn,SUB2_IMM(i)) +#define CC_STRH_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,0,0,1,Rd,Rn,ADD2_REG(Rm)) +#define CC_STRH_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,0,0,1,Rd,Rn,SUB2_REG(Rm)) + +#define STRH_rR(Rd,Rn) CC_STRH_rR(NATIVE_CC_AL,Rd,Rn) +#define STRH_rRI(Rd,Rn,i) CC_STRH_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define STRH_rRi(Rd,Rn,i) CC_STRH_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define STRH_rRR(Rd,Rn,Rm) CC_STRH_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define STRH_rRr(Rd,Rn,Rm) CC_STRH_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_LDRSB_rR(cc,Rd,Rn) _LS2(cc,1,1,1,0,Rd,Rn,ADD2_IMM(0)) +#define CC_LDRSB_rRI(cc,Rd,Rn,i) _LS2(cc,1,1,1,0,Rd,Rn,ADD2_IMM(i)) +#define CC_LDRSB_rRi(cc,Rd,Rn,i) _LS2(cc,1,1,1,0,Rd,Rn,SUB2_IMM(i)) +#define CC_LDRSB_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,1,1,0,Rd,Rn,ADD2_REG(Rm)) +#define CC_LDRSB_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,1,1,0,Rd,Rn,SUB2_REG(Rm)) + +#define LDRSB_rR(Rd,Rn) CC_LDRSB_rR(NATIVE_CC_AL,Rd,Rn) +#define LDRSB_rRI(Rd,Rn,i) CC_LDRSB_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDRSB_rRi(Rd,Rn,i) CC_LDRSB_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDRSB_rRR(Rd,Rn,Rm) CC_LDRSB_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRSB_rRr(Rd,Rn,Rm) CC_LDRSB_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +#define CC_LDRD_rR(cc,Rd,Rn) _LS2(cc,1,0,1,0,Rd,Rn,ADD2_IMM(0)) +#define CC_LDRD_rRI(cc,Rd,Rn,i) _LS2(cc,1,0,1,0,Rd,Rn,ADD2_IMM(i)) +#define CC_LDRD_rRi(cc,Rd,Rn,i) _LS2(cc,1,0,1,0,Rd,Rn,SUB2_IMM(i)) +#define CC_LDRD_rRR(cc,Rd,Rn,Rm) _LS2(cc,1,0,1,0,Rd,Rn,ADD2_REG(Rm)) +#define CC_LDRD_rRr(cc,Rd,Rn,Rm) _LS2(cc,1,0,1,0,Rd,Rn,SUB2_REG(Rm)) + +#define LDRD_rR(Rd,Rn) CC_LDRD_rR(NATIVE_CC_AL,Rd,Rn) +#define LDRD_rRI(Rd,Rn,i) CC_LDRD_rRI(NATIVE_CC_AL,Rd,Rn,i) +#define LDRD_rRi(Rd,Rn,i) CC_LDRD_rRi(NATIVE_CC_AL,Rd,Rn,i) +#define LDRD_rRR(Rd,Rn,Rm) CC_LDRD_rRR(NATIVE_CC_AL,Rd,Rn,Rm) +#define LDRD_rRr(Rd,Rn,Rm) CC_LDRD_rRr(NATIVE_CC_AL,Rd,Rn,Rm) + +/* Multiply */ +#define CC_SMULL_rrrr(cc, RdLo, RdHi, Rm, Rs) _W(((cc) << 28) | (0x0C << 20) | ((RdHi) << 16) | ((RdLo) << 12) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define SMULL_rrrr(RdLo,RdHi,Rm,Rs) CC_SMULL_rrrr(NATIVE_CC_AL,RdLo,RdHi,Rm,Rs) +#define CC_SMULLS_rrrr(cc, RdLo, RdHi, Rm, Rs) _W(((cc) << 28) | (0x0D << 20) | ((RdHi) << 16) | ((RdLo) << 12) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define SMULLS_rrrr(RdLo,RdHi,Rm,Rs) CC_SMULLS_rrrr(NATIVE_CC_AL,RdLo,RdHi,Rm,Rs) +#define CC_MUL_rrr(cc, Rd, Rm, Rs) _W(((cc) << 28) | (0x00 << 20) | ((Rd) << 16) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define MUL_rrr(Rd, Rm, Rs) CC_MUL_rrr(NATIVE_CC_AL, Rd, Rm, Rs) +#define CC_MULS_rrr(cc, Rd, Rm, Rs) _W(((cc) << 28) | (0x01 << 20) | ((Rd) << 16) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define MULS_rrr(Rd, Rm, Rs) CC_MULS_rrr(NATIVE_CC_AL, Rd, Rm, Rs) + +#define CC_UMULL_rrrr(cc, RdLo, RdHi, Rm, Rs) _W(((cc) << 28) | (0x08 << 20) | ((RdHi) << 16) | ((RdLo) << 12) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define UMULL_rrrr(RdLo,RdHi,Rm,Rs) CC_UMULL_rrrr(NATIVE_CC_AL,RdLo,RdHi,Rm,Rs) +#define CC_UMULLS_rrrr(cc, RdLo, RdHi, Rm, Rs) _W(((cc) << 28) | (0x09 << 20) | ((RdHi) << 16) | ((RdLo) << 12) | ((Rs) << 8) | (0x9 << 4) | (Rm)) +#define UMULLS_rrrr(RdLo,RdHi,Rm,Rs) CC_UMULLS_rrrr(NATIVE_CC_AL,RdLo,RdHi,Rm,Rs) + +/* Others */ +#define CC_CLZ_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x16 << 20) | (0xf << 16) | ((Rd) << 12) | (0xf << 8) | (0x1 << 4) | SHIFT_REG(Rm)) +#define CLZ_rr(Rd,Rm) CC_CLZ_rr(NATIVE_CC_AL,Rd,Rm) + +/* Alias */ +#define LSL_rri(Rd,Rm,i) MOV_rrLSLi(Rd,Rm,i) +#define LSL_rrr(Rd,Rm,Rs) MOV_rrLSLr(Rd,Rm,Rs) +#define LSR_rri(Rd,Rm,i) MOV_rrLSRi(Rd,Rm,i) +#define LSR_rrr(Rd,Rm,Rs) MOV_rrLSRr(Rd,Rm,Rs) +#define ASR_rri(Rd,Rm,i) MOV_rrASRi(Rd,Rm,i) +#define ASR_rrr(Rd,Rm,Rs) MOV_rrASRr(Rd,Rm,Rs) +#define ROR_rri(Rd,Rm,i) MOV_rrRORi(Rd,Rm,i) +#define ROR_rrr(Rd,Rm,Rs) MOV_rrRORr(Rd,Rm,Rs) +#define RRX_rr(Rd,Rm) MOV_rrRRX(Rd,Rm) +#define LSLS_rri(Rd,Rm,i) MOVS_rrLSLi(Rd,Rm,i) +#define LSLS_rrr(Rd,Rm,Rs) MOVS_rrLSLr(Rd,Rm,Rs) +#define LSRS_rri(Rd,Rm,i) MOVS_rrLSRi(Rd,Rm,i) +#define LSRS_rrr(Rd,Rm,Rs) MOVS_rrLSRr(Rd,Rm,Rs) +#define ASRS_rri(Rd,Rm,i) MOVS_rrASRi(Rd,Rm,i) +#define ASRS_rrr(Rd,Rm,Rs) MOVS_rrASRr(Rd,Rm,Rs) +#define RORS_rri(Rd,Rm,i) MOVS_rrRORi(Rd,Rm,i) +#define RORS_rrr(Rd,Rm,Rs) MOVS_rrRORr(Rd,Rm,Rs) +#define RRXS_rr(Rd,Rm) MOVS_rrRRX(Rd,Rm) + +/* ARMV6 ops */ +#define CC_SXTB_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6a << 20) | (0xf << 16) | ((Rd) << 12) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTB_rr(Rd,Rm) CC_SXTB_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTB_rr_ROR8(cc,Rd,Rm) _W(((cc) << 28) | (0x6a << 20) | (0xf << 16) | ((Rd) << 12) | (1 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTB_rr_ROR8(Rd,Rm) CC_SXTB_rr_ROR8(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTB_rr_ROR16(cc,Rd,Rm) _W(((cc) << 28) | (0x6a << 20) | (0xf << 16) | ((Rd) << 12) | (2 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTB_rr_ROR16(Rd,Rm) CC_SXTB_rr_ROR16(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTB_rr_ROR24(cc,Rd,Rm) _W(((cc) << 28) | (0x6a << 20) | (0xf << 16) | ((Rd) << 12) | (3 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTB_rr_ROR24(Rd,Rm) CC_SXTB_rr_ROR24(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTH_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | ((Rd) << 12) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTH_rr(Rd,Rm) CC_SXTH_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTH_rr_ROR8(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | ((Rd) << 12) | (1 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTH_rr_ROR8(Rd,Rm) CC_SXTH_rr_ROR8(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTH_rr_ROR16(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | ((Rd) << 12) | (2 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTH_rr_ROR16(Rd,Rm) CC_SXTH_rr_ROR16(NATIVE_CC_AL,Rd,Rm) + +#define CC_SXTH_rr_ROR24(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | ((Rd) << 12) | (3 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define SXTH_rr_ROR24(Rd,Rm) CC_SXTH_rr_ROR24(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTB_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6e << 20) | (0xf << 16) | ((Rd) << 12) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTB_rr(Rd,Rm) CC_UXTB_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTB_rr_ROR8(cc,Rd,Rm) _W(((cc) << 28) | (0x6e << 20) | (0xf << 16) | ((Rd) << 12) | (1 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTB_rr_ROR8(Rd,Rm) CC_UXTB_rr_ROR8(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTB_rr_ROR16(cc,Rd,Rm) _W(((cc) << 28) | (0x6e << 20) | (0xf << 16) | ((Rd) << 12) | (2 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTB_rr_ROR16(Rd,Rm) CC_UXTB_rr_ROR16(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTB_rr_ROR24(cc,Rd,Rm) _W(((cc) << 28) | (0x6e << 20) | (0xf << 16) | ((Rd) << 12) | (3 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTB_rr_ROR24(Rd,Rm) CC_UXTB_rr_ROR24(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTH_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6f << 20) | (0xf << 16) | ((Rd) << 12) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTH_rr(Rd,Rm) CC_UXTH_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTH_rr_ROR8(cc,Rd,Rm) _W(((cc) << 28) | (0x6f << 20) | (0xf << 16) | ((Rd) << 12) | (1 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTH_rr_ROR8(Rd,Rm) CC_UXTH_rr_ROR8(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTH_rr_ROR16(cc,Rd,Rm) _W(((cc) << 28) | (0x6f << 20) | (0xf << 16) | ((Rd) << 12) | (2 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTH_rr_ROR16(Rd,Rm) CC_UXTH_rr_ROR16(NATIVE_CC_AL,Rd,Rm) + +#define CC_UXTH_rr_ROR24(cc,Rd,Rm) _W(((cc) << 28) | (0x6f << 20) | (0xf << 16) | ((Rd) << 12) | (3 << 10) | (0x7 << 4) | SHIFT_REG(Rm)) +#define UXTH_rr_ROR24(Rd,Rm) CC_UXTH_rr_ROR24(NATIVE_CC_AL,Rd,Rm) + +#define CC_REV_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | (0xf << 8) | ((Rd) << 12) | (0x3 << 4) | SHIFT_REG(Rm)) +#define REV_rr(Rd,Rm) CC_REV_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_REV16_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6b << 20) | (0xf << 16) | (0xf << 8) | ((Rd) << 12) | (0xB << 4) | SHIFT_REG(Rm)) +#define REV16_rr(Rd,Rm) CC_REV16_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_REVSH_rr(cc,Rd,Rm) _W(((cc) << 28) | (0x6f << 20) | (0xf << 16) | (0xf << 8) | ((Rd) << 12) | (0xB << 4) | SHIFT_REG(Rm)) +#define REVSH_rr(Rd,Rm) CC_REVSH_rr(NATIVE_CC_AL,Rd,Rm) + +#define CC_PKHBT_rrr(cc,Rd,Rn,Rm) _W(((cc) << 28) | (0x68 << 20) | (Rn << 16) | (Rd << 12) | (0x1 << 4) | (Rm)) +#define CC_PKHBT_rrrLSLi(cc,Rd,Rn,Rm,s) _W(((cc) << 28) | (0x68 << 20) | (Rn << 16) | (Rd << 12) | (0x1 << 4) | SHIFT_PK(Rm, s)) +#define PKHBT_rrr(Rd,Rn,Rm) CC_PKHBT_rrr(NATIVE_CC_AL,Rd,Rn,Rm) +#define PKHBT_rrrLSLi(Rd,Rn,Rm,s) CC_PKHBT_rrrLSLi(NATIVE_CC_AL,Rd,Rn,Rm,s) + +#define CC_PKHTB_rrrASRi(cc,Rd,Rn,Rm,s) _W(((cc) << 28) | (0x68 << 20) | (Rn << 16) | (Rd << 12) | (0x5 << 4) | SHIFT_PK(Rm, s)) +#define PKHTB_rrrASRi(Rd,Rn,Rm,s) CC_PKHTB_rrrASRi(NATIVE_CC_AL,Rd,Rn,Rm,s) + +#endif /* ARM_RTASM_H */ diff --git a/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp b/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp new file mode 100644 index 00000000..32e6982a --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp @@ -0,0 +1,5372 @@ +/* + * compiler/codegen_x86.cpp - IA-32 and AMD64 code generator + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * JIT compiler m68k -> IA-32 and AMD64 + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* This should eventually end up in machdep/, but for now, x86 is the + only target, and it's easier this way... */ + +#include "flags_x86.h" + +/************************************************************************* + * Some basic information about the the target CPU * + *************************************************************************/ + +#define R1 RR1 +#define R2 RR2 +#define R4 RR4 + +#define EAX_INDEX 0 +#define ECX_INDEX 1 +#define EDX_INDEX 2 +#define EBX_INDEX 3 +#define ESP_INDEX 4 +#define EBP_INDEX 5 +#define ESI_INDEX 6 +#define EDI_INDEX 7 +#if defined(CPU_x86_64) +#define R8_INDEX 8 +#define R9_INDEX 9 +#define R10_INDEX 10 +#define R11_INDEX 11 +#define R12_INDEX 12 +#define R13_INDEX 13 +#define R14_INDEX 14 +#define R15_INDEX 15 +#endif +/* XXX this has to match X86_Reg8H_Base + 4 */ +#define AH_INDEX (0x10+4+EAX_INDEX) +#define CH_INDEX (0x10+4+ECX_INDEX) +#define DH_INDEX (0x10+4+EDX_INDEX) +#define BH_INDEX (0x10+4+EBX_INDEX) + +/* The register in which subroutines return an integer return value */ +#define REG_RESULT EAX_INDEX + +/* The registers subroutines take their first and second argument in */ +#ifdef _WIN32 +/* Handle the _fastcall parameters of ECX and EDX */ +#define REG_PAR1 ECX_INDEX +#define REG_PAR2 EDX_INDEX +#elif defined(CPU_x86_64) +#define REG_PAR1 EDI_INDEX +#define REG_PAR2 ESI_INDEX +#else +#define REG_PAR1 EAX_INDEX +#define REG_PAR2 EDX_INDEX +#endif + +#define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */ +#ifdef _WIN32 +#define REG_PC_TMP ECX_INDEX +#else +#define REG_PC_TMP ECX_INDEX /* Another register that is not the above */ +#endif + +#define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount. + -1 if any reg will do */ +#define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */ +#define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */ + +#define STACK_ALIGN 16 +#define STACK_OFFSET sizeof(void *) +#ifdef _WIN64 +/* In the Microsoft x64 calling convention, it's the caller's responsibility + * to allocate 32 bytes of "shadow space" on the stack right before calling + * the function (regardless of the actual number of parameters used). */ +#define STACK_SHADOW_SPACE 32 +#else +#define STACK_SHADOW_SPACE 0 +#endif + +#if defined(CPU_x86_64) +#ifdef UAE +uae_s8 always_used[] = { ESP_INDEX, R12_INDEX, -1 }; +#else +uae_s8 always_used[] = { ESP_INDEX, -1 }; +#endif +uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1}; +uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1}; +#else +uae_s8 always_used[] = { ESP_INDEX, -1 }; +uae_s8 can_byte[]={0,1,2,3,-1}; +uae_s8 can_word[]={0,1,2,3,5,6,7,-1}; +#endif +static bool have_lahf_lm = true; // target has LAHF supported in long mode ? + +#if USE_OPTIMIZED_CALLS +/* Make sure interpretive core does not use cpuopti */ +uae_u8 call_saved[]={0,0,0,1,1,1,1,1}; +#error FIXME: code not ready +#else +/* cpuopti mutate instruction handlers to assume registers are saved + by the caller */ +uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}; +#endif + +/* This *should* be the same as call_saved. But: + - We might not really know which registers are saved, and which aren't, + so we need to preserve some, but don't want to rely on everyone else + also saving those registers + - Special registers (such like the stack pointer) should not be "preserved" + by pushing, even though they are "saved" across function calls +*/ +#if defined(CPU_x86_64) +#ifdef _WIN64 +/* https://msdn.microsoft.com/en-us/library/6t169e9c.aspx: + * "The registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, and R15 are + * considered nonvolatile and must be saved and restored by a function that + * uses them". Also saving r11 for now (see comment below). */ +static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1,0,0,0,1,1,1,1,1}; +#else +/* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */ +/* preserve r11 because it's generally used to hold pointers to functions */ +/* FIXME: not really sure what the point of saving r11 is (??). If functions + * cannot assume calle preserves it, it will not be used across calls anyway? */ +static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1}; +#endif +#else +/* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */ +static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1}; +#endif + +/* Whether classes of instructions do or don't clobber the native flags */ +#define CLOBBER_MOV +#define CLOBBER_LEA +#define CLOBBER_CMOV +#define CLOBBER_POP +#define CLOBBER_PUSH +#define CLOBBER_SUB clobber_flags() +#define CLOBBER_SBB clobber_flags() +#define CLOBBER_CMP clobber_flags() +#define CLOBBER_ADD clobber_flags() +#define CLOBBER_ADC clobber_flags() +#define CLOBBER_AND clobber_flags() +#define CLOBBER_OR clobber_flags() +#define CLOBBER_XOR clobber_flags() + +#define CLOBBER_ROL clobber_flags() +#define CLOBBER_ROR clobber_flags() +#define CLOBBER_SHLL clobber_flags() +#define CLOBBER_SHRL clobber_flags() +#define CLOBBER_SHRA clobber_flags() +#define CLOBBER_TEST clobber_flags() +#define CLOBBER_CL16 +#define CLOBBER_CL8 +#define CLOBBER_SE32 +#define CLOBBER_SE16 +#define CLOBBER_SE8 +#define CLOBBER_ZE32 +#define CLOBBER_ZE16 +#define CLOBBER_ZE8 +#define CLOBBER_SW16 clobber_flags() +#define CLOBBER_SW32 +#define CLOBBER_SETCC +#define CLOBBER_MUL clobber_flags() +#define CLOBBER_BT clobber_flags() +#define CLOBBER_BSF clobber_flags() + +/* The older code generator is now deprecated. */ +#define USE_NEW_RTASM 1 + +#if USE_NEW_RTASM + +#if defined(CPU_x86_64) +#define X86_TARGET_64BIT 1 +/* The address override prefix causes a 5 cycles penalty on Intel Core + processors. Another solution would be to decompose the load in an LEA, + MOV (to zero-extend), MOV (from memory): is it better? */ +#define ADDR32 x86_emit_byte(0x67), +#else +#define ADDR32 +#endif +#define X86_FLAT_REGISTERS 0 +#define X86_OPTIMIZE_ALU 1 +#define X86_OPTIMIZE_ROTSHI 1 +#include "codegen_x86.h" + +#define x86_emit_byte(B) emit_byte(B) +#define x86_emit_word(W) emit_word(W) +#define x86_emit_long(L) emit_long(L) +#define x86_emit_quad(Q) emit_quad(Q) +#define x86_get_target() get_target() +#define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__) + +static inline void x86_64_addr32(void) +{ +#ifdef CPU_x86_64 + emit_byte(0x67); +#endif +} + +static inline void x86_64_rex(bool /* w */, uae_u32 * /* r */, uae_u32 * /* x */, uae_u32 *b) +{ +#ifdef CPU_x86_64 + int rex_byte = 0x40; + if (*b >= R8_INDEX) { + *b -= R8_INDEX; + rex_byte |= 1; + } + if (rex_byte != 0x40) { + emit_byte(rex_byte); + } +#else + UNUSED(b); +#endif +} + +static inline void x86_64_prefix( + bool addr32, bool w, uae_u32 *r, uae_u32 *x, uae_u32 *b) +{ + if (addr32) { + x86_64_addr32(); + } + x86_64_rex(w, r, x, b); +} + +// Some mappings to mark compemu_support calls as only used by compemu +// These are still mainly x86 minded. Should be more CPU independent in the future +#define compemu_raw_add_l_mi(a,b) raw_add_l_mi(a,b) +#define compemu_raw_and_l_ri(a,b) raw_and_l_ri(a,b) +#define compemu_raw_bswap_32(a) raw_bswap_32(a) +#define compemu_raw_bt_l_ri(a,b) raw_bt_l_ri(a,b) +#define compemu_raw_call(a) raw_call(a) +#define compemu_raw_cmov_l_rm_indexed(a,b,c,d,e) raw_cmov_l_rm_indexed(a,b,c,d,e) +#define compemu_raw_cmp_l_mi(a,b) raw_cmp_l_mi(a,b) +#define compemu_raw_cmp_l_mi8(a,b) raw_cmp_l_mi(a,b) +#define compemu_raw_jcc_b_oponly(a) raw_jcc_b_oponly(a) +#define compemu_raw_jcc_l_oponly(a) raw_jcc_l_oponly(a) +#define compemu_raw_jl(a) raw_jl(a) +#define compemu_raw_jmp(a) raw_jmp(a) +#define compemu_raw_jmp_m_indexed(a,b,c) raw_jmp_m_indexed(a,b,c) +#define compemu_raw_jmp_r(a) raw_jmp_r(a) +#define compemu_raw_jnz(a) raw_jnz(a) +#define compemu_raw_jz_b_oponly() raw_jz_b_oponly() +#define compemu_raw_lea_l_brr(a,b,c) raw_lea_l_brr(a,b,c) +#define compemu_raw_lea_l_brr_indexed(a,b,c,d,e) raw_lea_l_brr_indexed(a,b,c,d,e) +#define compemu_raw_mov_b_mr(a,b) raw_mov_b_mr(a,b) +#define compemu_raw_mov_l_mi(a,b) raw_mov_l_mi(a,b) +#define compemu_raw_mov_l_mr(a,b) raw_mov_l_mr(a,b) +#define compemu_raw_mov_l_ri(a,b) raw_mov_l_ri(a,b) +#define compemu_raw_mov_l_rm(a,b) raw_mov_l_rm(a,b) +#define compemu_raw_mov_l_rr(a,b) raw_mov_l_rr(a,b) +#define compemu_raw_mov_w_mr(a,b) raw_mov_w_mr(a,b) +#define compemu_raw_sub_l_mi(a,b) raw_sub_l_mi(a,b) +#define compemu_raw_test_l_rr(a,b) raw_test_l_rr(a,b) +#define compemu_raw_zero_extend_16_rr(a,b) raw_zero_extend_16_rr(a,b) +#define compemu_raw_lea_l_rr_indexed(a,b,c,d) raw_lea_l_rr_indexed(a,b,c,d) + +static void jit_fail(const char *msg, const char *file, int line, const char *function) +{ + jit_abort("failure in function %s from file %s at line %d: %s", + function, file, line, msg); +} + +LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) +{ +#if defined(CPU_x86_64) + PUSHQr(r); +#else + PUSHLr(r); +#endif +} +LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) +{ +#if defined(CPU_x86_64) + POPQr(r); +#else + POPLr(r); +#endif +} +LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) +{ +#if defined(CPU_x86_64) + POPQm(d, X86_NOREG, X86_NOREG, 1); +#else + POPLm(d, X86_NOREG, X86_NOREG, 1); +#endif +} +LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) +{ + BTLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) +{ + BTLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) +{ + BTCLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) +{ + BTCLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) +{ + BTRLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) +{ + BTRLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) +{ + BTSLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) +{ + BTSLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) +{ + SUBWir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) + +LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) +{ + ADDR32 MOVLmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) +{ + ADDR32 MOVLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) +{ + ADDR32 MOVWim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) +{ + ADDR32 MOVBim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) +{ + ADDR32 ROLBim(i, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) +{ + ROLBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) +{ + ROLWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) +{ + ROLLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) +{ + ROLLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) +{ + ROLWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) +{ + ROLBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) +{ + SHLLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) +{ + SHLWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) +{ + SHLBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) +{ + RORBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) +{ + RORWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) +{ + ADDR32 ORLmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) +{ + RORLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) +{ + RORLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) +{ + RORWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) +{ + RORBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) +{ + SHRLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) +{ + SHRWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) +{ + SHRBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) +{ + SARLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) +{ + SARWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) +{ + SARBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) +{ + SHLLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) +{ + SHLWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) +{ + SHLBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) +{ + SHRLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) +{ + SHRWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) +{ + SHRBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) +{ + SARLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) +{ + SARWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) +{ + SARBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,1,raw_sahf,(R2)) +{ + SAHF(); +} +LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) + +LOWFUNC(NONE,NONE,1,raw_cpuid,(R4)) +{ + CPUID(); +} +LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) + +LOWFUNC(READ,NONE,1,raw_lahf,(W2)) +{ + LAHF(); +} +LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) + +LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) +{ + SETCCir(cc, d); +} +LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) + +LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) +{ + ADDR32 SETCCim(cc, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) + +LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) +{ + if (have_cmov) + CMOVLrr(cc, s, d); + else { /* replacement using branch and mov */ + uae_s8 *target_p = (uae_s8 *)x86_get_target() + 1; + JCCSii(cc^1, 0); + MOVLrr(s, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + } +} +LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) + +LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) +{ + BSFLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s)) +{ + MOVSLQrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) +{ + MOVSWLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) +{ + MOVSBLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) +{ + MOVZWLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) +{ + MOVZBLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) +{ + IMULLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) { + jit_abort("Bad register in IMUL: d=%d, s=%d",d,s); + } + IMULLr(s); +} +LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) { + jit_abort("Bad register in MUL: d=%d, s=%d",d,s); + } + MULLr(s); +} +LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4, R4)) +{ + abort(); /* %^$&%^$%#^ x86! */ +} +LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) +{ + MOVBrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) +{ + MOVWrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVLmr(0, baser, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVWmr(0, baser, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVBmr(0, baser, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) +{ + ADDR32 MOVLrm(s, 0, baser, index, factor); +} +LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) +{ + ADDR32 MOVWrm(s, 0, baser, index, factor); +} +LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) +{ + ADDR32 MOVBrm(s, 0, baser, index, factor); +} +LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) +{ + ADDR32 MOVLrm(s, base, baser, index, factor); +} +LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) +{ + ADDR32 MOVWrm(s, base, baser, index, factor); +} +LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) +{ + ADDR32 MOVBrm(s, base, baser, index, factor); +} +LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVLmr(base, baser, index, factor, d); +} +LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVWmr(base, baser, index, factor, d); +} +LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVBmr(base, baser, index, factor, d); +} +LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) +{ + ADDR32 MOVLmr(base, X86_NOREG, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) +{ + if (have_cmov) + ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d); + else { /* replacement using branch and mov */ + uae_s8 *target_p = (uae_s8 *)x86_get_target() + 1; + JCCSii(cond^1, 0); + ADDR32 MOVLmr(base, X86_NOREG, index, factor, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + } +} +LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) +{ + if (have_cmov) + CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d); + else { /* replacement using branch and mov */ + uae_s8 *target_p = (uae_s8 *)x86_get_target() + 1; + JCCSii(cond^1, 0); + ADDR32 MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + } +} +LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) +{ + ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) +{ + ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) +{ + ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) +{ + ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) +{ + ADDR32 MOVLim(i, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) +{ + ADDR32 MOVWim(i, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) +{ + ADDR32 MOVBim(i, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) +{ + ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) +{ + ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) +{ + ADDR32 LEALmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) +{ + ADDR32 LEALmr(offset, s, index, factor, d); +} +LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) + +LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) +{ + ADDR32 LEALmr(0, s, index, factor, d); +} +LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) + +LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor)) +{ + ADDR32 LEALmr(0, X86_NOREG, index, factor, d); +} +LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) +{ + ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) +{ + ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) +{ + BSWAPLr(r); +} +LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) + +LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) +{ + ROLWir(8, r); +} +LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) +{ + MOVLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) +{ + ADDR32 MOVLrm(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) +{ + ADDR32 MOVWrm(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) + +LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) +{ + ADDR32 MOVWmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) +{ + ADDR32 MOVBrm(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) + +LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) +{ + ADDR32 MOVBmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) +{ + MOVLir(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) +{ + MOVWir(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) +{ + MOVBir(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) + +LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) +{ + ADDR32 ADCLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) +{ + ADDR32 ADDLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) +{ + ADDR32 ADDWim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) +{ + ADDR32 ADDBim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) +{ + TESTLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) +{ + TESTLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) +{ + TESTWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) +{ + TESTBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) +{ + XORLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) +{ + ANDLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) +{ + ANDWir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) +{ + ANDLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) +{ + ANDWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) +{ + ANDBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) +{ + ORLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) +{ + ORLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) +{ + ORWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) +{ + ORBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) +{ + ADCLrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) +{ + ADCWrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) +{ + ADCBrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) +{ + ADDLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) +{ + ADDWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) +{ + ADDBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) +{ + SUBLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) +{ + SUBBir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) +{ + ADDLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) +{ + ADDWir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) +{ + ADDBir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) + +LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) +{ + SBBLrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) +{ + SBBWrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) +{ + SBBBrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) +{ + SUBLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) +{ + SUBWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) +{ + SUBBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) +{ + CMPLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) +{ + CMPLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) +{ + CMPWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) + +LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s)) +{ + ADDR32 CMPBim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) +{ + CMPBir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) +{ + CMPBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) + +LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) +{ + ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d); +} +LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) +{ + XORLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) +{ + XORWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) +{ + XORBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) +{ + ADDR32 SUBLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) +{ + ADDR32 CMPLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) +{ + XCHGLrr(r2, r1); +} +LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) + +LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2)) +{ + XCHGBrr(r2, r1); +} +LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2)) + +LOWFUNC(READ,WRITE,0,raw_pushfl,(void)) +{ + PUSHF(); +} +LENDFUNC(READ,WRITE,0,raw_pushfl,(void)) + +LOWFUNC(WRITE,READ,0,raw_popfl,(void)) +{ + POPF(); +} +LENDFUNC(WRITE,READ,0,raw_popfl,(void)) + +/* Generate floating-point instructions */ +static inline void x86_fadd_m(MEMR s) +{ + ADDR32 FADDLm(s,X86_NOREG,X86_NOREG,1); +} + +#else + +const bool optimize_accum = true; +const bool optimize_imm8 = true; +const bool optimize_shift_once = true; + +/************************************************************************* + * Actual encoding of the instructions on the target CPU * + *************************************************************************/ + +static inline int isaccum(int r) +{ + return (r == EAX_INDEX); +} + +static inline int isbyte(uae_s32 x) +{ + return (x>=-128 && x<=127); +} + +static inline int isword(uae_s32 x) +{ + return (x>=-32768 && x<=32767); +} + +LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) +{ + emit_byte(0x50+r); +} +LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) +{ + emit_byte(0x58+r); +} +LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) +{ + emit_byte(0x8f); + emit_byte(0x05); + emit_long(d); +} +LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xe0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xa3); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xf8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xbb); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) + + +LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xf0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xb3); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xe8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xab); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) +{ + emit_byte(0x66); + if (isbyte(i)) { + emit_byte(0x83); + emit_byte(0xe8+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x2d); + else { + emit_byte(0x81); + emit_byte(0xe8+d); + } + emit_word(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) + + +LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) +{ + emit_byte(0x8b); + emit_byte(0x05+8*d); + emit_long(s); +} +LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) +{ + emit_byte(0xc7); + emit_byte(0x05); + emit_long(d); + emit_long(s); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0xc7); + emit_byte(0x05); + emit_long(d); + emit_word(s); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) +{ + emit_byte(0xc6); + emit_byte(0x05); + emit_long(d); + emit_byte(s); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0x05); + emit_long(d); + } + else { + emit_byte(0xc0); + emit_byte(0x05); + emit_long(d); + emit_byte(i); + } +} +LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xc0+r); + } + else { + emit_byte(0xc0); + emit_byte(0xc0+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xc0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xc0+r); + } + else { + emit_byte(0xc1); + emit_byte(0xc0+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xc0+d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xc0+d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xc0+d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xe0+d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xe0+d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xe0+d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xc8+r); + } + else { + emit_byte(0xc0); + emit_byte(0xc8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xc8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) + +// gb-- used for making an fpcr value in compemu_fpp.cpp +LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) +{ + emit_byte(0x0b); + emit_byte(0x05+8*d); + emit_long(s); +} +LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xc8+r); + } + else { + emit_byte(0xc1); + emit_byte(0xc8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xc8+d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xc8+d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xc8+d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xe8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xe8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xe8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xf8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xf8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xf8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xe0+r); + } + else { + emit_byte(0xc1); + emit_byte(0xe0+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xe0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xe0+r); + } + else { + emit_byte(0xc0); + emit_byte(0xe0+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xe8+r); + } + else { + emit_byte(0xc1); + emit_byte(0xe8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xe8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xe8+r); + } + else { + emit_byte(0xc0); + emit_byte(0xe8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xf8+r); + } + else { + emit_byte(0xc1); + emit_byte(0xf8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xf8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xf8+r); + } + else { + emit_byte(0xc0); + emit_byte(0xf8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) +{ + emit_byte(0x9e); +} +LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) + +LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) +{ + emit_byte(0x0f); + emit_byte(0xa2); +} +LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) + +LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) +{ + emit_byte(0x9f); +} +LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) + +LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) +{ + emit_byte(0x0f); + emit_byte(0x90+cc); + emit_byte(0xc0+d); +} +LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) + +LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) +{ + emit_byte(0x0f); + emit_byte(0x90+cc); + emit_byte(0x05); + emit_long(d); +} +LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) + +LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) +{ + if (have_cmov) { + emit_byte(0x0f); + emit_byte(0x40+cc); + emit_byte(0xc0+8*d+s); + } + else { /* replacement using branch and mov */ + int uncc=(cc^1); + emit_byte(0x70+uncc); + emit_byte(2); /* skip next 2 bytes if not cc=true */ + emit_byte(0x89); + emit_byte(0xc0+8*s+d); + } +} +LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) + +LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) +{ + emit_byte(0x0f); + emit_byte(0xbc); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) +{ + emit_byte(0x0f); + emit_byte(0xbf); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) +{ + emit_byte(0x0f); + emit_byte(0xbe); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) +{ + emit_byte(0x0f); + emit_byte(0xb7); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) +{ + emit_byte(0x0f); + emit_byte(0xb6); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) +{ + emit_byte(0x0f); + emit_byte(0xaf); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) { + jit_abort("Bad register in IMUL: d=%d, s=%d\n",d,s); + } + emit_byte(0xf7); + emit_byte(0xea); +} +LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) { + jit_abort("Bad register in MUL: d=%d, s=%d",d,s); + } + emit_byte(0xf7); + emit_byte(0xe2); +} +LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) +{ + jit_abort("unsupported MUL"); /* %^$&%^$%#^ x86! */ + emit_byte(0x0f); + emit_byte(0xaf); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) +{ + emit_byte(0x88); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) +{ + int isebp=(baser==5)?0x40:0; + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + + emit_byte(0x8b); + emit_byte(0x04+8*d+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x04+8*d+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x8a); + emit_byte(0x04+8*d+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + + isebp=(baser==5)?0x40:0; + + emit_byte(0x89); + emit_byte(0x04+8*s+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x04+8*s+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x88); + emit_byte(0x04+8*s+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x89); + emit_byte(0x84+8*s); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x84+8*s); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x88); + emit_byte(0x84+8*s); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x8b); + emit_byte(0x84+8*d); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x84+8*d); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x8a); + emit_byte(0x84+8*d); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) +{ + int fi; + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: + jit_abort("Bad factor %d in mov_l_rm_indexed!",factor); + } + emit_byte(0x8b); + emit_byte(0x04+8*d); + emit_byte(0x05+8*index+64*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) +{ + int fi; + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: + jit_abort("Bad factor %d in mov_l_rm_indexed!",factor); + } + if (have_cmov) { + emit_byte(0x0f); + emit_byte(0x40+cond); + emit_byte(0x04+8*d); + emit_byte(0x05+8*index+64*fi); + emit_long(base); + } + else { /* replacement using branch and mov */ + int uncc=(cond^1); + emit_byte(0x70+uncc); + emit_byte(7); /* skip next 7 bytes if not cc=true */ + emit_byte(0x8b); + emit_byte(0x04+8*d); + emit_byte(0x05+8*index+64*fi); + emit_long(base); + } +} +LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) +{ + if (have_cmov) { + emit_byte(0x0f); + emit_byte(0x40+cond); + emit_byte(0x05+8*d); + emit_long(mem); + } + else { /* replacement using branch and mov */ + int uncc=(cond^1); + emit_byte(0x70+uncc); + emit_byte(6); /* skip next 6 bytes if not cc=true */ + emit_byte(0x8b); + emit_byte(0x05+8*d); + emit_long(mem); + } +} +LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x8b); + emit_byte(0x40+8*d+s); + emit_byte(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x40+8*d+s); + emit_byte(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x8a); + emit_byte(0x40+8*d+s); + emit_byte(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) +{ + emit_byte(0x8b); + emit_byte(0x80+8*d+s); + emit_long(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) +{ + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x80+8*d+s); + emit_long(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) +{ + emit_byte(0x8a); + emit_byte(0x80+8*d+s); + emit_long(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0xc7); + emit_byte(0x40+d); + emit_byte(offset); + emit_long(i); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x66); + emit_byte(0xc7); + emit_byte(0x40+d); + emit_byte(offset); + emit_word(i); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0xc6); + emit_byte(0x40+d); + emit_byte(offset); + emit_byte(i); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x89); + emit_byte(0x40+8*s+d); + emit_byte(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x40+8*s+d); + emit_byte(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x88); + emit_byte(0x40+8*s+d); + emit_byte(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) +{ + if (optimize_imm8 && isbyte(offset)) { + emit_byte(0x8d); + emit_byte(0x40+8*d+s); + emit_byte(offset); + } + else { + emit_byte(0x8d); + emit_byte(0x80+8*d+s); + emit_long(offset); + } +} +LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + if (optimize_imm8 && isbyte(offset)) { + emit_byte(0x8d); + emit_byte(0x44+8*d); + emit_byte(0x40*fi+8*index+s); + emit_byte(offset); + } + else { + emit_byte(0x8d); + emit_byte(0x84+8*d); + emit_byte(0x40*fi+8*index+s); + emit_long(offset); + } +} +LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) + +LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) +{ + int isebp=(s==5)?0x40:0; + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x8d); + emit_byte(0x04+8*d+isebp); + emit_byte(0x40*fi+8*index+s); + if (isebp) + emit_byte(0); +} +LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) +{ + if (optimize_imm8 && isbyte(offset)) { + emit_byte(0x89); + emit_byte(0x40+8*s+d); + emit_byte(offset); + } + else { + emit_byte(0x89); + emit_byte(0x80+8*s+d); + emit_long(offset); + } +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) +{ + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x80+8*s+d); + emit_long(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) +{ + if (optimize_imm8 && isbyte(offset)) { + emit_byte(0x88); + emit_byte(0x40+8*s+d); + emit_byte(offset); + } + else { + emit_byte(0x88); + emit_byte(0x80+8*s+d); + emit_long(offset); + } +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) +{ + emit_byte(0x0f); + emit_byte(0xc8+r); +} +LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) + +LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xc0+r); + emit_byte(0x08); +} +LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) +{ + emit_byte(0x89); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) +{ + emit_byte(0x89); + emit_byte(0x05+8*s); + emit_long(d); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x05+8*s); + emit_long(d); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) + +LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x05+8*d); + emit_long(s); +} +LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) +{ + emit_byte(0x88); + emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */ + emit_long(d); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) + +LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) +{ + emit_byte(0x8a); + emit_byte(0x05+8*d); + emit_long(s); +} +LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) +{ + emit_byte(0xb8+d); + emit_long(s); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0xb8+d); + emit_word(s); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) +{ + emit_byte(0xb0+d); + emit_byte(s); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) + +LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) +{ + emit_byte(0x81); + emit_byte(0x15); + emit_long(d); + emit_long(s); +} +LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) +{ + if (optimize_imm8 && isbyte(s)) { + emit_byte(0x83); + emit_byte(0x05); + emit_long(d); + emit_byte(s); + } + else { + emit_byte(0x81); + emit_byte(0x05); + emit_long(d); + emit_long(s); + } +} +LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0x81); + emit_byte(0x05); + emit_long(d); + emit_word(s); +} +LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) +{ + emit_byte(0x80); + emit_byte(0x05); + emit_long(d); + emit_byte(s); +} +LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) +{ + if (optimize_accum && isaccum(d)) + emit_byte(0xa9); + else { + emit_byte(0xf7); + emit_byte(0xc0+d); + } + emit_long(i); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) +{ + emit_byte(0x85); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x85); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) +{ + emit_byte(0x84); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) +{ + emit_byte(0x81); + emit_byte(0xf0+d); + emit_long(i); +} +LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) +{ + if (optimize_imm8 && isbyte(i)) { + emit_byte(0x83); + emit_byte(0xe0+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x25); + else { + emit_byte(0x81); + emit_byte(0xe0+d); + } + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) +{ + emit_byte(0x66); + if (optimize_imm8 && isbyte(i)) { + emit_byte(0x83); + emit_byte(0xe0+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x25); + else { + emit_byte(0x81); + emit_byte(0xe0+d); + } + emit_word(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) +{ + emit_byte(0x21); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x21); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) +{ + emit_byte(0x20); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) +{ + if (optimize_imm8 && isbyte(i)) { + emit_byte(0x83); + emit_byte(0xc8+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x0d); + else { + emit_byte(0x81); + emit_byte(0xc8+d); + } + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) +{ + emit_byte(0x09); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x09); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) +{ + emit_byte(0x08); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) +{ + emit_byte(0x11); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x11); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) +{ + emit_byte(0x10); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) +{ + emit_byte(0x01); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x01); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) +{ + emit_byte(0x00); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) +{ + if (isbyte(i)) { + emit_byte(0x83); + emit_byte(0xe8+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x2d); + else { + emit_byte(0x81); + emit_byte(0xe8+d); + } + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) +{ + if (optimize_accum && isaccum(d)) + emit_byte(0x2c); + else { + emit_byte(0x80); + emit_byte(0xe8+d); + } + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) +{ + if (isbyte(i)) { + emit_byte(0x83); + emit_byte(0xc0+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x05); + else { + emit_byte(0x81); + emit_byte(0xc0+d); + } + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) +{ + emit_byte(0x66); + if (isbyte(i)) { + emit_byte(0x83); + emit_byte(0xc0+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x05); + else { + emit_byte(0x81); + emit_byte(0xc0+d); + } + emit_word(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) +{ + if (optimize_accum && isaccum(d)) + emit_byte(0x04); + else { + emit_byte(0x80); + emit_byte(0xc0+d); + } + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) + +LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) +{ + emit_byte(0x19); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x19); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) +{ + emit_byte(0x18); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) +{ + emit_byte(0x29); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x29); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) +{ + emit_byte(0x28); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) +{ + emit_byte(0x39); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) +{ + if (optimize_imm8 && isbyte(i)) { + emit_byte(0x83); + emit_byte(0xf8+r); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(r)) + emit_byte(0x3d); + else { + emit_byte(0x81); + emit_byte(0xf8+r); + } + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x39); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) + +LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s)) +{ + emit_byte(0x80); + emit_byte(0x3d); + emit_long(d); + emit_byte(s); +} +LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) +{ + if (optimize_accum && isaccum(d)) + emit_byte(0x3c); + else { + emit_byte(0x80); + emit_byte(0xf8+d); + } + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) +{ + emit_byte(0x38); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) + +LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + emit_byte(0x39); + emit_byte(0x04+8*d); + emit_byte(5+8*index+0x40*fi); + emit_long(offset); +} +LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) +{ + emit_byte(0x31); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x31); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) +{ + emit_byte(0x30); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) +{ + if (optimize_imm8 && isbyte(s)) { + emit_byte(0x83); + emit_byte(0x2d); + emit_long(d); + emit_byte(s); + } + else { + emit_byte(0x81); + emit_byte(0x2d); + emit_long(d); + emit_long(s); + } +} +LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) +{ + if (optimize_imm8 && isbyte(s)) { + emit_byte(0x83); + emit_byte(0x3d); + emit_long(d); + emit_byte(s); + } + else { + emit_byte(0x81); + emit_byte(0x3d); + emit_long(d); + emit_long(s); + } +} +LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) +{ + emit_byte(0x87); + emit_byte(0xc0+8*r1+r2); +} +LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) + +LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2)) +{ + emit_byte(0x86); + emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */ +} +LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) + +/************************************************************************* + * FIXME: mem access modes probably wrong * + *************************************************************************/ + +LOWFUNC(READ,WRITE,0,raw_pushfl,(void)) +{ + emit_byte(0x9c); +} +LENDFUNC(READ,WRITE,0,raw_pushfl,(void)) + +LOWFUNC(WRITE,READ,0,raw_popfl,(void)) +{ + emit_byte(0x9d); +} +LENDFUNC(WRITE,READ,0,raw_popfl,(void)) + +/* Generate floating-point instructions */ +static inline void x86_fadd_m(MEMR s) +{ + emit_byte(0xdc); + emit_byte(0x05); + emit_long(s); +} + +#endif + +/************************************************************************* + * Unoptimizable stuff --- jump * + *************************************************************************/ + +static inline void raw_call_r(R4 r) +{ +#if USE_NEW_RTASM + CALLsr(r); +#else + emit_byte(0xff); + emit_byte(0xd0+r); +#endif +} + +static inline void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) +{ +#if USE_NEW_RTASM + ADDR32 CALLsm(base, X86_NOREG, r, m); +#else + int mu; + switch(m) { + case 1: mu=0; break; + case 2: mu=1; break; + case 4: mu=2; break; + case 8: mu=3; break; + default: abort(); + } + emit_byte(0xff); + emit_byte(0x14); + emit_byte(0x05+8*r+0x40*mu); + emit_long(base); +#endif +} + +static inline void raw_jmp_r(R4 r) +{ +#if USE_NEW_RTASM + JMPsr(r); +#else + emit_byte(0xff); + emit_byte(0xe0+r); +#endif +} + +static inline void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) +{ +#if USE_NEW_RTASM + ADDR32 JMPsm(base, X86_NOREG, r, m); +#else + int mu; + switch (m) { + case 1: mu=0; break; + case 2: mu=1; break; + case 4: mu=2; break; + case 8: mu=3; break; + default: abort(); + } + emit_byte(0xff); + emit_byte(0x24); + emit_byte(0x05+8*r+0x40*mu); + emit_long(base); +#endif +} + +static inline void raw_jmp_m(uae_u32 base) +{ + emit_byte(0xff); + emit_byte(0x25); + emit_long(base); +} + + +static inline void raw_call(uae_u32 t) +{ +#if USE_NEW_RTASM + ADDR32 CALLm(t); +#else + emit_byte(0xe8); + emit_long(t-(uintptr)target-4); +#endif +} + +static inline void raw_jmp(uae_u32 t) +{ +#if USE_NEW_RTASM + ADDR32 JMPm(t); +#else + emit_byte(0xe9); + emit_long(t-(uintptr)target-4); +#endif +} + +static inline void raw_jl(uae_u32 t) +{ + emit_byte(0x0f); + emit_byte(0x8c); + emit_long(t-(uintptr)target-4); +} + +static inline void raw_jz(uae_u32 t) +{ + emit_byte(0x0f); + emit_byte(0x84); + emit_long(t-(uintptr)target-4); +} + +static inline void raw_jnz(uae_u32 t) +{ + emit_byte(0x0f); + emit_byte(0x85); + emit_long(t-(uintptr)target-4); +} + +static inline void raw_jnz_l_oponly(void) +{ + emit_byte(0x0f); + emit_byte(0x85); +} + +static inline void raw_jcc_l_oponly(int cc) +{ + emit_byte(0x0f); + emit_byte(0x80+cc); +} + +static inline void raw_jnz_b_oponly(void) +{ + emit_byte(0x75); +} + +static inline void raw_jz_b_oponly(void) +{ + emit_byte(0x74); +} + +static inline void raw_jcc_b_oponly(int cc) +{ + emit_byte(0x70+cc); +} + +static inline void raw_jmp_l_oponly(void) +{ + emit_byte(0xe9); +} + +static inline void raw_jmp_b_oponly(void) +{ + emit_byte(0xeb); +} + +static inline void raw_ret(void) +{ + emit_byte(0xc3); +} + +static inline void raw_emit_nop(void) +{ + emit_byte(0x90); +} + +static inline void raw_emit_nop_filler(int nbytes) +{ + +#if defined(CPU_x86_64) + /* The recommended way to pad 64bit code is to use NOPs preceded by + maximally four 0x66 prefixes. Balance the size of nops. */ + static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 }; + if (nbytes == 0) + return; + + int i; + int nnops = (nbytes + 3) / 4; + int len = nbytes / nnops; + int remains = nbytes - nnops * len; + + for (i = 0; i < remains; i++) { + emit_block(prefixes, len); + raw_emit_nop(); + } + for (; i < nnops; i++) { + emit_block(prefixes, len - 1); + raw_emit_nop(); + } +#else + /* Source: GNU Binutils 2.12.90.0.15 */ + /* Various efficient no-op patterns for aligning code labels. + Note: Don't try to assemble the instructions in the comments. + 0L and 0w are not legal. */ + static const uae_u8 f32_1[] = + {0x90}; /* nop */ + static const uae_u8 f32_2[] = + {0x89,0xf6}; /* movl %esi,%esi */ + static const uae_u8 f32_3[] = + {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */ + static const uae_u8 f32_4[] = + {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ + static const uae_u8 f32_5[] = + {0x90, /* nop */ + 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ + static const uae_u8 f32_6[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */ + static const uae_u8 f32_7[] = + {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ + static const uae_u8 f32_8[] = + {0x90, /* nop */ + 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ + static const uae_u8 f32_9[] = + {0x89,0xf6, /* movl %esi,%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_10[] = + {0x8d,0x76,0x00, /* leal 0(%esi),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_11[] = + {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_12[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ + 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */ + static const uae_u8 f32_13[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_14[] = + {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_15[] = + {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; + static const uae_u8 f32_16[] = + {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; + static const uae_u8 *const f32_patt[] = { + f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, + f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 + }; + + int nloops = nbytes / 16; + while (nloops-- > 0) + emit_block(f32_16, sizeof(f32_16)); + + nbytes %= 16; + if (nbytes) + emit_block(f32_patt[nbytes - 1], nbytes); +#endif +} + + +/************************************************************************* + * Flag handling, to and fro UAE flag register * + *************************************************************************/ + +static inline void raw_flags_evicted(int r) +{ + //live.state[FLAGTMP].status=CLEAN; + live.state[FLAGTMP].status=INMEM; + live.state[FLAGTMP].realreg=-1; + /* We just "evicted" FLAGTMP. */ + if (live.nat[r].nholds!=1) { + /* Huh? */ + abort(); + } + live.nat[r].nholds=0; +} + +#define FLAG_NREG1_FLAGREG 0 /* Set to -1 if any register will do */ +static inline void raw_flags_to_reg_FLAGREG(int r) +{ + raw_lahf(0); /* Most flags in AH */ + //raw_setcc(r,0); /* V flag in AL */ + raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0); + +#if 1 /* Let's avoid those nasty partial register stalls */ + //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r); + raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX); + raw_flags_evicted(r); +#endif +} + +#define FLAG_NREG2_FLAGREG 0 /* Set to -1 if any register will do */ +static inline void raw_reg_to_flags_FLAGREG(int r) +{ + raw_cmp_b_ri(r,-127); /* set V */ + raw_sahf(0); +} + +#define FLAG_NREG3_FLAGREG 0 /* Set to -1 if any register will do */ +static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp) +{ + raw_mov_l_rr(tmp,s); + raw_lahf(s); /* flags into ah */ + raw_and_l_ri(s,0xffffbfff); + raw_and_l_ri(tmp,0x00004000); + raw_xor_l_ri(tmp,0x00004000); + raw_or_l(s,tmp); + raw_sahf(s); +} + +static inline void raw_flags_init_FLAGREG(void) { } + +#define FLAG_NREG1_FLAGSTK -1 /* Set to -1 if any register will do */ +static inline void raw_flags_to_reg_FLAGSTK(int r) +{ + raw_pushfl(); + raw_pop_l_r(r); + raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r); + raw_flags_evicted(r); +} + +#define FLAG_NREG2_FLAGSTK -1 /* Set to -1 if any register will do */ +static inline void raw_reg_to_flags_FLAGSTK(int r) +{ + raw_push_l_r(r); + raw_popfl(); +} + +#define FLAG_NREG3_FLAGSTK -1 /* Set to -1 if any register will do */ +static inline void raw_flags_set_zero_FLAGSTK(int s, int tmp) +{ + raw_mov_l_rr(tmp,s); + raw_pushfl(); + raw_pop_l_r(s); + raw_and_l_ri(s,0xffffffbf); + raw_and_l_ri(tmp,0x00000040); + raw_xor_l_ri(tmp,0x00000040); + raw_or_l(s,tmp); + raw_push_l_r(s); + raw_popfl(); +} + +static inline void raw_flags_init_FLAGSTK(void) { } + +#if defined(CPU_x86_64) +/* Try to use the LAHF/SETO method on x86_64 since it is faster. + This can't be the default because some older CPUs don't support + LAHF/SAHF in long mode. */ +static int FLAG_NREG1_FLAGGEN = 0; +static inline void raw_flags_to_reg_FLAGGEN(int r) +{ + if (have_lahf_lm) { + // NOTE: the interpreter uses the normal EFLAGS layout + // pushf/popf CF(0) ZF( 6) SF( 7) OF(11) + // sahf/lahf CF(8) ZF(14) SF(15) OF( 0) + assert(r == 0); + raw_setcc(r,0); /* V flag in AL */ + raw_lea_l_r_scaled(0,0,8); /* move it to its EFLAGS location */ + raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0); + raw_lahf(0); /* most flags in AH */ + raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,AH_INDEX); + raw_flags_evicted(r); + } + else + raw_flags_to_reg_FLAGSTK(r); +} + +static int FLAG_NREG2_FLAGGEN = 0; +static inline void raw_reg_to_flags_FLAGGEN(int r) +{ + if (have_lahf_lm) { + raw_xchg_b_rr(0,AH_INDEX); + raw_cmp_b_ri(r,-120); /* set V */ + raw_sahf(0); + } + else + raw_reg_to_flags_FLAGSTK(r); +} + +static int FLAG_NREG3_FLAGGEN = 0; +static inline void raw_flags_set_zero_FLAGGEN(int s, int tmp) +{ + if (have_lahf_lm) + raw_flags_set_zero_FLAGREG(s, tmp); + else + raw_flags_set_zero_FLAGSTK(s, tmp); +} + +static inline void raw_flags_init_FLAGGEN(void) +{ + if (have_lahf_lm) { + FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG; + FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG; + FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG; + } + else { + FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK; + FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK; + FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK; + } +} +#endif + +#ifdef SAHF_SETO_PROFITABLE +#define FLAG_SUFFIX FLAGREG +#elif defined CPU_x86_64 +#define FLAG_SUFFIX FLAGGEN +#else +#define FLAG_SUFFIX FLAGSTK +#endif + +#define FLAG_GLUE_2(x, y) x ## _ ## y +#define FLAG_GLUE_1(x, y) FLAG_GLUE_2(x, y) +#define FLAG_GLUE(x) FLAG_GLUE_1(x, FLAG_SUFFIX) + +#define raw_flags_init FLAG_GLUE(raw_flags_init) +#define FLAG_NREG1 FLAG_GLUE(FLAG_NREG1) +#define raw_flags_to_reg FLAG_GLUE(raw_flags_to_reg) +#define FLAG_NREG2 FLAG_GLUE(FLAG_NREG2) +#define raw_reg_to_flags FLAG_GLUE(raw_reg_to_flags) +#define FLAG_NREG3 FLAG_GLUE(FLAG_NREG3) +#define raw_flags_set_zero FLAG_GLUE(raw_flags_set_zero) + +/* Apparently, there are enough instructions between flag store and + flag reload to avoid the partial memory stall */ +static inline void raw_load_flagreg(uae_u32 target, uae_u32 r) +{ +#if 1 + raw_mov_l_rm(target,(uintptr)live.state[r].mem); +#else + raw_mov_b_rm(target,(uintptr)live.state[r].mem); + raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1); +#endif +} + +#ifdef UAE +/* FLAGX is word-sized */ +#else +/* FLAGX is byte sized, and we *do* write it at that size */ +#endif +static inline void raw_load_flagx(uae_u32 target, uae_u32 r) +{ +#ifdef UAE + if (live.nat[target].canword) +#else + if (live.nat[target].canbyte) + raw_mov_b_rm(target,(uintptr)live.state[r].mem); + else if (live.nat[target].canword) +#endif + raw_mov_w_rm(target,(uintptr)live.state[r].mem); + else + raw_mov_l_rm(target,(uintptr)live.state[r].mem); +} + +static inline void raw_dec_sp(int off) +{ + if (off) { +#ifdef CPU_x86_64 + emit_byte(0x48); /* REX prefix */ +#endif + raw_sub_l_ri(ESP_INDEX,off); + } +} + +static inline void raw_inc_sp(int off) +{ + if (off) { +#ifdef CPU_x86_64 + emit_byte(0x48); /* REX prefix */ +#endif + raw_add_l_ri(ESP_INDEX,off); + } +} + +static inline void raw_push_regs_to_preserve(void) { + for (int i=N_REGS;i--;) { + if (need_to_preserve[i]) + raw_push_l_r(i); + } +} + +static inline void raw_pop_preserved_regs(void) { + for (int i=0;ix86_vendor_id; + + if (!strcmp(v, "GenuineIntel")) + c->x86_vendor = X86_VENDOR_INTEL; + else if (!strcmp(v, "AuthenticAMD")) + c->x86_vendor = X86_VENDOR_AMD; + else if (!strcmp(v, "CyrixInstead")) + c->x86_vendor = X86_VENDOR_CYRIX; + else if (!strcmp(v, "Geode by NSC")) + c->x86_vendor = X86_VENDOR_NSC; + else if (!strcmp(v, "UMC UMC UMC ")) + c->x86_vendor = X86_VENDOR_UMC; + else if (!strcmp(v, "CentaurHauls")) + c->x86_vendor = X86_VENDOR_CENTAUR; + else if (!strcmp(v, "NexGenDriven")) + c->x86_vendor = X86_VENDOR_NEXGEN; + else if (!strcmp(v, "RiseRiseRise")) + c->x86_vendor = X86_VENDOR_RISE; + else if (!strcmp(v, "GenuineTMx86") || !strcmp(v, "TransmetaCPU")) + c->x86_vendor = X86_VENDOR_TRANSMETA; + else + c->x86_vendor = X86_VENDOR_UNKNOWN; +} + +/* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx + * resulting in stale register contents being returned. + */ +/* Some CPUID calls want 'count' to be placed in ecx */ +#ifdef __GNUC__ +static void cpuid_count(uae_u32 op, uae_u32 count, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) +{ + uae_u32 _eax, _ebx, _ecx, _edx; + _eax = op; + _ecx = count; + __asm__ __volatile__( + " movl %0,%%eax \n" + " movl %2,%%ecx \n" + " cpuid \n" + " movl %%eax,%0 \n" + " movl %%ebx,%1 \n" + " movl %%ecx,%2 \n" + " movl %%edx,%3 \n" + : "+m" (_eax), + "=m" (_ebx), + "+m" (_ecx), + "=m" (_edx) + : + : "eax", "ebx", "ecx", "edx"); + *eax = _eax; + *ebx = _ebx; + *ecx = _ecx; + *edx = _edx; +} +#endif + +#ifdef _MSC_VER +#include +static void cpuid_count(uae_u32 op, uae_u32 count, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) +{ + int cpuinfo[4]; + cpuinfo[0] = op; + cpuinfo[1] = 0; + cpuinfo[2] = count; + cpuinfo[3] = 0; + __cpuidex(cpuinfo, op, count); + *eax = cpuinfo[0]; + *ebx = cpuinfo[1]; + *ecx = cpuinfo[2]; + *edx = cpuinfo[3]; +} +#endif + +static void +cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) +{ + cpuid_count(op, 0, eax, ebx, ecx, edx); +} + +static void +raw_init_cpu(void) +{ + struct cpuinfo_x86 *c = &cpuinfo; + uae_u32 dummy; + + /* Defaults */ + c->x86_processor = X86_PROCESSOR_max; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_hwcap = 0; +#ifdef CPU_x86_64 + c->x86_clflush_size = 64; +#else + c->x86_clflush_size = 32; +#endif + + /* Get vendor name */ + c->x86_vendor_id[12] = '\0'; + cpuid(0x00000000, + (uae_u32 *)&c->cpuid_level, + (uae_u32 *)&c->x86_vendor_id[0], + (uae_u32 *)&c->x86_vendor_id[8], + (uae_u32 *)&c->x86_vendor_id[4]); + x86_get_cpu_vendor(c); + + /* Intel-defined flags: level 0x00000001 */ + c->x86_brand_id = 0; + if ( c->cpuid_level >= 0x00000001 ) { + uae_u32 tfms, brand_id; + cpuid(0x00000001, &tfms, &brand_id, &dummy, &c->x86_hwcap); + c->x86 = (tfms >> 8) & 15; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; /* extended family */ + c->x86_model = (tfms >> 4) & 15; + if (c->x86_model == 0xf) + c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */ + c->x86_brand_id = brand_id & 0xff; + c->x86_mask = tfms & 15; + if (c->x86_hwcap & (1 << 19)) + { + c->x86_clflush_size = ((brand_id >> 8) & 0xff) * 8; + } + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + + /* AMD-defined flags: level 0x80000001 */ + uae_u32 xlvl; + cpuid(0x80000000, &xlvl, &dummy, &dummy, &dummy); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) { + uae_u32 features, extra_features; + cpuid(0x80000001, &dummy, &dummy, &extra_features, &features); + if (features & (1 << 29)) { + /* Assume x86-64 if long mode is supported */ + c->x86_processor = X86_PROCESSOR_X86_64; + } + if (extra_features & (1 << 0)) + have_lahf_lm = true; + } + } + + /* Canonicalize processor ID */ + switch (c->x86) { + case 3: + c->x86_processor = X86_PROCESSOR_I386; + break; + case 4: + c->x86_processor = X86_PROCESSOR_I486; + break; + case 5: + if (c->x86_vendor == X86_VENDOR_AMD) + c->x86_processor = X86_PROCESSOR_K6; + else + c->x86_processor = X86_PROCESSOR_PENTIUM; + break; + case 6: + if (c->x86_vendor == X86_VENDOR_AMD) + c->x86_processor = X86_PROCESSOR_ATHLON; + else + c->x86_processor = X86_PROCESSOR_PENTIUMPRO; + break; + case 15: + if (c->x86_processor == X86_PROCESSOR_max) { + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + c->x86_processor = X86_PROCESSOR_PENTIUM4; + break; + case X86_VENDOR_AMD: + /* Assume a 32-bit Athlon processor if not in long mode */ + c->x86_processor = X86_PROCESSOR_ATHLON; + break; + } + } + break; + } + if (c->x86_processor == X86_PROCESSOR_max) { + c->x86_processor = X86_PROCESSOR_I386; + jit_log("Error: unknown processor type"); + jit_log(" Family : %d", c->x86); + jit_log(" Model : %d", c->x86_model); + jit_log(" Mask : %d", c->x86_mask); + jit_log(" Vendor : %s [%d]", c->x86_vendor_id, c->x86_vendor); + if (c->x86_brand_id) + { + jit_log(" BrandID : %02x", c->x86_brand_id); + } + } + + /* Have CMOV support? */ + have_cmov = (c->x86_hwcap & (1 << 15)) != 0; +#if defined(CPU_x86_64) + if (!have_cmov) { + jit_abort("x86-64 implementations are bound to have CMOV!"); + } +#endif + + c->x86_has_xmm2 = (c->x86_hwcap & (1 << 26)) != 0; + + /* Can the host CPU suffer from partial register stalls? */ + // non-RAT_STALL mode is currently broken + have_rat_stall = true; //(c->x86_vendor == X86_VENDOR_INTEL); +#if 0 + /* It appears that partial register writes are a bad idea even on + AMD K7 cores, even though they are not supposed to have the + dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */ + if (c->x86_processor == X86_PROCESSOR_ATHLON) + have_rat_stall = true; +#endif + + /* Alignments */ + if (tune_alignment) { + align_loops = x86_alignments[c->x86_processor].align_loop; + align_jumps = x86_alignments[c->x86_processor].align_jump; + } + + jit_log(" : Max CPUID level=%d Processor is %s [%s]", + c->cpuid_level, c->x86_vendor_id, + x86_processor_string_table[c->x86_processor]); + + raw_flags_init(); +} + +#ifndef UAE +static void __attribute_noinline__ prevent_redzone_use(void) {} + +static bool target_check_bsf(void) +{ + bool mismatch = false; + for (int g_ZF = 0; g_ZF <= 1; g_ZF++) { + for (int g_CF = 0; g_CF <= 1; g_CF++) { + for (int g_OF = 0; g_OF <= 1; g_OF++) { + for (int g_SF = 0; g_SF <= 1; g_SF++) { + for (int value = -1; value <= 1; value++) { + uintptr flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF; + intptr tmp = value; + prevent_redzone_use(); + __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0" + : "+r" (flags), "+r" (tmp) : : "cc"); + int OF = (flags >> 11) & 1; + int SF = (flags >> 7) & 1; + int ZF = (flags >> 6) & 1; + int CF = flags & 1; + tmp = (value == 0); + if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF) + mismatch = true; + } + } + } + } + } + if (mismatch) + { + jit_log(" : Target CPU defines all flags on BSF instruction"); + } + return !mismatch; +} +#endif + +/************************************************************************* + * FPU stuff * + *************************************************************************/ + + +static inline void raw_fp_init(void) +{ + int i; + + for (i=0;i1) { + emit_byte(0x9b); + emit_byte(0xdb); + emit_byte(0xe3); + live.tos=-1; + } +#endif + while (live.tos>=1) { + emit_byte(0xde); + emit_byte(0xd9); + live.tos-=2; + } + while (live.tos>=0) { + emit_byte(0xdd); + emit_byte(0xd8); + live.tos--; + } + raw_fp_init(); +} + +static inline void make_tos(int r) +{ + int p,q; + + if (live.spos[r]<0) { /* Register not yet on stack */ + emit_byte(0xd9); + emit_byte(0xe8); /* Push '1' on the stack, just to grow it */ + live.tos++; + live.spos[r]=live.tos; + live.onstack[live.tos]=r; + return; + } + /* Register is on stack */ + if (live.tos==live.spos[r]) + return; + p=live.spos[r]; + q=live.onstack[live.tos]; + + emit_byte(0xd9); + emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */ + live.onstack[live.tos]=r; + live.spos[r]=live.tos; + live.onstack[p]=q; + live.spos[q]=p; +} + +static inline void make_tos2(int r, int r2) +{ + int q; + + make_tos(r2); /* Put the reg that's supposed to end up in position2 on top */ + + if (live.spos[r]<0) { /* Register not yet on stack */ + make_tos(r); /* This will extend the stack */ + return; + } + /* Register is on stack */ + emit_byte(0xd9); + emit_byte(0xc9); /* Move r2 into position 2 */ + + q=live.onstack[live.tos-1]; + live.onstack[live.tos]=q; + live.spos[q]=live.tos; + live.onstack[live.tos-1]=r2; + live.spos[r2]=live.tos-1; + + make_tos(r); /* And r into 1 */ +} + +static inline int stackpos(int r) +{ + if (live.spos[r]<0) + abort(); + if (live.tos=0) { + /* source is on top of stack, and we already have the dest */ + int dd=stackpos(d); + emit_byte(0xdd); + emit_byte(0xd0+dd); + } + else { + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source on tos */ + tos_make(d); /* store to destination, pop if necessary */ + } +} +LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) + +LOWFUNC(NONE,READ,2,raw_fldcw_m_indexed,(R4 index, IMM base)) +{ + x86_64_prefix(true, false, NULL, NULL, &index); + emit_byte(0xd9); + emit_byte(0xa8 + index); + emit_long(base); +} +LENDFUNC(NONE,READ,2,raw_fldcw_m_indexed,(R4 index, IMM base)) + +LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xfa); /* take square root */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfa); /* take square root */ + } +} +LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe1); /* take fabs */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xe1); /* take fabs */ + } +} +LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xfc); /* take frndint */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfc); /* take frndint */ + } +} +LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xff); /* take cos */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xff); /* take cos */ + } +} +LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xfe); /* fsin sin(x) */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfe); /* fsin y=sin(x) */ + } +} +LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) + +static const double one = 1; + +LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) +{ + int ds; + + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x) */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* subtract rounded from original */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 */ + x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */ + emit_byte(0xd9); + emit_byte(0xfd); /* and scale it */ + emit_byte(0xdd); + emit_byte(0xd9); /* take he rounded value off */ + tos_make(d); /* store to destination */ +} +LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) +{ + int ds; + + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xde); + emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */ + + emit_byte(0xd9); + emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ + emit_byte(0xd9); + emit_byte(0xfc); /* rndint */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* subtract rounded from original */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 */ + x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */ + emit_byte(0xd9); + emit_byte(0xfd); /* and scale it */ + emit_byte(0xdd); + emit_byte(0xd9); /* take he rounded value off */ + tos_make(d); /* store to destination */ +} +LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s)) +{ + int ds; + + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe8); /* push '1' */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two */ + emit_byte(0xd9); + emit_byte(0xf1); /* take 1*log2(x) */ + tos_make(d); /* store to destination */ +} +LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s)) + + +LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe0); /* take fchs */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xe0); /* take fchs */ + } +} +LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xc0+ds); /* add source to dest*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xc0+ds); /* add source to dest*/ + } +} +LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xe8+ds); /* sub source from dest*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xe0+ds); /* sub src from dest */ + } +} +LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + make_tos(d); + ds=stackpos(s); + + emit_byte(0xdd); + emit_byte(0xe0+ds); /* cmp dest with source*/ +} +LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xc8+ds); /* mul dest by source*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xc8+ds); /* mul dest by source*/ + } +} +LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xf8+ds); /* div dest by source */ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xf0+ds); /* div dest by source*/ + } +} +LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + make_tos2(d,s); + ds=stackpos(s); + + if (ds!=1) { + jit_abort("Failed horribly in raw_frem_rr! ds is %d",ds); + } + emit_byte(0xd9); + emit_byte(0xf8); /* take rem from dest by source */ +} +LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + make_tos2(d,s); + ds=stackpos(s); + + if (ds!=1) { + jit_abort("Failed horribly in raw_frem1_rr! ds is %d",ds); + } + emit_byte(0xd9); + emit_byte(0xf5); /* take rem1 from dest by source */ +} +LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) + + +LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r)) +{ + make_tos(r); + emit_byte(0xd9); /* ftst */ + emit_byte(0xe4); +} +LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r)) + +LOWFUNC(NONE,NONE,2,raw_fetoxM1_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy up */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale ((2^frac(x))-1)*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + if (s!=d) + tos_make(d); /* store y=(e^x)-1 */ +} +LENDFUNC(NONE,NONE,2,raw_fetoxM1_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_ftentox_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xe9); /* fldl2t log2(10) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(10) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy up */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(10)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(10) - int(x*log2(10)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(10)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + if (s!=d) + tos_make(d); /* store y=10^x */ +} +LENDFUNC(NONE,NONE,2,raw_ftentox_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,3,raw_fsincos_rr,(FW d, FW c, FR s)) +{ + int ds; + + if (s==d) { + //write_log (_T("FSINCOS src = dest\n")); + make_tos(s); + emit_byte(0xd9); + emit_byte(0xfb); /* fsincos sin(x) push cos(x) */ + tos_make(c); /* store cos(x) to c */ + return; + } + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xfb); /* fsincos sin(x) push cos(x) */ + if (live.spos[c]<0) { + if (live.spos[d]<0) { /* occupy both regs directly */ + live.tos++; + live.spos[d]=live.tos; + live.onstack[live.tos]=d; /* sin(x) comes first */ + live.tos++; + live.spos[c]=live.tos; + live.onstack[live.tos]=c; + } + else { + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap cos(x) with sin(x) */ + emit_byte(0xdd); /* store sin(x) to d & pop */ + emit_byte(0xd8+(live.tos+2)-live.spos[d]); + live.tos++; /* occupy a reg for cos(x) here */ + live.spos[c]=live.tos; + live.onstack[live.tos]=c; + } + } + else { + emit_byte(0xdd); /* store cos(x) to c & pop */ + emit_byte(0xd8+(live.tos+2)-live.spos[c]); + tos_make(d); /* store sin(x) to destination */ + } +} +LENDFUNC(NONE,NONE,3,raw_fsincos_rr,(FW d, FW c, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fscale_rr,(FRW d, FR s)) +{ + int ds; + + if (live.spos[d]==live.tos && live.spos[s]==live.tos-1) { + //write_log (_T("fscale found x in TOS-1 and y in TOS\n")); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale y*(2^x) */ + } + else { + make_tos(s); /* tos=x */ + ds=stackpos(d); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld y */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale y*(2^x) */ + tos_make(d); /* store y=y*(2^x) */ + } +} +LENDFUNC(NONE,NONE,2,raw_fscale_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_ftan_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xf2); /* fptan tan(x)=y/1.0 */ + emit_byte(0xdd); + emit_byte(0xd8); /* fstp pop 1.0 */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xf2); /* fptan tan(x)=y/1.0 */ + emit_byte(0xdd); + emit_byte(0xd8); /* fstp pop 1.0 */ + } +} +LENDFUNC(NONE,NONE,2,raw_ftan_rr,(FW d, FR s)) + +#ifdef CPU_x86_64 +#define REX64() emit_byte(0x48) +#else +#define REX64() +#endif + +LOWFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r)) +{ + make_tos(r); /* TOS = r */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xfc); /* add -4 to esp */ + emit_byte(0xd9); + emit_byte(0x1c); + emit_byte(0x24); /* fstp store r as SINGLE to [esp] and pop */ + emit_byte(0xd9); + emit_byte(0x04); + emit_byte(0x24); /* fld load r as SINGLE from [esp] */ + emit_byte(0x9b); /* let the CPU wait on FPU exceptions */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x04); /* add +4 to esp */ +} +LENDFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r)) + +LOWFUNC(NONE,NONE,1,raw_fcut_r,(FRW r)) +{ + make_tos(r); /* TOS = r */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xf8); /* add -8 to esp */ + emit_byte(0xdd); + emit_byte(0x1c); + emit_byte(0x24); /* fstp store r as DOUBLE to [esp] and pop */ + emit_byte(0xdd); + emit_byte(0x04); + emit_byte(0x24); /* fld load r as DOUBLE from [esp] */ + emit_byte(0x9b); /* let the CPU wait on FPU exceptions */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x08); /* add +8 to esp */ +} +LENDFUNC(NONE,NONE,1,raw_fcut_r,(FRW r)) + +LOWFUNC(NONE,NONE,2,raw_fgetexp_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xf4); /* fxtract exp push man */ + emit_byte(0xdd); + emit_byte(0xd8); /* fstp just pop man */ + tos_make(d); /* store exp to destination */ + } + else { + make_tos(d); /* tos=x=y */ + emit_byte(0xd9); + emit_byte(0xf4); /* fxtract exp push man */ + emit_byte(0xdd); + emit_byte(0xd8); /* fstp just pop man */ + } +} +LENDFUNC(NONE,NONE,2,raw_fgetexp_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fgetman_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xf4); /* fxtract exp push man */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy man up & pop */ + tos_make(d); /* store man to destination */ + } + else { + make_tos(d); /* tos=x=y */ + emit_byte(0xd9); + emit_byte(0xf4); /* fxtract exp push man */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy man up & pop */ + } +} +LENDFUNC(NONE,NONE,2,raw_fgetman_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_flogN_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xed); /* fldln2 logN(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap logN(2) with x */ + emit_byte(0xd9); + emit_byte(0xf1); /* fyl2x logN(2)*log2(x) */ + if (s!=d) + tos_make(d); /* store y=logN(x) */ +} +LENDFUNC(NONE,NONE,2,raw_flogN_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_flogNP1_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xed); /* fldln2 logN(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap logN(2) with x */ + emit_byte(0xd9); + emit_byte(0xf9); /* fyl2xp1 logN(2)*log2(x+1) */ + if (s!=d) + tos_make(d); /* store y=logN(x+1) */ +} +LENDFUNC(NONE,NONE,2,raw_flogNP1_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_flog10_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xec); /* fldlg2 log10(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap log10(2) with x */ + emit_byte(0xd9); + emit_byte(0xf1); /* fyl2x log10(2)*log2(x) */ + if (s!=d) + tos_make(d); /* store y=log10(x) */ +} +LENDFUNC(NONE,NONE,2,raw_flog10_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fasin_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd8); + emit_byte(0xc8); /* fmul x*x */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xde); + emit_byte(0xe1); /* fsubrp 1 - (x^2) */ + emit_byte(0xd9); + emit_byte(0xfa); /* fsqrt sqrt(1-(x^2)) */ + emit_byte(0xd9); + emit_byte(0xc1+ds); /* fld x again */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap x with sqrt(1-(x^2)) */ + emit_byte(0xd9); + emit_byte(0xf3); /* fpatan atan(x/sqrt(1-(x^2))) & pop */ + tos_make(d); /* store y=asin(x) */ +} +LENDFUNC(NONE,NONE,2,raw_fasin_rr,(FW d, FR s)) + +static uae_u32 pihalf[] = {0x2168c234, 0xc90fdaa2, 0x3fff}; // LSB=0 to get acos(1)=0 + +LOWFUNC(NONE,NONE,2,raw_facos_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd8); + emit_byte(0xc8); /* fmul x*x */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xde); + emit_byte(0xe1); /* fsubrp 1 - (x^2) */ + emit_byte(0xd9); + emit_byte(0xfa); /* fsqrt sqrt(1-(x^2)) */ + emit_byte(0xd9); + emit_byte(0xc1+ds); /* fld x again */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap x with sqrt(1-(x^2)) */ + emit_byte(0xd9); + emit_byte(0xf3); /* fpatan atan(x/sqrt(1-(x^2))) & pop */ + raw_fldt((uintptr) &pihalf); /* fld load pi/2 from pihalf */ + emit_byte(0xde); + emit_byte(0xe1); /* fsubrp pi/2 - asin(x) & pop */ + tos_make(d); /* store y=acos(x) */ +} +LENDFUNC(NONE,NONE,2,raw_facos_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fatan_rr,(FW d, FR s)) +{ + int ds; + + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xf3); /* fpatan atan(x)/1 & pop*/ + if (s!=d) + tos_make(d); /* store y=atan(x) */ +} +LENDFUNC(NONE,NONE,2,raw_fatan_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fatanh_rr,(FW d, FR s)) +{ + int ds; + + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xdc); + emit_byte(0xc1); /* fadd 1 + x */ + emit_byte(0xd8); + emit_byte(0xe2+ds); /* fsub 1 - x */ + emit_byte(0xde); + emit_byte(0xf9); /* fdivp (1+x)/(1-x) */ + emit_byte(0xd9); + emit_byte(0xed); /* fldl2e logN(2) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap logN(2) with (1+x)/(1-x) */ + emit_byte(0xd9); + emit_byte(0xf1); /* fyl2x logN(2)*log2((1+x)/(1-x)) pop */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -1.0 */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale logN((1+x)/(1-x)) * 2^(-1) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + tos_make(d); /* store y=atanh(x) */ +} +LENDFUNC(NONE,NONE,2,raw_fatanh_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsinh_rr,(FW d, FR s)) +{ + int ds,tr; + + tr=live.onstack[live.tos+3]; + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + if (tr>=0) { + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with temp-reg */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xf4); /* add -12 to esp */ + emit_byte(0xdb); + emit_byte(0x3c); + emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */ + } + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xc0); /* fld -x*log2(e) again */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap e^-x with x*log2(e) in tr */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy e^x & pop */ + if (tr>=0) { + emit_byte(0xdb); + emit_byte(0x2c); + emit_byte(0x24); /* fld load temp-reg from [esp] */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */ + emit_byte(0xde); + emit_byte(0xe9); /* fsubp (e^x)-(e^-x) */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x0c); /* delayed add +12 to esp */ + } + else { + emit_byte(0xde); + emit_byte(0xe1); /* fsubrp (e^x)-(e^-x) */ + } + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -1.0 */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale ((e^x)-(e^-x))/2 */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + if (s!=d) + tos_make(d); /* store y=sinh(x) */ +} +LENDFUNC(NONE,NONE,2,raw_fsinh_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fcosh_rr,(FW d, FR s)) +{ + int ds,tr; + + tr=live.onstack[live.tos+3]; + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + if (tr>=0) { + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with temp-reg */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xf4); /* add -12 to esp */ + emit_byte(0xdb); + emit_byte(0x3c); + emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */ + } + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xc0); /* fld -x*log2(e) again */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap e^-x with x*log2(e) in tr */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy e^x & pop */ + if (tr>=0) { + emit_byte(0xdb); + emit_byte(0x2c); + emit_byte(0x24); /* fld load temp-reg from [esp] */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x0c); /* delayed add +12 to esp */ + } + emit_byte(0xde); + emit_byte(0xc1); /* faddp (e^x)+(e^-x) */ + emit_byte(0xd9); + emit_byte(0xe8); /* fld 1.0 */ + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -1.0 */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd9); + emit_byte(0xfd); /* fscale ((e^x)+(e^-x))/2 */ + emit_byte(0xdd); + emit_byte(0xd9); /* fstp copy & pop */ + if (s!=d) + tos_make(d); /* store y=cosh(x) */ +} +LENDFUNC(NONE,NONE,2,raw_fcosh_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_ftanh_rr,(FW d, FR s)) +{ + int ds,tr; + + tr=live.onstack[live.tos+3]; + if (s==d) + make_tos(s); + else { + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* fld x */ + } + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e log2(e) */ + emit_byte(0xd8); + emit_byte(0xc9); /* fmul x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + if (tr>=0) { + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with temp-reg */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0xf4); /* add -12 to esp */ + emit_byte(0xdb); + emit_byte(0x3c); + emit_byte(0x24); /* fstp store temp-reg to [esp] & pop */ + } + emit_byte(0xd9); + emit_byte(0xe0); /* fchs -x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xc0); /* fld -x*log2(e) again */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub -x*log2(e) - int(-x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap e^-x with x*log2(e) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy x*log2(e) */ + emit_byte(0xd9); + emit_byte(0xfc); /* frndint int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xc9); /* fxch swap */ + emit_byte(0xd8); + emit_byte(0xe1); /* fsub x*log2(e) - int(x*log2(e)) */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 (2^frac(x))-1 */ + x86_fadd_m((uintptr) &one); + emit_byte(0xd9); + emit_byte(0xfd); /* fscale (2^frac(x))*2^int(x*log2(e)) */ + emit_byte(0xdd); + emit_byte(0xd1); /* fst copy e^x */ + emit_byte(0xd8); + emit_byte(0xc2); /* fadd (e^x)+(e^-x) */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap with e^-x */ + emit_byte(0xde); + emit_byte(0xe9); /* fsubp (e^x)-(e^-x) */ + if (tr>=0) { + emit_byte(0xdb); + emit_byte(0x2c); + emit_byte(0x24); /* fld load temp-reg from [esp] */ + emit_byte(0xd9); + emit_byte(0xca); /* fxch swap temp-reg with e^-x in tr */ + emit_byte(0xde); + emit_byte(0xf9); /* fdivp ((e^x)-(e^-x))/((e^x)+(e^-x)) */ + REX64(); + emit_byte(0x83); + emit_byte(0xc4); + emit_byte(0x0c); /* delayed add +12 to esp */ + } + else { + emit_byte(0xde); + emit_byte(0xf1); /* fdivrp ((e^x)-(e^-x))/((e^x)+(e^-x)) */ + } + if (s!=d) + tos_make(d); /* store y=tanh(x) */ +} +LENDFUNC(NONE,NONE,2,raw_ftanh_rr,(FW d, FR s)) + +/* %eax register is clobbered if target processor doesn't support fucomi */ +#define FFLAG_NREG_CLOBBER_CONDITION !have_cmov +#define FFLAG_NREG EAX_INDEX + +static inline void raw_fflags_into_flags(int r) +{ + int p; + + usereg(r); + p=stackpos(r); + + emit_byte(0xd9); + emit_byte(0xee); /* Push 0 */ + emit_byte(0xd9); + emit_byte(0xc9+p); /* swap top two around */ + if (have_cmov) { + // gb-- fucomi is for P6 cores only, not K6-2 then... + emit_byte(0xdb); + emit_byte(0xe9+p); /* fucomi them */ + } + else { + emit_byte(0xdd); + emit_byte(0xe1+p); /* fucom them */ + emit_byte(0x9b); + emit_byte(0xdf); + emit_byte(0xe0); /* fstsw ax */ + raw_sahf(0); /* sahf */ + } + emit_byte(0xdd); + emit_byte(0xd9+p); /* store value back, and get rid of 0 */ +} diff --git a/BasiliskII/src/uae_cpu/compiler/codegen_x86.h b/BasiliskII/src/uae_cpu/compiler/codegen_x86.h new file mode 100644 index 00000000..6743392d --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/codegen_x86.h @@ -0,0 +1,1996 @@ +/* + * compiler/codegen_x86.h - IA-32 and AMD64 code generator + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * JIT compiler m68k -> IA-32 and AMD64 + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * This file is derived from CCG, copyright 1999-2003 Ian Piumarta + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef X86_RTASM_H +#define X86_RTASM_H + +/* NOTES + * + * o Best viewed on a 1024x768 screen with fixed-6x10 font ;-) + * + * TODO + * + * o Fix FIXMEs + * o i387 FPU instructions + * o SSE instructions + * o Optimize for cases where register numbers are not integral constants + */ + +/* --- Configuration ------------------------------------------------------- */ + +/* Define to settle a "flat" register set, i.e. different regno for + each size variant. */ +#ifndef X86_FLAT_REGISTERS +#define X86_FLAT_REGISTERS 1 +#endif + +/* Define to generate x86-64 code. */ +#ifndef X86_TARGET_64BIT +#define X86_TARGET_64BIT 0 +#endif + +/* Define to optimize ALU instructions. */ +#ifndef X86_OPTIMIZE_ALU +#define X86_OPTIMIZE_ALU 1 +#endif + +/* Define to optimize rotate/shift instructions. */ +#ifndef X86_OPTIMIZE_ROTSHI +#define X86_OPTIMIZE_ROTSHI 1 +#endif + +/* Define to optimize absolute addresses for RIP relative addressing. */ +#ifndef X86_RIP_RELATIVE_ADDR +#define X86_RIP_RELATIVE_ADDR 1 +#endif + + +/* --- Macros -------------------------------------------------------------- */ + +/* Functions used to emit code. + * + * x86_emit_byte(B) + * x86_emit_word(W) + * x86_emit_long(L) + */ + +/* Get pointer to current code + * + * x86_get_target() + */ + +/* Abort assembler, fatal failure. + * + * x86_emit_failure(MSG) + */ + +#define x86_emit_failure0(MSG) (x86_emit_failure(MSG),0) + + +/* --- Register set -------------------------------------------------------- */ + +enum { + X86_RIP = -2, +#if X86_FLAT_REGISTERS + X86_NOREG = 0, + X86_Reg8L_Base = 0x10, + X86_Reg8H_Base = 0x20, + X86_Reg16_Base = 0x30, + X86_Reg32_Base = 0x40, + X86_Reg64_Base = 0x50, + X86_RegMMX_Base = 0x60, + X86_RegXMM_Base = 0x70, +#else + X86_NOREG = -1, + X86_Reg8L_Base = 0, + X86_Reg8H_Base = 16, + X86_Reg16_Base = 0, + X86_Reg32_Base = 0, + X86_Reg64_Base = 0, + X86_RegMMX_Base = 0, + X86_RegXMM_Base = 0, +#endif +}; + +enum { + X86_AL = X86_Reg8L_Base, + X86_CL, X86_DL, X86_BL, + X86_SPL, X86_BPL, X86_SIL, X86_DIL, + X86_R8B, X86_R9B, X86_R10B, X86_R11B, + X86_R12B, X86_R13B, X86_R14B, X86_R15B, + X86_AH = X86_Reg8H_Base + 4, + X86_CH, X86_DH, X86_BH +}; + +enum { + X86_AX = X86_Reg16_Base, + X86_CX, X86_DX, X86_BX, + X86_SP, X86_BP, X86_SI, X86_DI, + X86_R8W, X86_R9W, X86_R10W, X86_R11W, + X86_R12W, X86_R13W, X86_R14W, X86_R15W +}; + +enum { + X86_EAX = X86_Reg32_Base, + X86_ECX, X86_EDX, X86_EBX, + X86_ESP, X86_EBP, X86_ESI, X86_EDI, + X86_R8D, X86_R9D, X86_R10D, X86_R11D, + X86_R12D, X86_R13D, X86_R14D, X86_R15D +}; + +enum { + X86_RAX = X86_Reg64_Base, + X86_RCX, X86_RDX, X86_RBX, + X86_RSP, X86_RBP, X86_RSI, X86_RDI, + X86_R8, X86_R9, X86_R10, X86_R11, + X86_R12, X86_R13, X86_R14, X86_R15 +}; + +enum { + X86_MM0 = X86_RegMMX_Base, + X86_MM1, X86_MM2, X86_MM3, + X86_MM4, X86_MM5, X86_MM6, X86_MM7, +}; + +enum { + X86_XMM0 = X86_RegXMM_Base, + X86_XMM1, X86_XMM2, X86_XMM3, + X86_XMM4, X86_XMM5, X86_XMM6, X86_XMM7, + X86_XMM8, X86_XMM9, X86_XMM10, X86_XMM11, + X86_XMM12, X86_XMM13, X86_XMM14, X86_XMM15 +}; + +/* Register control and access + * + * _r0P(R) Null register? + * _rIP(R) RIP register? + * _rXP(R) Extended register? + * + * _rC(R) Class of register (only valid if X86_FLAT_REGISTERS) + * _rR(R) Full register number + * _rN(R) Short register number for encoding + * + * _r1(R) 8-bit register ID + * _r2(R) 16-bit register ID + * _r4(R) 32-bit register ID + * _r8(R) 64-bit register ID + * _rM(R) MMX register ID + * _rX(R) XMM register ID + * _rA(R) Address register ID used for EA calculation + */ + +#define _r0P(R) ((int)(R) == (int)X86_NOREG) +#define _rIP(R) ((int)(R) == (int)X86_RIP) + +#if X86_FLAT_REGISTERS +#define _rC(R) ((R) & 0xf0) +#define _rR(R) ((R) & 0x0f) +#define _rN(R) ((R) & 0x07) +#define _rXP(R) ((R) > 0 && _rR(R) > 7) +#else +#define _rN(R) ((R) & 0x07) +#define _rR(R) (int(R)) +#define _rXP(R) (_rR(R) > 7 && _rR(R) < 16) +#endif + +#if !defined(_ASM_SAFETY) || ! X86_FLAT_REGISTERS +#define _r1(R) _rN(R) +#define _r2(R) _rN(R) +#define _r4(R) _rN(R) +#define _r8(R) _rN(R) +#define _rA(R) _rN(R) +#define _rM(R) _rN(R) +#define _rX(R) _rN(R) +#else +#define _r1(R) ( ((_rC(R) & (X86_Reg8L_Base | X86_Reg8H_Base)) != 0) ? _rN(R) : x86_emit_failure0( "8-bit register required")) +#define _r2(R) ( (_rC(R) == X86_Reg16_Base) ? _rN(R) : x86_emit_failure0("16-bit register required")) +#define _r4(R) ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("32-bit register required")) +#define _r8(R) ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("64-bit register required")) +#define _rA(R) ( X86_TARGET_64BIT ? \ + ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("not a valid 64-bit base/index expression")) : \ + ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("not a valid 32-bit base/index expression")) ) +#define _rM(R) ( (_rC(R) == X86_RegMMX_Base) ? _rN(R) : x86_emit_failure0("MMX register required")) +#define _rX(R) ( (_rC(R) == X86_RegXMM_Base) ? _rN(R) : x86_emit_failure0("SSE register required")) +#endif + +#define _rSP() (X86_TARGET_64BIT ? (int)X86_RSP : (int)X86_ESP) +#define _r1e8lP(R) (int(R) >= X86_SPL && int(R) <= X86_DIL) +#define _rbpP(R) (_rR(R) == _rR(X86_RBP)) +#define _rspP(R) (_rR(R) == _rR(X86_RSP)) +#define _rbp13P(R) (_rN(R) == _rN(X86_RBP)) +#define _rsp12P(R) (_rN(R) == _rN(X86_RSP)) + + +/* ========================================================================= */ +/* --- UTILITY ------------------------------------------------------------- */ +/* ========================================================================= */ + +typedef signed char _sc; +typedef unsigned char _uc; +typedef signed short _ss; +typedef unsigned short _us; +typedef signed int _sl; +typedef unsigned int _ul; + +#define _UC(X) ((_uc )(uintptr_t)(X)) +#define _US(X) ((_us )(uintptr_t)(X)) +#define _SL(X) ((_sl )(uintptr_t)(X)) +#define _UL(X) ((_ul )(uintptr_t)(X)) + +#define _PUC(X) ((_uc *)(X)) +#define _PUS(X) ((_us *)(X)) +#define _PSL(X) ((_sl *)(X)) +#define _PUL(X) ((_ul *)(X)) + +#undef _B +#undef _W +#undef _L +#undef _Q + +#define _B(B) x86_emit_byte((B)) +#define _W(W) x86_emit_word((W)) +#define _L(L) x86_emit_long((L)) +#define _Q(Q) x86_emit_quad((Q)) + +#define _MASK(N) ((unsigned)((1<<(N)))-1) +#define _siP(N,I) (!((((unsigned)(I))^(((unsigned)(I))<<1))&~_MASK(N))) +#define _uiP(N,I) (!(((unsigned)(I))&~_MASK(N))) +#define _suiP(N,I) (_siP(N,I) | _uiP(N,I)) + +#ifndef _ASM_SAFETY +#define _ck_s(W,I) (_UL(I) & _MASK(W)) +#define _ck_u(W,I) (_UL(I) & _MASK(W)) +#define _ck_su(W,I) (_UL(I) & _MASK(W)) +#define _ck_d(W,I) (_UL(I) & _MASK(W)) +#else +#define _ck_s(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "signed integer `"#I"' too large for "#W"-bit field")) +#define _ck_u(W,I) (_uiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0("unsigned integer `"#I"' too large for "#W"-bit field")) +#define _ck_su(W,I) (_suiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "integer `"#I"' too large for "#W"-bit field")) +#define _ck_d(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "displacement `"#I"' too large for "#W"-bit field")) +#endif + +#define _s0P(I) ((I)==0) +#define _s8P(I) _siP(8,I) +#define _s16P(I) _siP(16,I) +#define _u8P(I) _uiP(8,I) +#define _u16P(I) _uiP(16,I) + +#define _su8(I) _ck_su(8,I) +#define _su16(I) _ck_su(16,I) + +#define _s1(I) _ck_s( 1,I) +#define _s2(I) _ck_s( 2,I) +#define _s3(I) _ck_s( 3,I) +#define _s4(I) _ck_s( 4,I) +#define _s5(I) _ck_s( 5,I) +#define _s6(I) _ck_s( 6,I) +#define _s7(I) _ck_s( 7,I) +#define _s8(I) _ck_s( 8,I) +#define _s9(I) _ck_s( 9,I) +#define _s10(I) _ck_s(10,I) +#define _s11(I) _ck_s(11,I) +#define _s12(I) _ck_s(12,I) +#define _s13(I) _ck_s(13,I) +#define _s14(I) _ck_s(14,I) +#define _s15(I) _ck_s(15,I) +#define _s16(I) _ck_s(16,I) +#define _s17(I) _ck_s(17,I) +#define _s18(I) _ck_s(18,I) +#define _s19(I) _ck_s(19,I) +#define _s20(I) _ck_s(20,I) +#define _s21(I) _ck_s(21,I) +#define _s22(I) _ck_s(22,I) +#define _s23(I) _ck_s(23,I) +#define _s24(I) _ck_s(24,I) +#define _s25(I) _ck_s(25,I) +#define _s26(I) _ck_s(26,I) +#define _s27(I) _ck_s(27,I) +#define _s28(I) _ck_s(28,I) +#define _s29(I) _ck_s(29,I) +#define _s30(I) _ck_s(30,I) +#define _s31(I) _ck_s(31,I) +#define _u1(I) _ck_u( 1,I) +#define _u2(I) _ck_u( 2,I) +#define _u3(I) _ck_u( 3,I) +#define _u4(I) _ck_u( 4,I) +#define _u5(I) _ck_u( 5,I) +#define _u6(I) _ck_u( 6,I) +#define _u7(I) _ck_u( 7,I) +#define _u8(I) _ck_u( 8,I) +#define _u9(I) _ck_u( 9,I) +#define _u10(I) _ck_u(10,I) +#define _u11(I) _ck_u(11,I) +#define _u12(I) _ck_u(12,I) +#define _u13(I) _ck_u(13,I) +#define _u14(I) _ck_u(14,I) +#define _u15(I) _ck_u(15,I) +#define _u16(I) _ck_u(16,I) +#define _u17(I) _ck_u(17,I) +#define _u18(I) _ck_u(18,I) +#define _u19(I) _ck_u(19,I) +#define _u20(I) _ck_u(20,I) +#define _u21(I) _ck_u(21,I) +#define _u22(I) _ck_u(22,I) +#define _u23(I) _ck_u(23,I) +#define _u24(I) _ck_u(24,I) +#define _u25(I) _ck_u(25,I) +#define _u26(I) _ck_u(26,I) +#define _u27(I) _ck_u(27,I) +#define _u28(I) _ck_u(28,I) +#define _u29(I) _ck_u(29,I) +#define _u30(I) _ck_u(30,I) +#define _u31(I) _ck_u(31,I) + +/* ========================================================================= */ +/* --- ASSEMBLER ----------------------------------------------------------- */ +/* ========================================================================= */ + +#define _b00 0 +#define _b01 1 +#define _b10 2 +#define _b11 3 + +#define _b000 0 +#define _b001 1 +#define _b010 2 +#define _b011 3 +#define _b100 4 +#define _b101 5 +#define _b110 6 +#define _b111 7 + +#define _OFF4(D) (_UL(D) - _UL(x86_get_target())) +#define _CKD8(D) _ck_d(8, ((_uc) _OFF4(D)) ) + +#define _D8(D) (_B(0), ((*(_PUC(x86_get_target())-1))= _CKD8(D))) +#define _D32(D) (_L(0), ((*(_PUL(x86_get_target())-1))= _OFF4(D))) + +#ifndef _ASM_SAFETY +# define _M(M) (M) +# define _r(R) (R) +# define _m(M) (M) +# define _s(S) (S) +# define _i(I) (I) +# define _b(B) (B) +#else +# define _M(M) (((M)>3) ? x86_emit_failure0("internal error: mod = " #M) : (M)) +# define _r(R) (((R)>7) ? x86_emit_failure0("internal error: reg = " #R) : (R)) +# define _m(M) (((M)>7) ? x86_emit_failure0("internal error: r/m = " #M) : (M)) +# define _s(S) (((S)>3) ? x86_emit_failure0("internal error: memory scale = " #S) : (S)) +# define _i(I) (((I)>7) ? x86_emit_failure0("internal error: memory index = " #I) : (I)) +# define _b(B) (((B)>7) ? x86_emit_failure0("internal error: memory base = " #B) : (B)) +#endif + +#define _Mrm(Md,R,M) _B((_M(Md)<<6)|(_r(R)<<3)|_m(M)) +#define _SIB(Sc,I, B) _B((_s(Sc)<<6)|(_i(I)<<3)|_b(B)) + +#define _SCL(S) ((((S)==1) ? _b00 : \ + (((S)==2) ? _b01 : \ + (((S)==4) ? _b10 : \ + (((S)==8) ? _b11 : x86_emit_failure0("illegal scale: " #S)))))) + + +/* --- Memory subformats - urgh! ------------------------------------------- */ + +/* _r_D() is RIP addressing mode if X86_TARGET_64BIT, use _r_DSIB() instead */ +#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_L((long)(D))) +#define _r_DSIB(R, D ) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(1),_b100 ,_b101 ),_L((long)(D))) +#define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_rA(B)) ) +#define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)) ) +#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_rA(B)) ,_B((long)(D))) +#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_B((long)(D))) +#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_rA(B)) ,_L((long)(D))) +#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_b101 ),_L((long)(D))) +#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_L((long)(D))) + +#define _r_DB( R, D,B ) ((_s0P(D) && (!_rbp13P(B)) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B )))) +#define _r_DBIS(R, D,B,I,S) ((_s0P(D) && (!_rbp13P(B)) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S)))) + +/* Use RIP-addressing in 64-bit mode, if possible */ +#define _x86_RIP_addressing_possible(D,O) (X86_RIP_RELATIVE_ADDR && \ + ((uintptr)x86_get_target() + 4 + (O) - (D) <= 0xffffffff)) + +#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? (!X86_TARGET_64BIT ? _r_D(R,D) : \ + (_x86_RIP_addressing_possible(D, O) ? \ + _r_D(R, (D) - ((uintptr)x86_get_target() + 4 + (O))) : \ + _r_DSIB(R,D))) : \ + (_rIP(B) ? _r_D (R,D ) : \ + (_rsp12P(B) ? _r_DBIS(R,D,_rSP(),_rSP(),1) : \ + _r_DB (R,D, B )))) : \ + (_r0P(B) ? _r_4IS (R,D, I,S) : \ + (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \ + x86_emit_failure("illegal index register: %esp")))) + + +/* --- Instruction formats ------------------------------------------------- */ + +#define _m32only(X) (! X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 64-bit mode")) +#define _m64only(X) ( X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 32-bit mode")) +#define _m64(X) ( X86_TARGET_64BIT ? X : ((void)0) ) + +/* _format Opcd ModR/M dN(rB,rI,Sc) imm... */ + +#define _d16() ( _B(0x66 ) ) +#define _O( OP ) ( _B( OP ) ) +#define _Or( OP,R ) ( _B( (OP)|_r(R)) ) +#define _OO( OP ) ( _B((OP)>>8), _B( (uae_u8)(OP) ) ) +#define _OOr( OP,R ) ( _B((OP)>>8), _B( (OP)|_r(R)) ) +#define _Os( OP,B ) ( _s8P(B) ? _B(((OP)|_b10)) : _B(OP) ) +#define _sW( W ) ( _s8P(W) ? _B(W):_W(W) ) +#define _sL( L ) ( _s8P(L) ? _B(L):_L(L) ) +#define _sWO( W ) ( _s8P(W) ? 1 : 2 ) +#define _sLO( L ) ( _s8P(L) ? 1 : 4 ) +#define _O_B( OP ,B ) ( _O ( OP ) ,_B(B) ) +#define _O_W( OP ,W ) ( _O ( OP ) ,_W(W) ) +#define _O_L( OP ,L ) ( _O ( OP ) ,_L(L) ) +#define _O_D8( OP ,D ) ( _O ( OP ) ,_D8(D) ) +#define _O_D32( OP ,D ) ( _O ( OP ) ,_D32(D) ) +#define _OO_D32( OP ,D ) ( _OO ( OP ) ,_D32(D) ) +#define _Os_sW( OP ,W ) ( _Os ( OP,W) ,_sW(W) ) +#define _Os_sL( OP ,L ) ( _Os ( OP,L) ,_sL(L) ) +#define _O_W_B( OP ,W,B) ( _O ( OP ) ,_W(W),_B(B)) +#define _Or_B( OP,R ,B ) ( _Or ( OP,R) ,_B(B) ) +#define _Or_W( OP,R ,W ) ( _Or ( OP,R) ,_W(W) ) +#define _Or_L( OP,R ,L ) ( _Or ( OP,R) ,_L(L) ) +#define _Or_Q( OP,R ,Q ) ( _Or ( OP,R) ,_Q(Q) ) +#define _O_Mrm( OP ,MO,R,M ) ( _O ( OP ),_Mrm(MO,R,M ) ) +#define _OO_Mrm( OP ,MO,R,M ) ( _OO ( OP ),_Mrm(MO,R,M ) ) +#define _O_Mrm_B( OP ,MO,R,M ,B ) ( _O ( OP ),_Mrm(MO,R,M ) ,_B(B) ) +#define _O_Mrm_W( OP ,MO,R,M ,W ) ( _O ( OP ),_Mrm(MO,R,M ) ,_W(W) ) +#define _O_Mrm_L( OP ,MO,R,M ,L ) ( _O ( OP ),_Mrm(MO,R,M ) ,_L(L) ) +#define _OO_Mrm_B( OP ,MO,R,M ,B ) ( _OO ( OP ),_Mrm(MO,R,M ) ,_B(B) ) +#define _Os_Mrm_sW(OP ,MO,R,M ,W ) ( _Os ( OP,W),_Mrm(MO,R,M ),_sW(W) ) +#define _Os_Mrm_sL(OP ,MO,R,M ,L ) ( _Os ( OP,L),_Mrm(MO,R,M ),_sL(L) ) +#define _O_r_X( OP ,R ,MD,MB,MI,MS ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,0) ) +#define _OO_r_X( OP ,R ,MD,MB,MI,MS ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,0) ) +#define _O_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) ) +#define _O_r_X_W( OP ,R ,MD,MB,MI,MS,W ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,2) ,_W(W) ) +#define _O_r_X_L( OP ,R ,MD,MB,MI,MS,L ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,4) ,_L(L) ) +#define _OO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) ) +#define _Os_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) ( _Os ( OP,W),_r_X( R ,MD,MB,MI,MS,_sWO(W)),_sW(W)) +#define _Os_r_X_sL(OP ,R ,MD,MB,MI,MS,L ) ( _Os ( OP,L),_r_X( R ,MD,MB,MI,MS,_sLO(L)),_sL(L)) +#define _O_X_B( OP ,MD,MB,MI,MS,B ) ( _O_r_X_B( OP ,0 ,MD,MB,MI,MS ,B) ) +#define _O_X_W( OP ,MD,MB,MI,MS,W ) ( _O_r_X_W( OP ,0 ,MD,MB,MI,MS ,W) ) +#define _O_X_L( OP ,MD,MB,MI,MS,L ) ( _O_r_X_L( OP ,0 ,MD,MB,MI,MS ,L) ) + + +/* --- REX prefixes -------------------------------------------------------- */ + +#undef _VOID + +#define _VOID() ((void)0) +#define _BIT(X) (!!(X)) +#define _d64(W,R,X,B) (_B(0x40|(W)<<3|(R)<<2|(X)<<1|(B))) + +#define __REXwrxb(L,W,R,X,B) ((W|R|X|B) || (L) ? _d64(W,R,X,B) : _VOID()) +#define __REXwrx_(L,W,R,X,MR) (__REXwrxb(L,W,R,X,_BIT(_rIP(MR)?0:_rXP(MR)))) +#define __REXw_x_(L,W,R,X,MR) (__REXwrx_(L,W,_BIT(_rXP(R)),X,MR)) +#define __REX_reg(RR) (__REXwrxb(0,0,0,00,_BIT(_rXP(RR)))) +#define __REX_mem(MB,MI) (__REXwrxb(0,0,0,_BIT(_rXP(MI)),_BIT(_rXP(MB)))) + +// FIXME: can't mix new (SPL,BPL,SIL,DIL) with (AH,BH,CH,DH) +#define _REXBrr(RR,MR) _m64(__REXw_x_(_r1e8lP(RR)||_r1e8lP(MR),0,RR,0,MR)) +#define _REXBmr(MB,MI,RD) _m64(__REXw_x_(_r1e8lP(RD)||_r1e8lP(MB),0,RD,_BIT(_rXP(MI)),MB)) +#define _REXBrm(RS,MB,MI) _REXBmr(MB,MI,RS) + +#define _REXBLrr(RR,MR) _m64(__REXw_x_(_r1e8lP(MR),0,RR,0,MR)) +#define _REXLrr(RR,MR) _m64(__REXw_x_(0,0,RR,0,MR)) +#define _REXLmr(MB,MI,RD) _m64(__REXw_x_(0,0,RD,_BIT(_rXP(MI)),MB)) +#define _REXLrm(RS,MB,MI) _REXLmr(MB,MI,RS) +#define _REXLr(RR) _m64(__REX_reg(RR)) +#define _REXLm(MB,MI) _m64(__REX_mem(MB,MI)) + +#define _REXQrr(RR,MR) _m64only(__REXw_x_(0,1,RR,0,MR)) +#define _REXQmr(MB,MI,RD) _m64only(__REXw_x_(0,1,RD,_BIT(_rXP(MI)),MB)) +#define _REXQrm(RS,MB,MI) _REXQmr(MB,MI,RS) +#define _REXQr(RR) _m64only(__REX_reg(RR)) +#define _REXQm(MB,MI) _m64only(__REX_mem(MB,MI)) + + +/* ========================================================================= */ +/* --- Fully-qualified intrinsic instructions ------------------------------ */ +/* ========================================================================= */ + +/* OPCODE + i = immediate operand + * + r = register operand + * + m = memory operand (disp,base,index,scale) + * + sr/sm = a star preceding a register or memory + * + 0 = top of stack register (for FPU instructions) + * + * NOTE in x86-64 mode: a memory operand with only a valid + * displacement value will lead to the expect absolute mode. If + * RIP addressing is necessary, X86_RIP shall be used as the base + * register argument. + */ + +/* --- ALU instructions ---------------------------------------------------- */ + +enum { + X86_ADD = 0, + X86_OR = 1, + X86_ADC = 2, + X86_SBB = 3, + X86_AND = 4, + X86_SUB = 5, + X86_XOR = 6, + X86_CMP = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _ALUBrr(OP,RS, RD) (_REXBrr(RS, RD), _O_Mrm (((OP) << 3) ,_b11,_r1(RS),_r1(RD) )) +#define _ALUBmr(OP, MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (((OP) << 3) + 2,_r1(RD) ,MD,MB,MI,MS )) +#define _ALUBrm(OP, RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (((OP) << 3) , ,_r1(RS) ,MD,MB,MI,MS )) +#define _ALUBir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \ + (_REXBrr(0, RD), _O_B (((OP) << 3) + 4 ,_su8(IM))) : \ + (_REXBrr(0, RD), _O_Mrm_B (0x80 ,_b11,OP ,_r1(RD) ,_su8(IM))) ) +#define _ALUBim(OP, IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0x80 ,OP ,MD,MB,MI,MS ,_su8(IM))) + +#define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) )) +#define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS )) +#define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS )) +#define _ALUWir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \ + (_d16(), _REXLrr(0, RD), _O_W (((OP) << 3) + 5 ,_su16(IM))) : \ + (_d16(), _REXLrr(0, RD), _Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD) ,_su16(IM))) ) +#define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS ,_su16(IM))) + +#define _ALULrr(OP, RS, RD) (_REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r4(RS),_r4(RD) )) +#define _ALULmr(OP, MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r4(RD) ,MD,MB,MI,MS )) +#define _ALULrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r4(RS) ,MD,MB,MI,MS )) +#define _ALULir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \ + (_REXLrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \ + (_REXLrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r4(RD) ,IM )) ) +#define _ALULim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM )) + +#define _ALUQrr(OP, RS, RD) (_REXQrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r8(RS),_r8(RD) )) +#define _ALUQmr(OP, MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r8(RD) ,MD,MB,MI,MS )) +#define _ALUQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r8(RS) ,MD,MB,MI,MS )) +#define _ALUQir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \ + (_REXQrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \ + (_REXQrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r8(RD) ,IM )) ) +#define _ALUQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM )) + +#define ADCBrr(RS, RD) _ALUBrr(X86_ADC, RS, RD) +#define ADCBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCBir(IM, RD) _ALUBir(X86_ADC, IM, RD) +#define ADCBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADCWrr(RS, RD) _ALUWrr(X86_ADC, RS, RD) +#define ADCWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCWir(IM, RD) _ALUWir(X86_ADC, IM, RD) +#define ADCWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADCLrr(RS, RD) _ALULrr(X86_ADC, RS, RD) +#define ADCLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCLir(IM, RD) _ALULir(X86_ADC, IM, RD) +#define ADCLim(IM, MD, MB, MI, MS) _ALULim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADCQrr(RS, RD) _ALUQrr(X86_ADC, RS, RD) +#define ADCQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCQir(IM, RD) _ALUQir(X86_ADC, IM, RD) +#define ADCQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADDBrr(RS, RD) _ALUBrr(X86_ADD, RS, RD) +#define ADDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDBir(IM, RD) _ALUBir(X86_ADD, IM, RD) +#define ADDBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADD, IM, MD, MB, MI, MS) + +#define ADDWrr(RS, RD) _ALUWrr(X86_ADD, RS, RD) +#define ADDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDWir(IM, RD) _ALUWir(X86_ADD, IM, RD) +#define ADDWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADD, IM, MD, MB, MI, MS) + +#define ADDLrr(RS, RD) _ALULrr(X86_ADD, RS, RD) +#define ADDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDLir(IM, RD) _ALULir(X86_ADD, IM, RD) +#define ADDLim(IM, MD, MB, MI, MS) _ALULim(X86_ADD, IM, MD, MB, MI, MS) + +#define ADDQrr(RS, RD) _ALUQrr(X86_ADD, RS, RD) +#define ADDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDQir(IM, RD) _ALUQir(X86_ADD, IM, RD) +#define ADDQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADD, IM, MD, MB, MI, MS) + +#define ANDBrr(RS, RD) _ALUBrr(X86_AND, RS, RD) +#define ANDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDBir(IM, RD) _ALUBir(X86_AND, IM, RD) +#define ANDBim(IM, MD, MB, MI, MS) _ALUBim(X86_AND, IM, MD, MB, MI, MS) + +#define ANDWrr(RS, RD) _ALUWrr(X86_AND, RS, RD) +#define ANDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDWir(IM, RD) _ALUWir(X86_AND, IM, RD) +#define ANDWim(IM, MD, MB, MI, MS) _ALUWim(X86_AND, IM, MD, MB, MI, MS) + +#define ANDLrr(RS, RD) _ALULrr(X86_AND, RS, RD) +#define ANDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDLir(IM, RD) _ALULir(X86_AND, IM, RD) +#define ANDLim(IM, MD, MB, MI, MS) _ALULim(X86_AND, IM, MD, MB, MI, MS) + +#define ANDQrr(RS, RD) _ALUQrr(X86_AND, RS, RD) +#define ANDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDQir(IM, RD) _ALUQir(X86_AND, IM, RD) +#define ANDQim(IM, MD, MB, MI, MS) _ALUQim(X86_AND, IM, MD, MB, MI, MS) + +#define CMPBrr(RS, RD) _ALUBrr(X86_CMP, RS, RD) +#define CMPBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPBir(IM, RD) _ALUBir(X86_CMP, IM, RD) +#define CMPBim(IM, MD, MB, MI, MS) _ALUBim(X86_CMP, IM, MD, MB, MI, MS) + +#define CMPWrr(RS, RD) _ALUWrr(X86_CMP, RS, RD) +#define CMPWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPWir(IM, RD) _ALUWir(X86_CMP, IM, RD) +#define CMPWim(IM, MD, MB, MI, MS) _ALUWim(X86_CMP, IM, MD, MB, MI, MS) + +#define CMPLrr(RS, RD) _ALULrr(X86_CMP, RS, RD) +#define CMPLmr(MD, MB, MI, MS, RD) _ALULmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPLrm(RS, MD, MB, MI, MS) _ALULrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPLir(IM, RD) _ALULir(X86_CMP, IM, RD) +#define CMPLim(IM, MD, MB, MI, MS) _ALULim(X86_CMP, IM, MD, MB, MI, MS) + +#define CMPQrr(RS, RD) _ALUQrr(X86_CMP, RS, RD) +#define CMPQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPQir(IM, RD) _ALUQir(X86_CMP, IM, RD) +#define CMPQim(IM, MD, MB, MI, MS) _ALUQim(X86_CMP, IM, MD, MB, MI, MS) + +#define ORBrr(RS, RD) _ALUBrr(X86_OR, RS, RD) +#define ORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_OR, MD, MB, MI, MS, RD) +#define ORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_OR, RS, MD, MB, MI, MS) +#define ORBir(IM, RD) _ALUBir(X86_OR, IM, RD) +#define ORBim(IM, MD, MB, MI, MS) _ALUBim(X86_OR, IM, MD, MB, MI, MS) + +#define ORWrr(RS, RD) _ALUWrr(X86_OR, RS, RD) +#define ORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_OR, MD, MB, MI, MS, RD) +#define ORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_OR, RS, MD, MB, MI, MS) +#define ORWir(IM, RD) _ALUWir(X86_OR, IM, RD) +#define ORWim(IM, MD, MB, MI, MS) _ALUWim(X86_OR, IM, MD, MB, MI, MS) + +#define ORLrr(RS, RD) _ALULrr(X86_OR, RS, RD) +#define ORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_OR, MD, MB, MI, MS, RD) +#define ORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_OR, RS, MD, MB, MI, MS) +#define ORLir(IM, RD) _ALULir(X86_OR, IM, RD) +#define ORLim(IM, MD, MB, MI, MS) _ALULim(X86_OR, IM, MD, MB, MI, MS) + +#define ORQrr(RS, RD) _ALUQrr(X86_OR, RS, RD) +#define ORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_OR, MD, MB, MI, MS, RD) +#define ORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_OR, RS, MD, MB, MI, MS) +#define ORQir(IM, RD) _ALUQir(X86_OR, IM, RD) +#define ORQim(IM, MD, MB, MI, MS) _ALUQim(X86_OR, IM, MD, MB, MI, MS) + +#define SBBBrr(RS, RD) _ALUBrr(X86_SBB, RS, RD) +#define SBBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBBir(IM, RD) _ALUBir(X86_SBB, IM, RD) +#define SBBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SBB, IM, MD, MB, MI, MS) + +#define SBBWrr(RS, RD) _ALUWrr(X86_SBB, RS, RD) +#define SBBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBWir(IM, RD) _ALUWir(X86_SBB, IM, RD) +#define SBBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SBB, IM, MD, MB, MI, MS) + +#define SBBLrr(RS, RD) _ALULrr(X86_SBB, RS, RD) +#define SBBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBLir(IM, RD) _ALULir(X86_SBB, IM, RD) +#define SBBLim(IM, MD, MB, MI, MS) _ALULim(X86_SBB, IM, MD, MB, MI, MS) + +#define SBBQrr(RS, RD) _ALUQrr(X86_SBB, RS, RD) +#define SBBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBQir(IM, RD) _ALUQir(X86_SBB, IM, RD) +#define SBBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SBB, IM, MD, MB, MI, MS) + +#define SUBBrr(RS, RD) _ALUBrr(X86_SUB, RS, RD) +#define SUBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBBir(IM, RD) _ALUBir(X86_SUB, IM, RD) +#define SUBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SUB, IM, MD, MB, MI, MS) + +#define SUBWrr(RS, RD) _ALUWrr(X86_SUB, RS, RD) +#define SUBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBWir(IM, RD) _ALUWir(X86_SUB, IM, RD) +#define SUBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SUB, IM, MD, MB, MI, MS) + +#define SUBLrr(RS, RD) _ALULrr(X86_SUB, RS, RD) +#define SUBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBLir(IM, RD) _ALULir(X86_SUB, IM, RD) +#define SUBLim(IM, MD, MB, MI, MS) _ALULim(X86_SUB, IM, MD, MB, MI, MS) + +#define SUBQrr(RS, RD) _ALUQrr(X86_SUB, RS, RD) +#define SUBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBQir(IM, RD) _ALUQir(X86_SUB, IM, RD) +#define SUBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SUB, IM, MD, MB, MI, MS) + +#define XORBrr(RS, RD) _ALUBrr(X86_XOR, RS, RD) +#define XORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORBir(IM, RD) _ALUBir(X86_XOR, IM, RD) +#define XORBim(IM, MD, MB, MI, MS) _ALUBim(X86_XOR, IM, MD, MB, MI, MS) + +#define XORWrr(RS, RD) _ALUWrr(X86_XOR, RS, RD) +#define XORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORWir(IM, RD) _ALUWir(X86_XOR, IM, RD) +#define XORWim(IM, MD, MB, MI, MS) _ALUWim(X86_XOR, IM, MD, MB, MI, MS) + +#define XORLrr(RS, RD) _ALULrr(X86_XOR, RS, RD) +#define XORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORLir(IM, RD) _ALULir(X86_XOR, IM, RD) +#define XORLim(IM, MD, MB, MI, MS) _ALULim(X86_XOR, IM, MD, MB, MI, MS) + +#define XORQrr(RS, RD) _ALUQrr(X86_XOR, RS, RD) +#define XORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORQir(IM, RD) _ALUQir(X86_XOR, IM, RD) +#define XORQim(IM, MD, MB, MI, MS) _ALUQim(X86_XOR, IM, MD, MB, MI, MS) + + +/* --- Shift/Rotate instructions ------------------------------------------- */ + +enum { + X86_ROL = 0, + X86_ROR = 1, + X86_RCL = 2, + X86_RCR = 3, + X86_SHL = 4, + X86_SHR = 5, + X86_SAR = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _ROTSHIBir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXBrr(0, RD), _O_Mrm (0xd0 ,_b11,OP,_r1(RD) )) : \ + (_REXBrr(0, RD), _O_Mrm_B (0xc0 ,_b11,OP,_r1(RD) ,_u8(IM))) ) +#define _ROTSHIBim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXBrm(0, MB, MI), _O_r_X (0xd0 ,OP ,MD,MB,MI,MS )) : \ + (_REXBrm(0, MB, MI), _O_r_X_B (0xc0 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIBrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_REXBrr(RS, RD), _O_Mrm (0xd2 ,_b11,OP,_r1(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHIBrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_REXBrm(RS, MB, MI), _O_r_X (0xd2 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define _ROTSHIWir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_d16(), _REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r2(RD) )) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r2(RD) ,_u8(IM))) ) +#define _ROTSHIWim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_d16(), _REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIWrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_d16(), _REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r2(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHIWrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define _ROTSHILir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r4(RD) )) : \ + (_REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r4(RD) ,_u8(IM))) ) +#define _ROTSHILim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHILrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r4(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHILrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define _ROTSHIQir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXQrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r8(RD) )) : \ + (_REXQrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r8(RD) ,_u8(IM))) ) +#define _ROTSHIQim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXQrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_REXQrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIQrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_REXQrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r8(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHIQrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_REXQrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define ROLBir(IM, RD) _ROTSHIBir(X86_ROL, IM, RD) +#define ROLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLBrr(RS, RD) _ROTSHIBrr(X86_ROL, RS, RD) +#define ROLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROL, RS, MD, MB, MI, MS) + +#define ROLWir(IM, RD) _ROTSHIWir(X86_ROL, IM, RD) +#define ROLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLWrr(RS, RD) _ROTSHIWrr(X86_ROL, RS, RD) +#define ROLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROL, RS, MD, MB, MI, MS) + +#define ROLLir(IM, RD) _ROTSHILir(X86_ROL, IM, RD) +#define ROLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLLrr(RS, RD) _ROTSHILrr(X86_ROL, RS, RD) +#define ROLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROL, RS, MD, MB, MI, MS) + +#define ROLQir(IM, RD) _ROTSHIQir(X86_ROL, IM, RD) +#define ROLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLQrr(RS, RD) _ROTSHIQrr(X86_ROL, RS, RD) +#define ROLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROL, RS, MD, MB, MI, MS) + +#define RORBir(IM, RD) _ROTSHIBir(X86_ROR, IM, RD) +#define RORBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROR, IM, MD, MB, MI, MS) +#define RORBrr(RS, RD) _ROTSHIBrr(X86_ROR, RS, RD) +#define RORBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RORWir(IM, RD) _ROTSHIWir(X86_ROR, IM, RD) +#define RORWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROR, IM, MD, MB, MI, MS) +#define RORWrr(RS, RD) _ROTSHIWrr(X86_ROR, RS, RD) +#define RORWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RORLir(IM, RD) _ROTSHILir(X86_ROR, IM, RD) +#define RORLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROR, IM, MD, MB, MI, MS) +#define RORLrr(RS, RD) _ROTSHILrr(X86_ROR, RS, RD) +#define RORLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RORQir(IM, RD) _ROTSHIQir(X86_ROR, IM, RD) +#define RORQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROR, IM, MD, MB, MI, MS) +#define RORQrr(RS, RD) _ROTSHIQrr(X86_ROR, RS, RD) +#define RORQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RCLBir(IM, RD) _ROTSHIBir(X86_RCL, IM, RD) +#define RCLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLBrr(RS, RD) _ROTSHIBrr(X86_RCL, RS, RD) +#define RCLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCLWir(IM, RD) _ROTSHIWir(X86_RCL, IM, RD) +#define RCLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLWrr(RS, RD) _ROTSHIWrr(X86_RCL, RS, RD) +#define RCLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCLLir(IM, RD) _ROTSHILir(X86_RCL, IM, RD) +#define RCLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLLrr(RS, RD) _ROTSHILrr(X86_RCL, RS, RD) +#define RCLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCLQir(IM, RD) _ROTSHIQir(X86_RCL, IM, RD) +#define RCLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLQrr(RS, RD) _ROTSHIQrr(X86_RCL, RS, RD) +#define RCLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCRBir(IM, RD) _ROTSHIBir(X86_RCR, IM, RD) +#define RCRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRBrr(RS, RD) _ROTSHIBrr(X86_RCR, RS, RD) +#define RCRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCR, RS, MD, MB, MI, MS) + +#define RCRWir(IM, RD) _ROTSHIWir(X86_RCR, IM, RD) +#define RCRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRWrr(RS, RD) _ROTSHIWrr(X86_RCR, RS, RD) +#define RCRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCR, RS, MD, MB, MI, MS) + +#define RCRLir(IM, RD) _ROTSHILir(X86_RCR, IM, RD) +#define RCRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRLrr(RS, RD) _ROTSHILrr(X86_RCR, RS, RD) +#define RCRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCR, RS, MD, MB, MI, MS) + +#define RCRQir(IM, RD) _ROTSHIQir(X86_RCR, IM, RD) +#define RCRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRQrr(RS, RD) _ROTSHIQrr(X86_RCR, RS, RD) +#define RCRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCR, RS, MD, MB, MI, MS) + +#define SHLBir(IM, RD) _ROTSHIBir(X86_SHL, IM, RD) +#define SHLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLBrr(RS, RD) _ROTSHIBrr(X86_SHL, RS, RD) +#define SHLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHLWir(IM, RD) _ROTSHIWir(X86_SHL, IM, RD) +#define SHLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLWrr(RS, RD) _ROTSHIWrr(X86_SHL, RS, RD) +#define SHLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHLLir(IM, RD) _ROTSHILir(X86_SHL, IM, RD) +#define SHLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLLrr(RS, RD) _ROTSHILrr(X86_SHL, RS, RD) +#define SHLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHLQir(IM, RD) _ROTSHIQir(X86_SHL, IM, RD) +#define SHLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLQrr(RS, RD) _ROTSHIQrr(X86_SHL, RS, RD) +#define SHLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHRBir(IM, RD) _ROTSHIBir(X86_SHR, IM, RD) +#define SHRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRBrr(RS, RD) _ROTSHIBrr(X86_SHR, RS, RD) +#define SHRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SHRWir(IM, RD) _ROTSHIWir(X86_SHR, IM, RD) +#define SHRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRWrr(RS, RD) _ROTSHIWrr(X86_SHR, RS, RD) +#define SHRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SHRLir(IM, RD) _ROTSHILir(X86_SHR, IM, RD) +#define SHRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRLrr(RS, RD) _ROTSHILrr(X86_SHR, RS, RD) +#define SHRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SHRQir(IM, RD) _ROTSHIQir(X86_SHR, IM, RD) +#define SHRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRQrr(RS, RD) _ROTSHIQrr(X86_SHR, RS, RD) +#define SHRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SALBir SHLBir +#define SALBim SHLBim +#define SALBrr SHLBrr +#define SALBrm SHLBrm + +#define SALWir SHLWir +#define SALWim SHLWim +#define SALWrr SHLWrr +#define SALWrm SHLWrm + +#define SALLir SHLLir +#define SALLim SHLLim +#define SALLrr SHLLrr +#define SALLrm SHLLrm + +#define SALQir SHLQir +#define SALQim SHLQim +#define SALQrr SHLQrr +#define SALQrm SHLQrm + +#define SARBir(IM, RD) _ROTSHIBir(X86_SAR, IM, RD) +#define SARBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SAR, IM, MD, MB, MI, MS) +#define SARBrr(RS, RD) _ROTSHIBrr(X86_SAR, RS, RD) +#define SARBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SAR, RS, MD, MB, MI, MS) + +#define SARWir(IM, RD) _ROTSHIWir(X86_SAR, IM, RD) +#define SARWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SAR, IM, MD, MB, MI, MS) +#define SARWrr(RS, RD) _ROTSHIWrr(X86_SAR, RS, RD) +#define SARWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SAR, RS, MD, MB, MI, MS) + +#define SARLir(IM, RD) _ROTSHILir(X86_SAR, IM, RD) +#define SARLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SAR, IM, MD, MB, MI, MS) +#define SARLrr(RS, RD) _ROTSHILrr(X86_SAR, RS, RD) +#define SARLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SAR, RS, MD, MB, MI, MS) + +#define SARQir(IM, RD) _ROTSHIQir(X86_SAR, IM, RD) +#define SARQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SAR, IM, MD, MB, MI, MS) +#define SARQrr(RS, RD) _ROTSHIQrr(X86_SAR, RS, RD) +#define SARQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SAR, RS, MD, MB, MI, MS) + + +/* --- Bit test instructions ----------------------------------------------- */ + +enum { + X86_BT = 4, + X86_BTS = 5, + X86_BTR = 6, + X86_BTC = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _BTWir(OP, IM, RD) (_d16(), _REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r2(RD) ,_u8(IM))) +#define _BTWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r2(RS),_r2(RD) )) +#define _BTWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r2(RS) ,MD,MB,MI,MS )) + +#define _BTLir(OP, IM, RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r4(RD) ,_u8(IM))) +#define _BTLim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTLrr(OP, RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r4(RS),_r4(RD) )) +#define _BTLrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r4(RS) ,MD,MB,MI,MS )) + +#define _BTQir(OP, IM, RD) (_REXQrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r8(RD) ,_u8(IM))) +#define _BTQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTQrr(OP, RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r8(RS),_r8(RD) )) +#define _BTQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r8(RS) ,MD,MB,MI,MS )) + +#define BTWir(IM, RD) _BTWir(X86_BT, IM, RD) +#define BTWim(IM, MD, MB, MI, MS) _BTWim(X86_BT, IM, MD, MI, MS) +#define BTWrr(RS, RD) _BTWrr(X86_BT, RS, RD) +#define BTWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BT, RS, MD, MB, MI, MS) + +#define BTLir(IM, RD) _BTLir(X86_BT, IM, RD) +#define BTLim(IM, MD, MB, MI, MS) _BTLim(X86_BT, IM, MD, MB, MI, MS) +#define BTLrr(RS, RD) _BTLrr(X86_BT, RS, RD) +#define BTLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BT, RS, MD, MB, MI, MS) + +#define BTQir(IM, RD) _BTQir(X86_BT, IM, RD) +#define BTQim(IM, MD, MB, MI, MS) _BTQim(X86_BT, IM, MD, MB, MI, MS) +#define BTQrr(RS, RD) _BTQrr(X86_BT, RS, RD) +#define BTQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BT, RS, MD, MB, MI, MS) + +#define BTCWir(IM, RD) _BTWir(X86_BTC, IM, RD) +#define BTCWim(IM, MD, MB, MI, MS) _BTWim(X86_BTC, IM, MD, MI, MS) +#define BTCWrr(RS, RD) _BTWrr(X86_BTC, RS, RD) +#define BTCWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTC, RS, MD, MB, MI, MS) + +#define BTCLir(IM, RD) _BTLir(X86_BTC, IM, RD) +#define BTCLim(IM, MD, MB, MI, MS) _BTLim(X86_BTC, IM, MD, MB, MI, MS) +#define BTCLrr(RS, RD) _BTLrr(X86_BTC, RS, RD) +#define BTCLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTC, RS, MD, MB, MI, MS) + +#define BTCQir(IM, RD) _BTQir(X86_BTC, IM, RD) +#define BTCQim(IM, MD, MB, MI, MS) _BTQim(X86_BTC, IM, MD, MB, MI, MS) +#define BTCQrr(RS, RD) _BTQrr(X86_BTC, RS, RD) +#define BTCQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTC, RS, MD, MB, MI, MS) + +#define BTRWir(IM, RD) _BTWir(X86_BTR, IM, RD) +#define BTRWim(IM, MD, MB, MI, MS) _BTWim(X86_BTR, IM, MD, MI, MS) +#define BTRWrr(RS, RD) _BTWrr(X86_BTR, RS, RD) +#define BTRWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTR, RS, MD, MB, MI, MS) + +#define BTRLir(IM, RD) _BTLir(X86_BTR, IM, RD) +#define BTRLim(IM, MD, MB, MI, MS) _BTLim(X86_BTR, IM, MD, MB, MI, MS) +#define BTRLrr(RS, RD) _BTLrr(X86_BTR, RS, RD) +#define BTRLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTR, RS, MD, MB, MI, MS) + +#define BTRQir(IM, RD) _BTQir(X86_BTR, IM, RD) +#define BTRQim(IM, MD, MB, MI, MS) _BTQim(X86_BTR, IM, MD, MB, MI, MS) +#define BTRQrr(RS, RD) _BTQrr(X86_BTR, RS, RD) +#define BTRQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTR, RS, MD, MB, MI, MS) + +#define BTSWir(IM, RD) _BTWir(X86_BTS, IM, RD) +#define BTSWim(IM, MD, MB, MI, MS) _BTWim(X86_BTS, IM, MD, MI, MS) +#define BTSWrr(RS, RD) _BTWrr(X86_BTS, RS, RD) +#define BTSWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTS, RS, MD, MB, MI, MS) + +#define BTSLir(IM, RD) _BTLir(X86_BTS, IM, RD) +#define BTSLim(IM, MD, MB, MI, MS) _BTLim(X86_BTS, IM, MD, MB, MI, MS) +#define BTSLrr(RS, RD) _BTLrr(X86_BTS, RS, RD) +#define BTSLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTS, RS, MD, MB, MI, MS) + +#define BTSQir(IM, RD) _BTQir(X86_BTS, IM, RD) +#define BTSQim(IM, MD, MB, MI, MS) _BTQim(X86_BTS, IM, MD, MB, MI, MS) +#define BTSQrr(RS, RD) _BTQrr(X86_BTS, RS, RD) +#define BTSQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTS, RS, MD, MB, MI, MS) + + +/* --- Move instructions --------------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define MOVBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x88 ,_b11,_r1(RS),_r1(RD) )) +#define MOVBmr(MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (0x8a ,_r1(RD) ,MD,MB,MI,MS )) +#define MOVBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x88 ,_r1(RS) ,MD,MB,MI,MS )) +#define MOVBir(IM, R) (_REXBrr(0, R), _Or_B (0xb0,_r1(R) ,_su8(IM))) +#define MOVBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_X_B (0xc6 ,MD,MB,MI,MS ,_su8(IM))) + +#define MOVWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r2(RS),_r2(RD) )) +#define MOVWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r2(RD) ,MD,MB,MI,MS )) +#define MOVWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r2(RS) ,MD,MB,MI,MS )) +#define MOVWir(IM, R) (_d16(), _REXLrr(0, R), _Or_W (0xb8,_r2(R) ,_su16(IM))) +#define MOVWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_X_W (0xc7 ,MD,MB,MI,MS ,_su16(IM))) + +#define MOVLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) )) +#define MOVLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS )) +#define MOVLir(IM, R) (_REXLrr(0, R), _Or_L (0xb8,_r4(R) ,IM )) +#define MOVLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM )) + +#define MOVQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x89 ,_b11,_r8(RS),_r8(RD) )) +#define MOVQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8b ,_r8(RD) ,MD,MB,MI,MS )) +#define MOVQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x89 ,_r8(RS) ,MD,MB,MI,MS )) +#define MOVQir(IM, R) (_REXQrr(0, R), _Or_Q (0xb8,_r8(R) ,IM )) +#define MOVQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM )) + + +/* --- Unary and Multiply/Divide instructions ------------------------------ */ + +enum { + X86_NOT = 2, + X86_NEG = 3, + X86_MUL = 4, + X86_IMUL = 5, + X86_DIV = 6, + X86_IDIV = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _UNARYBr(OP, RS) (_REXBrr(0, RS), _O_Mrm (0xf6 ,_b11,OP ,_r1(RS) )) +#define _UNARYBm(OP, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xf6 ,OP ,MD,MB,MI,MS )) +#define _UNARYWr(OP, RS) (_d16(), _REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r2(RS) )) +#define _UNARYWm(OP, MD, MB, MI, MS) (_d16(), _REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) +#define _UNARYLr(OP, RS) (_REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r4(RS) )) +#define _UNARYLm(OP, MD, MB, MI, MS) (_REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) +#define _UNARYQr(OP, RS) (_REXQrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r8(RS) )) +#define _UNARYQm(OP, MD, MB, MI, MS) (_REXQmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) + +#define NOTBr(RS) _UNARYBr(X86_NOT, RS) +#define NOTBm(MD, MB, MI, MS) _UNARYBm(X86_NOT, MD, MB, MI, MS) +#define NOTWr(RS) _UNARYWr(X86_NOT, RS) +#define NOTWm(MD, MB, MI, MS) _UNARYWm(X86_NOT, MD, MB, MI, MS) +#define NOTLr(RS) _UNARYLr(X86_NOT, RS) +#define NOTLm(MD, MB, MI, MS) _UNARYLm(X86_NOT, MD, MB, MI, MS) +#define NOTQr(RS) _UNARYQr(X86_NOT, RS) +#define NOTQm(MD, MB, MI, MS) _UNARYQm(X86_NOT, MD, MB, MI, MS) + +#define NEGBr(RS) _UNARYBr(X86_NEG, RS) +#define NEGBm(MD, MB, MI, MS) _UNARYBm(X86_NEG, MD, MB, MI, MS) +#define NEGWr(RS) _UNARYWr(X86_NEG, RS) +#define NEGWm(MD, MB, MI, MS) _UNARYWm(X86_NEG, MD, MB, MI, MS) +#define NEGLr(RS) _UNARYLr(X86_NEG, RS) +#define NEGLm(MD, MB, MI, MS) _UNARYLm(X86_NEG, MD, MB, MI, MS) +#define NEGQr(RS) _UNARYQr(X86_NEG, RS) +#define NEGQm(MD, MB, MI, MS) _UNARYQm(X86_NEG, MD, MB, MI, MS) + +#define MULBr(RS) _UNARYBr(X86_MUL, RS) +#define MULBm(MD, MB, MI, MS) _UNARYBm(X86_MUL, MD, MB, MI, MS) +#define MULWr(RS) _UNARYWr(X86_MUL, RS) +#define MULWm(MD, MB, MI, MS) _UNARYWm(X86_MUL, MD, MB, MI, MS) +#define MULLr(RS) _UNARYLr(X86_MUL, RS) +#define MULLm(MD, MB, MI, MS) _UNARYLm(X86_MUL, MD, MB, MI, MS) +#define MULQr(RS) _UNARYQr(X86_MUL, RS) +#define MULQm(MD, MB, MI, MS) _UNARYQm(X86_MUL, MD, MB, MI, MS) + +#define IMULBr(RS) _UNARYBr(X86_IMUL, RS) +#define IMULBm(MD, MB, MI, MS) _UNARYBm(X86_IMUL, MD, MB, MI, MS) +#define IMULWr(RS) _UNARYWr(X86_IMUL, RS) +#define IMULWm(MD, MB, MI, MS) _UNARYWm(X86_IMUL, MD, MB, MI, MS) +#define IMULLr(RS) _UNARYLr(X86_IMUL, RS) +#define IMULLm(MD, MB, MI, MS) _UNARYLm(X86_IMUL, MD, MB, MI, MS) +#define IMULQr(RS) _UNARYQr(X86_IMUL, RS) +#define IMULQm(MD, MB, MI, MS) _UNARYQm(X86_IMUL, MD, MB, MI, MS) + +#define DIVBr(RS) _UNARYBr(X86_DIV, RS) +#define DIVBm(MD, MB, MI, MS) _UNARYBm(X86_DIV, MD, MB, MI, MS) +#define DIVWr(RS) _UNARYWr(X86_DIV, RS) +#define DIVWm(MD, MB, MI, MS) _UNARYWm(X86_DIV, MD, MB, MI, MS) +#define DIVLr(RS) _UNARYLr(X86_DIV, RS) +#define DIVLm(MD, MB, MI, MS) _UNARYLm(X86_DIV, MD, MB, MI, MS) +#define DIVQr(RS) _UNARYQr(X86_DIV, RS) +#define DIVQm(MD, MB, MI, MS) _UNARYQm(X86_DIV, MD, MB, MI, MS) + +#define IDIVBr(RS) _UNARYBr(X86_IDIV, RS) +#define IDIVBm(MD, MB, MI, MS) _UNARYBm(X86_IDIV, MD, MB, MI, MS) +#define IDIVWr(RS) _UNARYWr(X86_IDIV, RS) +#define IDIVWm(MD, MB, MI, MS) _UNARYWm(X86_IDIV, MD, MB, MI, MS) +#define IDIVLr(RS) _UNARYLr(X86_IDIV, RS) +#define IDIVLm(MD, MB, MI, MS) _UNARYLm(X86_IDIV, MD, MB, MI, MS) +#define IDIVQr(RS) _UNARYQr(X86_IDIV, RS) +#define IDIVQm(MD, MB, MI, MS) _UNARYQm(X86_IDIV, MD, MB, MI, MS) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define IMULWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r2(RD),_r2(RS) )) +#define IMULWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r2(RD) ,MD,MB,MI,MS )) + +#define IMULWirr(IM,RS,RD) (_d16(), _REXLrr(RS, RD), _Os_Mrm_sW (0x69 ,_b11,_r2(RS),_r2(RD) ,_su16(IM) )) +#define IMULWimr(IM,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _Os_r_X_sW (0x69 ,_r2(RD) ,MD,MB,MI,MS ,_su16(IM) )) + +#define IMULLir(IM, RD) (_REXLrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RD),_r4(RD) ,IM )) +#define IMULLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r4(RD),_r4(RS) )) +#define IMULLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r4(RD) ,MD,MB,MI,MS )) + +#define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RD),_r8(RD) ,IM )) +#define IMULQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r8(RD),_r8(RS) )) +#define IMULQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0faf ,_r8(RD) ,MD,MB,MI,MS )) + +#define IMULLirr(IM,RS,RD) (_REXLrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RS),_r4(RD) ,IM )) +#define IMULLimr(IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r4(RD) ,MD,MB,MI,MS ,IM )) + +#define IMULQirr(IM,RS,RD) (_REXQrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RS),_r8(RD) ,IM )) +#define IMULQimr(IM,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r8(RD) ,MD,MB,MI,MS ,IM )) + + +/* --- Control Flow related instructions ----------------------------------- */ + +enum { + X86_CC_O = 0x0, + X86_CC_NO = 0x1, + X86_CC_NAE = 0x2, + X86_CC_B = 0x2, + X86_CC_C = 0x2, + X86_CC_AE = 0x3, + X86_CC_NB = 0x3, + X86_CC_NC = 0x3, + X86_CC_E = 0x4, + X86_CC_Z = 0x4, + X86_CC_NE = 0x5, + X86_CC_NZ = 0x5, + X86_CC_BE = 0x6, + X86_CC_NA = 0x6, + X86_CC_A = 0x7, + X86_CC_NBE = 0x7, + X86_CC_S = 0x8, + X86_CC_NS = 0x9, + X86_CC_P = 0xa, + X86_CC_PE = 0xa, + X86_CC_NP = 0xb, + X86_CC_PO = 0xb, + X86_CC_L = 0xc, + X86_CC_NGE = 0xc, + X86_CC_GE = 0xd, + X86_CC_NL = 0xd, + X86_CC_LE = 0xe, + X86_CC_NG = 0xe, + X86_CC_G = 0xf, + X86_CC_NLE = 0xf, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode +#define CALLm(M) _O_D32 (0xe8 ,(int)(M) ) +#define _CALLLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r4(R) )) +#define _CALLQsr(R) (_REXQrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r8(R) )) +#define CALLsr(R) ( X86_TARGET_64BIT ? _CALLQsr(R) : _CALLLsr(R)) +#define CALLsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b010 ,(int)(D),B,I,S )) + +// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode +#define JMPSm(M) _O_D8 (0xeb ,(int)(M) ) +#define JMPm(M) _O_D32 (0xe9 ,(int)(M) ) +#define _JMPLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r4(R) )) +#define _JMPQsr(R) (_REXQrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r8(R) )) +#define JMPsr(R) ( X86_TARGET_64BIT ? _JMPQsr(R) : _JMPLsr(R)) +#define JMPsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b100 ,(int)(D),B,I,S )) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define JCCSii(CC, D) _O_B (0x70|(CC) ,(_sc)(int)(D) ) +#define JCCSim(CC, D) _O_D8 (0x70|(CC) ,(int)(D) ) +#define JOSm(D) JCCSim(0x0, D) +#define JNOSm(D) JCCSim(0x1, D) +#define JBSm(D) JCCSim(0x2, D) +#define JNAESm(D) JCCSim(0x2, D) +#define JNBSm(D) JCCSim(0x3, D) +#define JAESm(D) JCCSim(0x3, D) +#define JESm(D) JCCSim(0x4, D) +#define JZSm(D) JCCSim(0x4, D) +#define JNESm(D) JCCSim(0x5, D) +#define JNZSm(D) JCCSim(0x5, D) +#define JBESm(D) JCCSim(0x6, D) +#define JNASm(D) JCCSim(0x6, D) +#define JNBESm(D) JCCSim(0x7, D) +#define JASm(D) JCCSim(0x7, D) +#define JSSm(D) JCCSim(0x8, D) +#define JNSSm(D) JCCSim(0x9, D) +#define JPSm(D) JCCSim(0xa, D) +#define JPESm(D) JCCSim(0xa, D) +#define JNPSm(D) JCCSim(0xb, D) +#define JPOSm(D) JCCSim(0xb, D) +#define JLSm(D) JCCSim(0xc, D) +#define JNGESm(D) JCCSim(0xc, D) +#define JNLSm(D) JCCSim(0xd, D) +#define JGESm(D) JCCSim(0xd, D) +#define JLESm(D) JCCSim(0xe, D) +#define JNGSm(D) JCCSim(0xe, D) +#define JNLESm(D) JCCSim(0xf, D) +#define JGSm(D) JCCSim(0xf, D) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define JCCii(CC, D) _OO_L (0x0f80|(CC) ,(int)(D) ) +#define JCCim(CC, D) _OO_D32 (0x0f80|(CC) ,(int)(D) ) +#define JOm(D) JCCim(0x0, D) +#define JNOm(D) JCCim(0x1, D) +#define JBm(D) JCCim(0x2, D) +#define JNAEm(D) JCCim(0x2, D) +#define JNBm(D) JCCim(0x3, D) +#define JAEm(D) JCCim(0x3, D) +#define JEm(D) JCCim(0x4, D) +#define JZm(D) JCCim(0x4, D) +#define JNEm(D) JCCim(0x5, D) +#define JNZm(D) JCCim(0x5, D) +#define JBEm(D) JCCim(0x6, D) +#define JNAm(D) JCCim(0x6, D) +#define JNBEm(D) JCCim(0x7, D) +#define JAm(D) JCCim(0x7, D) +#define JSm(D) JCCim(0x8, D) +#define JNSm(D) JCCim(0x9, D) +#define JPm(D) JCCim(0xa, D) +#define JPEm(D) JCCim(0xa, D) +#define JNPm(D) JCCim(0xb, D) +#define JPOm(D) JCCim(0xb, D) +#define JLm(D) JCCim(0xc, D) +#define JNGEm(D) JCCim(0xc, D) +#define JNLm(D) JCCim(0xd, D) +#define JGEm(D) JCCim(0xd, D) +#define JLEm(D) JCCim(0xe, D) +#define JNGm(D) JCCim(0xe, D) +#define JNLEm(D) JCCim(0xf, D) +#define JGm(D) JCCim(0xf, D) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define SETCCir(CC, RD) (_REXBrr(0, RD), _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) )) +#define SETOr(RD) SETCCir(0x0,RD) +#define SETNOr(RD) SETCCir(0x1,RD) +#define SETBr(RD) SETCCir(0x2,RD) +#define SETNAEr(RD) SETCCir(0x2,RD) +#define SETNBr(RD) SETCCir(0x3,RD) +#define SETAEr(RD) SETCCir(0x3,RD) +#define SETEr(RD) SETCCir(0x4,RD) +#define SETZr(RD) SETCCir(0x4,RD) +#define SETNEr(RD) SETCCir(0x5,RD) +#define SETNZr(RD) SETCCir(0x5,RD) +#define SETBEr(RD) SETCCir(0x6,RD) +#define SETNAr(RD) SETCCir(0x6,RD) +#define SETNBEr(RD) SETCCir(0x7,RD) +#define SETAr(RD) SETCCir(0x7,RD) +#define SETSr(RD) SETCCir(0x8,RD) +#define SETNSr(RD) SETCCir(0x9,RD) +#define SETPr(RD) SETCCir(0xa,RD) +#define SETPEr(RD) SETCCir(0xa,RD) +#define SETNPr(RD) SETCCir(0xb,RD) +#define SETPOr(RD) SETCCir(0xb,RD) +#define SETLr(RD) SETCCir(0xc,RD) +#define SETNGEr(RD) SETCCir(0xc,RD) +#define SETNLr(RD) SETCCir(0xd,RD) +#define SETGEr(RD) SETCCir(0xd,RD) +#define SETLEr(RD) SETCCir(0xe,RD) +#define SETNGr(RD) SETCCir(0xe,RD) +#define SETNLEr(RD) SETCCir(0xf,RD) +#define SETGr(RD) SETCCir(0xf,RD) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define SETCCim(CC,MD,MB,MI,MS) (_REXBrm(0, MB, MI), _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS )) +#define SETOm(D, B, I, S) SETCCim(0x0, D, B, I, S) +#define SETNOm(D, B, I, S) SETCCim(0x1, D, B, I, S) +#define SETBm(D, B, I, S) SETCCim(0x2, D, B, I, S) +#define SETNAEm(D, B, I, S) SETCCim(0x2, D, B, I, S) +#define SETNBm(D, B, I, S) SETCCim(0x3, D, B, I, S) +#define SETAEm(D, B, I, S) SETCCim(0x3, D, B, I, S) +#define SETEm(D, B, I, S) SETCCim(0x4, D, B, I, S) +#define SETZm(D, B, I, S) SETCCim(0x4, D, B, I, S) +#define SETNEm(D, B, I, S) SETCCim(0x5, D, B, I, S) +#define SETNZm(D, B, I, S) SETCCim(0x5, D, B, I, S) +#define SETBEm(D, B, I, S) SETCCim(0x6, D, B, I, S) +#define SETNAm(D, B, I, S) SETCCim(0x6, D, B, I, S) +#define SETNBEm(D, B, I, S) SETCCim(0x7, D, B, I, S) +#define SETAm(D, B, I, S) SETCCim(0x7, D, B, I, S) +#define SETSm(D, B, I, S) SETCCim(0x8, D, B, I, S) +#define SETNSm(D, B, I, S) SETCCim(0x9, D, B, I, S) +#define SETPm(D, B, I, S) SETCCim(0xa, D, B, I, S) +#define SETPEm(D, B, I, S) SETCCim(0xa, D, B, I, S) +#define SETNPm(D, B, I, S) SETCCim(0xb, D, B, I, S) +#define SETPOm(D, B, I, S) SETCCim(0xb, D, B, I, S) +#define SETLm(D, B, I, S) SETCCim(0xc, D, B, I, S) +#define SETNGEm(D, B, I, S) SETCCim(0xc, D, B, I, S) +#define SETNLm(D, B, I, S) SETCCim(0xd, D, B, I, S) +#define SETGEm(D, B, I, S) SETCCim(0xd, D, B, I, S) +#define SETLEm(D, B, I, S) SETCCim(0xe, D, B, I, S) +#define SETNGm(D, B, I, S) SETCCim(0xe, D, B, I, S) +#define SETNLEm(D, B, I, S) SETCCim(0xf, D, B, I, S) +#define SETGm(D, B, I, S) SETCCim(0xf, D, B, I, S) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define CMOVWrr(CC,RS,RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r2(RD),_r2(RS) )) +#define CMOVWmr(CC,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r2(RD) ,MD,MB,MI,MS )) +#define CMOVLrr(CC,RS,RD) (_REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r4(RD),_r4(RS) )) +#define CMOVLmr(CC,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r4(RD) ,MD,MB,MI,MS )) +#define CMOVQrr(CC,RS,RD) (_REXQrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r8(RD),_r8(RS) )) +#define CMOVQmr(CC,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r8(RD) ,MD,MB,MI,MS )) + + +/* --- Push/Pop instructions ----------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define POPWr(RD) _m32only((_d16(), _Or (0x58,_r2(RD) ))) +#define POPWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))) + +#define POPLr(RD) _m32only( _Or (0x58,_r4(RD) )) +#define POPLm(MD, MB, MI, MS) _m32only( _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )) + +#define POPQr(RD) _m64only((_REXQr(RD), _Or (0x58,_r8(RD) ))) +#define POPQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))) + +#define PUSHWr(RS) _m32only((_d16(), _Or (0x50,_r2(RS) ))) +#define PUSHWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0xff, ,_b110 ,MD,MB,MI,MS ))) +#define PUSHWi(IM) _m32only((_d16(), _Os_sW (0x68 ,IM ))) + +#define PUSHLr(RS) _m32only( _Or (0x50,_r4(RS) )) +#define PUSHLm(MD, MB, MI, MS) _m32only( _O_r_X (0xff ,_b110 ,MD,MB,MI,MS )) +#define PUSHLi(IM) _m32only( _Os_sL (0x68 ,IM )) + +#define PUSHQr(RS) _m64only((_REXQr(RS), _Or (0x50,_r8(RS) ))) +#define PUSHQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0xff ,_b110 ,MD,MB,MI,MS ))) +#define PUSHQi(IM) _m64only( _Os_sL (0x68 ,IM )) + +#define POPA() (_d16(), _O (0x61 )) +#define POPAD() _O (0x61 ) + +#define PUSHA() (_d16(), _O (0x60 )) +#define PUSHAD() _O (0x60 ) + +#define POPF() _O (0x9d ) +#define PUSHF() _O (0x9c ) + + +/* --- Test instructions --------------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define TESTBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x84 ,_b11,_r1(RS),_r1(RD) )) +#define TESTBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x84 ,_r1(RS) ,MD,MB,MI,MS )) +#define TESTBir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \ + (_REXBrr(0, RD), _O_B (0xa8 ,_u8(IM))) : \ + (_REXBrr(0, RD), _O_Mrm_B (0xf6 ,_b11,_b000 ,_r1(RD) ,_u8(IM))) ) +#define TESTBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0xf6 ,_b000 ,MD,MB,MI,MS ,_u8(IM))) + +#define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) )) +#define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS )) +#define TESTWir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \ + (_d16(), _REXLrr(0, RD), _O_W (0xa9 ,_u16(IM))) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))) ) +#define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM))) + +#define TESTLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r4(RS),_r4(RD) )) +#define TESTLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r4(RS) ,MD,MB,MI,MS )) +#define TESTLir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \ + (_REXLrr(0, RD), _O_L (0xa9 ,IM )) : \ + (_REXLrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r4(RD) ,IM )) ) +#define TESTLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM )) + +#define TESTQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x85 ,_b11,_r8(RS),_r8(RD) )) +#define TESTQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x85 ,_r8(RS) ,MD,MB,MI,MS )) +#define TESTQir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \ + (_REXQrr(0, RD), _O_L (0xa9 ,IM )) : \ + (_REXQrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r8(RD) ,IM )) ) +#define TESTQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM )) + + +/* --- Exchange instructions ----------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define CMPXCHGBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) )) +#define CMPXCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fb0 ,_r1(RS) ,MD,MB,MI,MS )) + +#define CMPXCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r2(RS),_r2(RD) )) +#define CMPXCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r2(RS) ,MD,MB,MI,MS )) + +#define CMPXCHGLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r4(RS),_r4(RD) )) +#define CMPXCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r4(RS) ,MD,MB,MI,MS )) + +#define CMPXCHGQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r8(RS),_r8(RD) )) +#define CMPXCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r8(RS) ,MD,MB,MI,MS )) + +#define XADDBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fc0 ,_b11,_r1(RS),_r1(RD) )) +#define XADDBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fc0 ,_r1(RS) ,MD,MB,MI,MS )) + +#define XADDWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r2(RS),_r2(RD) )) +#define XADDWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r2(RS) ,MD,MB,MI,MS )) + +#define XADDLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r4(RS),_r4(RD) )) +#define XADDLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r4(RS) ,MD,MB,MI,MS )) + +#define XADDQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r8(RS),_r8(RD) )) +#define XADDQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r8(RS) ,MD,MB,MI,MS )) + +#define XCHGBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x86 ,_b11,_r1(RS),_r1(RD) )) +#define XCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x86 ,_r1(RS) ,MD,MB,MI,MS )) + +#define XCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r2(RS),_r2(RD) )) +#define XCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r2(RS) ,MD,MB,MI,MS )) + +#define XCHGLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r4(RS),_r4(RD) )) +#define XCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r4(RS) ,MD,MB,MI,MS )) + +#define XCHGQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x87 ,_b11,_r8(RS),_r8(RD) )) +#define XCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x87 ,_r8(RS) ,MD,MB,MI,MS )) + + +/* --- Increment/Decrement instructions ------------------------------------ */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define DECBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b001 ,MD,MB,MI,MS )) +#define DECBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b001 ,_r1(RD) )) + +#define DECWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) +#define DECWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x48,_r2(RD) )) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r2(RD) ))) + +#define DECLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) +#define DECLr(RD) (! X86_TARGET_64BIT ? _Or (0x48,_r4(RD) ) : \ + (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r4(RD) ))) + +#define DECQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) +#define DECQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r8(RD) )) + +#define INCBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b000 ,MD,MB,MI,MS )) +#define INCBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b000 ,_r1(RD) )) + +#define INCWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) +#define INCWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x40,_r2(RD) )) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r2(RD) )) ) + +#define INCLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) +#define INCLr(RD) (! X86_TARGET_64BIT ? _Or (0x40,_r4(RD) ) : \ + (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r4(RD) ))) + +#define INCQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) +#define INCQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r8(RD) )) + + +/* --- Misc instructions --------------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define BSFWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r2(RD),_r2(RS) )) +#define BSFWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r2(RD) ,MD,MB,MI,MS )) +#define BSRWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r2(RD),_r2(RS) )) +#define BSRWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r2(RD) ,MD,MB,MI,MS )) + +#define BSFLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r4(RD),_r4(RS) )) +#define BSFLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r4(RD) ,MD,MB,MI,MS )) +#define BSRLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r4(RD),_r4(RS) )) +#define BSRLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r4(RD) ,MD,MB,MI,MS )) + +#define BSFQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r8(RD),_r8(RS) )) +#define BSFQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r8(RD) ,MD,MB,MI,MS )) +#define BSRQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r8(RD),_r8(RS) )) +#define BSRQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r8(RD) ,MD,MB,MI,MS )) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define MOVSBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r2(RD),_r1(RS) )) +#define MOVSBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r2(RD) ,MD,MB,MI,MS )) +#define MOVZBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r2(RD),_r1(RS) )) +#define MOVZBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r2(RD) ,MD,MB,MI,MS )) + +#define MOVSBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r4(RD),_r1(RS) )) +#define MOVSBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVZBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r4(RD),_r1(RS) )) +#define MOVZBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r4(RD) ,MD,MB,MI,MS )) + +#define MOVSBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r8(RD),_r1(RS) )) +#define MOVSBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r8(RD) ,MD,MB,MI,MS )) +#define MOVZBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r8(RD),_r1(RS) )) +#define MOVZBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r8(RD) ,MD,MB,MI,MS )) + +#define MOVSWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r4(RD),_r2(RS) )) +#define MOVSWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVZWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r4(RD),_r2(RS) )) +#define MOVZWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r4(RD) ,MD,MB,MI,MS )) + +#define MOVSWQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r8(RD),_r2(RS) )) +#define MOVSWQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r8(RD) ,MD,MB,MI,MS )) +#define MOVZWQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r8(RD),_r2(RS) )) +#define MOVZWQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r8(RD) ,MD,MB,MI,MS )) + +#define MOVSLQrr(RS, RD) _m64only((_REXQrr(RD, RS), _O_Mrm (0x63 ,_b11,_r8(RD),_r4(RS) ))) +#define MOVSLQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _O_r_X (0x63 ,_r8(RD) ,MD,MB,MI,MS ))) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define LEALmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS )) + +#define BSWAPLr(R) (_REXLrr(0, R), _OOr (0x0fc8,_r4(R) )) +#define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) )) + +#define CLC() _O (0xf8 ) +#define STC() _O (0xf9 ) + +#define CMC() _O (0xf5 ) +#define CLD() _O (0xfc ) +#define STD() _O (0xfd ) + +#define CBTW() (_d16(), _O (0x98 )) +#define CWTL() _O (0x98 ) +#define CLTQ() _m64only(_REXQrr(0, 0), _O (0x98 )) + +#define CBW CBTW +#define CWDE CWTL +#define CDQE CLTQ + +#define CWTD() (_d16(), _O (0x99 )) +#define CLTD() _O (0x99 ) +#define CQTO() _m64only(_REXQrr(0, 0), _O (0x99 )) + +#define CWD CWTD +#define CDQ CLTD +#define CQO CQTO + +#define LAHF() _O (0x9f ) +#define SAHF() _O (0x9e ) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define CPUID() _OO (0x0fa2 ) +#define RDTSC() _OO (0xff31 ) + +#define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B)) + +#define LEAVE() _O (0xc9 ) +#define RET() _O (0xc3 ) +#define RETi(IM) _O_W (0xc2 ,_su16(IM)) + +#define NOP() _O (0x90 ) + + +/* --- Media 128-bit instructions ------------------------------------------ */ + +enum { + X86_SSE_CVTIS = 0x2a, + X86_SSE_CVTSI = 0x2d, + X86_SSE_UCOMI = 0x2e, + X86_SSE_COMI = 0x2f, + X86_SSE_SQRT = 0x51, + X86_SSE_RSQRT = 0x52, + X86_SSE_RCP = 0x53, + X86_SSE_AND = 0x54, + X86_SSE_ANDN = 0x55, + X86_SSE_OR = 0x56, + X86_SSE_XOR = 0x57, + X86_SSE_ADD = 0x58, + X86_SSE_MUL = 0x59, + X86_SSE_CVTSD = 0x5a, + X86_SSE_CVTDT = 0x5b, + X86_SSE_SUB = 0x5c, + X86_SSE_MIN = 0x5d, + X86_SSE_DIV = 0x5e, + X86_SSE_MAX = 0x5f, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) + +#define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) + +#define _SSELrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSELrr(OP, RS, RSA, RD, RDA)) +#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA)) +#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS)) + +#define _SSEQrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA)) +#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA)) +#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS)) + +#define _SSEPSrr(OP,RS,RD) __SSELrr( OP, RS,_rX, RD,_rX) +#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr( OP, MD, MB, MI, MS, RD,_rX) +#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm( OP, RS,_rX, MD, MB, MI, MS) + +#define _SSEPDrr(OP,RS,RD) _SSELrr(0x66, OP, RS,_rX, RD,_rX) +#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0x66, OP, MD, MB, MI, MS, RD,_rX) +#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0x66, OP, RS,_rX, MD, MB, MI, MS) + +#define _SSESSrr(OP,RS,RD) _SSELrr(0xf3, OP, RS,_rX, RD,_rX) +#define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf3, OP, MD, MB, MI, MS, RD,_rX) +#define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf3, OP, RS,_rX, MD, MB, MI, MS) + +#define _SSESDrr(OP,RS,RD) _SSELrr(0xf2, OP, RS,_rX, RD,_rX) +#define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf2, OP, MD, MB, MI, MS, RD,_rX) +#define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf2, OP, RS,_rX, MD, MB, MI, MS) + +#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD) +#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD) +#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD) +#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD) + +#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD) +#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD) +#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD) +#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD) + +#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD) +#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD) +#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD) +#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD) + +#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD) +#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD) +#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD) +#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD) + +#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD) +#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD) +#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD) +#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD) + +#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD) +#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD) +#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD) +#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD) + +#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD) +#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD) +#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD) +#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD) + +#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD) +#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD) +#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD) +#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD) + +#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD) +#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD) +#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD) +#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD) + +#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD) +#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD) +#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD) +#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD) + +#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD) +#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD) +#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD) +#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD) + +#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD) +#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD) +#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD) +#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD) + +#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD) +#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD) +#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD) +#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD) + +#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD) +#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD) +#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD) +#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD) + +#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD) +#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD) +#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD) +#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD) + +#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD) +#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) +#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD) +#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) + +#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD) +#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) +#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD) +#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) + +#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD) +#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD) +#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD) +#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD) + +#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD) +#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD) +#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD) +#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD) + +#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD) +#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD) +#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD) +#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD) + +#define COMISSrr(RS, RD) _SSESSrr(X86_SSE_COMI, RS, RD) +#define COMISSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_COMI, MD, MB, MI, MS, RD) +#define COMISDrr(RS, RD) _SSESDrr(X86_SSE_COMI, RS, RD) +#define COMISDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_COMI, MD, MB, MI, MS, RD) + +#define UCOMISSrr(RS, RD) _SSESSrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) +#define UCOMISDrr(RS, RD) _SSESDrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) + +#define MOVAPSrr(RS, RD) _SSEPSrr(0x28, RS, RD) +#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr(0x28, MD, MB, MI, MS, RD) +#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPSrm(0x29, RS, MD, MB, MI, MS) + +#define MOVAPDrr(RS, RD) _SSEPDrr(0x28, RS, RD) +#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr(0x28, MD, MB, MI, MS, RD) +#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPDrm(0x29, RS, MD, MB, MI, MS) + +#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTSI, RS,_rX, RD,_rM) +#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM) +#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSI, RS,_rX, RD,_rM) +#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM) + +#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTIS, RS,_rM, RD,_rX) +#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTIS, RS,_rM, RD,_rX) +#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) + +#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) +#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) + +#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) +#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD, RS,_rX, RD,_rX) +#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX) + +#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r4) +#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4) +#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r4) +#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4) + +#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTIS, RS,_r4, RD,_rX) +#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTIS, RS,_r4, RD,_rX) +#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) + +#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r8) +#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8) +#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r8) +#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8) + +#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTIS, RS,_r8, RD,_rX) +#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTIS, RS,_r8, RD,_rX) +#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX) + +#define MOVDLXrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX) +#define MOVDLXmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) +#define MOVDQXrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX) +#define MOVDQXmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) + +#define MOVDXLrr(RS, RD) _SSELrr(0x66, 0x7e, RS,_rX, RD,_r4) +#define MOVDXLrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) +#define MOVDXQrr(RS, RD) _SSEQrr(0x66, 0x7e, RS,_rX, RD,_r8) +#define MOVDXQrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) + +#define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM) +#define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS, RD,_rM) +#define MOVDQMrr(RS, RD) __SSEQrr( 0x6e, RS,_r8, RD,_rM) +#define MOVDQMmr(MD, MB, MI, MS, RD) __SSEQmr( 0x6e, MD, MB, MI, MS, RD,_rM) + +#define MOVDMLrr(RS, RD) __SSELrr( 0x7e, RS,_rM, RD,_r4) +#define MOVDMLrm(RS, MD, MB, MI, MS) __SSELrm( 0x7e, RS,_rM, MD, MB, MI, MS) +#define MOVDMQrr(RS, RD) __SSEQrr( 0x7e, RS,_rM, RD,_r8) +#define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB, MI, MS) + +#define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, 0xd6, RS,_rX, RD,_rM) +#define MOVHLPSrr(RS, RD) __SSELrr( 0x12, RS,_rX, RD,_rX) +#define MOVLHPSrr(RS, RD) __SSELrr( 0x16, RS,_rX, RD,_rX) + +#define MOVDQArr(RS, RD) _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX) +#define MOVDQAmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6f, MD, MB, MI, MS, RD,_rX) +#define MOVDQArm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, MI, MS) + +#define MOVDQUrr(RS, RD) _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX) +#define MOVDQUmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, RD,_rX) +#define MOVDQUrm(RS, MD, MB, MI, MS) _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, MI, MS) + +#define MOVHPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x16, MD, MB, MI, MS, RD,_rX) +#define MOVHPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x17, RS,_rX, MD, MB, MI, MS) +#define MOVHPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x16, MD, MB, MI, MS, RD,_rX) +#define MOVHPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x17, RS,_rX, MD, MB, MI, MS) + +#define MOVLPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x12, MD, MB, MI, MS, RD,_rX) +#define MOVLPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x13, RS,_rX, MD, MB, MI, MS) +#define MOVLPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x12, MD, MB, MI, MS, RD,_rX) +#define MOVLPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x13, RS,_rX, MD, MB, MI, MS) + + +/* --- FLoating-Point instructions ----------------------------------------- */ + +#define _ESCmi(D,B,I,S,OP) (_REXLrm(0,B,I), _O_r_X(0xd8|(OP & 7), (OP >> 3), D,B,I,S)) + +#define FLDr(R) _OOr(0xd9c0,_rN(R)) +#define FLDLm(D,B,I,S) _ESCmi(D,B,I,S,005) +#define FLDSm(D,B,I,S) _ESCmi(D,B,I,S,001) +#define FLDTm(D,B,I,S) _ESCmi(D,B,I,S,053) + +#define FSTr(R) _OOr(0xddd0,_rN(R)) +#define FSTSm(D,B,I,S) _ESCmi(D,B,I,S,021) +#define FSTLm(D,B,I,S) _ESCmi(D,B,I,S,025) + +#define FSTPr(R) _OOr(0xddd8,_rN(R)) +#define FSTPSm(D,B,I,S) _ESCmi(D,B,I,S,031) +#define FSTPLm(D,B,I,S) _ESCmi(D,B,I,S,035) +#define FSTPTm(D,B,I,S) _ESCmi(D,B,I,S,073) + +#define FADDr0(R) _OOr(0xd8c0,_rN(R)) +#define FADD0r(R) _OOr(0xdcc0,_rN(R)) +#define FADDP0r(R) _OOr(0xdec0,_rN(R)) +#define FADDSm(D,B,I,S) _ESCmi(D,B,I,S,000) +#define FADDLm(D,B,I,S) _ESCmi(D,B,I,S,004) + +#define FSUBSm(D,B,I,S) _ESCmi(D,B,I,S,040) +#define FSUBLm(D,B,I,S) _ESCmi(D,B,I,S,044) +#define FSUBr0(R) _OOr(0xd8e0,_rN(R)) +#define FSUB0r(R) _OOr(0xdce8,_rN(R)) +#define FSUBP0r(R) _OOr(0xdee8,_rN(R)) + +#define FSUBRr0(R) _OOr(0xd8e8,_rN(R)) +#define FSUBR0r(R) _OOr(0xdce0,_rN(R)) +#define FSUBRP0r(R) _OOr(0xdee0,_rN(R)) +#define FSUBRSm(D,B,I,S) _ESCmi(D,B,I,S,050) +#define FSUBRLm(D,B,I,S) _ESCmi(D,B,I,S,054) + +#define FMULr0(R) _OOr(0xd8c8,_rN(R)) +#define FMUL0r(R) _OOr(0xdcc8,_rN(R)) +#define FMULP0r(R) _OOr(0xdec8,_rN(R)) +#define FMULSm(D,B,I,S) _ESCmi(D,B,I,S,010) +#define FMULLm(D,B,I,S) _ESCmi(D,B,I,S,014) + +#define FDIVr0(R) _OOr(0xd8f0,_rN(R)) +#define FDIV0r(R) _OOr(0xdcf8,_rN(R)) +#define FDIVP0r(R) _OOr(0xdef8,_rN(R)) +#define FDIVSm(D,B,I,S) _ESCmi(D,B,I,S,060) +#define FDIVLm(D,B,I,S) _ESCmi(D,B,I,S,064) + +#define FDIVRr0(R) _OOr(0xd8f8,_rN(R)) +#define FDIVR0r(R) _OOr(0xdcf0,_rN(R)) +#define FDIVRP0r(R) _OOr(0xdef0,_rN(R)) +#define FDIVRSm(D,B,I,S) _ESCmi(D,B,I,S,070) +#define FDIVRLm(D,B,I,S) _ESCmi(D,B,I,S,074) + +#define FCMOVBr0(R) _OOr(0xdac0,_rN(R)) +#define FCMOVBEr0(R) _OOr(0xdad0,_rN(R)) +#define FCMOVEr0(R) _OOr(0xdac8,_rN(R)) +#define FCMOVNBr0(R) _OOr(0xdbc0,_rN(R)) +#define FCMOVNBEr0(R) _OOr(0xdbd0,_rN(R)) +#define FCMOVNEr0(R) _OOr(0xdbc8,_rN(R)) +#define FCMOVNUr0(R) _OOr(0xdbd8,_rN(R)) +#define FCMOVUr0(R) _OOr(0xdad8,_rN(R)) +#define FCOMIr0(R) _OOr(0xdbf0,_rN(R)) +#define FCOMIPr0(R) _OOr(0xdff0,_rN(R)) + +#define FCOMr(R) _OOr(0xd8d0,_rN(R)) +#define FCOMSm(D,B,I,S) _ESCmi(D,B,I,S,020) +#define FCOMLm(D,B,I,S) _ESCmi(D,B,I,S,024) + +#define FCOMPr(R) _OOr(0xd8d8,_rN(R)) +#define FCOMPSm(D,B,I,S) _ESCmi(D,B,I,S,030) +#define FCOMPLm(D,B,I,S) _ESCmi(D,B,I,S,034) + +#define FUCOMIr0(R) _OOr(0xdbe8,_rN(R)) +#define FUCOMIPr0(R) _OOr(0xdfe8,_rN(R)) +#define FUCOMPr(R) _OOr(0xdde8,_rN(R)) +#define FUCOMr(R) _OOr(0xdde0,_rN(R)) + +#define FIADDLm(D,B,I,S) _ESCmi(D,B,I,S,002) +#define FICOMLm(D,B,I,S) _ESCmi(D,B,I,S,022) +#define FICOMPLm(D,B,I,S) _ESCmi(D,B,I,S,032) +#define FIDIVLm(D,B,I,S) _ESCmi(D,B,I,S,062) +#define FIDIVRLm(D,B,I,S) _ESCmi(D,B,I,S,072) +#define FILDLm(D,B,I,S) _ESCmi(D,B,I,S,003) +#define FILDQm(D,B,I,S) _ESCmi(D,B,I,S,057) +#define FIMULLm(D,B,I,S) _ESCmi(D,B,I,S,012) +#define FISTLm(D,B,I,S) _ESCmi(D,B,I,S,023) +#define FISTPLm(D,B,I,S) _ESCmi(D,B,I,S,033) +#define FISTPQm(D,B,I,S) _ESCmi(D,B,I,S,077) +#define FISUBLm(D,B,I,S) _ESCmi(D,B,I,S,042) +#define FISUBRLm(D,B,I,S) _ESCmi(D,B,I,S,052) + +#define FREEr(R) _OOr(0xddc0,_rN(R)) +#define FXCHr(R) _OOr(0xd9c8,_rN(R)) + +#endif /* X86_RTASM_H */ diff --git a/BasiliskII/src/uae_cpu/compiler/compemu.h b/BasiliskII/src/uae_cpu/compiler/compemu.h new file mode 100644 index 00000000..6e3abb1e --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu.h @@ -0,0 +1,543 @@ +/* + * compiler/compemu.h - Public interface and definitions + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * JIT compiler m68k -> IA-32 and AMD64 + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef COMPEMU_H +#define COMPEMU_H + +// #include "sysconfig.h" +#include "newcpu.h" + +#ifdef UAE +#ifdef CPU_64_BIT +typedef uae_u64 uintptr; +#else +typedef uae_u32 uintptr; +#endif +/* FIXME: cpummu.cpp also checks for USE_JIT, possibly others */ +// #define USE_JIT +#endif + +#ifdef USE_JIT + +#ifdef JIT_DEBUG +/* dump some information (m68k block, x86 block addresses) about the compiler state */ +extern void compiler_dumpstate(void); +#endif + +/* Now that we do block chaining, and also have linked lists on each tag, + TAGMASK can be much smaller and still do its job. Saves several megs + of memory! */ +#define TAGMASK 0x0000ffff +#define TAGSIZE (TAGMASK+1) +#define MAXRUN 1024 +#define cacheline(x) (((uintptr)x)&TAGMASK) + +extern uae_u8* start_pc_p; +extern uae_u32 start_pc; + +struct blockinfo_t; + +struct cpu_history { + uae_u16* location; +#ifdef UAE + uae_u8 specmem; +#endif +}; + +union cacheline { + cpuop_func* handler; + blockinfo_t * bi; +}; + +/* Use new spill/reload strategy when calling external functions */ +#define USE_OPTIMIZED_CALLS 0 +#if USE_OPTIMIZED_CALLS +#error implementation in progress +#endif + +/* (gb) When on, this option can save save up to 30% compilation time + * when many lazy flushes occur (e.g. apps in MacOS 8.x). + */ +#define USE_SEPARATE_BIA 1 + +/* Use chain of checksum_info_t to compute the block checksum */ +#define USE_CHECKSUM_INFO 1 + +/* Use code inlining, aka follow-up of constant jumps */ +#define USE_INLINING 1 + +/* Inlining requires the chained checksuming information */ +#if USE_INLINING +#undef USE_CHECKSUM_INFO +#define USE_CHECKSUM_INFO 1 +#endif + +/* Does flush_icache_range() only check for blocks falling in the requested range? */ +#define LAZY_FLUSH_ICACHE_RANGE 0 + +#define USE_F_ALIAS 1 +#define USE_OFFSET 1 +#define COMP_DEBUG 1 + +#if COMP_DEBUG +#define Dif(x) if (x) +#else +#define Dif(x) if (0) +#endif + +#define SCALE 2 + +#define BYTES_PER_INST 10240 /* paranoid ;-) */ +#if defined(CPU_arm) +#define LONGEST_68K_INST 256 /* The number of bytes the longest possible + 68k instruction takes */ +#else +#define LONGEST_68K_INST 16 /* The number of bytes the longest possible + 68k instruction takes */ +#endif +#define MAX_CHECKSUM_LEN 2048 /* The maximum size we calculate checksums + for. Anything larger will be flushed + unconditionally even with SOFT_FLUSH */ +#define MAX_HOLD_BI 3 /* One for the current block, and up to two + for jump targets */ + +#define INDIVIDUAL_INST 0 +#define FLAG_X 0x0010 +#define FLAG_N 0x0008 +#define FLAG_Z 0x0004 +#define FLAG_V 0x0002 +#define FLAG_C 0x0001 +#define FLAG_CZNV (FLAG_C | FLAG_Z | FLAG_N | FLAG_V) +#define FLAG_ALL (FLAG_C | FLAG_Z | FLAG_N | FLAG_V | FLAG_X) +#define FLAG_ZNV (FLAG_Z | FLAG_N | FLAG_V) + +#define KILLTHERAT 1 /* Set to 1 to avoid some partial_rat_stalls */ + +#if defined(CPU_arm) +#define USE_DATA_BUFFER +#define N_REGS 13 /* really 16, but 13 to 15 are SP, LR, PC */ +#else +#if defined(CPU_x86_64) +#define N_REGS 16 /* really only 15, but they are numbered 0-3,5-15 */ +#else +#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */ +#endif +#endif +#define N_FREGS 6 /* That leaves us two positions on the stack to play with */ + +/* Functions exposed to newcpu, or to what was moved from newcpu.c to + * compemu_support.c */ +#ifdef WINUAE_ARANYM +extern void compiler_init(void); +extern void compiler_exit(void); +extern bool compiler_use_jit(void); +#endif +extern void init_comp(void); +extern void flush(int save_regs); +extern void small_flush(int save_regs); +extern void set_target(uae_u8* t); +extern uae_u8* get_target(void); +extern void freescratch(void); +extern void build_comp(void); +extern void set_cache_state(int enabled); +extern int get_cache_state(void); +extern uae_u32 get_jitted_size(void); +#ifdef JIT +#ifdef WINUAE_ARANYM +extern void (*flush_icache)(int n); +#else +extern void flush_icache(int n); +#endif +#endif +extern void alloc_cache(void); +extern int check_for_cache_miss(void); + +/* JIT FPU compilation */ +extern void comp_fpp_opp (uae_u32 opcode, uae_u16 extra); +extern void comp_fbcc_opp (uae_u32 opcode); +extern void comp_fscc_opp (uae_u32 opcode, uae_u16 extra); +void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra); +void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc); +void comp_fsave_opp (uae_u32 opcode); +void comp_frestore_opp (uae_u32 opcode); + +extern uae_u32 needed_flags; +extern uae_u8* comp_pc_p; +extern void* pushall_call_handler; + +#define VREGS 32 +#define VFREGS 16 + +#define INMEM 1 +#define CLEAN 2 +#define DIRTY 3 +#define UNDEF 4 +#define ISCONST 5 + +typedef struct { + uae_u32* mem; + uae_u32 val; + uae_u8 is_swapped; + uae_u8 status; + uae_s8 realreg; /* gb-- realreg can hold -1 */ + uae_u8 realind; /* The index in the holds[] array */ + uae_u8 needflush; + uae_u8 validsize; + uae_u8 dirtysize; + uae_u8 dummy; +} reg_status; + +typedef struct { + uae_u32* mem; + double val; + uae_u8 status; + uae_s8 realreg; /* gb-- realreg can hold -1 */ + uae_u8 realind; + uae_u8 needflush; +} freg_status; + +#define PC_P 16 +#define FLAGX 17 +#define FLAGTMP 18 +#define NEXT_HANDLER 19 +#define S1 20 +#define S2 21 +#define S3 22 +#define S4 23 +#define S5 24 +#define S6 25 +#define S7 26 +#define S8 27 +#define S9 28 +#define S10 29 +#define S11 30 +#define S12 31 + +#define FP_RESULT 8 +#define FS1 9 +#define FS2 10 +#define FS3 11 + +typedef struct { + uae_u32 touched; + uae_s8 holds[VREGS]; + uae_u8 nholds; + uae_u8 canbyte; + uae_u8 canword; + uae_u8 locked; +} n_status; + +typedef struct { + uae_u32 touched; + uae_s8 holds[VFREGS]; + uae_u8 nholds; + uae_u8 locked; +} fn_status; + +/* For flag handling */ +#define NADA 1 +#define TRASH 2 +#define VALID 3 + +/* needflush values */ +#define NF_SCRATCH 0 +#define NF_TOMEM 1 +#define NF_HANDLER 2 + +typedef struct { + /* Integer part */ + reg_status state[VREGS]; + n_status nat[N_REGS]; + uae_u32 flags_on_stack; + uae_u32 flags_in_flags; + uae_u32 flags_are_important; + /* FPU part */ + freg_status fate[VFREGS]; + fn_status fat[N_FREGS]; + + /* x86 FPU part */ + uae_s8 spos[N_FREGS]; + uae_s8 onstack[6]; + uae_s8 tos; +} bigstate; + +typedef struct { + /* Integer part */ + uae_s8 virt[VREGS]; + uae_s8 nat[N_REGS]; +} smallstate; + +extern int touchcnt; + +#define IMM uae_s32 +#define RR1 uae_u32 +#define RR2 uae_u32 +#define RR4 uae_u32 +/* + R1, R2, R4 collides with ARM registers defined in ucontext +#define R1 uae_u32 +#define R2 uae_u32 +#define R4 uae_u32 +*/ +#define W1 uae_u32 +#define W2 uae_u32 +#define W4 uae_u32 +#define RW1 uae_u32 +#define RW2 uae_u32 +#define RW4 uae_u32 +#define MEMR uae_u32 +#define MEMW uae_u32 +#define MEMRW uae_u32 + +#define FW uae_u32 +#define FR uae_u32 +#define FRW uae_u32 + +#define MIDFUNC(nargs,func,args) void func args +#define MENDFUNC(nargs,func,args) +#define COMPCALL(func) func + +#define LOWFUNC(flags,mem,nargs,func,args) static inline void func args +#define LENDFUNC(flags,mem,nargs,func,args) + +/* What we expose to the outside */ +#define DECLARE_MIDFUNC(func) extern void func + +#if defined(CPU_arm) + +#include "compemu_midfunc_arm.h" + +#if defined(USE_JIT2) +#include "compemu_midfunc_arm2.h" +#endif +#endif + +#if defined(CPU_i386) || defined(CPU_x86_64) +#include "compemu_midfunc_x86.h" +#endif + +#undef DECLARE_MIDFUNC + +extern int failure; +#define FAIL(x) do { failure|=x; } while (0) + +/* Convenience functions exposed to gencomp */ +extern uae_u32 m68k_pc_offset; +extern void readbyte(int address, int dest, int tmp); +extern void readword(int address, int dest, int tmp); +extern void readlong(int address, int dest, int tmp); +extern void writebyte(int address, int source, int tmp); +extern void writeword(int address, int source, int tmp); +extern void writelong(int address, int source, int tmp); +extern void writeword_clobber(int address, int source, int tmp); +extern void writelong_clobber(int address, int source, int tmp); +extern void get_n_addr(int address, int dest, int tmp); +extern void get_n_addr_jmp(int address, int dest, int tmp); +extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp); +/* Set native Z flag only if register is zero */ +extern void set_zero(int r, int tmp); +extern int kill_rodent(int r); +#define SYNC_PC_OFFSET 100 +extern void sync_m68k_pc(void); +extern uae_u32 get_const(int r); +extern int is_const(int r); +extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond); + +#define comp_get_ibyte(o) do_get_mem_byte((uae_u8 *)(comp_pc_p + (o) + 1)) +#define comp_get_iword(o) do_get_mem_word((uae_u16 *)(comp_pc_p + (o))) +#define comp_get_ilong(o) do_get_mem_long((uae_u32 *)(comp_pc_p + (o))) + +struct blockinfo_t; + +typedef struct dep_t { + uae_u32* jmp_off; + struct blockinfo_t* target; + struct blockinfo_t* source; + struct dep_t** prev_p; + struct dep_t* next; +} dependency; + +typedef struct checksum_info_t { + uae_u8 *start_p; + uae_u32 length; + struct checksum_info_t *next; +} checksum_info; + +typedef struct blockinfo_t { + uae_s32 count; + cpuop_func* direct_handler_to_use; + cpuop_func* handler_to_use; + /* The direct handler does not check for the correct address */ + + cpuop_func* handler; + cpuop_func* direct_handler; + + cpuop_func* direct_pen; + cpuop_func* direct_pcc; + +#ifdef UAE + uae_u8* nexthandler; +#endif + uae_u8* pc_p; + + uae_u32 c1; + uae_u32 c2; +#if USE_CHECKSUM_INFO + checksum_info *csi; +#else + uae_u32 len; + uae_u32 min_pcp; +#endif + + struct blockinfo_t* next_same_cl; + struct blockinfo_t** prev_same_cl_p; + struct blockinfo_t* next; + struct blockinfo_t** prev_p; + + uae_u8 optlevel; + uae_u8 needed_flags; + uae_u8 status; + uae_u8 havestate; + + dependency dep[2]; /* Holds things we depend on */ + dependency* deplist; /* List of things that depend on this */ + smallstate env; + +#ifdef JIT_DEBUG + /* (gb) size of the compiled block (direct handler) */ + uae_u32 direct_handler_size; +#endif +} blockinfo; + +#define BI_INVALID 0 +#define BI_ACTIVE 1 +#define BI_NEED_RECOMP 2 +#define BI_NEED_CHECK 3 +#define BI_CHECKING 4 +#define BI_COMPILING 5 +#define BI_FINALIZING 6 + +void execute_normal(void); +void exec_nostats(void); +void do_nothing(void); + +#else + +static inline void flush_icache(int) { } +static inline void build_comp() { } + +#endif /* !USE_JIT */ + +#ifdef UAE + +typedef struct { + uae_u8 type; + uae_u8 reg; + uae_u32 next; +} regacc; + +#define JIT_EXCEPTION_HANDLER +// #define JIT_ALWAYS_DISTRUST + +/* ARAnyM uses fpu_register name, used in scratch_t */ +/* FIXME: check that no ARAnyM code assumes different floating point type */ +typedef fptype fpu_register; + +extern void compile_block(cpu_history* pc_hist, int blocklen, int totcyles); + +#define MAXCYCLES (1000 * CYCLE_UNIT) +#define scaled_cycles(x) (currprefs.m68k_speed<0?(((x)/SCALE)?(((x)/SCALE (uintptr_t) 0xffffffff) { + jit_abort("JIT: 64-bit pointer (0x%llx) at %s:%d (fatal)", + (unsigned long long)address, file, line); + } + return (uae_u32) address; +} +#define uae_p32(x) (check_uae_p32((uintptr)(x), __FILE__, __LINE__)) +#else +#define uae_p32(x) ((uae_u32)(x)) +#endif + +#endif /* COMPEMU_H */ diff --git a/BasiliskII/src/uae_cpu/compiler/compemu1.cpp b/BasiliskII/src/uae_cpu/compiler/compemu1.cpp new file mode 100644 index 00000000..297c6250 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu1.cpp @@ -0,0 +1,2 @@ +#define PART_1 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu2.cpp b/BasiliskII/src/uae_cpu/compiler/compemu2.cpp new file mode 100644 index 00000000..8c0ddeac --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu2.cpp @@ -0,0 +1,2 @@ +#define PART_2 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu3.cpp b/BasiliskII/src/uae_cpu/compiler/compemu3.cpp new file mode 100644 index 00000000..975e0669 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu3.cpp @@ -0,0 +1,2 @@ +#define PART_3 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu4.cpp b/BasiliskII/src/uae_cpu/compiler/compemu4.cpp new file mode 100644 index 00000000..a49b5444 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu4.cpp @@ -0,0 +1,2 @@ +#define PART_4 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu5.cpp b/BasiliskII/src/uae_cpu/compiler/compemu5.cpp new file mode 100644 index 00000000..41e872f6 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu5.cpp @@ -0,0 +1,2 @@ +#define PART_5 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu6.cpp b/BasiliskII/src/uae_cpu/compiler/compemu6.cpp new file mode 100644 index 00000000..9156e597 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu6.cpp @@ -0,0 +1,2 @@ +#define PART_6 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu7.cpp b/BasiliskII/src/uae_cpu/compiler/compemu7.cpp new file mode 100644 index 00000000..63108e04 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu7.cpp @@ -0,0 +1,2 @@ +#define PART_7 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu8.cpp b/BasiliskII/src/uae_cpu/compiler/compemu8.cpp new file mode 100644 index 00000000..543f9dfd --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu8.cpp @@ -0,0 +1,2 @@ +#define PART_8 +#include "compemu.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_fpp.cpp b/BasiliskII/src/uae_cpu/compiler/compemu_fpp.cpp new file mode 100644 index 00000000..cef6d43e --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_fpp.cpp @@ -0,0 +1,1638 @@ +/* + * compiler/compemu_fpp.cpp - Dynamic translation of FPU instructions + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * JIT compiler m68k -> IA-32 and AMD64 + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * UAE - The Un*x Amiga Emulator + * + * MC68881 emulation + * + * Copyright 1996 Herman ten Brugge + * Adapted for JIT compilation (c) Bernd Meyer, 2000 + */ + +#include "sysdeps.h" + +# include +# include +# include + +#include "memory.h" +#include "readcpu.h" +#include "newcpu.h" +#include "main.h" +#include "compiler/compemu.h" +#include "fpu/fpu.h" +#include "fpu/flags.h" +#include "fpu/exceptions.h" +#include "fpu/rounding.h" + +#define DEBUG 0 +#include "debug.h" + +// gb-- WARNING: get_fpcr() and set_fpcr() support is experimental +#define HANDLE_FPCR 0 + +// - IEEE-based fpu core must be used +#if defined(FPU_IEEE) +# define CAN_HANDLE_FPCR +#endif + +// - Generic rounding mode and precision modes are supported if set together +#if defined(FPU_USE_GENERIC_ROUNDING_MODE) && defined(FPU_USE_GENERIC_ROUNDING_PRECISION) +# define CAN_HANDLE_FPCR +#endif + +// - X86 rounding mode and precision modes are *not* supported but might work (?!) +#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) +# define CAN_HANDLE_FPCR +#endif + +#if HANDLE_FPCR && !defined(CAN_HANDLE_FPCR) +# warning "Can't handle FPCR, will FAIL(1) at runtime" +# undef HANDLE_FPCR +# define HANDLE_FPCR 0 +#endif + +#define STATIC_INLINE static inline +#define MAKE_FPSR(r) do { fmov_rr(FP_RESULT,r); } while (0) + +#define delay nop() ;nop() +#define delay2 nop() ;nop() + +#define UNKNOWN_EXTRA 0xFFFFFFFF +static void fpuop_illg(uae_u32 opcode, uae_u32 /* extra */) +{ +/* + if (extra == UNKNOWN_EXTRA) + printf("FPU opcode %x, extra UNKNOWN_EXTRA\n",opcode & 0xFFFF); + else + printf("FPU opcode %x, extra %x\n",opcode & 0xFFFF,extra & 0xFFFF); +*/ + op_illg(opcode); +} + +uae_s32 temp_fp[4]; /* To convert between FP/integer */ + +/* return register number, or -1 for failure */ +STATIC_INLINE int get_fp_value (uae_u32 opcode, uae_u16 extra) +{ + uaecptr tmppc; + uae_u16 tmp; + int size; + int mode; + int reg; + uae_u32 ad = 0; + static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; + static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; + + if ((extra & 0x4000) == 0) { + return ((extra >> 10) & 7); + } + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + switch (mode) { + case 0: + switch (size) { + case 6: + sign_extend_8_rr(S1,reg); + mov_l_mr((uintptr)temp_fp,S1); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + return FS1; + case 4: + sign_extend_16_rr(S1,reg); + mov_l_mr((uintptr)temp_fp,S1); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + return FS1; + case 0: + mov_l_mr((uintptr)temp_fp,reg); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + return FS1; + case 1: + mov_l_mr((uintptr)temp_fp,reg); + delay2; + fmovs_rm(FS1,(uintptr)temp_fp); + return FS1; + default: + return -1; + } + return -1; /* Should be unreachable */ + case 1: + return -1; /* Genuine invalid instruction */ + default: + break; + } + /* OK, we *will* have to load something from an address. Let's make + sure we know how to handle that, or quit early --- i.e. *before* + we do any postincrement/predecrement that we may regret */ + + switch (size) { + case 3: + return -1; + case 0: + case 1: + case 2: + case 4: + case 5: + case 6: + break; + default: + return -1; + } + + switch (mode) { + case 2: + ad=S1; /* We will change it, anyway ;-) */ + mov_l_rr(ad,reg+8); + break; + case 3: + ad=S1; + mov_l_rr(ad,reg+8); + lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size])); + break; + case 4: + ad=S1; + + lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size])); + mov_l_rr(ad,reg+8); + break; + case 5: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_rr(ad,reg+8); + lea_l_brr(ad,ad,off); + break; + } + case 6: + { + uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + calc_disp_ea_020(reg+8,dp,ad,S2); + break; + } + case 7: + switch (reg) { + case 0: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 1: + { + uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 2: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ + m68k_pc_offset; + uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_ri(ad,address+PC16off); + break; + } + case 3: + return -1; + tmppc = m68k_getpc (); + tmp = next_iword (); + ad = get_disp_ea_020 (tmppc, tmp); + break; + case 4: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ m68k_pc_offset; + ad=S1; + // Immediate addressing mode && Operation Length == Byte -> + // Use the low-order byte of the extension word. + if (size == 6) address++; + mov_l_ri(ad,address); + m68k_pc_offset+=sz2[size]; + break; + } + default: + return -1; + } + } + + switch (size) { + case 0: + readlong(ad,S2,S3); + mov_l_mr((uintptr)temp_fp,S2); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + break; + case 1: + readlong(ad,S2,S3); + mov_l_mr((uintptr)temp_fp,S2); + delay2; + fmovs_rm(FS1,(uintptr)temp_fp); + break; + case 2: + readword(ad,S2,S3); + mov_w_mr(((uintptr)temp_fp)+8,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp)+4,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp),S2); + delay2; + fmov_ext_rm(FS1,(uintptr)(temp_fp)); + break; + case 3: + return -1; /* Some silly "packed" stuff */ + case 4: + readword(ad,S2,S3); + sign_extend_16_rr(S2,S2); + mov_l_mr((uintptr)temp_fp,S2); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + break; + case 5: + readlong(ad,S2,S3); + mov_l_mr(((uintptr)temp_fp)+4,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp),S2); + delay2; + fmov_rm(FS1,(uintptr)(temp_fp)); + break; + case 6: + readbyte(ad,S2,S3); + sign_extend_8_rr(S2,S2); + mov_l_mr((uintptr)temp_fp,S2); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + break; + default: + return -1; + } + return FS1; +} + +/* return of -1 means failure, >=0 means OK */ +STATIC_INLINE int put_fp_value (int val, uae_u32 opcode, uae_u16 extra) +{ + uae_u16 tmp; + uaecptr tmppc; + int size; + int mode; + int reg; + uae_u32 ad; + static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; + static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; + + if ((extra & 0x4000) == 0) { + const int dest_reg = (extra >> 10) & 7; + fmov_rr(dest_reg, val); + // gb-- status register is affected + MAKE_FPSR(dest_reg); + return 0; + } + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + ad = (uae_u32)-1; + switch (mode) { + case 0: + switch (size) { + case 6: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_b_rm(reg,(uintptr)temp_fp); + return 0; + case 4: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_w_rm(reg,(uintptr)temp_fp); + return 0; + case 0: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(reg,(uintptr)temp_fp); + return 0; + case 1: + fmovs_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(reg,(uintptr)temp_fp); + return 0; + default: + return -1; + } + case 1: + return -1; /* genuine invalid instruction */ + default: break; + } + + /* Let's make sure we get out *before* doing something silly if + we can't handle the size */ + switch (size) { + case 0: + case 4: + case 5: + case 6: + case 2: + case 1: + break; + case 3: + default: + return -1; + } + + switch (mode) { + case 2: + ad=S1; + mov_l_rr(ad,reg+8); + break; + case 3: + ad=S1; + mov_l_rr(ad,reg+8); + lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size])); + break; + case 4: + ad=S1; + lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size])); + mov_l_rr(ad,reg+8); + break; + case 5: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_rr(ad,reg+8); + add_l_ri(ad,off); + break; + } + case 6: + { + uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + calc_disp_ea_020(reg+8,dp,ad,S2); + break; + } + case 7: + switch (reg) { + case 0: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 1: + { + uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 2: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ + m68k_pc_offset; + uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_ri(ad,address+PC16off); + break; + } + case 3: + return -1; + tmppc = m68k_getpc (); + tmp = next_iword (); + ad = get_disp_ea_020 (tmppc, tmp); + break; + case 4: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ + m68k_pc_offset; + ad=S1; + mov_l_ri(ad,address); + m68k_pc_offset+=sz2[size]; + break; + } + default: + return -1; + } + } + switch (size) { + case 0: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 1: + fmovs_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 2: + fmov_ext_mr((uintptr)temp_fp,val); + delay; + mov_w_rm(S2,(uintptr)temp_fp+8); + writeword_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp+4); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 3: return -1; /* Packed */ + + case 4: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(S2,(uintptr)temp_fp); + writeword_clobber(ad,S2,S3); + break; + case 5: + fmov_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(S2,(uintptr)temp_fp+4); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 6: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(S2,(uintptr)temp_fp); + writebyte(ad,S2,S3); + break; + default: + return -1; + } + return 0; +} + +/* return -1 for failure, or register number for success */ +STATIC_INLINE int get_fp_ad (uae_u32 opcode, uae_u32 * ad) +{ + uae_u16 tmp; + uaecptr tmppc; + int mode; + int reg; + uae_s32 off; + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + switch (mode) { + case 0: + case 1: + return -1; + case 2: + case 3: + case 4: + mov_l_rr(S1,8+reg); + return S1; + *ad = m68k_areg (regs, reg); + break; + case 5: + off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + + mov_l_rr(S1,8+reg); + add_l_ri(S1,off); + return S1; + case 6: + return -1; + break; + case 7: + switch (reg) { + case 0: + off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + mov_l_ri(S1,off); + return S1; + case 1: + off=comp_get_ilong((m68k_pc_offset+=4)-4); + mov_l_ri(S1,off); + return S1; + case 2: + return -1; +// *ad = m68k_getpc (); +// *ad += (uae_s32) (uae_s16) next_iword (); + off=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset; + off+=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + mov_l_ri(S1,off); + return S1; + case 3: + return -1; + tmppc = m68k_getpc (); + tmp = next_iword (); + *ad = get_disp_ea_020 (tmppc, tmp); + break; + default: + return -1; + } + } + abort(); +} + +void comp_fdbcc_opp (uae_u32 /* opcode */, uae_u16 /* extra */) +{ + FAIL(1); + return; +} + +void comp_fscc_opp (uae_u32 opcode, uae_u16 extra) +{ + uae_u32 ad; + int cc; + int reg; + +#ifdef DEBUG_FPP + printf ("fscc_opp at %08lx\n", m68k_getpc ()); + fflush (stdout); +#endif + + + if (extra&0x20) { /* only cc from 00 to 1f are defined */ + FAIL(1); + return; + } + if ((opcode & 0x38) != 0) { /* We can only do to integer register */ + FAIL(1); + return; + } + + fflags_into_flags(S2); + reg=(opcode&7); + + mov_l_ri(S1,255); + mov_l_ri(S4,0); + switch(extra&0x0f) { /* according to fpp.c, the 0x10 bit is ignored + */ + case 0: break; /* set never */ + case 1: mov_l_rr(S2,S4); + cmov_l_rr(S4,S1,4); + cmov_l_rr(S4,S2,10); break; + case 2: cmov_l_rr(S4,S1,7); break; + case 3: cmov_l_rr(S4,S1,3); break; + case 4: mov_l_rr(S2,S4); + cmov_l_rr(S4,S1,2); + cmov_l_rr(S4,S2,10); break; + case 5: mov_l_rr(S2,S4); + cmov_l_rr(S4,S1,6); + cmov_l_rr(S4,S2,10); break; + case 6: cmov_l_rr(S4,S1,5); break; + case 7: cmov_l_rr(S4,S1,11); break; + case 8: cmov_l_rr(S4,S1,10); break; + case 9: cmov_l_rr(S4,S1,4); break; + case 10: cmov_l_rr(S4,S1,10); cmov_l_rr(S4,S1,7); break; + case 11: cmov_l_rr(S4,S1,4); cmov_l_rr(S4,S1,3); break; + case 12: cmov_l_rr(S4,S1,2); break; + case 13: cmov_l_rr(S4,S1,6); break; + case 14: cmov_l_rr(S4,S1,5); cmov_l_rr(S4,S1,10); break; + case 15: mov_l_rr(S4,S1); break; + } + + if ((opcode & 0x38) == 0) { + mov_b_rr(reg,S4); + } else { + abort(); + if (get_fp_ad (opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + fpuop_illg (opcode,extra); + } else + put_byte (ad, cc ? 0xff : 0x00); + } +} + +void comp_ftrapcc_opp (uae_u32 /* opcode */, uaecptr /* oldpc */) +{ + FAIL(1); + return; +} + +void comp_fbcc_opp (uae_u32 opcode) +{ + uae_u32 start_68k_offset=m68k_pc_offset; + uae_u32 off; + uae_u32 v1; + uae_u32 v2; + int cc; + + // comp_pc_p is expected to be bound to 32-bit addresses + assert((uintptr)comp_pc_p <= 0xffffffffUL); + + if (opcode&0x20) { /* only cc from 00 to 1f are defined */ + FAIL(1); + return; + } + if ((opcode&0x40)==0) { + off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + } + else { + off=comp_get_ilong((m68k_pc_offset+=4)-4); + } + mov_l_ri(S1,(uintptr) + (comp_pc_p+off-(m68k_pc_offset-start_68k_offset))); + mov_l_ri(PC_P,(uintptr)comp_pc_p); + + /* Now they are both constant. Might as well fold in m68k_pc_offset */ + add_l_ri(S1,m68k_pc_offset); + add_l_ri(PC_P,m68k_pc_offset); + m68k_pc_offset=0; + + /* according to fpp.c, the 0x10 bit is ignored + (it handles exception handling, which we don't + do, anyway ;-) */ + cc=opcode&0x0f; + v1=get_const(PC_P); + v2=get_const(S1); + fflags_into_flags(S2); + + switch(cc) { + case 0: break; /* jump never */ + case 1: + mov_l_rr(S2,PC_P); + cmov_l_rr(PC_P,S1,4); + cmov_l_rr(PC_P,S2,10); break; + case 2: register_branch(v1,v2,7); break; + case 3: register_branch(v1,v2,3); break; + case 4: + mov_l_rr(S2,PC_P); + cmov_l_rr(PC_P,S1,2); + cmov_l_rr(PC_P,S2,10); break; + case 5: + mov_l_rr(S2,PC_P); + cmov_l_rr(PC_P,S1,6); + cmov_l_rr(PC_P,S2,10); break; + case 6: register_branch(v1,v2,5); break; + case 7: register_branch(v1,v2,11); break; + case 8: register_branch(v1,v2,10); break; + case 9: register_branch(v1,v2,4); break; + case 10: + cmov_l_rr(PC_P,S1,10); + cmov_l_rr(PC_P,S1,7); break; + case 11: + cmov_l_rr(PC_P,S1,4); + cmov_l_rr(PC_P,S1,3); break; + case 12: register_branch(v1,v2,2); break; + case 13: register_branch(v1,v2,6); break; + case 14: + cmov_l_rr(PC_P,S1,5); + cmov_l_rr(PC_P,S1,10); break; + case 15: mov_l_rr(PC_P,S1); break; + } +} + + /* Floating point conditions + The "NotANumber" part could be problematic; Howver, when NaN is + encountered, the ftst instruction sets bot N and Z to 1 on the x87, + so quite often things just fall into place. This is probably not + accurate wrt the 68k FPU, but it is *as* accurate as this was before. + However, some more thought should go into fixing this stuff up so + it accurately emulates the 68k FPU. +>==> 13) & 0x7) { + case 3: /* 2nd most common */ + if (put_fp_value ((extra >> 7)&7 , opcode, extra) < 0) { + FAIL(1); + return; + + } + return; + case 6: + case 7: + { + uae_u32 ad, list = 0; + int incr = 0; + if (extra & 0x2000) { + + /* FMOVEM FPP->memory */ + switch ((extra >> 11) & 3) { /* Get out early if failure */ + case 0: + case 2: + break; + case 1: + case 3: + default: + FAIL(1); return; + } + ad=get_fp_ad (opcode, &ad); + if ((uae_s32)ad<0) { + m68k_setpc (m68k_getpc () - 4); + fpuop_illg (opcode,extra); + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + abort(); + } + if (incr < 0) { /* Predecrement */ + for (reg = 7; reg >= 0; reg--) { + if (list & 0x80) { + fmov_ext_mr((uintptr)temp_fp,reg); + delay; + sub_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + sub_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp+4); + writelong_clobber(ad,S2,S3); + sub_l_ri(ad,4); + mov_w_rm(S2,(uintptr)temp_fp+8); + writeword_clobber(ad,S2,S3); + } + list <<= 1; + } + } + else { /* Postincrement */ + for (reg = 0; reg < 8; reg++) { + if (list & 0x80) { + fmov_ext_mr((uintptr)temp_fp,reg); + delay; + mov_w_rm(S2,(uintptr)temp_fp+8); + writeword_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp+4); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) + mov_l_rr((opcode & 7)+8,ad); + if ((opcode & 0x38) == 0x20) + mov_l_rr((opcode & 7)+8,ad); + } else { + /* FMOVEM memory->FPP */ + + uae_u32 ad; + switch ((extra >> 11) & 3) { /* Get out early if failure */ + case 0: + case 2: + break; + case 1: + case 3: + default: + FAIL(1); return; + } + ad=get_fp_ad (opcode, &ad); + if ((uae_s32)ad<0) { + m68k_setpc (m68k_getpc () - 4); + D(bug("no ad\n")); + fpuop_illg (opcode,extra); + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + abort(); + } + + if (incr < 0) { + // not reached + for (reg = 7; reg >= 0; reg--) { + if (list & 0x80) { + sub_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp),S2); + sub_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp)+4,S2); + sub_l_ri(ad,4); + readword(ad,S2,S3); + mov_w_mr(((uintptr)temp_fp)+8,S2); + delay2; + fmov_ext_rm(reg,(uintptr)(temp_fp)); + } + list <<= 1; + } + } + else { + for (reg = 0; reg < 8; reg++) { + if (list & 0x80) { + readword(ad,S2,S3); + mov_w_mr(((uintptr)temp_fp)+8,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp)+4,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp),S2); + add_l_ri(ad,4); + delay2; + fmov_ext_rm(reg,(uintptr)(temp_fp)); + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) + mov_l_rr((opcode & 7)+8,ad); + if ((opcode & 0x38) == 0x20) + mov_l_rr((opcode & 7)+8,ad); + } + } + return; + + case 4: + case 5: /* rare */ + if ((opcode & 0x30) == 0) { + if (extra & 0x2000) { + if (extra & 0x1000) { +#if HANDLE_FPCR + mov_l_rm(opcode & 15, (uintptr)&fpu.fpcr.rounding_mode); + or_l_rm(opcode & 15, (uintptr)&fpu.fpcr.rounding_precision); +#else + FAIL(1); + return; +#endif + } + if (extra & 0x0800) { + FAIL(1); + return; + } + if (extra & 0x0400) { + mov_l_rm(opcode & 15,(uintptr)&fpu.instruction_address); + return; + } + } else { + // gb-- moved here so that we may FAIL() without generating any code + if (extra & 0x0800) { + // set_fpsr(m68k_dreg (regs, opcode & 15)); + FAIL(1); + return; + } + if (extra & 0x1000) { +#if HANDLE_FPCR +#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) + FAIL(1); + return; +#endif + mov_l_rr(S1,opcode & 15); + mov_l_rr(S2,opcode & 15); + and_l_ri(S1,FPCR_ROUNDING_PRECISION); + and_l_ri(S2,FPCR_ROUNDING_MODE); + mov_l_mr((uintptr)&fpu.fpcr.rounding_precision,S1); + mov_l_mr((uintptr)&fpu.fpcr.rounding_mode,S2); +#else + FAIL(1); + return; +#endif +// return; gb-- FMOVEM could also operate on fpiar + } + if (extra & 0x0400) { + mov_l_mr((uintptr)&fpu.instruction_address,opcode & 15); +// return; gb-- we have to process all FMOVEM bits before returning + } + return; + } + } else if ((opcode & 0x3f) == 0x3c) { + if ((extra & 0x2000) == 0) { + // gb-- moved here so that we may FAIL() without generating any code + if (extra & 0x0800) { + FAIL(1); + return; + } + if (extra & 0x1000) { + comp_get_ilong((m68k_pc_offset+=4)-4); +#if HANDLE_FPCR +#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) + FAIL(1); + return; +#endif +// mov_l_mi((uintptr)®s.fpcr,val); + mov_l_ri(S1,val); + mov_l_ri(S2,val); + and_l_ri(S1,FPCR_ROUNDING_PRECISION); + and_l_ri(S2,FPCR_ROUNDING_MODE); + mov_l_mr((uintptr)&fpu.fpcr.rounding_precision,S1); + mov_l_mr((uintptr)&fpu.fpcr.rounding_mode,S2); +#else + FAIL(1); + return; +#endif +// return; gb-- FMOVEM could also operate on fpiar + } + if (extra & 0x0400) { + uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4); + mov_l_mi((uintptr)&fpu.instruction_address,val); +// return; gb-- we have to process all FMOVEM bits before returning + } + return; + } + FAIL(1); + return; + } else if (extra & 0x2000) { + FAIL(1); + return; + } else { + FAIL(1); + return; + } + FAIL(1); + return; + + case 0: + case 2: /* Extremely common */ + reg = (extra >> 7) & 7; + if ((extra & 0xfc00) == 0x5c00) { + switch (extra & 0x7f) { + case 0x00: + fmov_pi(reg); + break; + case 0x0b: + fmov_log10_2(reg); + break; + case 0x0c: +#if USE_LONG_DOUBLE + fmov_ext_rm(reg,(uintptr)&const_e); +#else + fmov_rm(reg,(uintptr)&const_e); +#endif + break; + case 0x0d: + fmov_log2_e(reg); + break; + case 0x0e: +#if USE_LONG_DOUBLE + fmov_ext_rm(reg,(uintptr)&const_log10_e); +#else + fmov_rm(reg,(uintptr)&const_log10_e); +#endif + break; + case 0x0f: + fmov_0(reg); + break; + case 0x30: + fmov_loge_2(reg); + break; + case 0x31: +#if USE_LONG_DOUBLE + fmov_ext_rm(reg,(uintptr)&const_loge_10); +#else + fmov_rm(reg,(uintptr)&const_loge_10); +#endif + break; + case 0x32: + fmov_1(reg); + break; + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: +#if USE_LONG_DOUBLE + case 0x3c: + case 0x3d: + case 0x3e: + case 0x3f: + fmov_ext_rm(reg,(uintptr)(power10+(extra & 0x7f)-0x32)); +#else + fmov_rm(reg,(uintptr)(power10+(extra & 0x7f)-0x32)); +#endif + break; + default: + /* This is not valid, so we fail */ + FAIL(1); + return; + } + return; + } + + switch (extra & 0x7f) { + case 0x00: /* FMOVE */ + case 0x40: /* Explicit rounding. This is just a quick fix. Same + * for all other cases that have three choices */ + case 0x44: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(reg,src); + MAKE_FPSR (src); + break; + case 0x01: /* FINT */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x02: /* FSINH */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x03: /* FINTRZ */ +#ifdef USE_X86_FPUCW + /* If we have control over the CW, we can do this */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + mov_l_ri(S1,16); /* Switch to "round to zero" mode */ + fldcw_m_indexed(S1,(uae_u32)x86_fpucw); + + frndint_rr(reg,src); + + /* restore control word */ + mov_l_rm(S1,(uintptr)®s.fpcr); + and_l_ri(S1,0x000000f0); + fldcw_m_indexed(S1,(uintptr)x86_fpucw); + + MAKE_FPSR (reg); + break; +#endif + FAIL(1); + return; + break; + case 0x04: /* FSQRT */ + case 0x41: + case 0x45: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fsqrt_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x06: /* FLOGNP1 */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x08: /* FETOXM1 */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x09: /* FTANH */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x0a: /* FATAN */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x0c: /* FASIN */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x0d: /* FATANH */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x0e: /* FSIN */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fsin_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x0f: /* FTAN */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x10: /* FETOX */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fetox_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x11: /* FTWOTOX */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + ftwotox_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x12: /* FTENTOX */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x14: /* FLOGN */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x15: /* FLOG10 */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x16: /* FLOG2 */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + flog2_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x18: /* FABS */ + case 0x58: + case 0x5c: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fabs_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x19: /* FCOSH */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x1a: /* FNEG */ + case 0x5a: + case 0x5e: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fneg_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x1c: /* FACOS */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x1d: /* FCOS */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fcos_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x1e: /* FGETEXP */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x1f: /* FGETMAN */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x20: /* FDIV */ + case 0x60: + case 0x64: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fdiv_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x21: /* FMOD */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + frem_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x22: /* FADD */ + case 0x62: + case 0x66: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fadd_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x23: /* FMUL */ + case 0x63: + case 0x67: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmul_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x24: /* FSGLDIV */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fdiv_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x25: /* FREM */ + // gb-- disabled because the quotient byte must be computed + // otherwise, free rotation in ClarisWorks doesn't work. + FAIL(1); + return; + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + frem1_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x26: /* FSCALE */ + dont_care_fflags(); + FAIL(1); + return; + break; + case 0x27: /* FSGLMUL */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmul_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x28: /* FSUB */ + case 0x68: + case 0x6c: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fsub_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x30: /* FSINCOS */ + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x38: /* FCMP */ + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(FP_RESULT,reg); + fsub_rr(FP_RESULT,src); /* Right way? */ + break; + case 0x3a: /* FTST */ + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(FP_RESULT,src); + break; + default: + FAIL(1); + return; + break; + } + return; + } + m68k_setpc (m68k_getpc () - 4); + fpuop_illg (opcode,extra); +} diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.cpp b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.cpp new file mode 100644 index 00000000..6c1ede09 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.cpp @@ -0,0 +1,2106 @@ +/* + * compiler/compemu_midfunc_arm.cpp - Native MIDFUNCS for ARM + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu_support.cpp + * + */ + +/******************************************************************** + * CPU functions exposed to gencomp. Both CREATE and EMIT time * + ********************************************************************/ + +/* + * RULES FOR HANDLING REGISTERS: + * + * * In the function headers, order the parameters + * - 1st registers written to + * - 2nd read/modify/write registers + * - 3rd registers read from + * * Before calling raw_*, you must call readreg, writereg or rmw for + * each register + * * The order for this is + * - 1st call remove_offset for all registers written to with size<4 + * - 2nd call readreg for all registers read without offset + * - 3rd call rmw for all rmw registers + * - 4th call readreg_offset for all registers that can handle offsets + * - 5th call get_offset for all the registers from the previous step + * - 6th call writereg for all written-to registers + * - 7th call raw_* + * - 8th unlock2 all registers that were locked + */ + +MIDFUNC(0,live_flags,(void)) +{ + live.flags_on_stack=TRASH; + live.flags_in_flags=VALID; + live.flags_are_important=1; +} +MENDFUNC(0,live_flags,(void)) + +MIDFUNC(0,dont_care_flags,(void)) +{ + live.flags_are_important=0; +} +MENDFUNC(0,dont_care_flags,(void)) + +MIDFUNC(0,duplicate_carry,(void)) +{ + evict(FLAGX); + make_flags_live_internal(); + COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,NATIVE_CC_CS); + log_vwrite(FLAGX); +} +MENDFUNC(0,duplicate_carry,(void)) + +MIDFUNC(0,restore_carry,(void)) +{ +#if defined(USE_JIT2) + RR4 r=readreg(FLAGX,4); + MRS_CPSR(REG_WORK1); + TEQ_ri(r,1); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_C_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSRf_r(REG_WORK1); + unlock2(r); +#else + if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */ + bt_l_ri_noclobber(FLAGX,0); + } + else { /* Avoid the stall the above creates. + This is slow on non-P6, though. + */ + COMPCALL(rol_b_ri(FLAGX,8)); + isclean(FLAGX); + } +#endif +} +MENDFUNC(0,restore_carry,(void)) + +MIDFUNC(0,start_needflags,(void)) +{ + needflags=1; +} +MENDFUNC(0,start_needflags,(void)) + +MIDFUNC(0,end_needflags,(void)) +{ + needflags=0; +} +MENDFUNC(0,end_needflags,(void)) + +MIDFUNC(0,make_flags_live,(void)) +{ + make_flags_live_internal(); +} +MENDFUNC(0,make_flags_live,(void)) + +MIDFUNC(2,bt_l_ri,(RR4 r, IMM i)) /* This is defined as only affecting C */ +{ + int size=4; + if (i<16) + size=2; + CLOBBER_BT; + r=readreg(r,size); + raw_bt_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,bt_l_ri,(RR4 r, IMM i)) /* This is defined as only affecting C */ + +MIDFUNC(2,bt_l_rr,(RR4 r, RR4 b)) /* This is defined as only affecting C */ +{ + CLOBBER_BT; + r=readreg(r,4); + b=readreg(b,4); + raw_bt_l_rr(r,b); + unlock2(r); + unlock2(b); +} +MENDFUNC(2,bt_l_rr,(RR4 r, RR4 b)) /* This is defined as only affecting C */ + +MIDFUNC(2,btc_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_btc_l_rr(r,b); + unlock2(r); + unlock2(b); +} +MENDFUNC(2,btc_l_rr,(RW4 r, RR4 b)) + +MIDFUNC(2,btr_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_btr_l_rr(r,b); + unlock2(r); + unlock2(b); +} +MENDFUNC(2,btr_l_rr,(RW4 r, RR4 b)) + +MIDFUNC(2,bts_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_bts_l_rr(r,b); + unlock2(r); + unlock2(b); +} +MENDFUNC(2,bts_l_rr,(RW4 r, RR4 b)) + +MIDFUNC(2,mov_l_rm,(W4 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,4); + raw_mov_l_rm(d,s); + unlock2(d); +} +MENDFUNC(2,mov_l_rm,(W4 d, IMM s)) + +MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, RR4 index, IMM factor)) +{ + CLOBBER_MOV; + index=readreg(index,4); + d=writereg(d,4); + raw_mov_l_rm_indexed(d,base,index,factor); + unlock2(index); + unlock2(d); +} +MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, RR4 index, IMM factor)) + +MIDFUNC(2,mov_l_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_l_mi(d,s); +} +MENDFUNC(2,mov_l_mi,(IMM d, IMM s)) + +MIDFUNC(2,mov_w_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_w_mi(d,s); +} +MENDFUNC(2,mov_w_mi,(IMM d, IMM s)) + +MIDFUNC(2,mov_b_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_b_mi(d,s); +} +MENDFUNC(2,mov_b_mi,(IMM d, IMM s)) + +MIDFUNC(2,rol_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,1,1); + raw_rol_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,rol_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,rol_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,2,2); + raw_rol_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,rol_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,rol_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,4,4); + raw_rol_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,rol_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,rol_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROL; + r=readreg(r,1); + d=rmw(d,4,4); + raw_rol_l_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,rol_l_rr,(RW4 d, RR1 r)) + +MIDFUNC(2,rol_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROL; + r=readreg(r,1); + d=rmw(d,2,2); + raw_rol_w_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,rol_w_rr,(RW2 d, RR1 r)) + +MIDFUNC(2,rol_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROL; + r=readreg(r,1); + d=rmw(d,1,1); + raw_rol_b_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,rol_b_rr,(RW1 d, RR1 r)) + +MIDFUNC(2,shll_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHLL; + r=readreg(r,1); + d=rmw(d,4,4); + raw_shll_l_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shll_l_rr,(RW4 d, RR1 r)) + +MIDFUNC(2,shll_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHLL; + r=readreg(r,1); + d=rmw(d,2,2); + raw_shll_w_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shll_w_rr,(RW2 d, RR1 r)) + +MIDFUNC(2,shll_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHLL; + r=readreg(r,1); + d=rmw(d,1,1); + raw_shll_b_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shll_b_rr,(RW1 d, RR1 r)) + +MIDFUNC(2,ror_b_ri,(RR1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,1,1); + raw_ror_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,ror_b_ri,(RR1 r, IMM i)) + +MIDFUNC(2,ror_w_ri,(RR2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,2,2); + raw_ror_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,ror_w_ri,(RR2 r, IMM i)) + +MIDFUNC(2,ror_l_ri,(RR4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,4,4); + raw_ror_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,ror_l_ri,(RR4 r, IMM i)) + +MIDFUNC(2,ror_l_rr,(RR4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROR; + r=readreg(r,1); + d=rmw(d,4,4); + raw_ror_l_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,ror_l_rr,(RR4 d, RR1 r)) + +MIDFUNC(2,ror_w_rr,(RR2 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROR; + r=readreg(r,1); + d=rmw(d,2,2); + raw_ror_w_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,ror_w_rr,(RR2 d, RR1 r)) + +MIDFUNC(2,ror_b_rr,(RR1 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_ROR; + r=readreg(r,1); + d=rmw(d,1,1); + raw_ror_b_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,ror_b_rr,(RR1 d, RR1 r)) + +MIDFUNC(2,shrl_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRL; + r=readreg(r,1); + d=rmw(d,4,4); + raw_shrl_l_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shrl_l_rr,(RW4 d, RR1 r)) + +MIDFUNC(2,shrl_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRL; + r=readreg(r,1); + d=rmw(d,2,2); + raw_shrl_w_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shrl_w_rr,(RW2 d, RR1 r)) + +MIDFUNC(2,shrl_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHRL; + r=readreg(r,1); + d=rmw(d,1,1); + raw_shrl_b_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shrl_b_rr,(RW1 d, RR1 r)) + +MIDFUNC(2,shll_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(r) && !needflags) { + live.state[r].val<<=i; + return; + } + CLOBBER_SHLL; + r=rmw(r,4,4); + raw_shll_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shll_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,shll_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHLL; + r=rmw(r,2,2); + raw_shll_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shll_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,shll_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHLL; + r=rmw(r,1,1); + raw_shll_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shll_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(r) && !needflags) { + live.state[r].val>>=i; + return; + } + CLOBBER_SHRL; + r=rmw(r,4,4); + raw_shrl_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,2,2); + raw_shrl_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,1,1); + raw_shrl_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,shra_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,4,4); + raw_shra_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shra_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,shra_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,2,2); + raw_shra_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shra_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,shra_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,1,1); + raw_shra_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shra_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,shra_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg(r,1); + d=rmw(d,4,4); + raw_shra_l_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shra_l_rr,(RW4 d, RR1 r)) + +MIDFUNC(2,shra_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg(r,1); + d=rmw(d,2,2); + raw_shra_w_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shra_w_rr,(RW2 d, RR1 r)) + +MIDFUNC(2,shra_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHRA; + r=readreg(r,1); + d=rmw(d,1,1); + raw_shra_b_rr(d,r); + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shra_b_rr,(RW1 d, RR1 r)) + +MIDFUNC(2,setcc,(W1 d, IMM cc)) +{ + CLOBBER_SETCC; + d=writereg(d,1); + raw_setcc(d,cc); + unlock2(d); +} +MENDFUNC(2,setcc,(W1 d, IMM cc)) + +MIDFUNC(2,setcc_m,(IMM d, IMM cc)) +{ + CLOBBER_SETCC; + raw_setcc_m(d,cc); +} +MENDFUNC(2,setcc_m,(IMM d, IMM cc)) + +MIDFUNC(3,cmov_l_rr,(RW4 d, RR4 s, IMM cc)) +{ + if (d==s) + return; + CLOBBER_CMOV; + s=readreg(s,4); + d=rmw(d,4,4); + raw_cmov_l_rr(d,s,cc); + unlock2(s); + unlock2(d); +} +MENDFUNC(3,cmov_l_rr,(RW4 d, RR4 s, IMM cc)) + +MIDFUNC(2,bsf_l_rr,(W4 d, W4 s)) +{ + CLOBBER_BSF; + s = readreg(s, 4); + d = writereg(d, 4); + raw_bsf_l_rr(d, s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,bsf_l_rr,(W4 d, W4 s)) + +/* Set the Z flag depending on the value in s. Note that the + value has to be 0 or -1 (or, more precisely, for non-zero + values, bit 14 must be set)! */ +MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) +{ + CLOBBER_BSF; + s=rmw_specific(s,4,4,FLAG_NREG3); + tmp=writereg(tmp,4); + raw_flags_set_zero(s, tmp); + unlock2(tmp); + unlock2(s); +} +MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) + +MIDFUNC(2,imul_32_32,(RW4 d, RR4 s)) +{ + CLOBBER_MUL; + s=readreg(s,4); + d=rmw(d,4,4); + raw_imul_32_32(d,s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,imul_32_32,(RW4 d, RR4 s)) + +MIDFUNC(2,imul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_imul_64_32(d,s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,imul_64_32,(RW4 d, RW4 s)) + +MIDFUNC(2,mul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_mul_64_32(d,s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,mul_64_32,(RW4 d, RW4 s)) + +MIDFUNC(2,sign_extend_16_rr,(W4 d, RR2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s16)live.state[s].val); + return; + } + + CLOBBER_SE16; + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_sign_extend_16_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,sign_extend_16_rr,(W4 d, RR2 s)) + +MIDFUNC(2,sign_extend_8_rr,(W4 d, RR1 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_SE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_sign_extend_8_rr(d,s); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,sign_extend_8_rr,(W4 d, RR1 s)) + +MIDFUNC(2,zero_extend_16_rr,(W4 d, RR2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u16)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE16; + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_zero_extend_16_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,zero_extend_16_rr,(W4 d, RR2 s)) + +MIDFUNC(2,zero_extend_8_rr,(W4 d, RR1 s)) +{ + int isrmw; + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_zero_extend_8_rr(d,s); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,zero_extend_8_rr,(W4 d, RR1 s)) + +MIDFUNC(2,mov_b_rr,(W1 d, RR1 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=writereg(d,1); + raw_mov_b_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,mov_b_rr,(W1 d, RR1 s)) + +MIDFUNC(2,mov_w_rr,(W2 d, RR2 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=writereg(d,2); + raw_mov_w_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,mov_w_rr,(W2 d, RR2 s)) + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_rR,(W4 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,4); + + raw_mov_l_rR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_rR,(W4 d, RR4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_rR,(W2 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,2); + + raw_mov_w_rR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_rR,(W2 d, RR4 s, IMM offset)) + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_brR,(W4 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,4); + + raw_mov_l_brR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_brR,(W4 d, RR4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_brR,(W2 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,2); + + raw_mov_w_brR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_brR,(W2 d, RR4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_b_brR,(W1 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_b_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,1); + + raw_mov_b_brR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_b_brR,(W1 d, RR4 s, IMM offset)) + +MIDFUNC(3,mov_l_Ri,(RR4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_l_Ri(d,i,offset); + unlock2(d); +} +MENDFUNC(3,mov_l_Ri,(RR4 d, IMM i, IMM offset)) + +MIDFUNC(3,mov_w_Ri,(RR4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_w_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_Ri(d,i,offset); + unlock2(d); +} +MENDFUNC(3,mov_w_Ri,(RR4 d, IMM i, IMM offset)) + +/* Warning! OFFSET is byte sized only! */ +MIDFUNC(3,mov_l_Rr,(RR4 d, RR4 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_l_Ri)(d,live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg(d,4); + + raw_mov_l_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_Rr,(RR4 d, RR4 s, IMM offset)) + +MIDFUNC(3,mov_w_Rr,(RR4 d, RR2 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg(d,4); + raw_mov_w_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_Rr,(RR4 d, RR2 s, IMM offset)) + +MIDFUNC(3,lea_l_brr,(W4 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val+offset); + return; + } +#if USE_OFFSET + if (d==s) { + add_offset(d,offset); + return; + } +#endif + CLOBBER_LEA; + s=readreg(s,4); + d=writereg(d,4); + raw_lea_l_brr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,lea_l_brr,(W4 d, RR4 s, IMM offset)) + +MIDFUNC(5,lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)) +{ + if (!offset) { + COMPCALL(lea_l_rr_indexed)(d,s,index,factor); + return; + } + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_brr_indexed(d,s,index,factor,offset); + unlock2(d); + unlock2(index); + unlock2(s); +} +MENDFUNC(5,lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)) + +MIDFUNC(4,lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) +{ + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_rr_indexed(d,s,index,factor); + unlock2(d); + unlock2(index); + unlock2(s); +} +MENDFUNC(4,lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) + +/* write d to the long at the address contained in s+offset */ +MIDFUNC(3,mov_l_bRr,(RR4 d, RR4 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + + raw_mov_l_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_bRr,(RR4 d, RR4 s, IMM offset)) + +/* write the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_bRr,(RR4 d, RR2 s, IMM offset)) +{ + int dreg=d; + + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_bRr,(RR4 d, RR2 s, IMM offset)) + +MIDFUNC(3,mov_b_bRr,(RR4 d, RR1 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_b_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_b_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_b_bRr,(RR4 d, RR1 s, IMM offset)) + +MIDFUNC(1,mid_bswap_32,(RW4 r)) +{ + + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=reverse32(oldv); + return; + } + + CLOBBER_SW32; + r=rmw(r,4,4); + raw_bswap_32(r); + unlock2(r); +} +MENDFUNC(1,mid_bswap_32,(RW4 r)) + +MIDFUNC(1,mid_bswap_16,(RW2 r)) +{ + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) | + (oldv&0xffff0000); + return; + } + + CLOBBER_SW16; + r=rmw(r,2,2); + + raw_bswap_16(r); + unlock2(r); +} +MENDFUNC(1,mid_bswap_16,(RW2 r)) + +MIDFUNC(2,mov_l_rr,(W4 d, RR4 s)) +{ + int olds; + + if (d==s) { /* How pointless! */ + return; + } + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val); + return; + } + olds=s; + disassociate(d); + s=readreg_offset(s,4); + live.state[d].realreg=s; + live.state[d].realind=live.nat[s].nholds; + live.state[d].val=live.state[olds].val; + live.state[d].validsize=4; + live.state[d].dirtysize=4; + set_status(d,DIRTY); + + live.nat[s].holds[live.nat[s].nholds]=d; + live.nat[s].nholds++; + log_clobberreg(d); + D2(panicbug("Added %d to nreg %d(%d), now holds %d regs", d,s,live.state[d].realind,live.nat[s].nholds)); + unlock2(s); +} +MENDFUNC(2,mov_l_rr,(W4 d, RR4 s)) + +MIDFUNC(2,mov_l_mr,(IMM d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(mov_l_mi)(d,live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + + raw_mov_l_mr(d,s); + unlock2(s); +} +MENDFUNC(2,mov_l_mr,(IMM d, RR4 s)) + +MIDFUNC(2,mov_w_mr,(IMM d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,2); + + raw_mov_w_mr(d,s); + unlock2(s); +} +MENDFUNC(2,mov_w_mr,(IMM d, RR2 s)) + +MIDFUNC(2,mov_w_rm,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_rm(d,s); + unlock2(d); +} +MENDFUNC(2,mov_w_rm,(W2 d, IMM s)) + +MIDFUNC(2,mov_b_mr,(IMM d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + + raw_mov_b_mr(d,s); + unlock2(s); +} +MENDFUNC(2,mov_b_mr,(IMM d, RR1 s)) + +MIDFUNC(2,mov_b_rm,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_rm(d,s); + unlock2(d); +} +MENDFUNC(2,mov_b_rm,(W1 d, IMM s)) + +MIDFUNC(2,mov_l_ri,(W4 d, IMM s)) +{ + set_const(d,s); + return; +} +MENDFUNC(2,mov_l_ri,(W4 d, IMM s)) + +MIDFUNC(2,mov_w_ri,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_ri(d,s); + unlock2(d); +} +MENDFUNC(2,mov_w_ri,(W2 d, IMM s)) + +MIDFUNC(2,mov_b_ri,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_ri(d,s); + unlock2(d); +} +MENDFUNC(2,mov_b_ri,(W1 d, IMM s)) + +MIDFUNC(2,test_l_ri,(RR4 d, IMM i)) +{ + CLOBBER_TEST; + d=readreg(d,4); + + raw_test_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,test_l_ri,(RR4 d, IMM i)) + +MIDFUNC(2,test_l_rr,(RR4 d, RR4 s)) +{ + CLOBBER_TEST; + d=readreg(d,4); + s=readreg(s,4); + + raw_test_l_rr(d,s);; + unlock2(d); + unlock2(s); +} +MENDFUNC(2,test_l_rr,(RR4 d, RR4 s)) + +MIDFUNC(2,test_w_rr,(RR2 d, RR2 s)) +{ + CLOBBER_TEST; + d=readreg(d,2); + s=readreg(s,2); + + raw_test_w_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,test_w_rr,(RR2 d, RR2 s)) + +MIDFUNC(2,test_b_rr,(RR1 d, RR1 s)) +{ + CLOBBER_TEST; + d=readreg(d,1); + s=readreg(s,1); + + raw_test_b_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,test_b_rr,(RR1 d, RR1 s)) + +MIDFUNC(2,and_l_ri,(RW4 d, IMM i)) +{ + if (isconst(d) && !needflags) { + live.state[d].val &= i; + return; + } + + CLOBBER_AND; + d=rmw(d,4,4); + + raw_and_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,and_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,and_l,(RW4 d, RR4 s)) +{ + CLOBBER_AND; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_and_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,and_l,(RW4 d, RR4 s)) + +MIDFUNC(2,and_w,(RW2 d, RR2 s)) +{ + CLOBBER_AND; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_and_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,and_w,(RW2 d, RR2 s)) + +MIDFUNC(2,and_b,(RW1 d, RR1 s)) +{ + CLOBBER_AND; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_and_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,and_b,(RW1 d, RR1 s)) + +MIDFUNC(2,or_l_ri,(RW4 d, IMM i)) +{ + if (isconst(d) && !needflags) { + live.state[d].val|=i; + return; + } + CLOBBER_OR; + d=rmw(d,4,4); + + raw_or_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,or_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,or_l,(RW4 d, RR4 s)) +{ + if (isconst(d) && isconst(s) && !needflags) { + live.state[d].val|=live.state[s].val; + return; + } + CLOBBER_OR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_or_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,or_l,(RW4 d, RR4 s)) + +MIDFUNC(2,or_w,(RW2 d, RR2 s)) +{ + CLOBBER_OR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_or_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,or_w,(RW2 d, RR2 s)) + +MIDFUNC(2,or_b,(RW1 d, RR1 s)) +{ + CLOBBER_OR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_or_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,or_b,(RW1 d, RR1 s)) + +MIDFUNC(2,adc_l,(RW4 d, RR4 s)) +{ + CLOBBER_ADC; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_adc_l(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,adc_l,(RW4 d, RR4 s)) + +MIDFUNC(2,adc_w,(RW2 d, RR2 s)) +{ + CLOBBER_ADC; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_adc_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,adc_w,(RW2 d, RR2 s)) + +MIDFUNC(2,adc_b,(RW1 d, RR1 s)) +{ + CLOBBER_ADC; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_adc_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,adc_b,(RW1 d, RR1 s)) + +MIDFUNC(2,add_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(add_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_add_l(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,add_l,(RW4 d, RR4 s)) + +MIDFUNC(2,add_w,(RW2 d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_add_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,add_w,(RW2 d, RR2 s)) + +MIDFUNC(2,add_b,(RW1 d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_add_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,add_b,(RW1 d, RR1 s)) + +MIDFUNC(2,sub_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val-=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,-i); + return; + } +#endif + + CLOBBER_SUB; + d=rmw(d,4,4); + + raw_sub_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,sub_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,sub_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,2,2); + + raw_sub_w_ri(d,i); + unlock2(d); +} +MENDFUNC(2,sub_w_ri,(RW2 d, IMM i)) + +MIDFUNC(2,sub_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,1,1); + + raw_sub_b_ri(d,i); + + unlock2(d); +} +MENDFUNC(2,sub_b_ri,(RW1 d, IMM i)) + +MIDFUNC(2,add_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val+=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,i); + return; + } +#endif + CLOBBER_ADD; + d=rmw(d,4,4); + raw_add_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,add_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,add_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,2,2); + + raw_add_w_ri(d,i); + unlock2(d); +} +MENDFUNC(2,add_w_ri,(RW2 d, IMM i)) + +MIDFUNC(2,add_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,1,1); + + raw_add_b_ri(d,i); + + unlock2(d); +} +MENDFUNC(2,add_b_ri,(RW1 d, IMM i)) + +MIDFUNC(2,sbb_l,(RW4 d, RR4 s)) +{ + CLOBBER_SBB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sbb_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sbb_l,(RW4 d, RR4 s)) + +MIDFUNC(2,sbb_w,(RW2 d, RR2 s)) +{ + CLOBBER_SBB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sbb_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sbb_w,(RW2 d, RR2 s)) + +MIDFUNC(2,sbb_b,(RW1 d, RR1 s)) +{ + CLOBBER_SBB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sbb_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sbb_b,(RW1 d, RR1 s)) + +MIDFUNC(2,sub_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(sub_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sub_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sub_l,(RW4 d, RR4 s)) + +MIDFUNC(2,sub_w,(RW2 d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sub_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sub_w,(RW2 d, RR2 s)) + +MIDFUNC(2,sub_b,(RW1 d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sub_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sub_b,(RW1 d, RR1 s)) + +MIDFUNC(2,cmp_l,(RR4 d, RR4 s)) +{ + CLOBBER_CMP; + s=readreg(s,4); + d=readreg(d,4); + + raw_cmp_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,cmp_l,(RR4 d, RR4 s)) + +MIDFUNC(2,cmp_w,(RR2 d, RR2 s)) +{ + CLOBBER_CMP; + s=readreg(s,2); + d=readreg(d,2); + + raw_cmp_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,cmp_w,(RR2 d, RR2 s)) + +MIDFUNC(2,cmp_b,(RR1 d, RR1 s)) +{ + CLOBBER_CMP; + s=readreg(s,1); + d=readreg(d,1); + + raw_cmp_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,cmp_b,(RR1 d, RR1 s)) + +MIDFUNC(2,xor_l,(RW4 d, RR4 s)) +{ + CLOBBER_XOR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_xor_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,xor_l,(RW4 d, RR4 s)) + +MIDFUNC(2,xor_w,(RW2 d, RR2 s)) +{ + CLOBBER_XOR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_xor_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,xor_w,(RW2 d, RR2 s)) + +MIDFUNC(2,xor_b,(RW1 d, RR1 s)) +{ + CLOBBER_XOR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_xor_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,xor_b,(RW1 d, RR1 s)) + +MIDFUNC(5,call_r_02,(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2)) +{ + clobber_flags(); + in1=readreg_specific(in1,isize1,REG_PAR1); + in2=readreg_specific(in2,isize2,REG_PAR2); + r=readreg(r,4); + prepare_for_call_1(); + unlock2(r); + unlock2(in1); + unlock2(in2); + prepare_for_call_2(); + compemu_raw_call_r(r); +} +MENDFUNC(5,call_r_02,(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2)) + +MIDFUNC(5,call_r_11,(W4 out1, RR4 r, RR4 in1, IMM osize, IMM isize)) +{ + clobber_flags(); + + if (osize==4) { + if (out1!=in1 && out1!=r) { + COMPCALL(forget_about)(out1); + } + } + else { + tomem_c(out1); + } + + in1=readreg_specific(in1,isize,REG_PAR1); + r=readreg(r,4); + + prepare_for_call_1(); + unlock2(in1); + unlock2(r); + + prepare_for_call_2(); + + compemu_raw_call_r(r); + + live.nat[REG_RESULT].holds[0]=out1; + live.nat[REG_RESULT].nholds=1; + live.nat[REG_RESULT].touched=touchcnt++; + + live.state[out1].realreg=REG_RESULT; + live.state[out1].realind=0; + live.state[out1].val=0; + live.state[out1].validsize=osize; + live.state[out1].dirtysize=osize; + set_status(out1,DIRTY); +} +MENDFUNC(5,call_r_11,(W4 out1, RR4 r, RR4 in1, IMM osize, IMM isize)) + +MIDFUNC(0,nop,(void)) +{ + raw_emit_nop(); +} +MENDFUNC(0,nop,(void)) + +/* forget_about() takes a mid-layer register */ +MIDFUNC(1,forget_about,(W4 r)) +{ + if (isinreg(r)) + disassociate(r); + live.state[r].val=0; + set_status(r,UNDEF); +} +MENDFUNC(1,forget_about,(W4 r)) + +MIDFUNC(1,f_forget_about,(FW r)) +{ + if (f_isinreg(r)) + f_disassociate(r); + live.fate[r].status=UNDEF; +} +MENDFUNC(1,f_forget_about,(FW r)) + +// ARM optimized functions + +MIDFUNC(2,arm_ADD_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(arm_ADD_l_ri)(d,live.state[s].val); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + raw_ADD_l_rr(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,arm_ADD_l,(RW4 d, RR4 s)) + +MIDFUNC(2,arm_ADD_l_ri,(RW4 d, IMM i)) +{ + if (!i) return; + if (isconst(d)) { + live.state[d].val+=i; + return; + } +#if USE_OFFSET + add_offset(d,i); + return; +#endif + d=rmw(d,4,4); + + raw_LDR_l_ri(REG_WORK1, i); + raw_ADD_l_rr(d,REG_WORK1); + unlock2(d); +} +MENDFUNC(2,arm_ADD_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,arm_ADD_l_ri8,(RW4 d, IMM i)) +{ + if (!i) return; + if (isconst(d)) { + live.state[d].val+=i; + return; + } +#if USE_OFFSET + add_offset(d,i); + return; +#endif + d=rmw(d,4,4); + + raw_ADD_l_rri(d,d,i); + unlock2(d); +} +MENDFUNC(2,arm_ADD_l_ri8,(RW4 d, IMM i)) + +MIDFUNC(2,arm_SUB_l_ri8,(RW4 d, IMM i)) +{ + if (!i) return; + if (isconst(d)) { + live.state[d].val-=i; + return; + } +#if USE_OFFSET + add_offset(d,-i); + return; +#endif + d=rmw(d,4,4); + + raw_SUB_l_rri(d,d,i); + unlock2(d); +} +MENDFUNC(2,arm_ADD_l_ri8,(RW4 d, IMM i)) + +MIDFUNC(2,arm_AND_l,(RW4 d, RR4 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + raw_AND_l_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,arm_AND_l,(RW4 d, RR4 s)) + +MIDFUNC(2,arm_AND_w,(RW2 d, RR2 s)) +{ + s=readreg(s,2); + d=rmw(d,2,2); + + raw_AND_w_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,arm_AND_w,(RW2 d, RR2 s)) + +MIDFUNC(2,arm_AND_b,(RW1 d, RR1 s)) +{ + s=readreg(s,1); + d=rmw(d,1,1); + + raw_AND_b_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,arm_AND_b,(RW1 d, RR1 s)) + +MIDFUNC(2,arm_AND_l_ri8,(RW4 d, IMM i)) +{ + if (isconst(d)) { + live.state[d].val &= i; + return; + } + + d=rmw(d,4,4); + + raw_AND_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,arm_AND_l_ri8,(RW4 d, IMM i)) + +MIDFUNC(2,arm_EOR_b,(RW1 d, RR1 s)) +{ + s=readreg(s,1); + d=rmw(d,1,1); + + raw_EOR_b_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,arm_EOR_b,(RW1 d, RR1 s)) + +MIDFUNC(2,arm_EOR_l,(RW4 d, RR4 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + raw_EOR_l_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,arm_EOR_l,(RW4 d, RR4 s)) + +MIDFUNC(2,arm_EOR_w,(RW2 d, RR2 s)) +{ + s=readreg(s,2); + d=rmw(d,2,2); + + raw_EOR_w_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,arm_EOR_w,(RW2 d, RR2 s)) + +MIDFUNC(2,arm_ORR_b,(RW1 d, RR1 s)) +{ + s=readreg(s,1); + d=rmw(d,1,1); + + raw_ORR_b_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,arm_ORR_b,(RW1 d, RR1 s)) + +MIDFUNC(2,arm_ORR_l,(RW4 d, RR4 s)) +{ + if (isconst(d) && isconst(s)) { + live.state[d].val|=live.state[s].val; + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + raw_ORR_l_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,arm_ORR_l,(RW4 d, RR4 s)) + +MIDFUNC(2,arm_ORR_w,(RW2 d, RR2 s)) +{ + s=readreg(s,2); + d=rmw(d,2,2); + + raw_ORR_w_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,arm_ORR_w,(RW2 d, RR2 s)) + +MIDFUNC(2,arm_ROR_l_ri8,(RW4 r, IMM i)) +{ + if (!i) + return; + + r=rmw(r,4,4); + raw_ROR_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,arm_ROR_l_ri8,(RW4 r, IMM i)) + +// Other +static inline void flush_cpu_icache(void *start, void *stop) +{ + + register void *_beg __asm ("a1") = start; + register void *_end __asm ("a2") = stop; + register void *_flg __asm ("a3") = 0; +#ifdef __ARM_EABI__ + register unsigned long _scno __asm ("r7") = 0xf0002; + __asm __volatile ("swi 0x0 @ sys_cacheflush" + : "=r" (_beg) + : "0" (_beg), "r" (_end), "r" (_flg), "r" (_scno)); +#else + __asm __volatile ("swi 0x9f0002 @ sys_cacheflush" + : "=r" (_beg) + : "0" (_beg), "r" (_end), "r" (_flg)); +#endif +} + +static inline void write_jmp_target(uae_u32* jmpaddr, cpuop_func* a) { + *(jmpaddr) = (uae_u32) a; + flush_cpu_icache((void *) jmpaddr, (void *) &jmpaddr[1]); +} + +static inline void emit_jmp_target(uae_u32 a) { + emit_long((uae_u32) a); +} diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.h b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.h new file mode 100644 index 00000000..52541326 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm.h @@ -0,0 +1,184 @@ +/* + * compiler/compemu_midfunc_arm.h - Native MIDFUNCS for ARM + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu.h + * + */ + +// Arm optimized midfunc +DECLARE_MIDFUNC(arm_ADD_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(arm_ADD_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(arm_ADD_l_ri8(RW4 d, IMM i)); +DECLARE_MIDFUNC(arm_SUB_l_ri8(RW4 d, IMM i)); +DECLARE_MIDFUNC(arm_AND_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(arm_AND_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(arm_AND_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(arm_AND_l_ri8(RW4 d, IMM i)); +DECLARE_MIDFUNC(arm_EOR_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(arm_EOR_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(arm_EOR_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(arm_ORR_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(arm_ORR_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(arm_ORR_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(arm_ROR_l_ri8(RW4 r, IMM i)); + +// Emulated midfunc +DECLARE_MIDFUNC(bt_l_ri(RR4 r, IMM i)); +DECLARE_MIDFUNC(bt_l_rr(RR4 r, RR4 b)); +DECLARE_MIDFUNC(btc_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(bts_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(btr_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s)); +DECLARE_MIDFUNC(mov_l_rm_indexed(W4 d, IMM base, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_w_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_b_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(rol_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(rol_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(rol_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(rol_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(rol_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(shll_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shll_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shll_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(ror_b_ri(RR1 r, IMM i)); +DECLARE_MIDFUNC(ror_w_ri(RR2 r, IMM i)); +DECLARE_MIDFUNC(ror_l_ri(RR4 r, IMM i)); +DECLARE_MIDFUNC(ror_l_rr(RR4 d, RR1 r)); +DECLARE_MIDFUNC(ror_w_rr(RR2 d, RR1 r)); +DECLARE_MIDFUNC(ror_b_rr(RR1 d, RR1 r)); +DECLARE_MIDFUNC(shrl_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shrl_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shrl_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(shra_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shra_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shra_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(shll_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shll_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shll_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(shrl_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shrl_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shrl_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(shra_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shra_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shra_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(setcc(W1 d, IMM cc)); +DECLARE_MIDFUNC(setcc_m(IMM d, IMM cc)); +DECLARE_MIDFUNC(cmov_l_rr(RW4 d, RR4 s, IMM cc)); +DECLARE_MIDFUNC(bsf_l_rr(W4 d, RR4 s)); +DECLARE_MIDFUNC(pop_l(W4 d)); +DECLARE_MIDFUNC(push_l(RR4 s)); +DECLARE_MIDFUNC(sign_extend_16_rr(W4 d, RR2 s)); +DECLARE_MIDFUNC(sign_extend_8_rr(W4 d, RR1 s)); +DECLARE_MIDFUNC(zero_extend_16_rr(W4 d, RR2 s)); +DECLARE_MIDFUNC(zero_extend_8_rr(W4 d, RR1 s)); +DECLARE_MIDFUNC(imul_64_32(RW4 d, RW4 s)); +DECLARE_MIDFUNC(mul_64_32(RW4 d, RW4 s)); +DECLARE_MIDFUNC(imul_32_32(RW4 d, RR4 s)); +DECLARE_MIDFUNC(mov_b_rr(W1 d, RR1 s)); +DECLARE_MIDFUNC(mov_w_rr(W2 d, RR2 s)); +DECLARE_MIDFUNC(mov_l_rR(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_rR(W2 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_l_brR(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_brR(W2 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_brR(W1 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_l_Ri(RR4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_w_Ri(RR4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_l_Rr(RR4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_Rr(RR4 d, RR2 s, IMM offset)); +DECLARE_MIDFUNC(lea_l_brr(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)); +DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, RR4 s, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_bRr(RR4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_bRr(RR4 d, RR2 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_bRr(RR4 d, RR1 s, IMM offset)); +DECLARE_MIDFUNC(mid_bswap_32(RW4 r)); +DECLARE_MIDFUNC(mid_bswap_16(RW2 r)); +DECLARE_MIDFUNC(mov_l_rr(W4 d, RR4 s)); +DECLARE_MIDFUNC(mov_l_mr(IMM d, RR4 s)); +DECLARE_MIDFUNC(mov_w_mr(IMM d, RR2 s)); +DECLARE_MIDFUNC(mov_w_rm(W2 d, IMM s)); +DECLARE_MIDFUNC(mov_b_mr(IMM d, RR1 s)); +DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s)); +DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s)); +DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s)); +DECLARE_MIDFUNC(mov_b_ri(W1 d, IMM s)); +DECLARE_MIDFUNC(test_l_ri(RR4 d, IMM i)); +DECLARE_MIDFUNC(test_l_rr(RR4 d, RR4 s)); +DECLARE_MIDFUNC(test_w_rr(RR2 d, RR2 s)); +DECLARE_MIDFUNC(test_b_rr(RR1 d, RR1 s)); +DECLARE_MIDFUNC(and_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(and_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(and_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(and_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(or_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(or_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(or_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(or_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(adc_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(adc_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(adc_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(add_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(add_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(add_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(sub_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(sub_w_ri(RW2 d, IMM i)); +DECLARE_MIDFUNC(sub_b_ri(RW1 d, IMM i)); +DECLARE_MIDFUNC(add_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(add_w_ri(RW2 d, IMM i)); +DECLARE_MIDFUNC(add_b_ri(RW1 d, IMM i)); +DECLARE_MIDFUNC(sbb_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(sbb_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(sbb_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(sub_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(sub_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(sub_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(cmp_l(RR4 d, RR4 s)); +DECLARE_MIDFUNC(cmp_w(RR2 d, RR2 s)); +DECLARE_MIDFUNC(cmp_b(RR1 d, RR1 s)); +DECLARE_MIDFUNC(xor_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(xor_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(xor_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(call_r_02(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2)); +DECLARE_MIDFUNC(call_r_11(W4 out1, RR4 r, RR4 in1, IMM osize, IMM isize)); +DECLARE_MIDFUNC(live_flags(void)); +DECLARE_MIDFUNC(dont_care_flags(void)); +DECLARE_MIDFUNC(duplicate_carry(void)); +DECLARE_MIDFUNC(restore_carry(void)); +DECLARE_MIDFUNC(start_needflags(void)); +DECLARE_MIDFUNC(end_needflags(void)); +DECLARE_MIDFUNC(make_flags_live(void)); +DECLARE_MIDFUNC(forget_about(W4 r)); +DECLARE_MIDFUNC(nop(void)); + +DECLARE_MIDFUNC(f_forget_about(FW r)); + + + + diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.cpp b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.cpp new file mode 100644 index 00000000..9da2c058 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.cpp @@ -0,0 +1,5428 @@ +/* + * compiler/compemu_midfunc_arm.cpp - Native MIDFUNCS for ARM (JIT v2) + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu_support.cpp + * + */ + +const uae_u32 ARM_CCR_MAP[] = { 0, ARM_C_FLAG, // 1 C + ARM_V_FLAG, // 2 V + ARM_C_FLAG | ARM_V_FLAG, // 3 VC + ARM_Z_FLAG, // 4 Z + ARM_Z_FLAG | ARM_C_FLAG, // 5 ZC + ARM_Z_FLAG | ARM_V_FLAG, // 6 ZV + ARM_Z_FLAG | ARM_C_FLAG | ARM_V_FLAG, // 7 ZVC + ARM_N_FLAG, // 8 N + ARM_N_FLAG | ARM_C_FLAG, // 9 NC + ARM_N_FLAG | ARM_V_FLAG, // 10 NV + ARM_N_FLAG | ARM_C_FLAG | ARM_V_FLAG, // 11 NVC + ARM_N_FLAG | ARM_Z_FLAG, // 12 NZ + ARM_N_FLAG | ARM_Z_FLAG | ARM_C_FLAG, // 13 NZC + ARM_N_FLAG | ARM_Z_FLAG | ARM_V_FLAG, // 14 NZV + ARM_N_FLAG | ARM_Z_FLAG | ARM_C_FLAG | ARM_V_FLAG, // 15 NZVC + }; + +// First we start with some helper functions (may be moved to codegen_arm) +static inline void UNSIGNED8_IMM_2_REG(W4 r, IMM v) { + MOV_ri8(r, (uint8) v); +} + +static inline void SIGNED8_IMM_2_REG(W4 r, IMM v) { + if (v & 0x80) { + MVN_ri8(r, (uint8) ~v); + } else { + MOV_ri8(r, (uint8) v); + } +} + +static inline void UNSIGNED16_IMM_2_REG(W4 r, IMM v) { + MOV_ri8(r, (uint8) v); + ORR_rri8RORi(r, r, (uint8)(v >> 8), 24); +} + +static inline void SIGNED16_IMM_2_REG(W4 r, IMM v) { +#if defined(ARMV6_ASSEMBLY) + MOV_ri8(r, (uint8) v); + ORR_rri8RORi(r, r, (uint8)(v >> 8), 24); + SXTH_rr(r, r); +#else + MOV_ri8(r, (uint8)(v << 16)); + ORR_rri8RORi(r, r, (uint8)(v >> 8), 8); + ASR_rri(r, r, 16); +#endif +} + +static inline void UNSIGNED8_REG_2_REG(W4 d, RR4 s) { +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(d, s); +#else + ROR_rri(d, s, 8); + LSR_rri(d, d, 24); +#endif +} + +static inline void SIGNED8_REG_2_REG(W4 d, RR4 s) { +#if defined(ARMV6_ASSEMBLY) + SXTB_rr(d, s); +#else + ROR_rri(d, s, 8); + ASR_rri(d, d, 24); +#endif +} + +static inline void UNSIGNED16_REG_2_REG(W4 d, RR4 s) { +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(d, s); +#else + LSL_rri(d, s, 16); + LSR_rri(d, d, 16); +#endif +} + +static inline void SIGNED16_REG_2_REG(W4 d, RR4 s) { +#if defined(ARMV6_ASSEMBLY) + SXTH_rr(d, s); +#else + LSL_rri(d, s, 16); + ASR_rri(d, d, 16); +#endif +} + +#define ZERO_EXTEND_8_REG_2_REG(d,s) UNSIGNED8_REG_2_REG(d,s) +#define ZERO_EXTEND_16_REG_2_REG(d,s) UNSIGNED16_REG_2_REG(d,s) +#define SIGN_EXTEND_8_REG_2_REG(d,s) SIGNED8_REG_2_REG(d,s) +#define SIGN_EXTEND_16_REG_2_REG(d,s) SIGNED16_REG_2_REG(d,s) + +MIDFUNC(0,restore_inverted_carry,(void)) +{ + RR4 r=readreg(FLAGX,4); + MRS_CPSR(REG_WORK1); + TEQ_ri(r,1); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_C_FLAG); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSRf_r(REG_WORK1); + unlock2(r); +} +MENDFUNC(0,restore_inverted_carry,(void)) + +/* + * ADD + * Operand Syntax: , Dn + * Dn, + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if an overflow is generated. Cleared otherwise. + * C Set if a carry is generated. Cleared otherwise. + * + */ +MIDFUNC(3,jnf_ADD_imm,(W4 d, RR4 s, IMM v)) +{ + if (isconst(s)) { + set_const(d,live.state[s].val+v); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + compemu_raw_mov_l_ri(REG_WORK1, v); + ADD_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ADD_imm,(W4 d, RR4 s, IMM v)) + +MIDFUNC(3,jnf_ADD,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jnf_ADD_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + ADD_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jnf_ADD,(W4 d, RR4 s, RR4 v)) + +MIDFUNC(3,jff_ADD_b_imm,(W4 d, RR1 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_IMM_2_REG(REG_WORK2, (uint8)v); + SIGNED8_REG_2_REG(REG_WORK1, s); + ADDS_rrr(d,REG_WORK1,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ADD_b_imm,(W4 d, RR1 s, IMM v)) + +MIDFUNC(3,jff_ADD_b,(W4 d, RR1 s, RR1 v)) +{ + if (isconst(v)) { + COMPCALL(jff_ADD_b_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + ADDS_rrr(d,REG_WORK1,REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_ADD_b,(W4 d, RR1 s, RR1 v)) + +MIDFUNC(3,jff_ADD_w_imm,(W4 d, RR2 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_IMM_2_REG(REG_WORK2, (uint16)v); + SIGNED16_REG_2_REG(REG_WORK1, s); + ADDS_rrr(d,REG_WORK1,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ADD_w_imm,(W4 d, RR2 s, IMM v)) + +MIDFUNC(3,jff_ADD_w,(W4 d, RR2 s, RR2 v)) +{ + if (isconst(v)) { + COMPCALL(jff_ADD_w_imm)(d,s,live.state[v].val); + return; + } + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + ADDS_rrr(d,REG_WORK1,REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_ADD_w,(W4 d, RR2 s, RR2 v)) + +MIDFUNC(3,jff_ADD_l_imm,(W4 d, RR4 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + compemu_raw_mov_l_ri(REG_WORK2, v); + ADDS_rrr(d,s,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ADD_l_imm,(W4 d, RR4 s, IMM v)) + +MIDFUNC(3,jff_ADD_l,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jff_ADD_l_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + ADDS_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_ADD_l,(W4 d, RR4 s, RR4 v)) + +/* + * ADDA + * Operand Syntax: , An + * + * Operand Size: 16,32 + * + * Flags: Not affected. + * + */ +MIDFUNC(2,jnf_ADDA_b,(W4 d, RR1 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + SIGNED8_REG_2_REG(REG_WORK1,s); + ADD_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_ADDA_b,(W4 d, RR1 s)) + +MIDFUNC(2,jnf_ADDA_w,(W4 d, RR2 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + SIGNED16_REG_2_REG(REG_WORK1,s); + ADD_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_ADDA_w,(W4 d, RR2 s)) + +MIDFUNC(2,jnf_ADDA_l,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + ADD_rrr(d,d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_ADDA_l,(W4 d, RR4 s)) + +/* + * ADDX + * Operand Syntax: Dy, Dx + * -(Ay), -(Ax) + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Cleared if the result is nonzero; unchanged otherwise. + * V Set if an overflow is generated. Cleared otherwise. + * C Set if a carry is generated. Cleared otherwise. + * + * Attention: Z is cleared only if the result is nonzero. Unchanged otherwise + * + */ +MIDFUNC(3,jnf_ADDX,(W4 d, RR4 s, RR4 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + ADC_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jnf_ADDX,(W4 d, RR4 s, RR4 v)) + +MIDFUNC(3,jff_ADDX_b,(W4 d, RR1 s, RR1 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + PUSH(REG_WORK2); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + ADCS_rrr(d,REG_WORK1,REG_WORK2); + + POP(REG_WORK2); + MRS_CPSR(REG_WORK1); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_ADDX_b,(W4 d, RR1 s, RR1 v)) + +MIDFUNC(3,jff_ADDX_w,(W4 d, RR2 s, RR2 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + PUSH(REG_WORK2); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + ADCS_rrr(d,REG_WORK1,REG_WORK2); + + POP(REG_WORK2); + MRS_CPSR(REG_WORK1); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_ADDX_w,(W4 d, RR2 s, RR2 v)) + +MIDFUNC(3,jff_ADDX_l,(W4 d, RR4 s, RR4 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + PUSH(REG_WORK2); + + ADCS_rrr(d,s,v); + + POP(REG_WORK2); + MRS_CPSR(REG_WORK1); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_ADDX_l,(W4 d, RR4 s, RR4 v)) + +/* + * ANDI + * Operand Syntax: #, CCR + * + * Operand Size: 8 + * + * X Cleared if bit 4 of immediate operand is zero. Unchanged otherwise. + * N Cleared if bit 3 of immediate operand is zero. Unchanged otherwise. + * Z Cleared if bit 2 of immediate operand is zero. Unchanged otherwise. + * V Cleared if bit 1 of immediate operand is zero. Unchanged otherwise. + * C Cleared if bit 0 of immediate operand is zero. Unchanged otherwise. + * + */ +MIDFUNC(1,jff_ANDSR,(IMM s, IMM x)) +{ + MRS_CPSR(REG_WORK1); + AND_rri(REG_WORK1, REG_WORK1, s); + MSR_CPSRf_r(REG_WORK1); + + if (!x) { + compemu_raw_mov_l_ri(REG_WORK1, (uintptr)live.state[FLAGX].mem); + MOV_ri(REG_WORK2, 0); + STRB_rR(REG_WORK2, REG_WORK1); + } +} +MENDFUNC(1,jff_ANDSR,(IMM s)) + +/* + * AND + * Operand Syntax: , Dn + * Dn, + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the most significant bit of the result is set. + * Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(3,jnf_AND,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(s) && isconst(v)) { + set_const(d, + live.state[s].val&live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + AND_rrr(d, s, v); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_AND,(RW4 d, RR4 s, RR4 v)) + +MIDFUNC(3,jff_AND_b,(W4 d, RR1 s, RR1 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + ANDS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_AND_b,(RW4 d, RR1 s, RR1 v)) + +MIDFUNC(3,jff_AND_w,(W4 d, RR2 s, RR2 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + ANDS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_AND_w,(RW4 d, RR2 s, RR2 v)) + +MIDFUNC(3,jff_AND_l,(W4 d, RR4 s, RR4 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + ANDS_rrr(d, s,v); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_AND_l,(RW4 d, RR4 s, RR4 v)) + +/* + * ASL + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if the most significant bit is changed at any time during the shift operation. Cleared otherwise. + * C Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * + */ +MIDFUNC(3,jff_ASL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d, s, 24); + if (i) { + MRS_CPSR(REG_WORK1); // store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except N & Z + PUSH(REG_WORK1); + + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rri(REG_WORK2, REG_WORK2, (i+1)); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, d, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// restore flags + + LSLS_rri(d,d,i); + } else { + MSR_CPSRf_i(0); + TST_rr(d,d); + } + REV_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ASL_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ASL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d, s, 16); + if (i) { + MRS_CPSR(REG_WORK1); // store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except N & Z + PUSH(REG_WORK1); + + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rri(REG_WORK2, REG_WORK2, (i+1)); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, d, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// retore flags + + LSLS_rri(d,d,i); + } else { + MSR_CPSRf_i(0); + TST_rr(d,d); + } + ASR_rri(d,d, 16); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ASL_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ASL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i) { + MRS_CPSR(REG_WORK1); // store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except C + PUSH(REG_WORK1); + + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rri(REG_WORK2, REG_WORK2, (i+1)); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, s, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// retore flags + + LSLS_rri(d,s,i); + } else { + MSR_CPSRf_i(0); + MOVS_rr(d, s); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ASL_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ASL_b_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + // Calculate V Flag + MRS_CPSR(REG_WORK1);// store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except C + PUSH(REG_WORK1); + + LSL_rri(d, s, 24); + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rrr(REG_WORK2, REG_WORK2, i); + LSR_rri(REG_WORK2, REG_WORK2, 1); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, d, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// retore flags + + AND_rri(REG_WORK2, i, 63); + LSLS_rrr(d,d,REG_WORK2); + ASR_rri(d,d, 24); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ASL_b_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ASL_w_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + // Calculate V Flag + MRS_CPSR(REG_WORK1);// store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except c + PUSH(REG_WORK1); + + LSL_rri(d, s, 16); + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rrr(REG_WORK2, REG_WORK2, i); + LSR_rri(REG_WORK2, REG_WORK2, 1); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, d, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// retore flags + + AND_rri(REG_WORK2, i, 63); + LSLS_rrr(d,d,REG_WORK2); + ASR_rri(d,d, 16); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ASL_w_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ASL_l_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + // Calculate V Flag + MRS_CPSR(REG_WORK1);// store flags + BIC_rri(REG_WORK1, REG_WORK1, ARM_N_FLAG|ARM_Z_FLAG|ARM_V_FLAG);// Clear everything except C + PUSH(REG_WORK1); + + // Calculate V Flag + MVN_ri(REG_WORK2, 0); + LSR_rrr(REG_WORK2, REG_WORK2, i); + LSR_rri(REG_WORK2, REG_WORK2, 1); + MVN_rr(REG_WORK2, REG_WORK2); + AND_rrr(REG_WORK1, s, REG_WORK2); + TST_rr(REG_WORK1, REG_WORK1); + CC_TEQ_rr(NATIVE_CC_NE, REG_WORK1, REG_WORK2); + POP(REG_WORK1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + + MSR_CPSRf_r(REG_WORK1);// retore flags + + AND_rri(REG_WORK2, i, 63); + LSLS_rrr(d,s,REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ASL_l_reg,(W4 d, RR4 s, RR4 i)) + +/* + * ASLW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Set according to the last bit shifted out of the operand. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if the most significant bit is changed at any time during the shift operation. Cleared otherwise. + * C Set according to the last bit shifted out of the operand. + * + */ +MIDFUNC(2,jnf_ASLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_ASLW,(W4 d, RR4 s)) + +MIDFUNC(2,jff_ASLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + LSLS_rri(d,s,17); + + MRS_CPSR(REG_WORK1); + CC_ORR_rri(NATIVE_CC_MI, REG_WORK1, REG_WORK1, ARM_V_FLAG); + CC_EOR_rri(NATIVE_CC_CS, REG_WORK1, REG_WORK1, ARM_V_FLAG); + MSR_CPSRf_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_ASLW,(W4 d, RR4 s)) + +/* + * ASR + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if the most significant bit is changed at any time during the shift operation. Cleared otherwise. + * C Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * + */ +MIDFUNC(3,jnf_ASR_b_imm,(W4 d, RR4 s, IMM i)) +{ + if (!i) return; + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + ASR_rri(d,d,i); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ASR_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ASR_w_imm,(W4 d, RR4 s, IMM i)) +{ + if (!i) return; + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + ASR_rri(d,d,i); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ASR_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ASR_l_imm,(W4 d, RR4 s, IMM i)) +{ + if (!i) return; + + s=readreg(s,4); + d=writereg(d,4); + + ASR_rri(d,s,i); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ASR_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ASR_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + if (i) { + MSR_CPSRf_i(0); + ASRS_rri(d,d,i); + } else { + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + TST_rr(d,d); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ASR_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ASR_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + if (i) { + MSR_CPSRf_i(0); + ASRS_rri(d,d,i); + } else { + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + TST_rr(d,d); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ASR_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ASR_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i) { + MSR_CPSRf_i(0); + ASRS_rri(d,s,i); + } else { + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + TST_rr(s,s); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ASR_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ASR_b_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + AND_rri(REG_WORK1, i, 63); + ASR_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ASR_b_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_ASR_w_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + AND_rri(REG_WORK1, i, 63); + ASR_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ASR_w_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_ASR_l_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 63); + ASR_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ASR_l_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ASR_b_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + ASRS_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ASR_b_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ASR_w_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + ASRS_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ASR_w_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ASR_l_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + ASRS_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ASR_l_reg,(W4 d, RR4 s, RR4 i)) + +/* + * ASRW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Set according to the last bit shifted out of the operand. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if the most significant bit is changed at any time during the shift operation. Cleared otherwise. + * C Set according to the last bit shifted out of the operand. + * + */ +MIDFUNC(2,jnf_ASRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + ASR_rri(d,d,1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_ASRW,(W4 d, RR4 s)) + +MIDFUNC(2,jff_ASRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + MSR_CPSRf_i(0); + ASR_rri(d,d,1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_ASRW,(W4 d, RR4 s)) + +/* + * BCHG + * Operand Syntax: Dn, + * #, + * + * Operand Size: 8,32 + * + * X Not affected. + * N Not affected. + * Z Set if the bit tested is zero. Cleared otherwise. + * V Not affected. + * C Not affected. + * + */ +MIDFUNC(2,jnf_BCHG_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + EOR_rri(d,d,(1 << s)); + unlock2(d); +} +MENDFUNC(2,jnf_BCHG_b_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jnf_BCHG_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + EOR_rri(d,d,(1 << s)); + unlock2(d); +} +MENDFUNC(2,jnf_BCHG_l_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jnf_BCHG_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BCHG_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + EOR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_BCHG_b,(RW4 d, RR4 s)) + +MIDFUNC(2,jnf_BCHG_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BCHG_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + EOR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_BCHG_l,(RW4 d, RR4 s)) + +MIDFUNC(2,jff_BCHG_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + EOR_rri(d,d,v); + + unlock2(d); +} +MENDFUNC(2,jff_BCHG_b_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jff_BCHG_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + EOR_rri(d,d,v); + + unlock2(d); +} +MENDFUNC(2,jff_BCHG_l_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jff_BCHG_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BCHG_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + EOR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_BCHG_b,(RW4 d, RR4 s)) + +MIDFUNC(2,jff_BCHG_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BCHG_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + EOR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_BCHG_l,(RW4 d, RR4 s)) + +/* + * BCLR + * Operand Syntax: Dn, + * #, + * + * Operand Size: 8,32 + * + * X Not affected. + * N Not affected. + * Z Set if the bit tested is zero. Cleared otherwise. + * V Not affected. + * C Not affected. + * + */ +MIDFUNC(2,jnf_BCLR_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + BIC_rri(d,d,(1 << s)); + unlock2(d); +} +MENDFUNC(2,jnf_BCLR_b_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jnf_BCLR_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + BIC_rri(d,d,(1 << s)); + unlock2(d); +} +MENDFUNC(2,jnf_BCLR_l_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jnf_BCLR_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BCLR_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + BIC_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_BCLR_b,(RW4 d, RR4 s)) + +MIDFUNC(2,jnf_BCLR_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BCLR_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + BIC_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_BCLR_l,(RW4 d, RR4 s)) + +MIDFUNC(2,jff_BCLR_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + BIC_rri(d,d,v); + + unlock2(d); +} +MENDFUNC(2,jff_BCLR_b_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jff_BCLR_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + BIC_rri(d,d,v); + + unlock2(d); +} +MENDFUNC(2,jff_BCLR_l_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jff_BCLR_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BCLR_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + BIC_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_BCLR_b,(RW4 d, RR4 s)) + +MIDFUNC(2,jff_BCLR_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BCLR_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + BIC_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_BCLR_l,(RW4 d, RR4 s)) + +/* + * BSET + * Operand Syntax: Dn, + * #, + * + * Operand Size: 8,32 + * + * X Not affected. + * N Not affected. + * Z Set if the bit tested is zero. Cleared otherwise. + * V Not affected. + * C Not affected. + * + */ +MIDFUNC(2,jnf_BSET_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + ORR_rri(d,d,(1 << s)); + unlock2(d); +} +MENDFUNC(2,jnf_BSET_b_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jnf_BSET_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + ORR_rri(d,d,(1 << s)); + unlock2(d); +} +MENDFUNC(2,jnf_BSET_l_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jnf_BSET_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BSET_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + ORR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_BSET_b,(RW4 d, RR4 s)) + +MIDFUNC(2,jnf_BSET_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jnf_BSET_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + ORR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_BSET_l,(RW4 d, RR4 s)) + +MIDFUNC(2,jff_BSET_b_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + ORR_rri(d,d,v); + + unlock2(d); +} +MENDFUNC(2,jff_BSET_b_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jff_BSET_l_imm,(RW4 d, IMM s)) +{ + d=rmw(d,4,4); + + uae_u32 v = (1 << s); + MRS_CPSR(REG_WORK1); + TST_ri(d,v); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + ORR_rri(d,d,v); + + unlock2(d); +} +MENDFUNC(2,jff_BSET_l_imm,(RW4 d, IMM s)) + +MIDFUNC(2,jff_BSET_b,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BSET_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + ORR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_BSET_b,(RW4 d, RR4 s)) + +MIDFUNC(2,jff_BSET_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BSET_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=rmw(d,4,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + ORR_rrr(d,d,REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_BSET_l,(RW4 d, RR4 s)) + +/* + * BTST + * Operand Syntax: Dn, + * #, + * + * Operand Size: 8,32 + * + * X Not affected + * N Not affected + * Z Set if the bit tested is zero. Cleared otherwise + * V Not affected + * C Not affected + * + */ +MIDFUNC(2,jff_BTST_b_imm,(RR4 d, IMM s)) +{ + d=readreg(d,4); + + MRS_CPSR(REG_WORK1); + TST_ri(d,(1 << s)); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); +} +MENDFUNC(2,jff_BTST_b_imm,(RR4 d, IMM s)) + +MIDFUNC(2,jff_BTST_l_imm,(RR4 d, IMM s)) +{ + d=readreg(d,4); + + MRS_CPSR(REG_WORK1); + TST_ri(d,(1 << s)); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); +} +MENDFUNC(2,jff_BTST_l_imm,(RR4 d, IMM s)) + +MIDFUNC(2,jff_BTST_b,(RR4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BTST_b_imm)(d,live.state[s].val&7); + return; + } + s=readreg(s,4); + d=readreg(d,4); + + AND_rri(REG_WORK1, s, 7); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_BTST_b,(RR4 d, RR4 s)) + +MIDFUNC(2,jff_BTST_l,(RR4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_BTST_l_imm)(d,live.state[s].val&31); + return; + } + + s=readreg(s,4); + d=readreg(d,4); + + AND_rri(REG_WORK1, s, 31); + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + MRS_CPSR(REG_WORK1); + TST_rr(d,REG_WORK2); + CC_BIC_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + CC_ORR_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, ARM_Z_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_BTST_l,(RR4 d, RR4 s)) + +/* + * CLR + * Operand Syntax: + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Always cleared. + * Z Always set. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(1,jnf_CLR,(W4 d)) +{ + d=writereg(d,4); + MOV_ri(d,0); + unlock2(d); +} +MENDFUNC(1,jnf_CLR,(W4 d)) + +MIDFUNC(1,jff_CLR,(W4 d)) +{ + d=writereg(d,4); + MOV_ri(d,0); + MSR_CPSR_i(ARM_Z_FLAG); + unlock2(d); +} +MENDFUNC(1,jff_CLR,(W4 d)) + +/* + * CMP + * Operand Syntax: , Dn + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if an overflow occurs. Cleared otherwise. + * C Set if a borrow occurs. Cleared otherwise. + * + */ +MIDFUNC(2,jff_CMP_b,(RR1 d, RR1 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + SIGNED8_REG_2_REG(REG_WORK1, d); + SIGNED8_REG_2_REG(REG_WORK2, s); + CMP_rr(REG_WORK1,REG_WORK2); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // inverted_carry = true; + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_CMP_b,(RR1 d, RR1 s)) + +MIDFUNC(2,jff_CMP_w,(RR2 d, RR2 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + SIGNED16_REG_2_REG(REG_WORK1, d); + SIGNED16_REG_2_REG(REG_WORK2, s); + CMP_rr(REG_WORK1,REG_WORK2); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // inverted_carry = true; + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_CMP_w,(RR2 d, RR2 s)) + +MIDFUNC(2,jff_CMP_l,(RR4 d, RR4 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + CMP_rr(d,s); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // inverted_carry = true; + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_CMP_l,(RR4 d, RR4 s)) + +/* + * CMPA + * Operand Syntax: , An + * + * Operand Size: 16,32 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if an overflow occurs. Cleared otherwise. + * C Set if a borrow occurs. Cleared otherwise. + * + */ +MIDFUNC(2,jff_CMPA_b,(RR1 d, RR1 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + SIGNED8_REG_2_REG(REG_WORK2, s); + CMP_rr(d,REG_WORK2); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // invertedcarry = true; + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_CMPA_b,(RR1 d, RR1 s)) + +MIDFUNC(2,jff_CMPA_w,(RR2 d, RR2 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + SIGNED16_REG_2_REG(REG_WORK2, s); + CMP_rr(d,REG_WORK2); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // invertedcarry = true; + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_CMPA_w,(RR2 d, RR2 s)) + +MIDFUNC(2,jff_CMPA_l,(RR4 d, RR4 s)) +{ + d=readreg(d,4); + s=readreg(s,4); + + CMP_rr(d,s); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + // invertedcarry = true; + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_CMPA_l,(RR4 d, RR4 s)) + +/* + * EOR + * Operand Syntax: Dn, + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the most significant bit of the result is set. + * Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(3,jnf_EOR,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(s) && isconst(v)) { + set_const(d, + live.state[s].val^live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + EOR_rrr(d, s, v); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_EOR,(RW4 d, RR4 s, RR4 v)) + +MIDFUNC(3,jff_EOR_b,(W4 d, RR1 s, RR1 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + EORS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_EOR_b,(RW4 d, RR1 s, RR1 v)) + +MIDFUNC(3,jff_EOR_w,(W4 d, RR2 s, RR2 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + EORS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_EOR_w,(RW4 d, RR2 s, RR2 v)) + +MIDFUNC(3,jff_EOR_l,(W4 d, RR4 s, RR4 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + EORS_rrr(d, s,v); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_EOR_l,(RW4 d, RR4 s, RR4 v)) + +/* + * EORI + * Operand Syntax: #, CCR + * + * Operand Size: 8 + * + * X — Changed if bit 4 of immediate operand is one; unchanged otherwise. + * N — Changed if bit 3 of immediate operand is one; unchanged otherwise. + * Z — Changed if bit 2 of immediate operand is one; unchanged otherwise. + * V — Changed if bit 1 of immediate operand is one; unchanged otherwise. + * C — Changed if bit 0 of immediate operand is one; unchanged otherwise. + * + */ +MIDFUNC(1,jff_EORSR,(IMM s, IMM x)) +{ + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, s); + MSR_CPSRf_r(REG_WORK1); + + if (x) { + compemu_raw_mov_l_ri(REG_WORK1, (uintptr)live.state[FLAGX].mem); + LDRB_rR(REG_WORK2, REG_WORK1); + EOR_rri(REG_WORK2, REG_WORK2, 1); + STRB_rR(REG_WORK2, REG_WORK1); + } +} +MENDFUNC(1,jff_EORSR,(IMM s)) + +/* + * EXT + * Operand Syntax: + * + * Operand Size: 16,32 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(2,jnf_EXT_b,(W4 d, RR4 s)) +{ + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s8)live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jnf_EXT_b,(W4 d, RR4 s)) + +MIDFUNC(2,jnf_EXT_w,(W4 d, RR4 s)) +{ + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s8)live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jnf_EXT_w,(W4 d, RR4 s)) + +MIDFUNC(2,jnf_EXT_l,(W4 d, RR4 s)) +{ + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s16)live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jnf_EXT_l,(W4 d, RR4 s)) + +MIDFUNC(2,jff_EXT_b,(W4 d, RR4 s)) +{ + if (isconst(s)) { + d=writereg(d,4); + SIGNED8_IMM_2_REG(d, (uint8)live.state[s].val); + } else { + s=readreg(s,4); + d=writereg(d,4); + SIGNED8_REG_2_REG(d, s); + unlock2(s); + } + + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} +MENDFUNC(2,jff_EXT_b,(W4 d, RR4 s)) + +MIDFUNC(2,jff_EXT_w,(W4 d, RR4 s)) +{ + if (isconst(s)) { + d=writereg(d,4); + SIGNED8_IMM_2_REG(d, (uint8)live.state[s].val); + } else { + s=readreg(s,4); + d=writereg(d,4); + SIGNED8_REG_2_REG(d, s); + unlock2(s); + } + + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} +MENDFUNC(2,jff_EXT_w,(W4 d, RR4 s)) + +MIDFUNC(2,jff_EXT_l,(W4 d, RR4 s)) +{ + if (isconst(s)) { + d=writereg(d,4); + SIGNED16_IMM_2_REG(d, (uint16)live.state[s].val); + } else { + s=readreg(s,4); + d=writereg(d,4); + SIGNED16_REG_2_REG(d, s); + unlock2(s); + } + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} +MENDFUNC(2,jff_EXT_l,(W4 d, RR4 s)) + +/* + * LSL + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit shifted out of the operand. Cleared for a shift count of zero. + * + */ +MIDFUNC(3,jnf_LSL_imm,(W4 d, RR4 s, IMM i)) +{ + if (!i) return; + + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,i); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_LSL_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_LSL_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 63); + LSL_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_LSL_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_LSL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED8_REG_2_REG(d, s); + MSR_CPSRf_i(0); + + REV_rr(d,d); + if (i) { + LSLS_rri(d,d,i); + } else { + TST_rr(d,d); + } + REV_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_LSL_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_LSL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + + LSL_rri(d,s,16); + if (i) { + LSLS_rri(d,d,i); + } else { + TST_rr(d,d); + } + LSR_rri(d,d,16); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_LSL_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_LSL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + if (i) { + LSLS_rri(d,s,i); + } else { + MOV_rr(d,s); + TST_rr(d,d); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_LSL_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_LSL_b_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED8_REG_2_REG(d,s); + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + REV_rr(d,d); + AND_rri(REG_WORK1, i, 63); + LSLS_rrr(d,d,REG_WORK1); + REV_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_LSL_b_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_LSL_w_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + LSL_rri(d, s, 16); + AND_rri(REG_WORK1, i, 63); + LSLS_rrr(d,d,REG_WORK1); + LSR_rri(d, d, 16); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_LSL_w_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_LSL_l_reg,(W4 d, RR4 s, RR4 i)) +{ + i=readreg(i,4); + s=readreg(s,4); + d=writereg(d,4); + + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + LSLS_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_LSL_l_reg,(W4 d, RR4 s, RR4 i)) + +/* + * LSLW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit shifted out of the operand. Cleared for a shift count of zero. + * + */ +MIDFUNC(2,jnf_LSLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_LSLW,(W4 d, RR4 s)) + +MIDFUNC(2,jff_LSLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + LSLS_rri(d,s,17); + LSR_rri(d,d,16); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_LSLW,(W4 d, RR4 s)) + +/* + * LSR + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit shifted out of the operand. + * Unaffected for a shift count of zero. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit shifted out of the operand. + * Cleared for a shift count of zero. + * + */ +MIDFUNC(3,jnf_LSR_b_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + if (!i) + return; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED8_REG_2_REG(d, s); + LSR_rri(d,d,i); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(3,jnf_LSR_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_LSR_w_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + if (!i) + return; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED16_REG_2_REG(d, s); + LSR_rri(d,d,i); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(3,jnf_LSR_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_LSR_l_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + if (!i) + return; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + LSR_rri(d,s,i); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(3,jnf_LSR_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_LSR_b_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED8_REG_2_REG(d, s); + MSR_CPSRf_i(0); + if (i) { + LSRS_rri(d,d,i); + } else { + TST_rr(d,d); + } + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(3,jff_LSR_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_LSR_w_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED16_REG_2_REG(d, s); + MSR_CPSRf_i(0); + if (i) { + LSRS_rri(d,d,i); + } else { + TST_rr(d,d); + } + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(3,jff_LSR_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_LSR_l_imm,(W4 d, RR4 s, IMM i)) +{ + int isrmw; + + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + MSR_CPSRf_i(0); + if (i) { + LSRS_rri(d,s,i); + } else { + TST_rr(s,s); + } + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(3,jff_LSR_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_LSR_b_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED8_REG_2_REG(d, s); + AND_rri(REG_WORK1, i, 63); + LSR_rrr(d,d,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} +MENDFUNC(3,jnf_LSR_b_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_LSR_w_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED16_REG_2_REG(d, s); + AND_rri(REG_WORK1, i, 63); + LSR_rrr(d,d,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} +MENDFUNC(3,jnf_LSR_w_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_LSR_l_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + AND_rri(REG_WORK1, i, 63); + LSR_rrr(d,s,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} +MENDFUNC(3,jnf_LSR_l_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_LSR_b_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED8_REG_2_REG(d, s); + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + LSRS_rrr(d,d,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} +MENDFUNC(3,jff_LSR_b_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_LSR_w_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + UNSIGNED16_REG_2_REG(d, s); + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + LSRS_rrr(d,d,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} +MENDFUNC(3,jff_LSR_w_reg,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_LSR_l_reg,(W4 d, RR4 s, RR4 i)) +{ + int isrmw; + + i=readreg(i,4); + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { + s=d=rmw(s,4,4); + } + + CC_MSR_CPSRf_r(NATIVE_CC_CC, 0); // Clear everything except C + CC_MSR_CPSRf_r(NATIVE_CC_CS, ARM_C_FLAG);// Clear everything except C + AND_rri(REG_WORK1, i, 63); + LSRS_rrr(d,s,REG_WORK1); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } + unlock2(i); +} +MENDFUNC(3,jff_LSR_l_reg,(W4 d, RR4 s, RR4 i)) + +/* + * LSRW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Set according to the last bit shifted out of the operand. Unaffected for a shift count of zero. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit shifted out of the operand. Cleared for a shift count of zero. + * + */ +MIDFUNC(2,jnf_LSRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d, s); + LSR_rri(d,d,1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_LSRW,(W4 d, RR4 s)) + +MIDFUNC(2,jff_LSRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d, s); + MSR_CPSRf_i(0); + LSR_rri(d,d,1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_LSRW,(W4 d, RR4 s)) + +/* + * MOVE + * Operand Syntax: , + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(2,jnf_MOVE,(W4 d, RR4 s)) +{ + if (isconst(s)) { + set_const(d,live.state[s].val); + return; + } + s=readreg(s,4); + d=writereg(d,4); + + MOV_rr(d, s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_MOVE,(W4 d, RR4 s)) + +MIDFUNC(2,jff_MOVE_b_imm,(W4 d, IMM s)) +{ + d=writereg(d,4); + + SIGNED8_IMM_2_REG(d, (uint8)s); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} +MENDFUNC(2,jff_MOVE_b_imm,(W4 d, IMM s)) + +MIDFUNC(2,jff_MOVE_w_imm,(W4 d, IMM s)) +{ + d=writereg(d,4); + + SIGNED16_IMM_2_REG(d, (uint16)s); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} +MENDFUNC(2,jff_MOVE_w_imm,(W4 d, IMM s)) + +MIDFUNC(2,jff_MOVE_l_imm,(W4 d, IMM s)) +{ + d=writereg(d,4); + + compemu_raw_mov_l_ri(d, s); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} +MENDFUNC(2,jff_MOVE_l_imm,(W4 d, IMM s)) + +MIDFUNC(2,jff_MOVE_b,(W4 d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(jff_MOVE_b_imm)(d,live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(d, s); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_MOVE_b,(W4 d, RR1 s)) + +MIDFUNC(2,jff_MOVE_w,(W4 d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(jff_MOVE_w_imm)(d,live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d, s); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_MOVE_w,(W4 d, RR2 s)) + +MIDFUNC(2,jff_MOVE_l,(W4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(jff_MOVE_l_imm)(d,live.state[s].val); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + MOVS_rr(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_MOVE_l,(W4 d, RR4 s)) + +/* + * MOVE16 + * + * Flags: Not affected. + * + */ +MIDFUNC(2,jnf_MOVE16,(RR4 d, RR4 s)) +{ + s=readreg(s,4); + d=readreg(d,4); + + BIC_rri(s, s, 0x000000FF); + BIC_rri(d, d, 0x000000FF); + + compemu_raw_mov_l_ri(REG_WORK1, (IMM)MEMBaseDiff); + ADD_rrr(s, s, REG_WORK1); + ADD_rrr(d, d, REG_WORK1); + + LDR_rRI(REG_WORK1, s, 8); + LDR_rRI(REG_WORK2, s, 12); + + PUSH_REGS((1<, An + * + * Operand Size: 16,32 + * + * Flags: Not affected. + * + */ +MIDFUNC(2,jnf_MOVEA_w,(W4 d, RR2 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_MOVEA_w,(W4 d, RR2 s)) + +MIDFUNC(2,jnf_MOVEA_l,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MOV_rr(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_MOVEA_l,(W4 d, RR4 s)) + +/* + * MULS + * Operand Syntax: , Dn + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if overflow. Cleared otherwise. (32 Bit multiply only) + * C Always cleared. + * + */ +MIDFUNC(2,jnf_MULS,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + SIGN_EXTEND_16_REG_2_REG(d,d); + SIGN_EXTEND_16_REG_2_REG(REG_WORK1,s); + MUL_rrr(d, d, REG_WORK1); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jnf_MULS,(RW4 d, RR4 s)) + +MIDFUNC(2,jff_MULS,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + SIGN_EXTEND_16_REG_2_REG(d,d); + SIGN_EXTEND_16_REG_2_REG(REG_WORK1,s); + + MSR_CPSRf_i(0); + MULS_rrr(d, d, REG_WORK1); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_MULS,(RW4 d, RR4 s)) + +MIDFUNC(2,jnf_MULS32,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + MUL_rrr(d, d, s); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jnf_MULS32,(RW4 d, RR4 s)) + +MIDFUNC(2,jff_MULS32,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + MSR_CPSRf_i(0); + // L, H, + SMULLS_rrrr(d, REG_WORK2, d, s); + MRS_CPSR(REG_WORK1); + TEQ_rrASRi(REG_WORK2,d,31); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + MSR_CPSRf_r(REG_WORK1); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_MULS32,(RW4 d, RR4 s)) + +MIDFUNC(2,jnf_MULS64,(RW4 d, RW4 s)) +{ + s = rmw(s, 4, 4); + d = rmw(d, 4, 4); + + // L, H, + SMULL_rrrr(d, s, d, s); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jnf_MULS64,(RW4 d, RW4 s)) + +MIDFUNC(2,jff_MULS64,(RW4 d, RW4 s)) +{ + s = rmw(s, 4, 4); + d = rmw(d, 4, 4); + + MSR_CPSRf_i(0); + // L, H, + SMULLS_rrrr(d, s, d, s); + MRS_CPSR(REG_WORK1); + TEQ_rrASRi(s,d,31); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + MSR_CPSRf_r(REG_WORK1); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_MULS64,(RW4 d, RW4 s)) + +/* + * MULU + * Operand Syntax: , Dn + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if overflow. Cleared otherwise. (32 Bit multiply only) + * C Always cleared. + * + */ +MIDFUNC(2,jnf_MULU,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + ZERO_EXTEND_16_REG_2_REG(d,d); + ZERO_EXTEND_16_REG_2_REG(REG_WORK1,s); + + MUL_rrr(d, d, REG_WORK1); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jnf_MULU,(RW4 d, RR4 s)) + +MIDFUNC(2,jff_MULU,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + ZERO_EXTEND_16_REG_2_REG(d,d); + ZERO_EXTEND_16_REG_2_REG(REG_WORK1, s); + + MSR_CPSRf_i(0); + MULS_rrr(d, d, REG_WORK1); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_MULU,(RW4 d, RR4 s)) + +MIDFUNC(2,jnf_MULU32,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + MUL_rrr(d, d, s); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jnf_MULU32,(RW4 d, RR4 s)) + +MIDFUNC(2,jff_MULU32,(RW4 d, RR4 s)) +{ + s = readreg(s, 4); + d = rmw(d, 4, 4); + + // L, H, + MSR_CPSRf_i(0); + UMULLS_rrrr(d, REG_WORK2, d, s); + MRS_CPSR(REG_WORK1); + TST_rr(REG_WORK2,REG_WORK2); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + MSR_CPSRf_r(REG_WORK1); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_MULU32,(RW4 d, RR4 s)) + +MIDFUNC(2,jnf_MULU64,(RW4 d, RW4 s)) +{ + s = rmw(s, 4, 4); + d = rmw(d, 4, 4); + + // L, H, + UMULL_rrrr(d, s, d, s); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jnf_MULU64,(RW4 d, RW4 s)) + +MIDFUNC(2,jff_MULU64,(RW4 d, RW4 s)) +{ + s = rmw(s, 4, 4); + d = rmw(d, 4, 4); + + // L, H, + MSR_CPSRf_i(0); + UMULLS_rrrr(d, s, d, s); + MRS_CPSR(REG_WORK1); + TST_rr(s,s); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK1, REG_WORK1, ARM_V_FLAG); + MSR_CPSRf_r(REG_WORK1); + + unlock2(s); + unlock2(d); +} +MENDFUNC(2,jff_MULU64,(RW4 d, RW4 s)) + +/* + * NEG + * Operand Syntax: + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if an overflow occurs. Cleared otherwise. + * C Cleared if the result is zero. Set otherwise. + * + */ +MIDFUNC(2,jnf_NEG,(W4 d, RR4 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + RSB_rri(d,s,0); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_NEG,(W4 d, RR4 s)) + +MIDFUNC(2,jff_NEG_b,(W4 d, RR1 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + RSBS_rri(d,REG_WORK1,0); + + // inverted_carry = true; + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_NEG_b,(W4 d, RR1 s)) + +MIDFUNC(2,jff_NEG_w,(W4 d, RR2 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + RSBS_rri(d,REG_WORK1,0); + + // inverted_carry = true; + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_NEG_w,(W4 d, RR2 s)) + +MIDFUNC(2,jff_NEG_l,(W4 d, RR4 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + RSBS_rri(d,s,0); + + // inverted_carry = true; + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_NEG_l,(W4 d, RR4 s)) + +/* + * NEGX + * Operand Syntax: + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Cleared if the result is nonzero; unchanged otherwise. + * V Set if an overflow occurs. Cleared otherwise. + * C Cleared if the result is zero. Set otherwise. + * + * Attention: Z is cleared only if the result is nonzero. Unchanged otherwise + * + */ +MIDFUNC(2,jnf_NEGX,(W4 d, RR4 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + RSC_rri(d,s,0); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_NEGX,(W4 d, RR4 s)) + +MIDFUNC(2,jff_NEGX_b,(W4 d, RR1 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + MRS_CPSR(REG_WORK2); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + + SIGNED8_REG_2_REG(REG_WORK1, s); + RSCS_rri(d,REG_WORK1,0); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_NEGX_b,(W4 d, RR1 s)) + +MIDFUNC(2,jff_NEGX_w,(W4 d, RR2 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + MRS_CPSR(REG_WORK2); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + + SIGNED16_REG_2_REG(REG_WORK1, s); + RSCS_rri(d,REG_WORK1,0); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_NEGX_w,(W4 d, RR2 s)) + +MIDFUNC(2,jff_NEGX_l,(W4 d, RR4 s)) +{ + d=writereg(d,4); + s=readreg(s,4); + + MRS_CPSR(REG_WORK2); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + + RSCS_rri(d,s,0); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_NEGX_l,(W4 d, RR4 s)) + +/* + * NOT + * Operand Syntax: + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(2,jnf_NOT,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MVN_rr(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_NOT,(W4 d, RR4 s)) + +MIDFUNC(2,jff_NOT_b,(W4 d, RR1 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED8_REG_2_REG(d,s); + MSR_CPSRf_i(0); // Clear flags + MVNS_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_NOT_b,(W4 d, RR1 s)) + +MIDFUNC(2,jff_NOT_w,(W4 d, RR2 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d,s); + MSR_CPSRf_i(0); // Clear flags + MVNS_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_NOT_w,(W4 d, RR2 s)) + +MIDFUNC(2,jff_NOT_l,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); // Clear flags + MVNS_rr(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_NOT_l,(W4 d, RR4 s)) + +/* + * OR + * Operand Syntax: , Dn + * Dn, + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(3,jnf_OR,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(s) && isconst(v)) { + set_const(d, + live.state[s].val|live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + ORR_rrr(d, s, v); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_OR,(RW4 d, RR4 s, RR4 v)) + +MIDFUNC(3,jff_OR_b,(W4 d, RR1 s, RR1 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + ORRS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_OR_b,(RW4 d, RR1 s, RR1 v)) + +MIDFUNC(3,jff_OR_w,(W4 d, RR2 s, RR2 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + MSR_CPSRf_i(0); + ORRS_rrr(d, REG_WORK1, REG_WORK2); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_OR_w,(RW4 d, RR2 s, RR2 v)) + +MIDFUNC(3,jff_OR_l,(W4 d, RR4 s, RR4 v)) +{ + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + ORRS_rrr(d, s,v); + + unlock2(v); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_OR_l,(RW4 d, RR4 s, RR4 v)) + +/* + * ORI + * Operand Syntax: #, CCR + * + * Operand Size: 8 + * + * X — Set if bit 4 of immediate operand is one; unchanged otherwise. + * N — Set if bit 3 of immediate operand is one; unchanged otherwise. + * Z — Set if bit 2 of immediate operand is one; unchanged otherwise. + * V — Set if bit 1 of immediate operand is one; unchanged otherwise. + * C — Set if bit 0 of immediate operand is one; unchanged otherwise. + * + */ +MIDFUNC(1,jff_ORSR,(IMM s, IMM x)) +{ + MRS_CPSR(REG_WORK1); + ORR_rri(REG_WORK1, REG_WORK1, s); + MSR_CPSRf_r(REG_WORK1); + + if (x) { + compemu_raw_mov_l_ri(REG_WORK1, (uintptr)live.state[FLAGX].mem); + MOV_ri(REG_WORK2, 1); + STRB_rR(REG_WORK2, REG_WORK1); + } +} +MENDFUNC(1,jff_ORSR,(IMM s)) + +/* + * ROL + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * + */ +MIDFUNC(3,jnf_ROL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,(32-(i&0x1f))); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROL_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,(32-(i&0x1f))); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROL_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + ROR_rri(d,s,(32-(i&0x1f))); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROL_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + if (i) { + RORS_rri(d,d,(32-(i&0x1f))); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + } else { + TST_rr(d,d); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROL_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + if (i) { + RORS_rri(d,d,(32-(i&0x1f))); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + } else { + TST_rr(d,d); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROL_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + if (i) { + RORS_rri(d,s,(32-(i&0x1f))); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + } else { + MOVS_rr(d,s); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROL_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROL_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROL_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + ROR_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROL_b,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_ROL_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROL_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROL_w,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_ROL_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROL_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + ROR_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROL_l,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROL_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROL_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rrr(d,d,REG_WORK1); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROL_b,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROL_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROL_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rrr(d,d,REG_WORK1); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROL_w,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROL_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROL_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + AND_rri(REG_WORK1, i, 0x1f); + RSB_rri(REG_WORK1, REG_WORK1, 32); + + MSR_CPSRf_i(0); + RORS_rrr(d,s,REG_WORK1); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROL_l,(W4 d, RR4 s, RR4 i)) + +/* + * ROLW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * + */ +MIDFUNC(2,jnf_ROLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,(32-1)); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_ROLW,(W4 d, RR4 s)) + +MIDFUNC(2,jff_ROLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rri(d,d,(32-1)); + + MRS_CPSR(REG_WORK2); + TST_ri(d, 1); + CC_ORR_rri(NATIVE_CC_NE, REG_WORK2, REG_WORK2, ARM_C_FLAG); + CC_BIC_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, ARM_C_FLAG); + MSR_CPSR_r(REG_WORK2); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_ROLW,(W4 d, RR4 s)) + +/* + * RORW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. + * + */ +MIDFUNC(2,jnf_RORW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_RORW,(W4 d, RR4 s)) + +MIDFUNC(2,jff_RORW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rri(d,d,1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_RORW,(W4 d, RR4 s)) + +/* + * ROXL + * Operand Syntax: Dx, Dy + * #, Dy + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * + */ +MIDFUNC(3,jnf_ROXL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED8_REG_2_REG(d,s); + LSL_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,d,9); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROXL_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROXL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED16_REG_2_REG(d,s); + LSL_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,d,17); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROXL_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROXL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSL_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,s,(32-i)); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROXL_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROXL_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED8_REG_2_REG(d,s); + LSL_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,d,9); + TST_ri(s, (1<<(8-i))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + SIGNED8_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROXL_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROXL_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED16_REG_2_REG(d,s); + LSL_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,d,17); + TST_ri(s, (1<<(16-i))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + SIGNED16_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROXL_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROXL_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSL_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (1 << (i - 1))); + if (i > 1) ORR_rrrLSRi(d,d,s,(32-i)); + TST_ri(s, (1<<(32-i))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROXL_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROXL_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXL_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 36); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 36); + CMP_ri(REG_WORK1, 18); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 18); + CMP_ri(REG_WORK1, 9); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 9); + CMP_ri(REG_WORK1, 0); +#if defined(ARMV6_ASSEMBLY) + BLE_i(8-1); +#else + BLE_i(9-1); +#endif + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 8); +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(REG_WORK2, s); +#else + ROR_rri(REG_WORK2, s, 8); + LSR_rri(REG_WORK2, REG_WORK2, 24); +#endif + ORR_rrrLSRr(d,d,REG_WORK2,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROXL_b,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_ROXL_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXL_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d,s); + MRS_CPSR(REG_WORK2); + + CMP_ri(REG_WORK1, 34); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 34); + CMP_ri(REG_WORK1, 17); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 17); + CMP_ri(REG_WORK1, 0); +#if defined(ARMV6_ASSEMBLY) + BLE_i(8-1); +#else + BLE_i(9-1); +#endif + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 16); +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK2, s); +#else + LSL_rri(REG_WORK2, s, 16); + LSR_rri(REG_WORK2, REG_WORK2, 16); +#endif + ORR_rrrLSRr(d,d,REG_WORK2,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROXL_w,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_ROXL_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXL_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + CMP_ri(REG_WORK1, 33); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 33); + CMP_ri(REG_WORK1, 0); + BLE_i(7-1); + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 32); + ORR_rrrLSRr(d,d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROXL_l,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROXL_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXL_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 36); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 36); + CMP_ri(REG_WORK1, 18); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 18); + CMP_ri(REG_WORK1, 9); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 9); + CMP_ri(REG_WORK1, 0); +#if defined(ARMV6_ASSEMBLY) + BLE_i(16-1); // label +#else + BLE_i(17-1); // label +#endif + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 0x80); + LSR_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + PUSH(REG_WORK2); + + RSB_rri(REG_WORK1, REG_WORK1, 8); +#if defined(ARMV6_ASSEMBLY) + UXTB_rr(REG_WORK2, s); +#else + ROR_rri(REG_WORK2, s, 8); + LSR_rri(REG_WORK2, REG_WORK2, 24); +#endif + ORR_rrrLSRr(d,d,REG_WORK2,REG_WORK1); + + POP(REG_WORK2); + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0); // label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + raw_sign_extend_8_rr(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROXL_b,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROXL_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXL_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 34); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 34); + CMP_ri(REG_WORK1, 17); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 17); + CMP_ri(REG_WORK1, 0); +#if defined(ARMV6_ASSEMBLY) + BLE_i(16-1); // label +#else + BLE_i(17-1); // label +#endif + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 0x8000); + LSR_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + PUSH(REG_WORK2); + +#if defined(ARMV6_ASSEMBLY) + UXTH_rr(REG_WORK2, s); +#else + LSL_rri(REG_WORK2, s, 16); + LSR_rri(REG_WORK2, REG_WORK2, 16); +#endif + + RSB_rri(REG_WORK1, REG_WORK1, 16); + ORR_rrrLSRr(d,d,REG_WORK2,REG_WORK1); + + POP(REG_WORK2); + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0); // label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + SIGNED16_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROXL_w,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROXL_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXL_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 33); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 33); + CMP_ri(REG_WORK1, 0); + BLE_i(13-1); // label + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSL_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,1); + LSL_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 0x80000000); + LSR_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + RSB_rri(REG_WORK1, REG_WORK1, 32); + ORR_rrrLSRr(d,d,s,REG_WORK1); + + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0);// label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROXL_l,(W4 d, RR4 s, RR4 i)) + +/* + * ROXLW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. + * + */ +MIDFUNC(2,jnf_ROXLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,1); + ADC_rri(d,d,0); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_ROXLW,(W4 d, RR4 s)) + +MIDFUNC(2,jff_ROXLW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,1); + ADC_rri(d,d,0); + MSR_CPSRf_i(0); + LSLS_rri(d,d,15); + LSR_rri(d,d,16); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_ROXLW,(W4 d, RR4 s)) + +/* + * ROR + * Operand Syntax: Dx, Dy + * #, Dy + * + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * + */ +MIDFUNC(3,jnf_ROR_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,i); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROR_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROR_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rri(d,d,i); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROR_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROR_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + ROR_rri(d,s,i); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROR_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROR_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rri(d,d,i); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROR_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROR_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + RORS_rrr(d,d,i); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROR_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROR_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + RORS_rrr(d,s,i); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROR_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROR_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROR_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + ROR_rrr(d,d,i); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROR_b,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_ROR_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROR_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + ROR_rrr(d,d,i); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROR_w,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_ROR_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROR_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + ROR_rrr(d,s,i); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROR_l,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROR_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROR_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + LSL_rri(d,s,24); + ORR_rrrLSRi(d,d,d,8); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + AND_rri(REG_WORK1, i, 63); + RORS_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROR_b,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROR_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROR_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + ORR_rrrLSRi(d,d,d,16); + MSR_CPSRf_i(0); + AND_rri(REG_WORK1, i, 63); + RORS_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROR_w,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROR_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROR_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MSR_CPSRf_i(0); + AND_rri(REG_WORK1, i, 63); + RORS_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROR_l,(W4 d, RR4 s, RR4 i)) + +/* + * ROXR + * Operand Syntax: Dx, Dy + * #, Dy + * + * Operand Size: 8,16,32 + * + * X Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. Cleared when the rotate count is zero. + * + */ +MIDFUNC(3,jnf_ROXR_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSR_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x80 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(9-i)); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROXR_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROXR_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSR_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x8000 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(17-i)); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROXR_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROXR_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSR_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x80000000 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(33-i)); + } else { + MOV_rr(d,s); + } + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_ROXR_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROXR_b_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED8_REG_2_REG(d,s); + LSR_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x80 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(9-i)); + TST_ri(s, (1<<(i-1))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + SIGNED8_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROXR_b_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROXR_w_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + UNSIGNED16_REG_2_REG(d,s); + LSR_rri(d,d,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x8000 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(17-i)); + TST_ri(s, (1<<(i-1))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + SIGNED16_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROXR_w_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jff_ROXR_l_imm,(W4 d, RR4 s, IMM i)) +{ + s=readreg(s,4); + d=writereg(d,4); + + if (i > 0) { + LSR_rri(d,s,i); + CC_ORR_rri(NATIVE_CC_CS, d,d, (0x80000000 >> (i - 1))); + if (i > 1) ORR_rrrLSLi(d,d,s,(33-i)); + TST_ri(s, (1<<(i-1))); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + } else { + MOV_rr(d,s); + MSR_CPSRf_i(0); + } + + TST_rr(d,d); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_ROXR_l_imm,(W4 d, RR4 s, IMM i)) + +MIDFUNC(3,jnf_ROXR_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXR_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + UNSIGNED8_REG_2_REG(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 36); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 36); + CMP_ri(REG_WORK1, 18); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 18); + CMP_ri(REG_WORK1, 9); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 9); + CMP_ri(REG_WORK1, 0); + BLE_i(7-1); + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x80); + LSR_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 8); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROXR_b,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_ROXR_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXR_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d,s); + MRS_CPSR(REG_WORK2); + + CMP_ri(REG_WORK1, 34); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 34); + CMP_ri(REG_WORK1, 17); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 17); + CMP_ri(REG_WORK1, 0); + BLE_i(7-1); + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x8000); + LSR_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 16); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROXR_w,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jnf_ROXR_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jnf_ROXR_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + CMP_ri(REG_WORK1, 33); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 33); + CMP_ri(REG_WORK1, 0); + BLE_i(7-1); + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x80000000); + LSR_rrr(d, d, REG_WORK1); + RSB_rri(REG_WORK1, REG_WORK1, 32); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jnf_ROXR_l,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROXR_b,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXR_b_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + UNSIGNED8_REG_2_REG(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 36); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 36); + CMP_ri(REG_WORK1, 18); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 18); + CMP_ri(REG_WORK1, 9); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 9); + CMP_ri(REG_WORK1, 0); + BLE_i(13-1); // label + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x80); + LSR_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + RSB_rri(REG_WORK1, REG_WORK1, 8); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0);// label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + SIGNED8_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROXR_b,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROXR_w,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXR_w_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + UNSIGNED16_REG_2_REG(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 34); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 34); + CMP_ri(REG_WORK1, 17); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 17); + CMP_ri(REG_WORK1, 0); + BLE_i(13-1); // label + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x8000); + LSR_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + RSB_rri(REG_WORK1, REG_WORK1, 16); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0);// label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + SIGNED16_REG_2_REG(d,d); + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROXR_w,(W4 d, RR4 s, RR4 i)) + +MIDFUNC(3,jff_ROXR_l,(W4 d, RR4 s, RR4 i)) +{ + if (isconst(i)) { + COMPCALL(jff_ROXR_l_imm)(d,s,(uae_u8)live.state[i].val); + return; + } + + s=readreg(s,4); + i=readreg(i,4); + d=writereg(d,4); + + MOV_rr(d,s); + MRS_CPSR(REG_WORK2); + + AND_rri(REG_WORK1, i, 0x3f); + CMP_ri(REG_WORK1, 33); + CC_SUB_rri(NATIVE_CC_GE, REG_WORK1, REG_WORK1, 33); + CMP_ri(REG_WORK1, 0); + BLE_i(13-1); // label + + SUB_rri(REG_WORK1, REG_WORK1, 1); + LSR_rri(d, d, 1); + MSR_CPSRf_r(REG_WORK2); + CC_ORR_rri(NATIVE_CC_CS, d,d,0x80000000); + LSR_rrr(d, d, REG_WORK1); + + MOV_ri(REG_WORK2, 1); + LSL_rrr(REG_WORK2, REG_WORK2, REG_WORK1); + + RSB_rri(REG_WORK1, REG_WORK1, 32); + ORR_rrrLSLr(d,d,s,REG_WORK1); + + TST_rr(s, REG_WORK2); + CC_MSR_CPSRf_i(NATIVE_CC_NE, ARM_C_FLAG); + CC_MSR_CPSRf_i(NATIVE_CC_EQ, 0); + B_i(0);// label2 + +// label: + MSR_CPSRf_i(0); + +// label2: + TST_rr(d,d); + + unlock2(d); + unlock2(s); + unlock2(i); +} +MENDFUNC(3,jff_ROXR_l,(W4 d, RR4 s, RR4 i)) + +/* + * ROXRW + * Operand Syntax: + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the most significant bit of the result is set. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Always cleared. + * C Set according to the last bit rotated out of the operand. + * + */ +MIDFUNC(2,jnf_ROXRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + RRX_rr(d,d); + LSR_rri(d,d,16); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_ROXRW,(W4 d, RR4 s)) + +MIDFUNC(2,jff_ROXRW,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=writereg(d,4); + + LSL_rri(d,s,16); + MSR_CPSRf_i(0); + RRXS_rr(d,d); + LSR_rri(d,d,16); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jff_ROXRW,(W4 d, RR4 s)) + +/* + * SUB + * Operand Syntax: , Dn + * Dn, + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Set if the result is zero. Cleared otherwise. + * V Set if an overflow is generated. Cleared otherwise. + * C Set if a carry is generated. Cleared otherwise. + * + */ +MIDFUNC(3,jnf_SUB_b_imm,(W4 d, RR4 s, IMM v)) +{ + if (isconst(s)) { + set_const(d,live.state[s].val-v); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED8_IMM_2_REG(REG_WORK1, (uint8)v); + SUB_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_SUB_b_imm,(W4 d, RR4 s, IMM v)) + +MIDFUNC(3,jnf_SUB_b,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jnf_SUB_b_imm)(d,s,live.state[v].val); + return; + } + + // d has to be different to s and v + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SUB_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jnf_SUB_b,(W4 d, RR4 s, RR4 v)) + +MIDFUNC(3,jnf_SUB_w_imm,(W4 d, RR4 s, IMM v)) +{ + if (isconst(s)) { + set_const(d,live.state[s].val-v); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + UNSIGNED16_IMM_2_REG(REG_WORK1, (uint16)v); + SUB_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_SUB_w_imm,(W4 d, RR4 s, IMM v)) + +MIDFUNC(3,jnf_SUB_w,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jnf_SUB_w_imm)(d,s,live.state[v].val); + return; + } + + // d has to be different to s and v + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SUB_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jnf_SUB_w,(W4 d, RR4 s, RR4 v)) + +MIDFUNC(3,jnf_SUB_l_imm,(W4 d, RR4 s, IMM v)) +{ + if (isconst(s)) { + set_const(d,live.state[s].val-v); + return; + } + + s=readreg(s,4); + d=writereg(d,4); + + compemu_raw_mov_l_ri(REG_WORK1, v); + SUB_rrr(d,s,REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jnf_SUB_l_imm,(W4 d, RR4 s, IMM v)) + +MIDFUNC(3,jnf_SUB_l,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jnf_SUB_l_imm)(d,s,live.state[v].val); + return; + } + + // d has to be different to s and v + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SUB_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jnf_SUB_l,(W4 d, RR4 s, RR4 v)) + +MIDFUNC(3,jff_SUB_b_imm,(W4 d, RR1 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_IMM_2_REG(REG_WORK2, (uint8)v); + SIGNED8_REG_2_REG(REG_WORK1, s); + SUBS_rrr(d,REG_WORK1,REG_WORK2); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_SUB_b_imm,(W4 d, RR1 s, IMM v)) + +MIDFUNC(3,jff_SUB_b,(W4 d, RR1 s, RR1 v)) +{ + if (isconst(v)) { + COMPCALL(jff_SUB_b_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + SUBS_rrr(d,REG_WORK1,REG_WORK2); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_SUB_b,(W4 d, RR1 s, RR1 v)) + +MIDFUNC(3,jff_SUB_w_imm,(W4 d, RR2 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_IMM_2_REG(REG_WORK2, (uint16)v); + SIGNED16_REG_2_REG(REG_WORK1, s); + SUBS_rrr(d,REG_WORK1,REG_WORK2); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jitc_SUB_ff_w2l_rri,(W4 d, RR2 s, IMM v)) + +MIDFUNC(3,jff_SUB_w,(W4 d, RR2 s, RR2 v)) +{ + if (isconst(v)) { + COMPCALL(jff_SUB_w_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + SUBS_rrr(d,REG_WORK1,REG_WORK2); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_SUB_w,(W4 d, RR2 s, RR2 v)) + +MIDFUNC(3,jff_SUB_l_imm,(W4 d, RR4 s, IMM v)) +{ + s=readreg(s,4); + d=writereg(d,4); + + compemu_raw_mov_l_ri(REG_WORK2, v); + SUBS_rrr(d,s,REG_WORK2); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); +} +MENDFUNC(3,jff_SUB_l_imm,(W4 d, RR4 s, IMM v)) + +MIDFUNC(3,jff_SUB_l,(W4 d, RR4 s, RR4 v)) +{ + if (isconst(v)) { + COMPCALL(jff_SUB_l_imm)(d,s,live.state[v].val); + return; + } + + v=readreg(v,4); + s=readreg(s,4); + d=writereg(d,4); + + SUBS_rrr(d,s,v); + + // Todo: Handle this with inverted carry + MRS_CPSR(REG_WORK1);// mrs r2, CPSR + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);// eor r2, r2, #0x20000000 + MSR_CPSR_r(REG_WORK1);// msr CPSR_fc, r2 + // inverted_carry = true; + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_SUB_l,(W4 d, RR4 s, RR4 v)) + +/* + * SUBA + * + * Operand Syntax: , Dn + * + * Operand Size: 16,32 + * + * Flags: Not affected. + * + */ +MIDFUNC(2,jnf_SUBA_b,(W4 d, RR1 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + SIGNED8_REG_2_REG(REG_WORK1,s); + SUB_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_SUBA_b,(W4 d, RR1 s)) + +MIDFUNC(2,jnf_SUBA_w,(W4 d, RR2 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + SIGNED16_REG_2_REG(REG_WORK1,s); + SUB_rrr(d,d,REG_WORK1); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_SUBA_w,(W4 d, RR2 s)) + +MIDFUNC(2,jnf_SUBA_l,(W4 d, RR4 s)) +{ + s=readreg(s,4); + d=rmw(d,4,4); + + SUB_rrr(d,d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,jnf_SUBA_l,(W4 d, RR4 s)) + +/* + * SUBX + * Operand Syntax: Dy, Dx + * -(Ay), -(Ax) + * + * Operand Size: 8,16,32 + * + * X Set the same as the carry bit. + * N Set if the result is negative. Cleared otherwise. + * Z Cleared if the result is nonzero. Unchanged otherwise. + * V Set if an overflow is generated. Cleared otherwise. + * C Set if a carry is generated. Cleared otherwise. + * + * Attention: Z is cleared only if the result is nonzero. Unchanged otherwise + * + */ +MIDFUNC(3,jnf_SUBX,(W4 d, RR4 s, RR4 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + SBC_rrr(d,s,v); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jnf_SUBX,(W4 d, RR4 s, RR4 v)) + +MIDFUNC(3,jff_SUBX_b,(W4 d, RR1 s, RR1 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + MRS_CPSR(REG_WORK1); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK1, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK1, ARM_Z_FLAG); + PUSH(REG_WORK1); + + SIGNED8_REG_2_REG(REG_WORK1, s); + SIGNED8_REG_2_REG(REG_WORK2, v); + SBCS_rrr(d,REG_WORK1,REG_WORK2); + + POP(REG_WORK2); + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_SUBX_b,(W4 d, RR1 s, RR1 v)) + +MIDFUNC(3,jff_SUBX_w,(W4 d, RR2 s, RR2 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + MRS_CPSR(REG_WORK1); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK1, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK1, ARM_Z_FLAG); + PUSH(REG_WORK1); + + SIGNED16_REG_2_REG(REG_WORK1, s); + SIGNED16_REG_2_REG(REG_WORK2, v); + SBCS_rrr(d,REG_WORK1,REG_WORK2); + + POP(REG_WORK2); + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_SUBX_w,(W4 d, RR2 s, RR2 v)) + +MIDFUNC(3,jff_SUBX_l,(W4 d, RR4 s, RR4 v)) +{ + s=readreg(s,4); + v=readreg(v,4); + d=writereg(d,4); + + MRS_CPSR(REG_WORK2); + CC_MVN_ri(NATIVE_CC_EQ, REG_WORK2, 0); + CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); + + SBCS_rrr(d,s,v); + + MRS_CPSR(REG_WORK1); + EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + AND_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + MSR_CPSR_r(REG_WORK1); + + unlock2(d); + unlock2(s); + unlock2(v); +} +MENDFUNC(3,jff_SUBX_l,(W4 d, RR4 s, RR4 v)) + +/* + * SWAP + * Operand Syntax: Dn + * + * Operand Size: 16 + * + * X Not affected. + * N Set if the most significant bit of the 32-bit result is set. Cleared otherwise. + * Z Set if the 32-bit result is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(1,jnf_SWAP,(RW4 d)) +{ + d=rmw(d,4,4); + + ROR_rri(d,d,16); + + unlock2(d); +} +MENDFUNC(1,jnf_SWAP,(RW4 d)) + +MIDFUNC(1,jff_SWAP,(RW4 d)) +{ + d=rmw(d,4,4); + + ROR_rri(d,d,16); + MSR_CPSRf_i(0); + TST_rr(d,d); + + unlock2(d); +} +MENDFUNC(1,jff_SWAP,(RW4 d)) + +/* + * TST + * Operand Syntax: + * + * Operand Size: 8,16,32 + * + * X Not affected. + * N Set if the operand is negative. Cleared otherwise. + * Z Set if the operand is zero. Cleared otherwise. + * V Always cleared. + * C Always cleared. + * + */ +MIDFUNC(1,jff_TST_b,(RR1 s)) +{ + if (isconst(s)) { + SIGNED8_IMM_2_REG(REG_WORK1, (uint8)live.state[s].val); + } else { + s=readreg(s,4); + SIGNED8_REG_2_REG(REG_WORK1, s); + unlock2(s); + } + MSR_CPSRf_i(0); + TST_rr(REG_WORK1,REG_WORK1); +} +MENDFUNC(1,jff_TST_b,(RR1 s)) + +MIDFUNC(1,jff_TST_w,(RR2 s)) +{ + if (isconst(s)) { + SIGNED16_IMM_2_REG(REG_WORK1, (uint16)live.state[s].val); + } else { + s=readreg(s,4); + SIGNED16_REG_2_REG(REG_WORK1, s); + unlock2(s); + } + MSR_CPSRf_i(0); + TST_rr(REG_WORK1,REG_WORK1); +} +MENDFUNC(1,jff_TST_w,(RR2 s)) + +MIDFUNC(1,jff_TST_l,(RR4 s)) +{ + MSR_CPSRf_i(0); + + if (isconst(s)) { + compemu_raw_mov_l_ri(REG_WORK1, live.state[s].val); + TST_rr(REG_WORK1,REG_WORK1); + } + else { + s=readreg(s,4); + TST_rr(s,s); + unlock2(s); + } +} +MENDFUNC(1,jff_TST_l,(RR4 s)) diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.h b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.h new file mode 100644 index 00000000..ecbc2fdf --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_arm2.h @@ -0,0 +1,348 @@ +/* + * compiler/compemu_midfunc_arm2.h - Native MIDFUNCS for ARM (JIT v2) + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu.h + * + */ + +// Arm optimized midfunc +extern const uae_u32 ARM_CCR_MAP[]; + +DECLARE_MIDFUNC(restore_inverted_carry(void)); + +// ADD +DECLARE_MIDFUNC(jnf_ADD(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jnf_ADD_imm(W4 d, RR4 s, IMM v)); +DECLARE_MIDFUNC(jff_ADD_b(W4 d, RR1 s, RR1 v)); +DECLARE_MIDFUNC(jff_ADD_w(W4 d, RR2 s, RR2 v)); +DECLARE_MIDFUNC(jff_ADD_l(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_ADD_b_imm(W4 d, RR1 s, IMM v)); +DECLARE_MIDFUNC(jff_ADD_w_imm(W4 d, RR2 s, IMM v)); +DECLARE_MIDFUNC(jff_ADD_l_imm(W4 d, RR4 s, IMM v)); + +// ADDA +DECLARE_MIDFUNC(jnf_ADDA_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jnf_ADDA_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jnf_ADDA_l(W4 d, RR4 s)); + +// ADDX +DECLARE_MIDFUNC(jnf_ADDX(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_ADDX_b(W4 d, RR1 s, RR4 v)); +DECLARE_MIDFUNC(jff_ADDX_w(W4 d, RR2 s, RR4 v)); +DECLARE_MIDFUNC(jff_ADDX_l(W4 d, RR4 s, RR4 v)); + +// AND +DECLARE_MIDFUNC(jnf_AND(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_AND_b(W4 d, RR1 s, RR1 v)); +DECLARE_MIDFUNC(jff_AND_w(W4 d, RR2 s, RR2 v)); +DECLARE_MIDFUNC(jff_AND_l(W4 d, RR4 s, RR4 v)); + +// ANDSR +DECLARE_MIDFUNC(jff_ANDSR(IMM s, IMM x)); + +// ASL +DECLARE_MIDFUNC(jff_ASL_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASL_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASL_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASL_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ASL_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ASL_l_reg(W4 d, RR4 s, RR4 i)); + +// ASLW +DECLARE_MIDFUNC(jff_ASLW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_ASLW(W4 d, RR4 s)); + +// ASR +DECLARE_MIDFUNC(jnf_ASR_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_ASR_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_ASR_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASR_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASR_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_ASR_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_ASR_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ASR_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ASR_l_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ASR_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ASR_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ASR_l_reg(W4 d, RR4 s, RR4 i)); + +// ASRW +DECLARE_MIDFUNC(jff_ASRW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_ASRW(W4 d, RR4 s)); + +// BCHG +DECLARE_MIDFUNC(jnf_BCHG_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jnf_BCHG_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jff_BCHG_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jff_BCHG_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jnf_BCHG_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_BCHG_l(RW4 d, RR4 s)); + +DECLARE_MIDFUNC(jff_BCHG_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jff_BCHG_l(RW4 d, RR4 s)); + +// BCLR +DECLARE_MIDFUNC(jnf_BCLR_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jnf_BCLR_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jnf_BCLR_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_BCLR_l(RW4 d, RR4 s)); + +DECLARE_MIDFUNC(jff_BCLR_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jff_BCLR_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jff_BCLR_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jff_BCLR_l(RW4 d, RR4 s)); + +// BSET +DECLARE_MIDFUNC(jnf_BSET_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jnf_BSET_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jnf_BSET_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_BSET_l(RW4 d, RR4 s)); + +DECLARE_MIDFUNC(jff_BSET_b_imm(RW4 d, IMM s)); +DECLARE_MIDFUNC(jff_BSET_l_imm(RW4 d, IMM s)); + +DECLARE_MIDFUNC(jff_BSET_b(RW4 d, RR4 s)); +DECLARE_MIDFUNC(jff_BSET_l(RW4 d, RR4 s)); + +// BTST +DECLARE_MIDFUNC(jff_BTST_b_imm(RR4 d, IMM s)); +DECLARE_MIDFUNC(jff_BTST_l_imm(RR4 d, IMM s)); + +DECLARE_MIDFUNC(jff_BTST_b(RR4 d, RR4 s)); +DECLARE_MIDFUNC(jff_BTST_l(RR4 d, RR4 s)); + +// CLR +DECLARE_MIDFUNC (jnf_CLR(W4 d)); +DECLARE_MIDFUNC (jff_CLR(W4 d)); + +// CMP +DECLARE_MIDFUNC(jff_CMP_b(RR1 d, RR1 s)); +DECLARE_MIDFUNC(jff_CMP_w(RR2 d, RR2 s)); +DECLARE_MIDFUNC(jff_CMP_l(RR4 d, RR4 s)); + +// CMPA +DECLARE_MIDFUNC(jff_CMPA_b(RR1 d, RR1 s)); +DECLARE_MIDFUNC(jff_CMPA_w(RR2 d, RR2 s)); +DECLARE_MIDFUNC(jff_CMPA_l(RR4 d, RR4 s)); + +// EOR +DECLARE_MIDFUNC(jnf_EOR(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_EOR_b(W4 d, RR1 s, RR1 v)); +DECLARE_MIDFUNC(jff_EOR_w(W4 d, RR2 s, RR2 v)); +DECLARE_MIDFUNC(jff_EOR_l(W4 d, RR4 s, RR4 v)); + +// EORSR +DECLARE_MIDFUNC(jff_EORSR(IMM s, IMM x)); + +// EXT +DECLARE_MIDFUNC(jnf_EXT_b(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_EXT_w(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_EXT_l(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_EXT_b(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_EXT_w(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_EXT_l(W4 d, RR4 s)); + +// LSL +DECLARE_MIDFUNC(jnf_LSL_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_LSL_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSL_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSL_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSL_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSL_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSL_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSL_l_reg(W4 d, RR4 s, RR4 i)); + +// LSLW +DECLARE_MIDFUNC(jff_LSLW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_LSLW(W4 d, RR4 s)); + +// LSR +DECLARE_MIDFUNC(jnf_LSR_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_LSR_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_LSR_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSR_b_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSR_w_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jff_LSR_l_imm(W4 d, RR4 s, IMM i)); +DECLARE_MIDFUNC(jnf_LSR_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_LSR_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_LSR_l_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSR_b_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSR_w_reg(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_LSR_l_reg(W4 d, RR4 s, RR4 i)); + +// LSRW +DECLARE_MIDFUNC(jff_LSRW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_LSRW(W4 d, RR4 s)); + +// MOVE +DECLARE_MIDFUNC(jnf_MOVE(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_MOVE_b_imm(W4 d, IMM i)); +DECLARE_MIDFUNC(jff_MOVE_w_imm(W4 d, IMM i)); +DECLARE_MIDFUNC(jff_MOVE_l_imm(W4 d, IMM i)); +DECLARE_MIDFUNC(jff_MOVE_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jff_MOVE_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jff_MOVE_l(W4 d, RR4 s)); + +// MOVE16 +DECLARE_MIDFUNC(jnf_MOVE16(RR4 d, RR4 s)); + +// MOVEA +DECLARE_MIDFUNC(jnf_MOVEA_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jnf_MOVEA_l(W4 d, RR4 s)); + +// MULS +DECLARE_MIDFUNC (jnf_MULS(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jff_MULS(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jnf_MULS32(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jff_MULS32(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jnf_MULS64(RW4 d, RW4 s)); +DECLARE_MIDFUNC (jff_MULS64(RW4 d, RW4 s)); + +// MULU +DECLARE_MIDFUNC (jnf_MULU(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jff_MULU(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jnf_MULU32(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jff_MULU32(RW4 d, RR4 s)); +DECLARE_MIDFUNC (jnf_MULU64(RW4 d, RW4 s)); +DECLARE_MIDFUNC (jff_MULU64(RW4 d, RW4 s)); + +// NEG +DECLARE_MIDFUNC(jnf_NEG(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_NEG_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jff_NEG_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jff_NEG_l(W4 d, RR4 s)); + +// NEGX +DECLARE_MIDFUNC(jnf_NEGX(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_NEGX_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jff_NEGX_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jff_NEGX_l(W4 d, RR4 s)); + +// NOT +DECLARE_MIDFUNC(jnf_NOT(W4 d, RR4 s)); +DECLARE_MIDFUNC(jff_NOT_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jff_NOT_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jff_NOT_l(W4 d, RR4 s)); + +// OR +DECLARE_MIDFUNC(jnf_OR(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_OR_b(W4 d, RR1 s, RR1 v)); +DECLARE_MIDFUNC(jff_OR_w(W4 d, RR2 s, RR2 v)); +DECLARE_MIDFUNC(jff_OR_l(W4 d, RR4 s, RR4 v)); + +// ORSR +DECLARE_MIDFUNC(jff_ORSR(IMM s, IMM x)); + +// ROL +DECLARE_MIDFUNC(jnf_ROL_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROL_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROL_l(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROL_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROL_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROL_l(W4 d, RR4 s, RR4 i)); + +// ROLW +DECLARE_MIDFUNC(jff_ROLW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_ROLW(W4 d, RR4 s)); + +// RORW +DECLARE_MIDFUNC(jff_RORW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_RORW(W4 d, RR4 s)); + +// ROXL +DECLARE_MIDFUNC(jnf_ROXL_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROXL_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROXL_l(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXL_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXL_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXL_l(W4 d, RR4 s, RR4 i)); + +// ROXLW +DECLARE_MIDFUNC(jff_ROXLW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_ROXLW(W4 d, RR4 s)); + +// ROR +DECLARE_MIDFUNC(jnf_ROR_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROR_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROR_l(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROR_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROR_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROR_l(W4 d, RR4 s, RR4 i)); + +// ROXR +DECLARE_MIDFUNC(jnf_ROXR_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROXR_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jnf_ROXR_l(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXR_b(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXR_w(W4 d, RR4 s, RR4 i)); +DECLARE_MIDFUNC(jff_ROXR_l(W4 d, RR4 s, RR4 i)); + +// ROXRW +DECLARE_MIDFUNC(jff_ROXRW(W4 d, RR4 s)); +DECLARE_MIDFUNC(jnf_ROXRW(W4 d, RR4 s)); + +// SUB +DECLARE_MIDFUNC(jnf_SUB_b_imm(W4 d, RR4 s, IMM v)); +DECLARE_MIDFUNC(jnf_SUB_b(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jnf_SUB_w_imm(W4 d, RR4 s, IMM v)); +DECLARE_MIDFUNC(jnf_SUB_w(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jnf_SUB_l_imm(W4 d, RR4 s, IMM v)); +DECLARE_MIDFUNC(jnf_SUB_l(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_SUB_b(W4 d, RR1 s, RR1 v)); +DECLARE_MIDFUNC(jff_SUB_w(W4 d, RR2 s, RR2 v)); +DECLARE_MIDFUNC(jff_SUB_l(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_SUB_b_imm(W4 d, RR1 s, IMM v)); +DECLARE_MIDFUNC(jff_SUB_w_imm(W4 d, RR2 s, IMM v)); +DECLARE_MIDFUNC(jff_SUB_l_imm(W4 d, RR4 s, IMM v)); + +// SUBA +DECLARE_MIDFUNC(jnf_SUBA_b(W4 d, RR1 s)); +DECLARE_MIDFUNC(jnf_SUBA_w(W4 d, RR2 s)); +DECLARE_MIDFUNC(jnf_SUBA_l(W4 d, RR4 s)); + +// SUBX +DECLARE_MIDFUNC(jnf_SUBX(W4 d, RR4 s, RR4 v)); +DECLARE_MIDFUNC(jff_SUBX_b(W4 d, RR1 s, RR4 v)); +DECLARE_MIDFUNC(jff_SUBX_w(W4 d, RR2 s, RR4 v)); +DECLARE_MIDFUNC(jff_SUBX_l(W4 d, RR4 s, RR4 v)); + +// SWAP +DECLARE_MIDFUNC (jnf_SWAP(RW4 d)); +DECLARE_MIDFUNC (jff_SWAP(RW4 d)); + +// TST +DECLARE_MIDFUNC (jff_TST_b(RR1 s)); +DECLARE_MIDFUNC (jff_TST_w(RR2 s)); +DECLARE_MIDFUNC (jff_TST_l(RR4 s)); + diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.cpp b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.cpp new file mode 100644 index 00000000..d5e2e053 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.cpp @@ -0,0 +1,2982 @@ +/* + * compiler/compemu_midfunc_arm.cpp - Native MIDFUNCS for IA-32 and AMD64 + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu_support.cpp + * + */ + +static int f_rmw(int r) +{ + int n; + + f_make_exclusive(r,0); + if (f_isinreg(r)) { + n=live.fate[r].realreg; + } + else + n=f_alloc_reg(r,0); + live.fate[r].status=DIRTY; + live.fat[n].locked++; + live.fat[n].touched=touchcnt++; + return n; +} + +static void fflags_into_flags_internal(uae_u32 tmp) +{ + int r; + + clobber_flags(); + r=f_readreg(FP_RESULT); + if (FFLAG_NREG_CLOBBER_CONDITION) { + int tmp2=tmp; + tmp=writereg_specific(tmp,4,FFLAG_NREG); + raw_fflags_into_flags(r); + unlock2(tmp); + forget_about(tmp2); + } + else + raw_fflags_into_flags(r); + f_unlock(r); + live_flags(); +} + + +/******************************************************************** + * CPU functions exposed to gencomp. Both CREATE and EMIT time * + ********************************************************************/ + + +/* + * RULES FOR HANDLING REGISTERS: + * + * * In the function headers, order the parameters + * - 1st registers written to + * - 2nd read/modify/write registers + * - 3rd registers read from + * * Before calling raw_*, you must call readreg, writereg or rmw for + * each register + * * The order for this is + * - 1st call remove_offset for all registers written to with size<4 + * - 2nd call readreg for all registers read without offset + * - 3rd call rmw for all rmw registers + * - 4th call readreg_offset for all registers that can handle offsets + * - 5th call get_offset for all the registers from the previous step + * - 6th call writereg for all written-to registers + * - 7th call raw_* + * - 8th unlock2 all registers that were locked + */ + +MIDFUNC(0,live_flags,(void)) +{ + live.flags_on_stack=TRASH; + live.flags_in_flags=VALID; + live.flags_are_important=1; +} +MENDFUNC(0,live_flags,(void)) + +MIDFUNC(0,dont_care_flags,(void)) +{ + live.flags_are_important=0; +} +MENDFUNC(0,dont_care_flags,(void)) + +MIDFUNC(0,duplicate_carry,(void)) +{ + evict(FLAGX); + make_flags_live_internal(); +#ifdef UAE + COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem + 1, NATIVE_CC_CS); +#else + COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem, NATIVE_CC_CS); +#endif + log_vwrite(FLAGX); +} +MENDFUNC(0,duplicate_carry,(void)) + +MIDFUNC(0,restore_carry,(void)) +{ + if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */ +#ifdef UAE + bt_l_ri_noclobber(FLAGX, 8); +#else + bt_l_ri_noclobber(FLAGX, 0); +#endif + } + else { /* Avoid the stall the above creates. + This is slow on non-P6, though. + */ +#ifdef UAE + COMPCALL(rol_w_ri(FLAGX, 8)); +#else + COMPCALL(rol_b_ri(FLAGX, 8)); +#endif + isclean(FLAGX); + } +} +MENDFUNC(0,restore_carry,(void)) + +MIDFUNC(0,start_needflags,(void)) +{ + needflags=1; +} +MENDFUNC(0,start_needflags,(void)) + +MIDFUNC(0,end_needflags,(void)) +{ + needflags=0; +} +MENDFUNC(0,end_needflags,(void)) + +MIDFUNC(0,make_flags_live,(void)) +{ + make_flags_live_internal(); +} +MENDFUNC(0,make_flags_live,(void)) + +MIDFUNC(1,fflags_into_flags,(W2 tmp)) +{ + clobber_flags(); + fflags_into_flags_internal(tmp); +} +MENDFUNC(1,fflags_into_flags,(W2 tmp)) + +MIDFUNC(2,bt_l_ri,(RR4 r, IMM i)) /* This is defined as only affecting C */ +{ + int size=4; + if (i<16) + size=2; + CLOBBER_BT; + r=readreg(r,size); + raw_bt_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,bt_l_ri,(RR4 r, IMM i)) /* This is defined as only affecting C */ + +MIDFUNC(2,bt_l_rr,(RR4 r, RR4 b)) /* This is defined as only affecting C */ +{ + CLOBBER_BT; + r=readreg(r,4); + b=readreg(b,4); + raw_bt_l_rr(r,b); + unlock2(r); + unlock2(b); +} +MENDFUNC(2,bt_l_rr,(RR4 r, RR4 b)) /* This is defined as only affecting C */ + +MIDFUNC(2,btc_l_ri,(RW4 r, IMM i)) +{ + int size=4; + if (i<16) + size=2; + CLOBBER_BT; + r=rmw(r,size,size); + raw_btc_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,btc_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,btc_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_btc_l_rr(r,b); + unlock2(r); + unlock2(b); +} +MENDFUNC(2,btc_l_rr,(RW4 r, RR4 b)) + +MIDFUNC(2,btr_l_ri,(RW4 r, IMM i)) +{ + int size=4; + if (i<16) + size=2; + CLOBBER_BT; + r=rmw(r,size,size); + raw_btr_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,btr_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,btr_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_btr_l_rr(r,b); + unlock2(r); + unlock2(b); +} +MENDFUNC(2,btr_l_rr,(RW4 r, RR4 b)) + +MIDFUNC(2,bts_l_ri,(RW4 r, IMM i)) +{ + int size=4; + if (i<16) + size=2; + CLOBBER_BT; + r=rmw(r,size,size); + raw_bts_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,bts_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,bts_l_rr,(RW4 r, RR4 b)) +{ + CLOBBER_BT; + b=readreg(b,4); + r=rmw(r,4,4); + raw_bts_l_rr(r,b); + unlock2(r); + unlock2(b); +} +MENDFUNC(2,bts_l_rr,(RW4 r, RR4 b)) + +MIDFUNC(2,mov_l_rm,(W4 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,4); + raw_mov_l_rm(d,s); + unlock2(d); +} +MENDFUNC(2,mov_l_rm,(W4 d, IMM s)) + +MIDFUNC(1,call_r,(RR4 r)) /* Clobbering is implicit */ +{ + r=readreg(r,4); + raw_dec_sp(STACK_SHADOW_SPACE); + raw_call_r(r); + raw_inc_sp(STACK_SHADOW_SPACE); + unlock2(r); +} +MENDFUNC(1,call_r,(RR4 r)) /* Clobbering is implicit */ + +MIDFUNC(2,sub_l_mi,(IMM d, IMM s)) +{ + CLOBBER_SUB; + raw_sub_l_mi(d,s) ; +} +MENDFUNC(2,sub_l_mi,(IMM d, IMM s)) + +MIDFUNC(2,mov_l_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_l_mi(d,s) ; +} +MENDFUNC(2,mov_l_mi,(IMM d, IMM s)) + +MIDFUNC(2,mov_w_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_w_mi(d,s) ; +} +MENDFUNC(2,mov_w_mi,(IMM d, IMM s)) + +MIDFUNC(2,mov_b_mi,(IMM d, IMM s)) +{ + CLOBBER_MOV; + raw_mov_b_mi(d,s) ; +} +MENDFUNC(2,mov_b_mi,(IMM d, IMM s)) + +MIDFUNC(2,rol_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,1,1); + raw_rol_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,rol_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,rol_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,2,2); + raw_rol_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,rol_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,rol_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROL; + r=rmw(r,4,4); + raw_rol_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,rol_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,rol_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_rol_b",r); + } + raw_rol_l_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,rol_l_rr,(RW4 d, RR1 r)) + +MIDFUNC(2,rol_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_rol_b",r); + } + raw_rol_w_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,rol_w_rr,(RW2 d, RR1 r)) + +MIDFUNC(2,rol_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_ROL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_rol_b",r); + } + raw_rol_b_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,rol_b_rr,(RW1 d, RR1 r)) + + +MIDFUNC(2,shll_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHLL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_rol_b",r); + } + raw_shll_l_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shll_l_rr,(RW4 d, RR1 r)) + +MIDFUNC(2,shll_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHLL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_shll_b",r); + } + raw_shll_w_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shll_w_rr,(RW2 d, RR1 r)) + +MIDFUNC(2,shll_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHLL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_shll_b",r); + } + raw_shll_b_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shll_b_rr,(RW1 d, RR1 r)) + + +MIDFUNC(2,ror_b_ri,(RR1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,1,1); + raw_ror_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,ror_b_ri,(RR1 r, IMM i)) + +MIDFUNC(2,ror_w_ri,(RR2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,2,2); + raw_ror_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,ror_w_ri,(RR2 r, IMM i)) + +MIDFUNC(2,ror_l_ri,(RR4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_ROR; + r=rmw(r,4,4); + raw_ror_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,ror_l_ri,(RR4 r, IMM i)) + +MIDFUNC(2,ror_l_rr,(RR4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROR; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + raw_ror_l_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,ror_l_rr,(RR4 d, RR1 r)) + +MIDFUNC(2,ror_w_rr,(RR2 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_ROR; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + raw_ror_w_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,ror_w_rr,(RR2 d, RR1 r)) + +MIDFUNC(2,ror_b_rr,(RR1 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_ROR; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + raw_ror_b_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,ror_b_rr,(RR1 d, RR1 r)) + +MIDFUNC(2,shrl_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_rol_b",r); + } + raw_shrl_l_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shrl_l_rr,(RW4 d, RR1 r)) + +MIDFUNC(2,shrl_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_shrl_b",r); + } + raw_shrl_w_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shrl_w_rr,(RW2 d, RR1 r)) + +MIDFUNC(2,shrl_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHRL; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_shrl_b",r); + } + raw_shrl_b_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shrl_b_rr,(RW1 d, RR1 r)) + + + +MIDFUNC(2,shll_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(r) && !needflags) { + live.state[r].val<<=i; + return; + } + CLOBBER_SHLL; + r=rmw(r,4,4); + raw_shll_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shll_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,shll_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHLL; + r=rmw(r,2,2); + raw_shll_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shll_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,shll_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHLL; + r=rmw(r,1,1); + raw_shll_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shll_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(r) && !needflags) { + live.state[r].val>>=i; + return; + } + CLOBBER_SHRL; + r=rmw(r,4,4); + raw_shrl_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,2,2); + raw_shrl_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,1,1); + raw_shrl_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,shra_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,4,4); + raw_shra_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shra_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,shra_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,2,2); + raw_shra_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shra_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,shra_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,1,1); + raw_shra_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shra_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,shra_l_rr,(RW4 d, RR1 r)) +{ + if (isconst(r)) { + COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_rol_b",r); + } + raw_shra_l_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shra_l_rr,(RW4 d, RR1 r)) + +MIDFUNC(2,shra_w_rr,(RW2 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_shra_b",r); + } + raw_shra_w_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shra_w_rr,(RW2 d, RR1 r)) + +MIDFUNC(2,shra_b_rr,(RW1 d, RR1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + Dif (r!=1) { + jit_abort("Illegal register %d in raw_shra_b",r); + } + raw_shra_b_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shra_b_rr,(RW1 d, RR1 r)) + + +MIDFUNC(2,setcc,(W1 d, IMM cc)) +{ + CLOBBER_SETCC; + d=writereg(d,1); + raw_setcc(d,cc); + unlock2(d); +} +MENDFUNC(2,setcc,(W1 d, IMM cc)) + +MIDFUNC(2,setcc_m,(IMM d, IMM cc)) +{ + CLOBBER_SETCC; + raw_setcc_m(d,cc); +} +MENDFUNC(2,setcc_m,(IMM d, IMM cc)) + +MIDFUNC(3,cmov_l_rr,(RW4 d, RR4 s, IMM cc)) +{ + if (d==s) + return; + CLOBBER_CMOV; + s=readreg(s,4); + d=rmw(d,4,4); + raw_cmov_l_rr(d,s,cc); + unlock2(s); + unlock2(d); +} +MENDFUNC(3,cmov_l_rr,(RW4 d, RR4 s, IMM cc)) + +MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) +{ + CLOBBER_CMOV; + d=rmw(d,4,4); + raw_cmov_l_rm(d,s,cc); + unlock2(d); +} +MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) + +MIDFUNC(2,bsf_l_rr,(W4 d, RR4 s)) +{ + CLOBBER_BSF; + s = readreg(s, 4); + d = writereg(d, 4); + raw_bsf_l_rr(d, s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,bsf_l_rr,(W4 d, RR4 s)) + +/* Set the Z flag depending on the value in s. Note that the + value has to be 0 or -1 (or, more precisely, for non-zero + values, bit 14 must be set)! */ +MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) +{ + CLOBBER_BSF; + s=rmw_specific(s,4,4,FLAG_NREG3); + tmp=writereg(tmp,4); + raw_flags_set_zero(s, tmp); + unlock2(tmp); + unlock2(s); +} +MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) + +MIDFUNC(2,imul_32_32,(RW4 d, RR4 s)) +{ + CLOBBER_MUL; + s=readreg(s,4); + d=rmw(d,4,4); + raw_imul_32_32(d,s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,imul_32_32,(RW4 d, RR4 s)) + +MIDFUNC(2,imul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_imul_64_32(d,s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,imul_64_32,(RW4 d, RW4 s)) + +MIDFUNC(2,mul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_mul_64_32(d,s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,mul_64_32,(RW4 d, RW4 s)) + +MIDFUNC(2,mul_32_32,(RW4 d, RR4 s)) +{ + CLOBBER_MUL; + s=readreg(s,4); + d=rmw(d,4,4); + raw_mul_32_32(d,s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,mul_32_32,(RW4 d, RR4 s)) + +#if SIZEOF_VOID_P == 8 +MIDFUNC(2,sign_extend_32_rr,(W4 d, RR2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)live.state[s].val); + return; + } + + CLOBBER_SE32; + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,4); + } + raw_sign_extend_32_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,sign_extend_32_rr,(W4 d, RR2 s)) +#endif + +MIDFUNC(2,sign_extend_16_rr,(W4 d, RR2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s16)live.state[s].val); + return; + } + + CLOBBER_SE16; + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_sign_extend_16_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,sign_extend_16_rr,(W4 d, RR2 s)) + +MIDFUNC(2,sign_extend_8_rr,(W4 d, RR1 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_SE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_sign_extend_8_rr(d,s); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,sign_extend_8_rr,(W4 d, RR1 s)) + + +MIDFUNC(2,zero_extend_16_rr,(W4 d, RR2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u16)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE16; + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_zero_extend_16_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,zero_extend_16_rr,(W4 d, RR2 s)) + +MIDFUNC(2,zero_extend_8_rr,(W4 d, RR1 s)) +{ + int isrmw; + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_zero_extend_8_rr(d,s); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,zero_extend_8_rr,(W4 d, RR1 s)) + +MIDFUNC(2,mov_b_rr,(W1 d, RR1 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=writereg(d,1); + raw_mov_b_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,mov_b_rr,(W1 d, RR1 s)) + +MIDFUNC(2,mov_w_rr,(W2 d, RR2 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=writereg(d,2); + raw_mov_w_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,mov_w_rr,(W2 d, RR2 s)) + +MIDFUNC(4,mov_l_rrm_indexed,(W4 d,RR4 baser, RR4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_mov_l_rrm_indexed(d,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_l_rrm_indexed,(W4 d,RR4 baser, RR4 index, IMM factor)) + +MIDFUNC(4,mov_w_rrm_indexed,(W2 d, RR4 baser, RR4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,2); + + raw_mov_w_rrm_indexed(d,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_w_rrm_indexed,(W2 d, RR4 baser, RR4 index, IMM factor)) + +MIDFUNC(4,mov_b_rrm_indexed,(W1 d, RR4 baser, RR4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,1); + + raw_mov_b_rrm_indexed(d,baser,index,factor); + + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_b_rrm_indexed,(W1 d, RR4 baser, RR4 index, IMM factor)) + + +MIDFUNC(4,mov_l_mrr_indexed,(RR4 baser, RR4 index, IMM factor, RR4 s)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + s=readreg(s,4); + + Dif (baser==s || index==s) + jit_abort("mov_l_mrr_indexed"); + + + raw_mov_l_mrr_indexed(baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_l_mrr_indexed,(RR4 baser, RR4 index, IMM factor, RR4 s)) + +MIDFUNC(4,mov_w_mrr_indexed,(RR4 baser, RR4 index, IMM factor, RR2 s)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + s=readreg(s,2); + + raw_mov_w_mrr_indexed(baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_w_mrr_indexed,(RR4 baser, RR4 index, IMM factor, RR2 s)) + +MIDFUNC(4,mov_b_mrr_indexed,(RR4 baser, RR4 index, IMM factor, RR1 s)) +{ + CLOBBER_MOV; + s=readreg(s,1); + baser=readreg(baser,4); + index=readreg(index,4); + + raw_mov_b_mrr_indexed(baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_b_mrr_indexed,(RR4 baser, RR4 index, IMM factor, RR1 s)) + + +MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, RR4 baser, RR4 index, IMM factor, RR4 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,4); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_l_bmrr_indexed(base,baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, RR4 baser, RR4 index, IMM factor, RR4 s)) + +MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, RR4 baser, RR4 index, IMM factor, RR2 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,2); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_w_bmrr_indexed(base,baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, RR4 baser, RR4 index, IMM factor, RR2 s)) + +MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, RR4 baser, RR4 index, IMM factor, RR1 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_b_bmrr_indexed(base,baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, RR4 baser, RR4 index, IMM factor, RR1 s)) + + + +/* Read a long from base+baser+factor*index */ +MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, RR4 baser, RR4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,4); + raw_mov_l_brrm_indexed(d,base,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, RR4 baser, RR4 index, IMM factor)) + + +MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, RR4 baser, RR4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + remove_offset(d,-1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,2); + raw_mov_w_brrm_indexed(d,base,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, RR4 baser, RR4 index, IMM factor)) + + +MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, RR4 baser, RR4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + remove_offset(d,-1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,1); + raw_mov_b_brrm_indexed(d,base,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, RR4 baser, RR4 index, IMM factor)) + +/* Read a long from base+factor*index */ +MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, RR4 index, IMM factor)) +{ + int indexreg=index; + + if (isconst(index)) { + COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val); + return; + } + + CLOBBER_MOV; + index=readreg_offset(index,4); + base+=get_offset(indexreg)*factor; + d=writereg(d,4); + + raw_mov_l_rm_indexed(d,base,index,factor); + unlock2(index); + unlock2(d); +} +MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, RR4 index, IMM factor)) + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_rR,(W4 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,4); + + raw_mov_l_rR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_rR,(W4 d, RR4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_rR,(W2 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,2); + + raw_mov_w_rR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_rR,(W2 d, RR4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_b_rR,(W1 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_b_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,1); + + raw_mov_b_rR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_b_rR,(W1 d, RR4 s, IMM offset)) + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_brR,(W4 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,4); + + raw_mov_l_brR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_brR,(W4 d, RR4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_brR,(W2 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,2); + + raw_mov_w_brR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_brR,(W2 d, RR4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_b_brR,(W1 d, RR4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_b_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,1); + + raw_mov_b_brR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_b_brR,(W1 d, RR4 s, IMM offset)) + +MIDFUNC(3,mov_l_Ri,(RR4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_l_Ri(d,i,offset); + unlock2(d); +} +MENDFUNC(3,mov_l_Ri,(RR4 d, IMM i, IMM offset)) + +MIDFUNC(3,mov_w_Ri,(RR4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_w_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_Ri(d,i,offset); + unlock2(d); +} +MENDFUNC(3,mov_w_Ri,(RR4 d, IMM i, IMM offset)) + +MIDFUNC(3,mov_b_Ri,(RR4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_b_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_b_Ri(d,i,offset); + unlock2(d); +} +MENDFUNC(3,mov_b_Ri,(RR4 d, IMM i, IMM offset)) + +/* Warning! OFFSET is byte sized only! */ +MIDFUNC(3,mov_l_Rr,(RR4 d, RR4 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_l_Ri)(d,live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg(d,4); + + raw_mov_l_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_Rr,(RR4 d, RR4 s, IMM offset)) + +MIDFUNC(3,mov_w_Rr,(RR4 d, RR2 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg(d,4); + raw_mov_w_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_Rr,(RR4 d, RR2 s, IMM offset)) + +MIDFUNC(3,mov_b_Rr,(RR4 d, RR1 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_b_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=readreg(d,4); + raw_mov_b_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_b_Rr,(RR4 d, RR1 s, IMM offset)) + +MIDFUNC(3,lea_l_brr,(W4 d, RR4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val+offset); + return; + } +#if USE_OFFSET + if (d==s) { + add_offset(d,offset); + return; + } +#endif + CLOBBER_LEA; + s=readreg(s,4); + d=writereg(d,4); + raw_lea_l_brr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,lea_l_brr,(W4 d, RR4 s, IMM offset)) + +MIDFUNC(5,lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)) +{ + if (!offset) { + COMPCALL(lea_l_rr_indexed)(d,s,index,factor); + return; + } + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_brr_indexed(d,s,index,factor,offset); + unlock2(d); + unlock2(index); + unlock2(s); +} +MENDFUNC(5,lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)) + +MIDFUNC(4,lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) +{ + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_rr_indexed(d,s,index,factor); + unlock2(d); + unlock2(index); + unlock2(s); +} +MENDFUNC(4,lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor)) + +/* write d to the long at the address contained in s+offset */ +MIDFUNC(3,mov_l_bRr,(RR4 d, RR4 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + + raw_mov_l_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_bRr,(RR4 d, RR4 s, IMM offset)) + +/* write the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_bRr,(RR4 d, RR2 s, IMM offset)) +{ + int dreg=d; + + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_bRr,(RR4 d, RR2 s, IMM offset)) + +MIDFUNC(3,mov_b_bRr,(RR4 d, RR1 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_b_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_b_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_b_bRr,(RR4 d, RR1 s, IMM offset)) + +MIDFUNC(1,mid_bswap_32,(RW4 r)) +{ + + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=reverse32(oldv); + return; + } + + CLOBBER_SW32; + r=rmw(r,4,4); + raw_bswap_32(r); + unlock2(r); +} +MENDFUNC(1,mid_bswap_32,(RW4 r)) + +MIDFUNC(1,mid_bswap_16,(RW2 r)) +{ + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) | (oldv&0xffff0000); + return; + } + + CLOBBER_SW16; + r=rmw(r,2,2); + + raw_bswap_16(r); + unlock2(r); +} +MENDFUNC(1,mid_bswap_16,(RW2 r)) + + + +MIDFUNC(2,mov_l_rr,(W4 d, RR4 s)) +{ + int olds; + + if (d==s) { /* How pointless! */ + return; + } + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val); + return; + } + olds=s; + disassociate(d); + s=readreg_offset(s,4); + live.state[d].realreg=s; + live.state[d].realind=live.nat[s].nholds; + live.state[d].val=live.state[olds].val; + live.state[d].validsize=4; + live.state[d].dirtysize=4; + set_status(d,DIRTY); + + live.nat[s].holds[live.nat[s].nholds]=d; + live.nat[s].nholds++; + log_clobberreg(d); + jit_log2("Added %d to nreg %d(%d), now holds %d regs", d,s,live.state[d].realind,live.nat[s].nholds); + unlock2(s); +} +MENDFUNC(2,mov_l_rr,(W4 d, RR4 s)) + +MIDFUNC(2,mov_l_mr,(IMM d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(mov_l_mi)(d,live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + + raw_mov_l_mr(d,s); + unlock2(s); +} +MENDFUNC(2,mov_l_mr,(IMM d, RR4 s)) + + +MIDFUNC(2,mov_w_mr,(IMM d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,2); + + raw_mov_w_mr(d,s); + unlock2(s); +} +MENDFUNC(2,mov_w_mr,(IMM d, RR2 s)) + +MIDFUNC(2,mov_w_rm,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_rm(d,s); + unlock2(d); +} +MENDFUNC(2,mov_w_rm,(W2 d, IMM s)) + +MIDFUNC(2,mov_b_mr,(IMM d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + + raw_mov_b_mr(d,s); + unlock2(s); +} +MENDFUNC(2,mov_b_mr,(IMM d, RR1 s)) + +MIDFUNC(2,mov_b_rm,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_rm(d,s); + unlock2(d); +} +MENDFUNC(2,mov_b_rm,(W1 d, IMM s)) + +MIDFUNC(2,mov_l_ri,(W4 d, IMM s)) +{ + set_const(d,s); + return; +} +MENDFUNC(2,mov_l_ri,(W4 d, IMM s)) + +MIDFUNC(2,mov_w_ri,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_ri(d,s); + unlock2(d); +} +MENDFUNC(2,mov_w_ri,(W2 d, IMM s)) + +MIDFUNC(2,mov_b_ri,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_ri(d,s); + unlock2(d); +} +MENDFUNC(2,mov_b_ri,(W1 d, IMM s)) + +MIDFUNC(2,add_l_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_l_mi(d,s) ; +} +MENDFUNC(2,add_l_mi,(IMM d, IMM s)) + +MIDFUNC(2,add_w_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_w_mi(d,s) ; +} +MENDFUNC(2,add_w_mi,(IMM d, IMM s)) + +MIDFUNC(2,add_b_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_b_mi(d,s) ; +} +MENDFUNC(2,add_b_mi,(IMM d, IMM s)) + +MIDFUNC(2,test_l_ri,(RR4 d, IMM i)) +{ + CLOBBER_TEST; + d=readreg(d,4); + + raw_test_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,test_l_ri,(RR4 d, IMM i)) + +MIDFUNC(2,test_l_rr,(RR4 d, RR4 s)) +{ + CLOBBER_TEST; + d=readreg(d,4); + s=readreg(s,4); + + raw_test_l_rr(d,s);; + unlock2(d); + unlock2(s); +} +MENDFUNC(2,test_l_rr,(RR4 d, RR4 s)) + +MIDFUNC(2,test_w_rr,(RR2 d, RR2 s)) +{ + CLOBBER_TEST; + d=readreg(d,2); + s=readreg(s,2); + + raw_test_w_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,test_w_rr,(RR2 d, RR2 s)) + +MIDFUNC(2,test_b_rr,(RR1 d, RR1 s)) +{ + CLOBBER_TEST; + d=readreg(d,1); + s=readreg(s,1); + + raw_test_b_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,test_b_rr,(RR1 d, RR1 s)) + + +MIDFUNC(2,and_l_ri,(RW4 d, IMM i)) +{ + if (isconst(d) && !needflags) { + live.state[d].val &= i; + return; + } + + CLOBBER_AND; + d=rmw(d,4,4); + + raw_and_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,and_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,and_l,(RW4 d, RR4 s)) +{ + CLOBBER_AND; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_and_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,and_l,(RW4 d, RR4 s)) + +MIDFUNC(2,and_w,(RW2 d, RR2 s)) +{ + CLOBBER_AND; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_and_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,and_w,(RW2 d, RR2 s)) + +MIDFUNC(2,and_b,(RW1 d, RR1 s)) +{ + CLOBBER_AND; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_and_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,and_b,(RW1 d, RR1 s)) + +// gb-- used for making an fpcr value in compemu_fpp.cpp +MIDFUNC(2,or_l_rm,(RW4 d, IMM s)) +{ + CLOBBER_OR; + d=rmw(d,4,4); + + raw_or_l_rm(d,s); + unlock2(d); +} +MENDFUNC(2,or_l_rm,(RW4 d, IMM s)) + +MIDFUNC(2,or_l_ri,(RW4 d, IMM i)) +{ + if (isconst(d) && !needflags) { + live.state[d].val|=i; + return; + } + CLOBBER_OR; + d=rmw(d,4,4); + + raw_or_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,or_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,or_l,(RW4 d, RR4 s)) +{ + if (isconst(d) && isconst(s) && !needflags) { + live.state[d].val|=live.state[s].val; + return; + } + CLOBBER_OR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_or_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,or_l,(RW4 d, RR4 s)) + +MIDFUNC(2,or_w,(RW2 d, RR2 s)) +{ + CLOBBER_OR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_or_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,or_w,(RW2 d, RR2 s)) + +MIDFUNC(2,or_b,(RW1 d, RR1 s)) +{ + CLOBBER_OR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_or_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,or_b,(RW1 d, RR1 s)) + +MIDFUNC(2,adc_l,(RW4 d, RR4 s)) +{ + CLOBBER_ADC; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_adc_l(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,adc_l,(RW4 d, RR4 s)) + +MIDFUNC(2,adc_w,(RW2 d, RR2 s)) +{ + CLOBBER_ADC; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_adc_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,adc_w,(RW2 d, RR2 s)) + +MIDFUNC(2,adc_b,(RW1 d, RR1 s)) +{ + CLOBBER_ADC; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_adc_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,adc_b,(RW1 d, RR1 s)) + +MIDFUNC(2,add_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(add_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_add_l(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,add_l,(RW4 d, RR4 s)) + +MIDFUNC(2,add_w,(RW2 d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_add_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,add_w,(RW2 d, RR2 s)) + +MIDFUNC(2,add_b,(RW1 d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_add_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,add_b,(RW1 d, RR1 s)) + +MIDFUNC(2,sub_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val-=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,-i); + return; + } +#endif + + CLOBBER_SUB; + d=rmw(d,4,4); + + raw_sub_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,sub_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,sub_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,2,2); + + raw_sub_w_ri(d,i); + unlock2(d); +} +MENDFUNC(2,sub_w_ri,(RW2 d, IMM i)) + +MIDFUNC(2,sub_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,1,1); + + raw_sub_b_ri(d,i); + + unlock2(d); +} +MENDFUNC(2,sub_b_ri,(RW1 d, IMM i)) + +MIDFUNC(2,add_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val+=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,i); + return; + } +#endif + CLOBBER_ADD; + d=rmw(d,4,4); + raw_add_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,add_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,add_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,2,2); + + raw_add_w_ri(d,i); + unlock2(d); +} +MENDFUNC(2,add_w_ri,(RW2 d, IMM i)) + +MIDFUNC(2,add_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,1,1); + + raw_add_b_ri(d,i); + + unlock2(d); +} +MENDFUNC(2,add_b_ri,(RW1 d, IMM i)) + +MIDFUNC(2,sbb_l,(RW4 d, RR4 s)) +{ + CLOBBER_SBB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sbb_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sbb_l,(RW4 d, RR4 s)) + +MIDFUNC(2,sbb_w,(RW2 d, RR2 s)) +{ + CLOBBER_SBB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sbb_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sbb_w,(RW2 d, RR2 s)) + +MIDFUNC(2,sbb_b,(RW1 d, RR1 s)) +{ + CLOBBER_SBB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sbb_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sbb_b,(RW1 d, RR1 s)) + +MIDFUNC(2,sub_l,(RW4 d, RR4 s)) +{ + if (isconst(s)) { + COMPCALL(sub_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sub_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sub_l,(RW4 d, RR4 s)) + +MIDFUNC(2,sub_w,(RW2 d, RR2 s)) +{ + if (isconst(s)) { + COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sub_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sub_w,(RW2 d, RR2 s)) + +MIDFUNC(2,sub_b,(RW1 d, RR1 s)) +{ + if (isconst(s)) { + COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sub_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sub_b,(RW1 d, RR1 s)) + +MIDFUNC(2,cmp_l,(RR4 d, RR4 s)) +{ + CLOBBER_CMP; + s=readreg(s,4); + d=readreg(d,4); + + raw_cmp_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,cmp_l,(RR4 d, RR4 s)) + +MIDFUNC(2,cmp_l_ri,(RR4 r, IMM i)) +{ + CLOBBER_CMP; + r=readreg(r,4); + + raw_cmp_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,cmp_l_ri,(RR4 r, IMM i)) + +MIDFUNC(2,cmp_w,(RR2 d, RR2 s)) +{ + CLOBBER_CMP; + s=readreg(s,2); + d=readreg(d,2); + + raw_cmp_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,cmp_w,(RR2 d, RR2 s)) + +MIDFUNC(2,cmp_b,(RR1 d, RR1 s)) +{ + CLOBBER_CMP; + s=readreg(s,1); + d=readreg(d,1); + + raw_cmp_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,cmp_b,(RR1 d, RR1 s)) + + +MIDFUNC(2,xor_l,(RW4 d, RR4 s)) +{ + CLOBBER_XOR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_xor_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,xor_l,(RW4 d, RR4 s)) + +MIDFUNC(2,xor_w,(RW2 d, RR2 s)) +{ + CLOBBER_XOR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_xor_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,xor_w,(RW2 d, RR2 s)) + +MIDFUNC(2,xor_b,(RW1 d, RR1 s)) +{ + CLOBBER_XOR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_xor_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,xor_b,(RW1 d, RR1 s)) + +MIDFUNC(5,call_r_11,(W4 out1, RR4 r, RR4 in1, IMM osize, IMM isize)) +{ + clobber_flags(); + remove_all_offsets(); + if (osize==4) { + if (out1!=in1 && out1!=r) { + COMPCALL(forget_about)(out1); + } + } + else { + tomem_c(out1); + } + + in1=readreg_specific(in1,isize,REG_PAR1); + r=readreg(r,4); + prepare_for_call_1(); /* This should ensure that there won't be + any need for swapping nregs in prepare_for_call_2 + */ +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(in1); +#endif + unlock2(in1); + unlock2(r); + + prepare_for_call_2(); + raw_dec_sp(STACK_SHADOW_SPACE); + raw_call_r(r); + raw_inc_sp(STACK_SHADOW_SPACE); + +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(4); +#endif + + + live.nat[REG_RESULT].holds[0]=out1; + live.nat[REG_RESULT].nholds=1; + live.nat[REG_RESULT].touched=touchcnt++; + + live.state[out1].realreg=REG_RESULT; + live.state[out1].realind=0; + live.state[out1].val=0; + live.state[out1].validsize=osize; + live.state[out1].dirtysize=osize; + set_status(out1,DIRTY); +} +MENDFUNC(5,call_r_11,(W4 out1, RR4 r, RR4 in1, IMM osize, IMM isize)) + +MIDFUNC(5,call_r_02,(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2)) +{ + clobber_flags(); + remove_all_offsets(); + in1=readreg_specific(in1,isize1,REG_PAR1); + in2=readreg_specific(in2,isize2,REG_PAR2); + r=readreg(r,4); + prepare_for_call_1(); /* This should ensure that there won't be + any need for swapping nregs in prepare_for_call_2 + */ +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(in2); + raw_push_l_r(in1); +#endif + unlock2(r); + unlock2(in1); + unlock2(in2); + prepare_for_call_2(); + raw_dec_sp(STACK_SHADOW_SPACE); + raw_call_r(r); + raw_inc_sp(STACK_SHADOW_SPACE); +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(8); +#endif +} +MENDFUNC(5,call_r_02,(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2)) + +/* forget_about() takes a mid-layer register */ +MIDFUNC(1,forget_about,(W4 r)) +{ + if (isinreg(r)) + disassociate(r); + live.state[r].val=0; + set_status(r,UNDEF); +} +MENDFUNC(1,forget_about,(W4 r)) + +MIDFUNC(0,nop,(void)) +{ + raw_emit_nop(); +} +MENDFUNC(0,nop,(void)) + +MIDFUNC(1,f_forget_about,(FW r)) +{ + if (f_isinreg(r)) + f_disassociate(r); + live.fate[r].status=UNDEF; +} +MENDFUNC(1,f_forget_about,(FW r)) + +MIDFUNC(1,fmov_pi,(FW r)) +{ + r=f_writereg(r); + raw_fmov_pi(r); + f_unlock(r); +} +MENDFUNC(1,fmov_pi,(FW r)) + +MIDFUNC(1,fmov_log10_2,(FW r)) +{ + r=f_writereg(r); + raw_fmov_log10_2(r); + f_unlock(r); +} +MENDFUNC(1,fmov_log10_2,(FW r)) + +MIDFUNC(1,fmov_log2_e,(FW r)) +{ + r=f_writereg(r); + raw_fmov_log2_e(r); + f_unlock(r); +} +MENDFUNC(1,fmov_log2_e,(FW r)) + +MIDFUNC(1,fmov_loge_2,(FW r)) +{ + r=f_writereg(r); + raw_fmov_loge_2(r); + f_unlock(r); +} +MENDFUNC(1,fmov_loge_2,(FW r)) + +MIDFUNC(1,fmov_1,(FW r)) +{ + r=f_writereg(r); + raw_fmov_1(r); + f_unlock(r); +} +MENDFUNC(1,fmov_1,(FW r)) + +MIDFUNC(1,fmov_0,(FW r)) +{ + r=f_writereg(r); + raw_fmov_0(r); + f_unlock(r); +} +MENDFUNC(1,fmov_0,(FW r)) + +MIDFUNC(2,fmov_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmov_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmov_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmovi_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmovi_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmovi_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmovi_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmovi_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmovi_mr,(MEMW m, FR r)) + +MIDFUNC(3,fmovi_mrb,(MEMW m, FR r, double *bounds)) +{ + r=f_readreg(r); + raw_fmovi_mrb(m,r,bounds); + f_unlock(r); +} +MENDFUNC(3,fmovi_mrb,(MEMW m, FR r, double *bounds)) + +MIDFUNC(2,fmovs_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmovs_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmovs_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmovs_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmovs_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmovs_mr,(MEMW m, FR r)) + +MIDFUNC(1,fcuts_r,(FRW r)) +{ + r=f_rmw(r); + raw_fcuts_r(r); + f_unlock(r); +} +MENDFUNC(1,fcuts_r,(FRW r)) + +MIDFUNC(1,fcut_r,(FRW r)) +{ + r=f_rmw(r); + raw_fcut_r(r); + f_unlock(r); +} +MENDFUNC(1,fcut_r,(FRW r)) + +MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmov_ext_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) + +MIDFUNC(2,fmov_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmov_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmov_mr,(MEMW m, FR r)) + +MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmov_ext_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmov_rr,(FW d, FR s)) +{ + if (d==s) { /* How pointless! */ + return; + } +#if USE_F_ALIAS + f_disassociate(d); + s=f_readreg(s); + live.fate[d].realreg=s; + live.fate[d].realind=live.fat[s].nholds; + live.fate[d].status=DIRTY; + live.fat[s].holds[live.fat[s].nholds]=d; + live.fat[s].nholds++; + f_unlock(s); +#else + s=f_readreg(s); + d=f_writereg(d); + raw_fmov_rr(d,s); + f_unlock(s); + f_unlock(d); +#endif +} +MENDFUNC(2,fmov_rr,(FW d, FR s)) + +MIDFUNC(2,fldcw_m_indexed,(RR4 index, IMM base)) +{ + index=readreg(index,4); + + raw_fldcw_m_indexed(index,base); + unlock2(index); +} +MENDFUNC(2,fldcw_m_indexed,(RR4 index, IMM base)) + +MIDFUNC(1,ftst_r,(FR r)) +{ + r=f_readreg(r); + raw_ftst_r(r); + f_unlock(r); +} +MENDFUNC(1,ftst_r,(FR r)) + +MIDFUNC(0,dont_care_fflags,(void)) +{ + f_disassociate(FP_RESULT); +} +MENDFUNC(0,dont_care_fflags,(void)) + +MIDFUNC(2,fsqrt_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fsqrt_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsqrt_rr,(FW d, FR s)) + +MIDFUNC(2,fabs_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fabs_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fabs_rr,(FW d, FR s)) + +MIDFUNC(2,fgetexp_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fgetexp_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fgetexp_rr,(FW d, FR s)) + +MIDFUNC(2,fgetman_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fgetman_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fgetman_rr,(FW d, FR s)) + +MIDFUNC(2,fsin_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fsin_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsin_rr,(FW d, FR s)) + +MIDFUNC(2,fcos_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fcos_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fcos_rr,(FW d, FR s)) + +MIDFUNC(2,ftan_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftan_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,ftan_rr,(FW d, FR s)) + +MIDFUNC(3,fsincos_rr,(FW d, FW c, FR s)) +{ + s=f_readreg(s); /* s for source */ + d=f_writereg(d); /* d for sine */ + c=f_writereg(c); /* c for cosine */ + raw_fsincos_rr(d,c,s); + f_unlock(s); + f_unlock(d); + f_unlock(c); +} +MENDFUNC(3,fsincos_rr,(FW d, FW c, FR s)) + +MIDFUNC(2,fscale_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fscale_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fscale_rr,(FRW d, FR s)) + +MIDFUNC(2,ftwotox_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftwotox_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,ftwotox_rr,(FW d, FR s)) + +MIDFUNC(2,fetox_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fetox_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fetox_rr,(FW d, FR s)) + +MIDFUNC(2,frndint_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_frndint_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frndint_rr,(FW d, FR s)) + +MIDFUNC(2,fetoxM1_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fetoxM1_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fetoxM1_rr,(FW d, FR s)) + +MIDFUNC(2,ftentox_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftentox_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,ftentox_rr,(FW d, FR s)) + +MIDFUNC(2,flog2_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flog2_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,flog2_rr,(FW d, FR s)) + +MIDFUNC(2,flogN_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flogN_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,flogN_rr,(FW d, FR s)) + +MIDFUNC(2,flogNP1_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flogNP1_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,flogNP1_rr,(FW d, FR s)) + +MIDFUNC(2,flog10_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flog10_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,flog10_rr,(FW d, FR s)) + +MIDFUNC(2,fasin_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fasin_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fasin_rr,(FW d, FR s)) + +MIDFUNC(2,facos_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_facos_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,facos_rr,(FW d, FR s)) + +MIDFUNC(2,fatan_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fatan_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fatan_rr,(FW d, FR s)) + +MIDFUNC(2,fatanh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fatanh_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fatanh_rr,(FW d, FR s)) + +MIDFUNC(2,fsinh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fsinh_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsinh_rr,(FW d, FR s)) + +MIDFUNC(2,fcosh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fcosh_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fcosh_rr,(FW d, FR s)) + +MIDFUNC(2,ftanh_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftanh_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,ftanh_rr,(FW d, FR s)) + +MIDFUNC(2,fneg_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fneg_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fneg_rr,(FW d, FR s)) + +MIDFUNC(2,fadd_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fadd_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fadd_rr,(FRW d, FR s)) + +MIDFUNC(2,fsub_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fsub_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsub_rr,(FRW d, FR s)) + +MIDFUNC(2,fcmp_rr,(FR d, FR s)) +{ + d=f_readreg(d); + s=f_readreg(s); + raw_fcmp_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fcmp_rr,(FR d, FR s)) + +MIDFUNC(2,fdiv_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fdiv_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fdiv_rr,(FRW d, FR s)) + +MIDFUNC(2,frem_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_frem_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frem_rr,(FRW d, FR s)) + +MIDFUNC(2,frem1_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_frem1_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frem1_rr,(FRW d, FR s)) + +MIDFUNC(2,fmul_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fmul_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fmul_rr,(FRW d, FR s)) + +#ifdef __GNUC__ + +static inline void mfence(void) +{ +#ifdef CPU_i386 + if (!cpuinfo.x86_has_xmm2) + __asm__ __volatile__("lock; addl $0,0(%%esp)":::"memory"); + else +#endif + __asm__ __volatile__("mfence":::"memory"); +} + +static inline void clflush(volatile void *__p) +{ + __asm__ __volatile__("clflush %0" : "+m" (*(volatile char *)__p)); +} + +static inline void flush_cpu_icache(void *start, void *stop) +{ + mfence(); + if (cpuinfo.x86_clflush_size != 0) + { + volatile char *vaddr = (volatile char *)(((uintptr)start / cpuinfo.x86_clflush_size) * cpuinfo.x86_clflush_size); + volatile char *vend = (volatile char *)((((uintptr)stop + cpuinfo.x86_clflush_size - 1) / cpuinfo.x86_clflush_size) * cpuinfo.x86_clflush_size); + while (vaddr < vend) + { + clflush(vaddr); + vaddr += cpuinfo.x86_clflush_size; + } + } + mfence(); +} + +#else + +static inline void flush_cpu_icache(void *start, void *stop) +{ + UNUSED(start); + UNUSED(stop); +} + +#endif + +static inline void write_jmp_target(uae_u32 *jmpaddr, cpuop_func* a) { + uintptr rel = (uintptr) a - ((uintptr) jmpaddr + 4); + *(jmpaddr) = (uae_u32) rel; + flush_cpu_icache((void *) jmpaddr, (void *) &jmpaddr[1]); +} + +static inline void emit_jmp_target(uae_u32 a) { + emit_long(a-((uintptr)target+4)); +} diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.h b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.h new file mode 100644 index 00000000..a0f5cf92 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_midfunc_x86.h @@ -0,0 +1,252 @@ +/* + * compiler/compemu_midfunc_x86.h - Native MIDFUNCS for IA-32 and AMD64 + * + * Copyright (c) 2014 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Note: + * File is included by compemu.h + * + */ + +DECLARE_MIDFUNC(bt_l_ri(RR4 r, IMM i)); +DECLARE_MIDFUNC(bt_l_rr(RR4 r, RR4 b)); +DECLARE_MIDFUNC(btc_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(btc_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(bts_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(bts_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(btr_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(btr_l_rr(RW4 r, RR4 b)); +DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s)); +DECLARE_MIDFUNC(call_r(RR4 r)); +DECLARE_MIDFUNC(sub_l_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_l_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_w_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_b_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(rol_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(rol_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(rol_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(rol_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(rol_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(rol_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(shll_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shll_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shll_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(ror_b_ri(RR1 r, IMM i)); +DECLARE_MIDFUNC(ror_w_ri(RR2 r, IMM i)); +DECLARE_MIDFUNC(ror_l_ri(RR4 r, IMM i)); +DECLARE_MIDFUNC(ror_l_rr(RR4 d, RR1 r)); +DECLARE_MIDFUNC(ror_w_rr(RR2 d, RR1 r)); +DECLARE_MIDFUNC(ror_b_rr(RR1 d, RR1 r)); +DECLARE_MIDFUNC(shrl_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shrl_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shrl_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(shra_l_rr(RW4 d, RR1 r)); +DECLARE_MIDFUNC(shra_w_rr(RW2 d, RR1 r)); +DECLARE_MIDFUNC(shra_b_rr(RW1 d, RR1 r)); +DECLARE_MIDFUNC(shll_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shll_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shll_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(shrl_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shrl_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shrl_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(shra_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shra_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shra_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(setcc(W1 d, IMM cc)); +DECLARE_MIDFUNC(setcc_m(IMM d, IMM cc)); +DECLARE_MIDFUNC(cmov_l_rr(RW4 d, RR4 s, IMM cc)); +DECLARE_MIDFUNC(cmov_l_rm(RW4 d, IMM s, IMM cc)); +DECLARE_MIDFUNC(bsf_l_rr(W4 d, RR4 s)); +DECLARE_MIDFUNC(pop_m(IMM d)); +DECLARE_MIDFUNC(push_m(IMM d)); +DECLARE_MIDFUNC(pop_l(W4 d)); +DECLARE_MIDFUNC(push_l_i(IMM i)); +DECLARE_MIDFUNC(push_l(RR4 s)); +DECLARE_MIDFUNC(clear_16(RW4 r)); +DECLARE_MIDFUNC(clear_8(RW4 r)); +DECLARE_MIDFUNC(sign_extend_32_rr(W4 d, RR2 s)); +DECLARE_MIDFUNC(sign_extend_16_rr(W4 d, RR2 s)); +DECLARE_MIDFUNC(sign_extend_8_rr(W4 d, RR1 s)); +DECLARE_MIDFUNC(zero_extend_16_rr(W4 d, RR2 s)); +DECLARE_MIDFUNC(zero_extend_8_rr(W4 d, RR1 s)); +DECLARE_MIDFUNC(imul_64_32(RW4 d, RW4 s)); +DECLARE_MIDFUNC(mul_64_32(RW4 d, RW4 s)); +DECLARE_MIDFUNC(simulate_bsf(W4 tmp, RW4 s)); +DECLARE_MIDFUNC(imul_32_32(RW4 d, RR4 s)); +DECLARE_MIDFUNC(mul_32_32(RW4 d, RR4 s)); +DECLARE_MIDFUNC(mov_b_rr(W1 d, RR1 s)); +DECLARE_MIDFUNC(mov_w_rr(W2 d, RR2 s)); +DECLARE_MIDFUNC(mov_l_rrm_indexed(W4 d,RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_w_rrm_indexed(W2 d, RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_b_rrm_indexed(W1 d, RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_mrr_indexed(RR4 baser, RR4 index, IMM factor, RR4 s)); +DECLARE_MIDFUNC(mov_w_mrr_indexed(RR4 baser, RR4 index, IMM factor, RR2 s)); +DECLARE_MIDFUNC(mov_b_mrr_indexed(RR4 baser, RR4 index, IMM factor, RR1 s)); +DECLARE_MIDFUNC(mov_l_bmrr_indexed(IMM base, RR4 baser, RR4 index, IMM factor, RR4 s)); +DECLARE_MIDFUNC(mov_w_bmrr_indexed(IMM base, RR4 baser, RR4 index, IMM factor, RR2 s)); +DECLARE_MIDFUNC(mov_b_bmrr_indexed(IMM base, RR4 baser, RR4 index, IMM factor, RR1 s)); +DECLARE_MIDFUNC(mov_l_brrm_indexed(W4 d, IMM base, RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_w_brrm_indexed(W2 d, IMM base, RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_b_brrm_indexed(W1 d, IMM base, RR4 baser, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_rm_indexed(W4 d, IMM base, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_rR(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_rR(W2 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_rR(W1 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_l_brR(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_brR(W2 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_brR(W1 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_l_Ri(RR4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_w_Ri(RR4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_b_Ri(RR4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_l_Rr(RR4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_Rr(RR4 d, RR2 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_Rr(RR4 d, RR1 s, IMM offset)); +DECLARE_MIDFUNC(lea_l_brr(W4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)); +DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, RR4 s, RR4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_bRr(RR4 d, RR4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_bRr(RR4 d, RR2 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_bRr(RR4 d, RR1 s, IMM offset)); +DECLARE_MIDFUNC(mid_bswap_32(RW4 r)); +DECLARE_MIDFUNC(mid_bswap_16(RW2 r)); +DECLARE_MIDFUNC(mov_l_rr(W4 d, RR4 s)); +DECLARE_MIDFUNC(mov_l_mr(IMM d, RR4 s)); +DECLARE_MIDFUNC(mov_w_mr(IMM d, RR2 s)); +DECLARE_MIDFUNC(mov_w_rm(W2 d, IMM s)); +DECLARE_MIDFUNC(mov_b_mr(IMM d, RR1 s)); +DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s)); +DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s)); +DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s)); +DECLARE_MIDFUNC(mov_b_ri(W1 d, IMM s)); +DECLARE_MIDFUNC(add_l_mi(IMM d, IMM s) ); +DECLARE_MIDFUNC(add_w_mi(IMM d, IMM s) ); +DECLARE_MIDFUNC(add_b_mi(IMM d, IMM s) ); +DECLARE_MIDFUNC(test_l_ri(RR4 d, IMM i)); +DECLARE_MIDFUNC(test_l_rr(RR4 d, RR4 s)); +DECLARE_MIDFUNC(test_w_rr(RR2 d, RR2 s)); +DECLARE_MIDFUNC(test_b_rr(RR1 d, RR1 s)); +DECLARE_MIDFUNC(and_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(and_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(and_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(and_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(or_l_rm(RW4 d, IMM s)); +DECLARE_MIDFUNC(or_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(or_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(or_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(or_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(adc_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(adc_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(adc_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(add_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(add_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(add_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(sub_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(sub_w_ri(RW2 d, IMM i)); +DECLARE_MIDFUNC(sub_b_ri(RW1 d, IMM i)); +DECLARE_MIDFUNC(add_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(add_w_ri(RW2 d, IMM i)); +DECLARE_MIDFUNC(add_b_ri(RW1 d, IMM i)); +DECLARE_MIDFUNC(sbb_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(sbb_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(sbb_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(sub_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(sub_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(sub_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(cmp_l(RR4 d, RR4 s)); +DECLARE_MIDFUNC(cmp_l_ri(RR4 r, IMM i)); +DECLARE_MIDFUNC(cmp_w(RR2 d, RR2 s)); +DECLARE_MIDFUNC(cmp_b(RR1 d, RR1 s)); +DECLARE_MIDFUNC(xor_l(RW4 d, RR4 s)); +DECLARE_MIDFUNC(xor_w(RW2 d, RR2 s)); +DECLARE_MIDFUNC(xor_b(RW1 d, RR1 s)); +DECLARE_MIDFUNC(live_flags(void)); +DECLARE_MIDFUNC(dont_care_flags(void)); +DECLARE_MIDFUNC(duplicate_carry(void)); +DECLARE_MIDFUNC(restore_carry(void)); +DECLARE_MIDFUNC(start_needflags(void)); +DECLARE_MIDFUNC(end_needflags(void)); +DECLARE_MIDFUNC(make_flags_live(void)); +DECLARE_MIDFUNC(call_r_11(RR4 r, W4 out1, RR4 in1, IMM osize, IMM isize)); +DECLARE_MIDFUNC(call_r_02(RR4 r, RR4 in1, RR4 in2, IMM isize1, IMM isize2)); +DECLARE_MIDFUNC(forget_about(W4 r)); +DECLARE_MIDFUNC(nop(void)); + +DECLARE_MIDFUNC(f_forget_about(FW r)); +DECLARE_MIDFUNC(fmov_pi(FW r)); +DECLARE_MIDFUNC(fmov_log10_2(FW r)); +DECLARE_MIDFUNC(fmov_log2_e(FW r)); +DECLARE_MIDFUNC(fmov_loge_2(FW r)); +DECLARE_MIDFUNC(fmov_1(FW r)); +DECLARE_MIDFUNC(fmov_0(FW r)); +DECLARE_MIDFUNC(fmov_rm(FW r, MEMR m)); +DECLARE_MIDFUNC(fmov_mr(MEMW m, FR r)); +DECLARE_MIDFUNC(fmovi_rm(FW r, MEMR m)); +DECLARE_MIDFUNC(fmovi_mr(MEMW m, FR r)); +DECLARE_MIDFUNC(fmovi_mrb(MEMW m, FR r, double *bounds)); +DECLARE_MIDFUNC(fmovs_rm(FW r, MEMR m)); +DECLARE_MIDFUNC(fmovs_mr(MEMW m, FR r)); +DECLARE_MIDFUNC(fcuts_r(FRW r)); +DECLARE_MIDFUNC(fcut_r(FRW r)); +DECLARE_MIDFUNC(fmov_ext_mr(MEMW m, FR r)); +DECLARE_MIDFUNC(fmov_ext_rm(FW r, MEMR m)); +DECLARE_MIDFUNC(fmov_rr(FW d, FR s)); +DECLARE_MIDFUNC(fldcw_m_indexed(RR4 index, IMM base)); +DECLARE_MIDFUNC(ftst_r(FR r)); +DECLARE_MIDFUNC(dont_care_fflags(void)); +DECLARE_MIDFUNC(fsqrt_rr(FW d, FR s)); +DECLARE_MIDFUNC(fabs_rr(FW d, FR s)); +DECLARE_MIDFUNC(frndint_rr(FW d, FR s)); +DECLARE_MIDFUNC(fgetexp_rr(FW d, FR s)); +DECLARE_MIDFUNC(fgetman_rr(FW d, FR s)); +DECLARE_MIDFUNC(fsin_rr(FW d, FR s)); +DECLARE_MIDFUNC(fcos_rr(FW d, FR s)); +DECLARE_MIDFUNC(ftan_rr(FW d, FR s)); +DECLARE_MIDFUNC(fsincos_rr(FW d, FW c, FR s)); +DECLARE_MIDFUNC(fscale_rr(FRW d, FR s)); +DECLARE_MIDFUNC(ftwotox_rr(FW d, FR s)); +DECLARE_MIDFUNC(fetox_rr(FW d, FR s)); +DECLARE_MIDFUNC(fetoxM1_rr(FW d, FR s)); +DECLARE_MIDFUNC(ftentox_rr(FW d, FR s)); +DECLARE_MIDFUNC(flog2_rr(FW d, FR s)); +DECLARE_MIDFUNC(flogN_rr(FW d, FR s)); +DECLARE_MIDFUNC(flogNP1_rr(FW d, FR s)); +DECLARE_MIDFUNC(flog10_rr(FW d, FR s)); +DECLARE_MIDFUNC(fasin_rr(FW d, FR s)); +DECLARE_MIDFUNC(facos_rr(FW d, FR s)); +DECLARE_MIDFUNC(fatan_rr(FW d, FR s)); +DECLARE_MIDFUNC(fatanh_rr(FW d, FR s)); +DECLARE_MIDFUNC(fsinh_rr(FW d, FR s)); +DECLARE_MIDFUNC(fcosh_rr(FW d, FR s)); +DECLARE_MIDFUNC(ftanh_rr(FW d, FR s)); +DECLARE_MIDFUNC(fneg_rr(FW d, FR s)); +DECLARE_MIDFUNC(fadd_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fsub_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fmul_rr(FRW d, FR s)); +DECLARE_MIDFUNC(frem_rr(FRW d, FR s)); +DECLARE_MIDFUNC(frem1_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fdiv_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fcmp_rr(FR d, FR s)); +DECLARE_MIDFUNC(fflags_into_flags(W2 tmp)); diff --git a/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp b/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp new file mode 100644 index 00000000..c7b94244 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp @@ -0,0 +1,5111 @@ +/* + * compiler/compemu_support.cpp - Core dynamic translation engine + * + * Copyright (c) 2001-2009 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * JIT compiler m68k -> IA-32 and AMD64 / ARM + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifdef UAE + +#define writemem_special writemem +#define readmem_special readmem + +#else +#if !FIXED_ADDRESSING +#error "Only Fixed Addressing is supported with the JIT Compiler" +#endif + +#if defined(X86_ASSEMBLY) && !SAHF_SETO_PROFITABLE +#error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler" +#endif + +/* NOTE: support for AMD64 assumes translation cache and other code + * buffers are allocated into a 32-bit address space because (i) B2/JIT + * code is not 64-bit clean and (ii) it's faster to resolve branches + * that way. + */ +#if !defined(CPU_i386) && !defined(CPU_x86_64) && !defined(CPU_arm) +#error "Only IA-32, X86-64 and ARM v6 targets are supported with the JIT Compiler" +#endif +#endif + +#define USE_MATCH 0 + +/* kludge for Brian, so he can compile under MSVC++ */ +#define USE_NORMAL_CALLING_CONVENTION 0 + +#include "sysconfig.h" +#include "sysdeps.h" + +#ifdef JIT + +#ifdef UAE +#include "options.h" +#include "events.h" +#include "memory.h" +#include "custom.h" +#else +#include "cpu_emulation.h" +#include "main.h" +#include "vm_alloc.h" + +#include "m68k.h" +#include "memory.h" +#include "readcpu.h" +#endif +#include "newcpu.h" +#include "comptbl.h" +#ifdef UAE +#include "compemu.h" +#else +#include "compiler/compemu.h" +#include "fpu/fpu.h" +#include "fpu/flags.h" +#include "parameters.h" +#endif +#include "verify.h" + +#ifdef UAE +#include "uae/log.h" + +#include "uae/vm.h" +#define VM_PAGE_READ UAE_VM_READ +#define VM_PAGE_WRITE UAE_VM_WRITE +#define VM_PAGE_EXECUTE UAE_VM_EXECUTE +#define VM_MAP_FAILED UAE_VM_ALLOC_FAILED +#define VM_MAP_DEFAULT 1 +#define VM_MAP_32BIT 1 +#define vm_protect(address, size, protect) uae_vm_protect(address, size, protect) +#define vm_release(address, size) uae_vm_free(address, size) + +static inline void *vm_acquire(size_t size, int options = VM_MAP_DEFAULT) +{ + assert(options == (VM_MAP_DEFAULT | VM_MAP_32BIT)); + return uae_vm_alloc(size, UAE_VM_32BIT, UAE_VM_READ_WRITE); +} + +#define UNUSED(x) +#include "uae.h" +#include "uae/log.h" +#define jit_log(format, ...) \ + uae_log("JIT: " format "\n", ##__VA_ARGS__); +#define jit_log2(format, ...) + +#define MEMBaseDiff uae_p32(NATMEM_OFFSET) + +#ifdef NATMEM_OFFSET +#define FIXED_ADDRESSING 1 +#endif + +#define SAHF_SETO_PROFITABLE + +// %%% BRIAN KING WAS HERE %%% +extern bool canbang; + +#include "compemu_prefs.cpp" + +#define uint32 uae_u32 +#define uint8 uae_u8 + +static inline int distrust_check(int value) +{ +#ifdef JIT_ALWAYS_DISTRUST + return 1; +#else + int distrust = value; + return distrust; +#endif +} + +static inline int distrust_byte(void) +{ + return distrust_check(currprefs.comptrustbyte); +} + +static inline int distrust_word(void) +{ + return distrust_check(currprefs.comptrustword); +} + +static inline int distrust_long(void) +{ + return distrust_check(currprefs.comptrustlong); +} + +static inline int distrust_addr(void) +{ + return distrust_check(currprefs.comptrustnaddr); +} + +#else +#define DEBUG 0 +#include "debug.h" + +#define NATMEM_OFFSET MEMBaseDiff +#define canbang 1 +#define op_illg op_illg_1 + +#ifdef WINUAE_ARANYM +void jit_abort(const char *format, ...) +{ + va_list args; + va_start(args, format); + ndebug::pdbvprintf(format, args); + va_end(args); + abort(); +} +#endif + +#if DEBUG +#define PROFILE_COMPILE_TIME 1 +#define PROFILE_UNTRANSLATED_INSNS 1 +#endif +#endif + +# include +# include +# include +# include + +#if defined(CPU_x86_64) && 0 +#define RECORD_REGISTER_USAGE 1 +#endif + +#ifdef JIT_DEBUG +#undef abort +#define abort() do { \ + fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \ + compiler_dumpstate(); \ + exit(EXIT_FAILURE); \ +} while (0) +#endif + +#ifdef RECORD_REGISTER_USAGE +static uint64 reg_count[16]; +static int reg_count_local[16]; + +static int reg_count_compare(const void *ap, const void *bp) +{ + const int a = *((int *)ap); + const int b = *((int *)bp); + return reg_count[b] - reg_count[a]; +} +#endif + +#ifdef PROFILE_COMPILE_TIME +#include +static uae_u32 compile_count = 0; +static clock_t compile_time = 0; +static clock_t emul_start_time = 0; +static clock_t emul_end_time = 0; +#endif + +#ifdef PROFILE_UNTRANSLATED_INSNS +static const int untranslated_top_ten = 20; +static uae_u32 raw_cputbl_count[65536] = { 0, }; +static uae_u16 opcode_nums[65536]; + + +static int untranslated_compfn(const void *e1, const void *e2) +{ + return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2]; +} +#endif + +static compop_func *compfunctbl[65536]; +static compop_func *nfcompfunctbl[65536]; +#ifdef NOFLAGS_SUPPORT +static cpuop_func *nfcpufunctbl[65536]; +#endif +uae_u8* comp_pc_p; + +#ifdef UAE +/* defined in uae.h */ +#else +// External variables +// newcpu.cpp +extern int quit_program; +#endif + +// gb-- Extra data for Basilisk II/JIT +#ifdef JIT_DEBUG +static bool JITDebug = false; // Enable runtime disassemblers through mon? +#endif +#if USE_INLINING +#ifdef UAE +#define follow_const_jumps (currprefs.comp_constjump != 0) +#else +static bool follow_const_jumps = true; // Flag: translation through constant jumps +#endif +#else +const bool follow_const_jumps = false; +#endif + +const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB) +static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks +static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already +static bool lazy_flush = true; // Flag: lazy translation cache invalidation +#ifdef UAE +#ifdef USE_JIT_FPU +#define avoid_fpu (!currprefs.compfpu) +#else +#define avoid_fpu (true) +#endif +#else +static bool avoid_fpu = true; // Flag: compile FPU instructions ? +#endif +static bool have_cmov = false; // target has CMOV instructions ? +static bool have_rat_stall = true; // target has partial register stalls ? +const bool tune_alignment = true; // Tune code alignments for running CPU ? +const bool tune_nop_fillers = true; // Tune no-op fillers for architecture +static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly? +static int align_loops = 32; // Align the start of loops +static int align_jumps = 32; // Align the start of jumps +static int optcount[10] = { +#ifdef UAE + 4, // How often a block has to be executed before it is translated +#else + 10, // How often a block has to be executed before it is translated +#endif + 0, // How often to use naive translation + 0, 0, 0, 0, + -1, -1, -1, -1 +}; + +#ifdef UAE +/* FIXME: op_properties is currently in compemu.h */ + +op_properties prop[65536]; + +static inline bool is_const_jump(uae_u32 opcode) +{ + return prop[opcode].is_const_jump != 0; +} +#else +struct op_properties { + uae_u8 use_flags; + uae_u8 set_flags; + uae_u8 is_addx; + uae_u8 cflow; +}; +static op_properties prop[65536]; + +static inline int end_block(uae_u32 opcode) +{ + return (prop[opcode].cflow & fl_end_block); +} + +static inline bool is_const_jump(uae_u32 opcode) +{ + return (prop[opcode].cflow == fl_const_jump); +} + +#if 0 +static inline bool may_trap(uae_u32 opcode) +{ + return (prop[opcode].cflow & fl_trap); +} +#endif + +#endif + +static inline unsigned int cft_map (unsigned int f) +{ +#ifdef UAE + return f; +#else +#if !defined(HAVE_GET_WORD_UNSWAPPED) || defined(FULLMMU) + return f; +#else + return ((f >> 8) & 255) | ((f & 255) << 8); +#endif +#endif +} + +uae_u8* start_pc_p; +uae_u32 start_pc; +uae_u32 current_block_pc_p; +static uintptr current_block_start_target; +uae_u32 needed_flags; +static uintptr next_pc_p; +static uintptr taken_pc_p; +static int branch_cc; +static int redo_current_block; + +int segvcount=0; +int soft_flush_count=0; +int hard_flush_count=0; +int checksum_count=0; +static uae_u8* current_compile_p=NULL; +static uae_u8* max_compile_start; +static uae_u8* compiled_code=NULL; +static uae_s32 reg_alloc_run; +const int POPALLSPACE_SIZE = 2048; /* That should be enough space */ +static uae_u8 *popallspace=NULL; + +void* pushall_call_handler=NULL; +static void* popall_do_nothing=NULL; +static void* popall_exec_nostats=NULL; +static void* popall_execute_normal=NULL; +static void* popall_cache_miss=NULL; +static void* popall_recompile_block=NULL; +static void* popall_check_checksum=NULL; + +/* The 68k only ever executes from even addresses. So right now, we + * waste half the entries in this array + * UPDATE: We now use those entries to store the start of the linked + * lists that we maintain for each hash result. + */ +static cacheline cache_tags[TAGSIZE]; +int letit=0; +static blockinfo* hold_bi[MAX_HOLD_BI]; +static blockinfo* active; +static blockinfo* dormant; + +#ifdef NOFLAGS_SUPPORT +/* 68040 */ +extern const struct cputbl op_smalltbl_0_nf[]; +#endif +extern const struct comptbl op_smalltbl_0_comp_nf[]; +extern const struct comptbl op_smalltbl_0_comp_ff[]; + +#ifdef NOFLAGS_SUPPORT +/* 68020 + 68881 */ +extern const struct cputbl op_smalltbl_1_nf[]; +/* 68020 */ +extern const struct cputbl op_smalltbl_2_nf[]; +/* 68010 */ +extern const struct cputbl op_smalltbl_3_nf[]; +/* 68000 */ +extern const struct cputbl op_smalltbl_4_nf[]; +/* 68000 slow but compatible. */ +extern const struct cputbl op_smalltbl_5_nf[]; +#endif + +#ifdef WINUAE_ARANYM +static void flush_icache_hard(int n); +static void flush_icache_lazy(int n); +static void flush_icache_none(int n); +void (*flush_icache)(int n) = flush_icache_none; +#endif + +static bigstate live; +static smallstate empty_ss; +static smallstate default_ss; +static int optlev; + +static int writereg(int r, int size); +static void unlock2(int r); +static void setlock(int r); +static int readreg_specific(int r, int size, int spec); +static int writereg_specific(int r, int size, int spec); +static void prepare_for_call_1(void); +static void prepare_for_call_2(void); +static void align_target(uae_u32 a); + +static void inline flush_cpu_icache(void *from, void *to); +static void inline write_jmp_target(uae_u32 *jmpaddr, cpuop_func* a); +static void inline emit_jmp_target(uae_u32 a); + +uae_u32 m68k_pc_offset; + +/* Some arithmetic operations can be optimized away if the operands + * are known to be constant. But that's only a good idea when the + * side effects they would have on the flags are not important. This + * variable indicates whether we need the side effects or not + */ +uae_u32 needflags=0; + +/* Flag handling is complicated. + * + * x86 instructions create flags, which quite often are exactly what we + * want. So at times, the "68k" flags are actually in the x86 flags. + * + * Then again, sometimes we do x86 instructions that clobber the x86 + * flags, but don't represent a corresponding m68k instruction. In that + * case, we have to save them. + * + * We used to save them to the stack, but now store them back directly + * into the regflags.cznv of the traditional emulation. Thus some odd + * names. + * + * So flags can be in either of two places (used to be three; boy were + * things complicated back then!); And either place can contain either + * valid flags or invalid trash (and on the stack, there was also the + * option of "nothing at all", now gone). A couple of variables keep + * track of the respective states. + * + * To make things worse, we might or might not be interested in the flags. + * by default, we are, but a call to dont_care_flags can change that + * until the next call to live_flags. If we are not, pretty much whatever + * is in the register and/or the native flags is seen as valid. + */ + +static inline blockinfo* get_blockinfo(uae_u32 cl) +{ + return cache_tags[cl+1].bi; +} + +static inline blockinfo* get_blockinfo_addr(void* addr) +{ + blockinfo* bi=get_blockinfo(cacheline(addr)); + + while (bi) { + if (bi->pc_p==addr) + return bi; + bi=bi->next_same_cl; + } + return NULL; +} + +#ifdef WINUAE_ARANYM +/******************************************************************* + * Disassembler support * + *******************************************************************/ + +#define TARGET_M68K 0 +#define TARGET_POWERPC 1 +#define TARGET_X86 2 +#define TARGET_X86_64 3 +#define TARGET_ARM 4 +#if defined(CPU_i386) +#define TARGET_NATIVE TARGET_X86 +#endif +#if defined(CPU_powerpc) +#define TARGET_NATIVE TARGET_POWERPC +#endif +#if defined(CPU_x86_64) +#define TARGET_NATIVE TARGET_X86_64 +#endif +#if defined(CPU_arm) +#define TARGET_NATIVE TARGET_ARM +#endif +#include "disasm-glue.h" + +#ifdef JIT_DEBUG +static void disasm_block(int disasm_target, const uint8 *start, size_t length) +{ + UNUSED(start); + UNUSED(length); + switch (disasm_target) + { + case TARGET_M68K: +#if defined(HAVE_DISASM_M68K) + { + char buf[256]; + + disasm_info.memory_vma = ((memptr)((uintptr_t)(start) - MEMBaseDiff)); + while (length > 0) + { + int isize = m68k_disasm_to_buf(&disasm_info, buf); + bug("%s", buf); + if (isize < 0) + break; + if ((uintptr)isize > length) + break; + length -= isize; + } + } +#endif + break; + case TARGET_X86: + case TARGET_X86_64: +#if defined(HAVE_DISASM_X86) + { + const uint8 *end = start + length; + char buf[256]; + + while (start < end) + { + start = x86_disasm(start, buf); + bug("%s", buf); + } + } +#endif + break; + case TARGET_ARM: +#if defined(HAVE_DISASM_ARM) + { + const uint8 *end = start + length; + char buf[256]; + + while (start < end) + { + start = arm_disasm(start, buf); + bug("%s", buf); + } + } +#endif + break; + } +} + +static inline void disasm_native_block(const uint8 *start, size_t length) +{ + disasm_block(TARGET_NATIVE, start, length); +} + +static inline void disasm_m68k_block(const uint8 *start, size_t length) +{ + disasm_block(TARGET_M68K, start, length); +} +#endif +#endif + + +/******************************************************************* + * All sorts of list related functions for all of the lists * + *******************************************************************/ + +static inline void remove_from_cl_list(blockinfo* bi) +{ + uae_u32 cl=cacheline(bi->pc_p); + + if (bi->prev_same_cl_p) + *(bi->prev_same_cl_p)=bi->next_same_cl; + if (bi->next_same_cl) + bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p; + if (cache_tags[cl+1].bi) + cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use; + else + cache_tags[cl].handler=(cpuop_func*)popall_execute_normal; +} + +static inline void remove_from_list(blockinfo* bi) +{ + if (bi->prev_p) + *(bi->prev_p)=bi->next; + if (bi->next) + bi->next->prev_p=bi->prev_p; +} + +#if 0 +static inline void remove_from_lists(blockinfo* bi) +{ + remove_from_list(bi); + remove_from_cl_list(bi); +} +#endif + +static inline void add_to_cl_list(blockinfo* bi) +{ + uae_u32 cl=cacheline(bi->pc_p); + + if (cache_tags[cl+1].bi) + cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl); + bi->next_same_cl=cache_tags[cl+1].bi; + + cache_tags[cl+1].bi=bi; + bi->prev_same_cl_p=&(cache_tags[cl+1].bi); + + cache_tags[cl].handler=bi->handler_to_use; +} + +static inline void raise_in_cl_list(blockinfo* bi) +{ + remove_from_cl_list(bi); + add_to_cl_list(bi); +} + +static inline void add_to_active(blockinfo* bi) +{ + if (active) + active->prev_p=&(bi->next); + bi->next=active; + + active=bi; + bi->prev_p=&active; +} + +static inline void add_to_dormant(blockinfo* bi) +{ + if (dormant) + dormant->prev_p=&(bi->next); + bi->next=dormant; + + dormant=bi; + bi->prev_p=&dormant; +} + +static inline void remove_dep(dependency* d) +{ + if (d->prev_p) + *(d->prev_p)=d->next; + if (d->next) + d->next->prev_p=d->prev_p; + d->prev_p=NULL; + d->next=NULL; +} + +/* This block's code is about to be thrown away, so it no longer + depends on anything else */ +static inline void remove_deps(blockinfo* bi) +{ + remove_dep(&(bi->dep[0])); + remove_dep(&(bi->dep[1])); +} + +static inline void adjust_jmpdep(dependency* d, cpuop_func* a) +{ + write_jmp_target(d->jmp_off, a); +} + +/******************************************************************** + * Soft flush handling support functions * + ********************************************************************/ + +static inline void set_dhtu(blockinfo* bi, cpuop_func *dh) +{ + jit_log2("bi is %p",bi); + if (dh!=bi->direct_handler_to_use) { + dependency* x=bi->deplist; + jit_log2("bi->deplist=%p",bi->deplist); + while (x) { + jit_log2("x is %p",x); + jit_log2("x->next is %p",x->next); + jit_log2("x->prev_p is %p",x->prev_p); + + if (x->jmp_off) { + adjust_jmpdep(x,dh); + } + x=x->next; + } + bi->direct_handler_to_use=dh; + } +} + +static inline void invalidate_block(blockinfo* bi) +{ + int i; + + bi->optlevel=0; + bi->count=optcount[0]-1; + bi->handler=NULL; + bi->handler_to_use=(cpuop_func*)popall_execute_normal; + bi->direct_handler=NULL; + set_dhtu(bi,bi->direct_pen); + bi->needed_flags=0xff; + bi->status=BI_INVALID; + for (i=0;i<2;i++) { + bi->dep[i].jmp_off=NULL; + bi->dep[i].target=NULL; + } + remove_deps(bi); +} + +static inline void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target) +{ + blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target); + + Dif(!tbi) { + jit_abort("Could not create jmpdep!"); + } + bi->dep[i].jmp_off=jmpaddr; + bi->dep[i].source=bi; + bi->dep[i].target=tbi; + bi->dep[i].next=tbi->deplist; + if (bi->dep[i].next) + bi->dep[i].next->prev_p=&(bi->dep[i].next); + bi->dep[i].prev_p=&(tbi->deplist); + tbi->deplist=&(bi->dep[i]); +} + +static inline void block_need_recompile(blockinfo * bi) +{ + uae_u32 cl = cacheline(bi->pc_p); + + set_dhtu(bi, bi->direct_pen); + bi->direct_handler = bi->direct_pen; + + bi->handler_to_use = (cpuop_func *)popall_execute_normal; + bi->handler = (cpuop_func *)popall_execute_normal; + if (bi == cache_tags[cl + 1].bi) + cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; + bi->status = BI_NEED_RECOMP; +} + +#if USE_MATCH +static inline void mark_callers_recompile(blockinfo * bi) +{ + dependency *x = bi->deplist; + + while (x) { + dependency *next = x->next; /* This disappears when we mark for + * recompilation and thus remove the + * blocks from the lists */ + if (x->jmp_off) { + blockinfo *cbi = x->source; + + Dif(cbi->status == BI_INVALID) { + jit_log("invalid block in dependency list"); // FIXME? + // abort(); + } + if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) { + block_need_recompile(cbi); + mark_callers_recompile(cbi); + } + else if (cbi->status == BI_COMPILING) { + redo_current_block = 1; + } + else if (cbi->status == BI_NEED_RECOMP) { + /* nothing */ + } + else { + jit_log2("Status %d in mark_callers",cbi->status); // FIXME? + } + } + x = next; + } +} +#endif + +static inline blockinfo* get_blockinfo_addr_new(void* addr, int /* setstate */) +{ + blockinfo* bi=get_blockinfo_addr(addr); + int i; + + if (!bi) { + for (i=0;ipc_p=(uae_u8*)addr; + invalidate_block(bi); + add_to_active(bi); + add_to_cl_list(bi); + + } + } + } + if (!bi) { + jit_abort("Looking for blockinfo, can't find free one"); + } + return bi; +} + +static void prepare_block(blockinfo* bi); + +/* Managment of blockinfos. + + A blockinfo struct is allocated whenever a new block has to be + compiled. If the list of free blockinfos is empty, we allocate a new + pool of blockinfos and link the newly created blockinfos altogether + into the list of free blockinfos. Otherwise, we simply pop a structure + of the free list. + + Blockinfo are lazily deallocated, i.e. chained altogether in the + list of free blockinfos whenvever a translation cache flush (hard or + soft) request occurs. +*/ + +template< class T > +class LazyBlockAllocator +{ + enum { + kPoolSize = 1 + (16384 - sizeof(T) - sizeof(void *)) / sizeof(T) + }; + struct Pool { + T chunk[kPoolSize]; + Pool * next; + }; + Pool * mPools; + T * mChunks; +public: + LazyBlockAllocator() : mPools(0), mChunks(0) { } +#ifdef UAE +#else + ~LazyBlockAllocator(); +#endif + T * acquire(); + void release(T * const); +}; + +#ifdef UAE +/* uae_vm_release may do logging, which isn't safe to do when the application + * is shutting down. Better to release memory manually with a function call + * to a release_all method on shutdown, or even simpler, just let the OS + * handle it (we're shutting down anyway). */ +#else +template< class T > +LazyBlockAllocator::~LazyBlockAllocator() +{ + Pool * currentPool = mPools; + while (currentPool) { + Pool * deadPool = currentPool; + currentPool = currentPool->next; + vm_release(deadPool, sizeof(Pool)); + } +} +#endif + +template< class T > +T * LazyBlockAllocator::acquire() +{ + if (!mChunks) { + // There is no chunk left, allocate a new pool and link the + // chunks into the free list + Pool * newPool = (Pool *)vm_acquire(sizeof(Pool), VM_MAP_DEFAULT | VM_MAP_32BIT); + if (newPool == VM_MAP_FAILED) { + jit_abort("Could not allocate block pool!"); + } + for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) { + chunk->next = mChunks; + mChunks = chunk; + } + newPool->next = mPools; + mPools = newPool; + } + T * chunk = mChunks; + mChunks = chunk->next; + return chunk; +} + +template< class T > +void LazyBlockAllocator::release(T * const chunk) +{ + chunk->next = mChunks; + mChunks = chunk; +} + +template< class T > +class HardBlockAllocator +{ +public: + T * acquire() { + T * data = (T *)current_compile_p; + current_compile_p += sizeof(T); + return data; + } + + void release(T * const ) { + // Deallocated on invalidation + } +}; + +#if USE_SEPARATE_BIA +static LazyBlockAllocator BlockInfoAllocator; +static LazyBlockAllocator ChecksumInfoAllocator; +#else +static HardBlockAllocator BlockInfoAllocator; +static HardBlockAllocator ChecksumInfoAllocator; +#endif + +static inline checksum_info *alloc_checksum_info(void) +{ + checksum_info *csi = ChecksumInfoAllocator.acquire(); + csi->next = NULL; + return csi; +} + +static inline void free_checksum_info(checksum_info *csi) +{ + csi->next = NULL; + ChecksumInfoAllocator.release(csi); +} + +static inline void free_checksum_info_chain(checksum_info *csi) +{ + while (csi != NULL) { + checksum_info *csi2 = csi->next; + free_checksum_info(csi); + csi = csi2; + } +} + +static inline blockinfo *alloc_blockinfo(void) +{ + blockinfo *bi = BlockInfoAllocator.acquire(); +#if USE_CHECKSUM_INFO + bi->csi = NULL; +#endif + return bi; +} + +static inline void free_blockinfo(blockinfo *bi) +{ +#if USE_CHECKSUM_INFO + free_checksum_info_chain(bi->csi); + bi->csi = NULL; +#endif + BlockInfoAllocator.release(bi); +} + +static inline void alloc_blockinfos(void) +{ + int i; + blockinfo* bi; + + for (i=0;i>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000); +#endif +} + +void set_target(uae_u8* t) +{ + target=t; +} + +static inline uae_u8* get_target_noopt(void) +{ + return target; +} + +inline uae_u8* get_target(void) +{ + return get_target_noopt(); +} + +/******************************************************************** + * New version of data buffer: interleave data and code * + ********************************************************************/ +#if defined(USE_DATA_BUFFER) + +#define DATA_BUFFER_SIZE 1024 // Enlarge POPALLSPACE_SIZE if this value is greater than 768 +#define DATA_BUFFER_MAXOFFSET 4096 - 32 // max range between emit of data and use of data +static uae_u8* data_writepos = 0; +static uae_u8* data_endpos = 0; +#if DEBUG +static long data_wasted = 0; +#endif + +static inline void compemu_raw_branch(IMM d); + +static inline void data_check_end(long n, long codesize) +{ + if(data_writepos + n > data_endpos || get_target_noopt() + codesize - data_writepos > DATA_BUFFER_MAXOFFSET) + { + // Start new buffer +#if DEBUG + if(data_writepos < data_endpos) + data_wasted += data_endpos - data_writepos; +#endif + compemu_raw_branch(DATA_BUFFER_SIZE); + data_writepos = get_target_noopt(); + data_endpos = data_writepos + DATA_BUFFER_SIZE; + set_target(get_target_noopt() + DATA_BUFFER_SIZE); + } +} + +static inline long data_word_offs(uae_u16 x) +{ + data_check_end(4, 4); +#ifdef WORDS_BIGENDIAN + *((uae_u16*)data_writepos)=x; + data_writepos += 2; + *((uae_u16*)data_writepos)=0; + data_writepos += 2; +#else + *((uae_u32*)data_writepos)=x; + data_writepos += 4; +#endif + return (long)data_writepos - (long)get_target_noopt() - 12; +} + +static inline long data_long(uae_u32 x, long codesize) +{ + data_check_end(4, codesize); + *((uae_u32*)data_writepos)=x; + data_writepos += 4; + return (long)data_writepos - 4; +} + +static inline long data_long_offs(uae_u32 x) +{ + data_check_end(4, 4); + *((uae_u32*)data_writepos)=x; + data_writepos += 4; + return (long)data_writepos - (long)get_target_noopt() - 12; +} + +static inline long get_data_offset(long t) +{ + return t - (long)get_target_noopt() - 8; +} + +static inline void reset_data_buffer(void) +{ + data_writepos = 0; + data_endpos = 0; +} + +#endif +/******************************************************************** + * Getting the information about the target CPU * + ********************************************************************/ + +#if defined(CPU_arm) +#include "codegen_arm.cpp" +#endif +#if defined(CPU_i386) || defined(CPU_x86_64) +#include "codegen_x86.cpp" +#endif + + +/******************************************************************** + * Flags status handling. EMIT TIME! * + ********************************************************************/ + +static void bt_l_ri_noclobber(RR4 r, IMM i); + +static void make_flags_live_internal(void) +{ + if (live.flags_in_flags==VALID) + return; + Dif (live.flags_on_stack==TRASH) { + jit_abort("Want flags, got something on stack, but it is TRASH"); + } + if (live.flags_on_stack==VALID) { + int tmp; + tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2); + raw_reg_to_flags(tmp); + unlock2(tmp); + + live.flags_in_flags=VALID; + return; + } + jit_abort("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live", + live.flags_in_flags,live.flags_on_stack); +} + +static void flags_to_stack(void) +{ + if (live.flags_on_stack==VALID) + return; + if (!live.flags_are_important) { + live.flags_on_stack=VALID; + return; + } + Dif (live.flags_in_flags!=VALID) + jit_abort("flags_to_stack != VALID"); + else { + int tmp; + tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1); + raw_flags_to_reg(tmp); + unlock2(tmp); + } + live.flags_on_stack=VALID; +} + +static inline void clobber_flags(void) +{ + if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID) + flags_to_stack(); + live.flags_in_flags=TRASH; +} + +/* Prepare for leaving the compiled stuff */ +static inline void flush_flags(void) +{ + flags_to_stack(); + return; +} + +int touchcnt; + +/******************************************************************** + * Partial register flushing for optimized calls * + ********************************************************************/ + +struct regusage { + uae_u16 rmask; + uae_u16 wmask; +}; + +#if 0 +static inline void ru_set(uae_u16 *mask, int reg) +{ +#if USE_OPTIMIZED_CALLS + *mask |= 1 << reg; +#else + UNUSED(mask); + UNUSED(reg); +#endif +} + +static inline bool ru_get(const uae_u16 *mask, int reg) +{ +#if USE_OPTIMIZED_CALLS + return (*mask & (1 << reg)); +#else + UNUSED(mask); + UNUSED(reg); + /* Default: instruction reads & write to register */ + return true; +#endif +} + +static inline void ru_set_read(regusage *ru, int reg) +{ + ru_set(&ru->rmask, reg); +} + +static inline void ru_set_write(regusage *ru, int reg) +{ + ru_set(&ru->wmask, reg); +} + +static inline bool ru_read_p(const regusage *ru, int reg) +{ + return ru_get(&ru->rmask, reg); +} + +static inline bool ru_write_p(const regusage *ru, int reg) +{ + return ru_get(&ru->wmask, reg); +} + +static void ru_fill_ea(regusage *ru, int reg, amodes mode, + wordsizes size, int write_mode) +{ + switch (mode) { + case Areg: + reg += 8; + /* fall through */ + case Dreg: + ru_set(write_mode ? &ru->wmask : &ru->rmask, reg); + break; + case Ad16: + /* skip displacment */ + m68k_pc_offset += 2; + case Aind: + case Aipi: + case Apdi: + ru_set_read(ru, reg+8); + break; + case Ad8r: + ru_set_read(ru, reg+8); + /* fall through */ + case PC8r: { + uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2); + reg = (dp >> 12) & 15; + ru_set_read(ru, reg); + if (dp & 0x100) + m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2); + break; + } + case PC16: + case absw: + case imm0: + case imm1: + m68k_pc_offset += 2; + break; + case absl: + case imm2: + m68k_pc_offset += 4; + break; + case immi: + m68k_pc_offset += (size == sz_long) ? 4 : 2; + break; + } +} + +/* TODO: split into a static initialization part and a dynamic one + (instructions depending on extension words) */ + +static void ru_fill(regusage *ru, uae_u32 opcode) +{ + m68k_pc_offset += 2; + + /* Default: no register is used or written to */ + ru->rmask = 0; + ru->wmask = 0; + + uae_u32 real_opcode = cft_map(opcode); + struct instr *dp = &table68k[real_opcode]; + + bool rw_dest = true; + bool handled = false; + + /* Handle some instructions specifically */ + uae_u16 ext; + switch (dp->mnemo) { + case i_BFCHG: + case i_BFCLR: + case i_BFEXTS: + case i_BFEXTU: + case i_BFFFO: + case i_BFINS: + case i_BFSET: + case i_BFTST: + ext = comp_get_iword((m68k_pc_offset+=2)-2); + if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7); + if (ext & 0x020) ru_set_read(ru, ext & 7); + ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1); + if (dp->dmode == Dreg) + ru_set_read(ru, dp->dreg); + switch (dp->mnemo) { + case i_BFEXTS: + case i_BFEXTU: + case i_BFFFO: + ru_set_write(ru, (ext >> 12) & 7); + break; + case i_BFINS: + ru_set_read(ru, (ext >> 12) & 7); + /* fall through */ + case i_BFCHG: + case i_BFCLR: + case i_BSET: + if (dp->dmode == Dreg) + ru_set_write(ru, dp->dreg); + break; + } + handled = true; + rw_dest = false; + break; + + case i_BTST: + rw_dest = false; + break; + + case i_CAS: + { + ext = comp_get_iword((m68k_pc_offset+=2)-2); + int Du = ext & 7; + ru_set_read(ru, Du); + int Dc = (ext >> 6) & 7; + ru_set_read(ru, Dc); + ru_set_write(ru, Dc); + break; + } + case i_CAS2: + { + int Dc1, Dc2, Du1, Du2, Rn1, Rn2; + ext = comp_get_iword((m68k_pc_offset+=2)-2); + Rn1 = (ext >> 12) & 15; + Du1 = (ext >> 6) & 7; + Dc1 = ext & 7; + ru_set_read(ru, Rn1); + ru_set_read(ru, Du1); + ru_set_read(ru, Dc1); + ru_set_write(ru, Dc1); + ext = comp_get_iword((m68k_pc_offset+=2)-2); + Rn2 = (ext >> 12) & 15; + Du2 = (ext >> 6) & 7; + Dc2 = ext & 7; + ru_set_read(ru, Rn2); + ru_set_read(ru, Du2); + ru_set_write(ru, Dc2); + break; + } + case i_DIVL: case i_MULL: + m68k_pc_offset += 2; + break; + case i_LEA: + case i_MOVE: case i_MOVEA: case i_MOVE16: + rw_dest = false; + break; + case i_PACK: case i_UNPK: + rw_dest = false; + m68k_pc_offset += 2; + break; + case i_TRAPcc: + m68k_pc_offset += (dp->size == sz_long) ? 4 : 2; + break; + case i_RTR: + /* do nothing, just for coverage debugging */ + break; + /* TODO: handle EXG instruction */ + } + + /* Handle A-Traps better */ + if ((real_opcode & 0xf000) == 0xa000) { + handled = true; + } + + /* Handle EmulOps better */ + if ((real_opcode & 0xff00) == 0x7100) { + handled = true; + ru->rmask = 0xffff; + ru->wmask = 0; + } + + if (dp->suse && !handled) + ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0); + + if (dp->duse && !handled) + ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1); + + if (rw_dest) + ru->rmask |= ru->wmask; + + handled = handled || dp->suse || dp->duse; + + /* Mark all registers as used/written if the instruction may trap */ + if (may_trap(opcode)) { + handled = true; + ru->rmask = 0xffff; + ru->wmask = 0xffff; + } + + if (!handled) { + jit_abort("ru_fill: %04x = { %04x, %04x }", + real_opcode, ru->rmask, ru->wmask); + } +} +#endif + +/******************************************************************** + * register allocation per block logging * + ********************************************************************/ + +static uae_s8 vstate[VREGS]; +static uae_s8 vwritten[VREGS]; +static uae_s8 nstate[N_REGS]; + +#define L_UNKNOWN -127 +#define L_UNAVAIL -1 +#define L_NEEDED -2 +#define L_UNNEEDED -3 + +#if USE_MATCH +static inline void big_to_small_state(bigstate * /* b */, smallstate * s) +{ + int i; + + for (i = 0; i < VREGS; i++) + s->virt[i] = vstate[i]; + for (i = 0; i < N_REGS; i++) + s->nat[i] = nstate[i]; +} + +static inline int callers_need_recompile(bigstate * /* b */, smallstate * s) +{ + int i; + int reverse = 0; + + for (i = 0; i < VREGS; i++) { + if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED) + return 1; + if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED) + reverse++; + } + for (i = 0; i < N_REGS; i++) { + if (nstate[i] >= 0 && nstate[i] != s->nat[i]) + return 1; + if (nstate[i] < 0 && s->nat[i] >= 0) + reverse++; + } + if (reverse >= 2 && USE_MATCH) + return 1; /* In this case, it might be worth recompiling the + * callers */ + return 0; +} +#endif + +static inline void log_startblock(void) +{ + int i; + + for (i = 0; i < VREGS; i++) { + vstate[i] = L_UNKNOWN; + vwritten[i] = 0; + } + for (i = 0; i < N_REGS; i++) + nstate[i] = L_UNKNOWN; +} + +/* Using an n-reg for a temp variable */ +static inline void log_isused(int n) +{ + if (nstate[n] == L_UNKNOWN) + nstate[n] = L_UNAVAIL; +} + +static inline void log_visused(int r) +{ + if (vstate[r] == L_UNKNOWN) + vstate[r] = L_NEEDED; +} + +static inline void do_load_reg(int n, int r) +{ + if (r == FLAGTMP) + raw_load_flagreg(n, r); + else if (r == FLAGX) + raw_load_flagx(n, r); + else + compemu_raw_mov_l_rm(n, (uintptr) live.state[r].mem); +} + +#if 0 +static inline void check_load_reg(int n, int r) +{ + compemu_raw_mov_l_rm(n, (uintptr) live.state[r].mem); +} +#endif + +static inline void log_vwrite(int r) +{ + vwritten[r] = 1; +} + +/* Using an n-reg to hold a v-reg */ +static inline void log_isreg(int n, int r) +{ + if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH) + nstate[n] = r; + else { + do_load_reg(n, r); + if (nstate[n] == L_UNKNOWN) + nstate[n] = L_UNAVAIL; + } + if (vstate[r] == L_UNKNOWN) + vstate[r] = L_NEEDED; +} + +static inline void log_clobberreg(int r) +{ + if (vstate[r] == L_UNKNOWN) + vstate[r] = L_UNNEEDED; +} + +/* This ends all possibility of clever register allocation */ + +static inline void log_flush(void) +{ + int i; + + for (i = 0; i < VREGS; i++) + if (vstate[i] == L_UNKNOWN) + vstate[i] = L_NEEDED; + for (i = 0; i < N_REGS; i++) + if (nstate[i] == L_UNKNOWN) + nstate[i] = L_UNAVAIL; +} + +static inline void log_dump(void) +{ + int i; + + return; + + jit_log("----------------------"); + for (i = 0; i < N_REGS; i++) { + switch (nstate[i]) { + case L_UNKNOWN: + jit_log("Nat %d : UNKNOWN", i); + break; + case L_UNAVAIL: + jit_log("Nat %d : UNAVAIL", i); + break; + default: + jit_log("Nat %d : %d", i, nstate[i]); + break; + } + } + for (i = 0; i < VREGS; i++) { + if (vstate[i] == L_UNNEEDED) { + jit_log("Virt %d: UNNEEDED", i); + } + } +} + +/******************************************************************** + * register status handling. EMIT TIME! * + ********************************************************************/ + +static inline void set_status(int r, int status) +{ + if (status == ISCONST) + log_clobberreg(r); + live.state[r].status=status; +} + +static inline int isinreg(int r) +{ + return live.state[r].status==CLEAN || live.state[r].status==DIRTY; +} + +static inline void adjust_nreg(int r, uae_u32 val) +{ + if (!val) + return; + compemu_raw_lea_l_brr(r,r,val); +} + +static void tomem(int r) +{ + int rr=live.state[r].realreg; + + if (isinreg(r)) { + if (live.state[r].val && live.nat[rr].nholds==1 + && !live.nat[rr].locked) { + jit_log2("RemovingA offset %x from reg %d (%d) at %p", live.state[r].val,r,rr,target); + adjust_nreg(rr,live.state[r].val); + live.state[r].val=0; + live.state[r].dirtysize=4; + set_status(r,DIRTY); + } + } + + if (live.state[r].status==DIRTY) { + switch (live.state[r].dirtysize) { + case 1: compemu_raw_mov_b_mr((uintptr)live.state[r].mem,rr); break; + case 2: compemu_raw_mov_w_mr((uintptr)live.state[r].mem,rr); break; + case 4: compemu_raw_mov_l_mr((uintptr)live.state[r].mem,rr); break; + default: abort(); + } + log_vwrite(r); + set_status(r,CLEAN); + live.state[r].dirtysize=0; + } +} + +static inline int isconst(int r) +{ + return live.state[r].status==ISCONST; +} + +int is_const(int r) +{ + return isconst(r); +} + +static inline void writeback_const(int r) +{ + if (!isconst(r)) + return; + Dif (live.state[r].needflush==NF_HANDLER) { + jit_abort("Trying to write back constant NF_HANDLER!"); + } + + compemu_raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val); + log_vwrite(r); + live.state[r].val=0; + set_status(r,INMEM); +} + +static inline void tomem_c(int r) +{ + if (isconst(r)) { + writeback_const(r); + } + else + tomem(r); +} + +static void evict(int r) +{ + int rr; + + if (!isinreg(r)) + return; + tomem(r); + rr=live.state[r].realreg; + + Dif (live.nat[rr].locked && + live.nat[rr].nholds==1) { + jit_abort("register %d in nreg %d is locked!",r,live.state[r].realreg); + } + + live.nat[rr].nholds--; + if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */ + int topreg=live.nat[rr].holds[live.nat[rr].nholds]; + int thisind=live.state[r].realind; + + live.nat[rr].holds[thisind]=topreg; + live.state[topreg].realind=thisind; + } + live.state[r].realreg=-1; + set_status(r,INMEM); +} + +static inline void free_nreg(int r) +{ + int i=live.nat[r].nholds; + + while (i) { + int vr; + + --i; + vr=live.nat[r].holds[i]; + evict(vr); + } + Dif (live.nat[r].nholds!=0) { + jit_abort("Failed to free nreg %d, nholds is %d",r,live.nat[r].nholds); + } +} + +/* Use with care! */ +static inline void isclean(int r) +{ + if (!isinreg(r)) + return; + live.state[r].validsize=4; + live.state[r].dirtysize=0; + live.state[r].val=0; + set_status(r,CLEAN); +} + +static inline void disassociate(int r) +{ + isclean(r); + evict(r); +} + +static inline void set_const(int r, uae_u32 val) +{ + disassociate(r); + live.state[r].val=val; + set_status(r,ISCONST); +} + +static inline uae_u32 get_offset(int r) +{ + return live.state[r].val; +} + +static int alloc_reg_hinted(int r, int size, int willclobber, int hint) +{ + int bestreg; + uae_s32 when; + int i; + uae_s32 badness=0; /* to shut up gcc */ + bestreg=-1; + when=2000000000; + + /* XXX use a regalloc_order table? */ + for (i=0;i0) { + free_nreg(bestreg); + } + if (isinreg(r)) { + int rr=live.state[r].realreg; + /* This will happen if we read a partially dirty register at a + bigger size */ + Dif (willclobber || live.state[r].validsize>=size) + jit_abort("willclobber || live.state[r].validsize>=size"); + Dif (live.nat[rr].nholds!=1) + jit_abort("live.nat[rr].nholds!=1"); + if (size==4 && live.state[r].validsize==2) { + log_isused(bestreg); + log_visused(r); + compemu_raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem); + compemu_raw_bswap_32(bestreg); + compemu_raw_zero_extend_16_rr(rr,rr); + compemu_raw_zero_extend_16_rr(bestreg,bestreg); + compemu_raw_bswap_32(bestreg); + compemu_raw_lea_l_rr_indexed(rr, rr, bestreg, 1); + live.state[r].validsize=4; + live.nat[rr].touched=touchcnt++; + return rr; + } + if (live.state[r].validsize==1) { + /* Nothing yet */ + } + evict(r); + } + + if (!willclobber) { + if (live.state[r].status!=UNDEF) { + if (isconst(r)) { + compemu_raw_mov_l_ri(bestreg,live.state[r].val); + live.state[r].val=0; + live.state[r].dirtysize=4; + set_status(r,DIRTY); + log_isused(bestreg); + } + else { + log_isreg(bestreg, r); /* This will also load it! */ + live.state[r].dirtysize=0; + set_status(r,CLEAN); + } + } + else { + live.state[r].val=0; + live.state[r].dirtysize=0; + set_status(r,CLEAN); + log_isused(bestreg); + } + live.state[r].validsize=4; + } + else { /* this is the easiest way, but not optimal. FIXME! */ + /* Now it's trickier, but hopefully still OK */ + if (!isconst(r) || size==4) { + live.state[r].validsize=size; + live.state[r].dirtysize=size; + live.state[r].val=0; + set_status(r,DIRTY); + if (size == 4) { + log_clobberreg(r); + log_isused(bestreg); + } + else { + log_visused(r); + log_isused(bestreg); + } + } + else { + if (live.state[r].status!=UNDEF) + compemu_raw_mov_l_ri(bestreg,live.state[r].val); + live.state[r].val=0; + live.state[r].validsize=4; + live.state[r].dirtysize=4; + set_status(r,DIRTY); + log_isused(bestreg); + } + } + live.state[r].realreg=bestreg; + live.state[r].realind=live.nat[bestreg].nholds; + live.nat[bestreg].touched=touchcnt++; + live.nat[bestreg].holds[live.nat[bestreg].nholds]=r; + live.nat[bestreg].nholds++; + + return bestreg; +} + +/* +static int alloc_reg(int r, int size, int willclobber) +{ + return alloc_reg_hinted(r,size,willclobber,-1); +} +*/ + +static void unlock2(int r) +{ + Dif (!live.nat[r].locked) + jit_abort("unlock2 %d not locked", r); + live.nat[r].locked--; +} + +static void setlock(int r) +{ + live.nat[r].locked++; +} + + +static void mov_nregs(int d, int s) +{ + int nd=live.nat[d].nholds; + int i; + + if (s==d) + return; + + if (nd>0) + free_nreg(d); + + log_isused(d); + compemu_raw_mov_l_rr(d,s); + + for (i=0;i=size) { + n=live.state[r].realreg; + switch(size) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + answer=n; + } + break; + case 4: + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec); + } + + if (spec>=0 && spec!=answer) { + /* Too bad */ + mov_nregs(spec,answer); + answer=spec; + } + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + return answer; +} + + + +static int readreg(int r, int size) +{ + return readreg_general(r,size,-1,0); +} + +static int readreg_specific(int r, int size, int spec) +{ + return readreg_general(r,size,spec,0); +} + +static int readreg_offset(int r, int size) +{ + return readreg_general(r,size,-1,1); +} + +/* writereg_general(r, size, spec) + * + * INPUT + * - r : mid-layer register + * - size : requested size (1/2/4) + * - spec : -1 if find or make a register free, otherwise specifies + * the physical register to use in any case + * + * OUTPUT + * - hard (physical, x86 here) register allocated to virtual register r + */ +static inline int writereg_general(int r, int size, int spec) +{ + int n; + int answer=-1; + + record_register(r); + if (size<4) { + remove_offset(r,spec); + } + + make_exclusive(r,size,spec); + if (isinreg(r)) { + int nvsize=size>live.state[r].validsize?size:live.state[r].validsize; + int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; + n=live.state[r].realreg; + + Dif (live.nat[n].nholds!=1) + jit_abort("live.nat[%d].nholds!=1", n); + switch(size) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + } + break; + case 4: + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,size,1,spec); + } + if (spec>=0 && spec!=answer) { + mov_nregs(spec,answer); + answer=spec; + } + if (live.state[r].status==UNDEF) + live.state[r].validsize=4; + live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; + live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize; + + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + if (size==4) { + live.state[r].val=0; + } + else { + Dif (live.state[r].val) { + jit_abort("Problem with val"); + } + } + set_status(r,DIRTY); + return answer; +} + +static int writereg(int r, int size) +{ + return writereg_general(r,size,-1); +} + +static int writereg_specific(int r, int size, int spec) +{ + return writereg_general(r,size,spec); +} + +static inline int rmw_general(int r, int wsize, int rsize, int spec) +{ + int n; + int answer=-1; + + record_register(r); + if (live.state[r].status==UNDEF) { + jit_log("WARNING: Unexpected read of undefined register %d",r); + } + remove_offset(r,spec); + make_exclusive(r,0,spec); + + Dif (wsize=rsize) { + n=live.state[r].realreg; + Dif (live.nat[n].nholds!=1) + jit_abort("live.nat[%d].nholds!=1", n); + + switch(rsize) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + answer=n; + } + break; + case 4: + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec); + } + + if (spec>=0 && spec!=answer) { + /* Too bad */ + mov_nregs(spec,answer); + answer=spec; + } + if (wsize>live.state[r].dirtysize) + live.state[r].dirtysize=wsize; + if (wsize>live.state[r].validsize) + live.state[r].validsize=wsize; + set_status(r,DIRTY); + + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + + Dif (live.state[r].val) { + jit_abort("Problem with val(rmw)"); + } + return answer; +} + +static int rmw(int r, int wsize, int rsize) +{ + return rmw_general(r,wsize,rsize,-1); +} + +static int rmw_specific(int r, int wsize, int rsize, int spec) +{ + return rmw_general(r,wsize,rsize,spec); +} + + +/* needed for restoring the carry flag on non-P6 cores */ +static void bt_l_ri_noclobber(RR4 r, IMM i) +{ + int size=4; + if (i<16) + size=2; + r=readreg(r,size); + compemu_raw_bt_l_ri(r,i); + unlock2(r); +} + +/******************************************************************** + * FPU register status handling. EMIT TIME! * + ********************************************************************/ + +static void f_tomem(int r) +{ + if (live.fate[r].status==DIRTY) { +#if defined(USE_LONG_DOUBLE) + raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); +#else + raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); +#endif + live.fate[r].status=CLEAN; + } +} + +static void f_tomem_drop(int r) +{ + if (live.fate[r].status==DIRTY) { +#if defined(USE_LONG_DOUBLE) + raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); +#else + raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); +#endif + live.fate[r].status=INMEM; + } +} + + +static inline int f_isinreg(int r) +{ + return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY; +} + +static void f_evict(int r) +{ + int rr; + + if (!f_isinreg(r)) + return; + rr=live.fate[r].realreg; + if (live.fat[rr].nholds==1) + f_tomem_drop(r); + else + f_tomem(r); + + Dif (live.fat[rr].locked && + live.fat[rr].nholds==1) { + jit_abort("FPU register %d in nreg %d is locked!",r,live.fate[r].realreg); + } + + live.fat[rr].nholds--; + if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */ + int topreg=live.fat[rr].holds[live.fat[rr].nholds]; + int thisind=live.fate[r].realind; + live.fat[rr].holds[thisind]=topreg; + live.fate[topreg].realind=thisind; + } + live.fate[r].status=INMEM; + live.fate[r].realreg=-1; +} + +static inline void f_free_nreg(int r) +{ + int i=live.fat[r].nholds; + + while (i) { + int vr; + + --i; + vr=live.fat[r].holds[i]; + f_evict(vr); + } + Dif (live.fat[r].nholds!=0) { + jit_abort("Failed to free nreg %d, nholds is %d",r,live.fat[r].nholds); + } +} + + +/* Use with care! */ +static inline void f_isclean(int r) +{ + if (!f_isinreg(r)) + return; + live.fate[r].status=CLEAN; +} + +static inline void f_disassociate(int r) +{ + f_isclean(r); + f_evict(r); +} + + + +static int f_alloc_reg(int r, int willclobber) +{ + int bestreg; + uae_s32 when; + int i; + uae_s32 badness; + bestreg=-1; + when=2000000000; + for (i=N_FREGS;i--;) { + badness=live.fat[i].touched; + if (live.fat[i].nholds==0) + badness=0; + + if (!live.fat[i].locked && badness0) { + f_free_nreg(bestreg); + } + if (f_isinreg(r)) { + f_evict(r); + } + + if (!willclobber) { + if (live.fate[r].status!=UNDEF) { +#if defined(USE_LONG_DOUBLE) + raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem); +#else + raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem); +#endif + } + live.fate[r].status=CLEAN; + } + else { + live.fate[r].status=DIRTY; + } + live.fate[r].realreg=bestreg; + live.fate[r].realind=live.fat[bestreg].nholds; + live.fat[bestreg].touched=touchcnt++; + live.fat[bestreg].holds[live.fat[bestreg].nholds]=r; + live.fat[bestreg].nholds++; + + return bestreg; +} + +static void f_unlock(int r) +{ + Dif (!live.fat[r].locked) + jit_abort ("unlock %d", r); + live.fat[r].locked--; +} + +static void f_setlock(int r) +{ + live.fat[r].locked++; +} + +static inline int f_readreg(int r) +{ + int n; + int answer=-1; + + if (f_isinreg(r)) { + n=live.fate[r].realreg; + answer=n; + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) + answer=f_alloc_reg(r,0); + + live.fat[answer].locked++; + live.fat[answer].touched=touchcnt++; + return answer; +} + +static inline void f_make_exclusive(int r, int clobber) +{ + freg_status oldstate; + int rr=live.fate[r].realreg; + int nr; + int nind; + int ndirt=0; + int i; + + if (!f_isinreg(r)) + return; + if (live.fat[rr].nholds==1) + return; + for (i=0;i : enable runtime disassemblers : %s", JITDebug ? "yes" : "no"); + +#ifdef USE_JIT_FPU + // Use JIT compiler for FPU instructions ? + avoid_fpu = !bx_options.jit.jitfpu; +#else + // JIT FPU is always disabled + avoid_fpu = true; +#endif + jit_log(" : compile FPU instructions : %s", !avoid_fpu ? "yes" : "no"); + + // Get size of the translation cache (in KB) + cache_size = bx_options.jit.jitcachesize; + jit_log(" : requested translation cache size : %d KB", cache_size); + + // Initialize target CPU (check for features, e.g. CMOV, rat stalls) + raw_init_cpu(); + setzflg_uses_bsf = target_check_bsf(); + jit_log(" : target processor has CMOV instructions : %s", have_cmov ? "yes" : "no"); + jit_log(" : target processor can suffer from partial register stalls : %s", have_rat_stall ? "yes" : "no"); + jit_log(" : alignment for loops, jumps are %d, %d", align_loops, align_jumps); +#if defined(CPU_i386) || defined(CPU_x86_64) + jit_log(" : target processor has SSE2 instructions : %s", cpuinfo.x86_has_xmm2 ? "yes" : "no"); + jit_log(" : cache linesize is %lu", (unsigned long)cpuinfo.x86_clflush_size); +#endif + + // Translation cache flush mechanism + lazy_flush = (bx_options.jit.jitlazyflush == 0) ? false : true; + jit_log(" : lazy translation cache invalidation : %s", str_on_off(lazy_flush)); + flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard; + + // Compiler features + jit_log(" : register aliasing : %s", str_on_off(1)); + jit_log(" : FP register aliasing : %s", str_on_off(USE_F_ALIAS)); + jit_log(" : lazy constant offsetting : %s", str_on_off(USE_OFFSET)); +#if USE_INLINING + follow_const_jumps = bx_options.jit.jitinline; +#endif + jit_log(" : block inlining : %s", str_on_off(follow_const_jumps)); + jit_log(" : separate blockinfo allocation : %s", str_on_off(USE_SEPARATE_BIA)); + + // Build compiler tables + build_comp(); +#endif + + initialized = true; + +#ifdef PROFILE_UNTRANSLATED_INSNS + jit_log(" : gather statistics on untranslated insns count"); +#endif + +#ifdef PROFILE_COMPILE_TIME + jit_log(" : gather statistics on translation time"); + emul_start_time = clock(); +#endif +} + +#ifdef UAE +static +#endif +void compiler_exit(void) +{ +#ifdef PROFILE_COMPILE_TIME + emul_end_time = clock(); +#endif + +#ifdef UAE +#else +#if DEBUG +#if defined(USE_DATA_BUFFER) + jit_log("data_wasted = %d bytes", data_wasted); +#endif +#endif + + // Deallocate translation cache + if (compiled_code) { + vm_release(compiled_code, cache_size * 1024); + compiled_code = 0; + } + + // Deallocate popallspace + if (popallspace) { + vm_release(popallspace, POPALLSPACE_SIZE); + popallspace = 0; + } +#endif + +#ifdef PROFILE_COMPILE_TIME + jit_log("### Compile Block statistics"); + jit_log("Number of calls to compile_block : %d", compile_count); + uae_u32 emul_time = emul_end_time - emul_start_time; + jit_log("Total emulation time : %.1f sec", double(emul_time)/double(CLOCKS_PER_SEC)); + jit_log("Total compilation time : %.1f sec (%.1f%%)", double(compile_time)/double(CLOCKS_PER_SEC), 100.0*double(compile_time)/double(emul_time)); +#endif + +#ifdef PROFILE_UNTRANSLATED_INSNS + uae_u64 untranslated_count = 0; + for (int i = 0; i < 65536; i++) { + opcode_nums[i] = i; + untranslated_count += raw_cputbl_count[i]; + } + jit_log("Sorting out untranslated instructions count..."); + qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn); + jit_log("Rank Opc Count Name"); + for (int i = 0; i < untranslated_top_ten; i++) { + uae_u32 count = raw_cputbl_count[opcode_nums[i]]; + struct instr *dp; + struct mnemolookup *lookup; + if (!count) + break; + dp = table68k + opcode_nums[i]; + for (lookup = lookuptab; lookup->mnemo != (instrmnem)dp->mnemo; lookup++) + ; + jit_log("%03d: %04x %10u %s", i, opcode_nums[i], count, lookup->name); + } +#endif + +#ifdef RECORD_REGISTER_USAGE + int reg_count_ids[16]; + uint64 tot_reg_count = 0; + for (int i = 0; i < 16; i++) { + reg_count_ids[i] = i; + tot_reg_count += reg_count[i]; + } + qsort(reg_count_ids, 16, sizeof(int), reg_count_compare); + uint64 cum_reg_count = 0; + for (int i = 0; i < 16; i++) { + int r = reg_count_ids[i]; + cum_reg_count += reg_count[r]; + jit_log("%c%d : %16ld %2.1f%% [%2.1f]", r < 8 ? 'D' : 'A', r % 8, + reg_count[r], + 100.0*double(reg_count[r])/double(tot_reg_count), + 100.0*double(cum_reg_count)/double(tot_reg_count)); + } +#endif +} + +#ifdef UAE +#else +bool compiler_use_jit(void) +{ + // Check for the "jit" prefs item + if (!bx_options.jit.jit) + return false; + + // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB + if (bx_options.jit.jitcachesize < MIN_CACHE_SIZE) { + panicbug(" : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE); + return false; + } + + return true; +} +#endif + +void init_comp(void) +{ + int i; + uae_s8* cb=can_byte; + uae_s8* cw=can_word; + uae_s8* au=always_used; + +#ifdef RECORD_REGISTER_USAGE + for (i=0;i<16;i++) + reg_count_local[i] = 0; +#endif + + for (i=0;i= uae_p32(kickmem_bank.baseaddr) && + addr < uae_p32(kickmem_bank.baseaddr + 8 * 65536)); +#else + return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize)); +#endif +} + +static void flush_all(void) +{ + int i; + + log_flush(); + for (i=0;i0) + free_nreg(i); + + for (i=0;i0) + f_free_nreg(i); + + live.flags_in_flags=TRASH; /* Note: We assume we already rescued the + flags at the very start of the call_r + functions! */ +} + +/******************************************************************** + * Memory access and related functions, CREATE time * + ********************************************************************/ + +void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond) +{ + next_pc_p=not_taken; + taken_pc_p=taken; + branch_cc=cond; +} + +/* Note: get_handler may fail in 64 Bit environments, if direct_handler_to_use is + * outside 32 bit + */ +static uintptr get_handler(uintptr addr) +{ + blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0); + return (uintptr)bi->direct_handler_to_use; +} + +/* This version assumes that it is writing *real* memory, and *will* fail + * if that assumption is wrong! No branches, no second chances, just + * straight go-for-it attitude */ + +static void writemem_real(int address, int source, int size, int tmp, int clobber) +{ + int f=tmp; + +#ifdef NATMEM_OFFSET + if (canbang) { /* Woohoo! go directly at the memory! */ + if (clobber) + f=source; + + switch(size) { + case 1: mov_b_bRr(address,source,MEMBaseDiff); break; + case 2: mov_w_rr(f,source); mid_bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break; + case 4: mov_l_rr(f,source); mid_bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break; + } + forget_about(tmp); + forget_about(f); + return; + } +#endif + +#ifdef UAE + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the baseaddr table */ + mov_l_rm_indexed(f,uae_p32(baseaddr),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + + if (address==source) { /* IBrowse does this! */ + if (size > 1) { + add_l(f,address); /* f now holds the final address */ + switch (size) { + case 2: mid_bswap_16(source); mov_w_Rr(f,source,0); + mid_bswap_16(source); return; + case 4: mid_bswap_32(source); mov_l_Rr(f,source,0); + mid_bswap_32(source); return; + } + } + } + switch (size) { /* f now holds the offset */ + case 1: mov_b_mrr_indexed(address,f,1,source); break; + case 2: mid_bswap_16(source); mov_w_mrr_indexed(address,f,1,source); + mid_bswap_16(source); break; /* base, index, source */ + case 4: mid_bswap_32(source); mov_l_mrr_indexed(address,f,1,source); + mid_bswap_32(source); break; + } +#endif +} + +#ifdef UAE +static inline void writemem(int address, int source, int offset, int size, int tmp) +{ + int f=tmp; + + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the mem bank table */ + mov_l_rm_indexed(f,uae_p32(mem_banks),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + /* Now f holds a pointer to the actual membank */ + mov_l_rR(f,f,offset); + /* Now f holds the address of the b/w/lput function */ + call_r_02(f,address,source,4,size); + forget_about(tmp); +} +#endif + +void writebyte(int address, int source, int tmp) +{ +#ifdef UAE + if ((special_mem & S_WRITE) || distrust_byte()) + writemem_special(address, source, 5 * SIZEOF_VOID_P, 1, tmp); + else +#endif + writemem_real(address,source,1,tmp,0); +} + +static inline void writeword_general(int address, int source, int tmp, + int clobber) +{ +#ifdef UAE + if ((special_mem & S_WRITE) || distrust_word()) + writemem_special(address, source, 4 * SIZEOF_VOID_P, 2, tmp); + else +#endif + writemem_real(address,source,2,tmp,clobber); +} + +void writeword_clobber(int address, int source, int tmp) +{ + writeword_general(address,source,tmp,1); +} + +void writeword(int address, int source, int tmp) +{ + writeword_general(address,source,tmp,0); +} + +static inline void writelong_general(int address, int source, int tmp, + int clobber) +{ +#ifdef UAE + if ((special_mem & S_WRITE) || distrust_long()) + writemem_special(address, source, 3 * SIZEOF_VOID_P, 4, tmp); + else +#endif + writemem_real(address,source,4,tmp,clobber); +} + +void writelong_clobber(int address, int source, int tmp) +{ + writelong_general(address,source,tmp,1); +} + +void writelong(int address, int source, int tmp) +{ + writelong_general(address,source,tmp,0); +} + + + +/* This version assumes that it is reading *real* memory, and *will* fail + * if that assumption is wrong! No branches, no second chances, just + * straight go-for-it attitude */ + +static void readmem_real(int address, int dest, int size, int tmp) +{ + int f=tmp; + + if (size==4 && address!=dest) + f=dest; + +#ifdef NATMEM_OFFSET + if (canbang) { /* Woohoo! go directly at the memory! */ + switch(size) { + case 1: mov_b_brR(dest,address,MEMBaseDiff); break; + case 2: mov_w_brR(dest,address,MEMBaseDiff); mid_bswap_16(dest); break; + case 4: mov_l_brR(dest,address,MEMBaseDiff); mid_bswap_32(dest); break; + } + forget_about(tmp); + (void) f; + return; + } +#endif + +#ifdef UAE + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the baseaddr table */ + mov_l_rm_indexed(f,uae_p32(baseaddr),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + /* f now holds the offset */ + + switch(size) { + case 1: mov_b_rrm_indexed(dest,address,f,1); break; + case 2: mov_w_rrm_indexed(dest,address,f,1); mid_bswap_16(dest); break; + case 4: mov_l_rrm_indexed(dest,address,f,1); mid_bswap_32(dest); break; + } + forget_about(tmp); +#endif +} + + + +#ifdef UAE +static inline void readmem(int address, int dest, int offset, int size, int tmp) +{ + int f=tmp; + + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the mem bank table */ + mov_l_rm_indexed(f,uae_p32(mem_banks),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + /* Now f holds a pointer to the actual membank */ + mov_l_rR(f,f,offset); + /* Now f holds the address of the b/w/lget function */ + call_r_11(dest,f,address,size,4); + forget_about(tmp); +} +#endif + +void readbyte(int address, int dest, int tmp) +{ +#ifdef UAE + if ((special_mem & S_READ) || distrust_byte()) + readmem_special(address, dest, 2 * SIZEOF_VOID_P, 1, tmp); + else +#endif + readmem_real(address,dest,1,tmp); +} + +void readword(int address, int dest, int tmp) +{ +#ifdef UAE + if ((special_mem & S_READ) || distrust_word()) + readmem_special(address, dest, 1 * SIZEOF_VOID_P, 2, tmp); + else +#endif + readmem_real(address,dest,2,tmp); +} + +void readlong(int address, int dest, int tmp) +{ +#ifdef UAE + if ((special_mem & S_READ) || distrust_long()) + readmem_special(address, dest, 0 * SIZEOF_VOID_P, 4, tmp); + else +#endif + readmem_real(address,dest,4,tmp); +} + +void get_n_addr(int address, int dest, int tmp) +{ +#ifdef UAE + if (special_mem || distrust_addr()) { + /* This one might appear a bit odd... */ + readmem(address, dest, 6 * SIZEOF_VOID_P, 4, tmp); + return; + } +#endif + + // a is the register containing the virtual address + // after the offset had been fetched + int a=tmp; + + // f is the register that will contain the offset + int f=tmp; + + // a == f == tmp if (address == dest) + if (address!=dest) { + a=address; + f=dest; + } + +#ifdef NATMEM_OFFSET + if (canbang) { +#if FIXED_ADDRESSING + lea_l_brr(dest,address,MEMBaseDiff); +#else +# error "Only fixed adressing mode supported" +#endif + forget_about(tmp); + (void) f; + (void) a; + return; + } +#endif + +#ifdef UAE + mov_l_rr(f,address); + mov_l_rr(dest,address); // gb-- nop if dest==address + shrl_l_ri(f,16); + mov_l_rm_indexed(f,uae_p32(baseaddr),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + add_l(dest,f); + forget_about(tmp); +#endif +} + +void get_n_addr_jmp(int address, int dest, int tmp) +{ +#ifdef WINUAE_ARANYM + /* For this, we need to get the same address as the rest of UAE + would --- otherwise we end up translating everything twice */ + get_n_addr(address,dest,tmp); +#else + int f=tmp; + if (address!=dest) + f=dest; + mov_l_rr(f,address); + shrl_l_ri(f,16); /* The index into the baseaddr bank table */ + mov_l_rm_indexed(dest,uae_p32(baseaddr),f,SIZEOF_VOID_P); /* FIXME: is SIZEOF_VOID_P correct? */ + add_l(dest,address); + and_l_ri (dest, ~1); + forget_about(tmp); +#endif +} + + +/* base is a register, but dp is an actual value. + target is a register, as is tmp */ +void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp) +{ + int reg = (dp >> 12) & 15; + int regd_shift=(dp >> 9) & 3; + + if (dp & 0x100) { + int ignorebase=(dp&0x80); + int ignorereg=(dp&0x40); + int addbase=0; + int outer=0; + + if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4); + + if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4); + + if ((dp & 0x4) == 0) { /* add regd *before* the get_long */ + if (!ignorereg) { + if ((dp & 0x800) == 0) + sign_extend_16_rr(target,reg); + else + mov_l_rr(target,reg); + shll_l_ri(target,regd_shift); + } + else + mov_l_ri(target,0); + + /* target is now regd */ + if (!ignorebase) + add_l(target,base); + add_l_ri(target,addbase); + if (dp&0x03) readlong(target,target,tmp); + } else { /* do the getlong first, then add regd */ + if (!ignorebase) { + mov_l_rr(target,base); + add_l_ri(target,addbase); + } + else + mov_l_ri(target,addbase); + if (dp&0x03) readlong(target,target,tmp); + + if (!ignorereg) { + if ((dp & 0x800) == 0) + sign_extend_16_rr(tmp,reg); + else + mov_l_rr(tmp,reg); + shll_l_ri(tmp,regd_shift); + /* tmp is now regd */ + add_l(target,tmp); + } + } + add_l_ri(target,outer); + } + else { /* 68000 version */ + if ((dp & 0x800) == 0) { /* Sign extend */ + sign_extend_16_rr(target,reg); + lea_l_brr_indexed(target,base,target,1<= CODE_ALLOC_MAX_ATTEMPTS) + return NULL; + + return do_alloc_code(size, depth + 1); +#else + UNUSED(depth); + uint8 *code = (uint8 *)vm_acquire(size, VM_MAP_DEFAULT | VM_MAP_32BIT); + return code == VM_MAP_FAILED ? NULL : code; +#endif +} + +static inline uint8 *alloc_code(uint32 size) +{ + uint8 *ptr = do_alloc_code(size, 0); + /* allocated code must fit in 32-bit boundaries */ + assert((uintptr)ptr <= 0xffffffff); + return ptr; +} + +void alloc_cache(void) +{ + if (compiled_code) { + flush_icache_hard(6); + vm_release(compiled_code, cache_size * 1024); + compiled_code = 0; + } + +#ifdef UAE + cache_size = currprefs.cachesize; +#endif + if (cache_size == 0) + return; + + while (!compiled_code && cache_size) { + if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) { + compiled_code = 0; + cache_size /= 2; + } + } + vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE); + + if (compiled_code) { + jit_log(" : actual translation cache size : %d KB at %p-%p", cache_size, compiled_code, compiled_code + cache_size*1024); +#ifdef USE_DATA_BUFFER + max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST - DATA_BUFFER_SIZE; +#else + max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST; +#endif + current_compile_p = compiled_code; + current_cache_size = 0; +#if defined(USE_DATA_BUFFER) + reset_data_buffer(); +#endif + } +} + +static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2) +{ + uae_u32 k1 = 0; + uae_u32 k2 = 0; + +#if USE_CHECKSUM_INFO + checksum_info *csi = bi->csi; + Dif(!csi) abort(); + while (csi) { + uae_s32 len = csi->length; + uintptr tmp = (uintptr)csi->start_p; +#else + uae_s32 len = bi->len; + uintptr tmp = (uintptr)bi->min_pcp; +#endif + uae_u32* pos; + + len += (tmp & 3); + tmp &= ~((uintptr)3); + pos = (uae_u32 *)tmp; + + if (len >= 0 && len <= MAX_CHECKSUM_LEN) { + while (len > 0) { + k1 += *pos; + k2 ^= *pos; + pos++; + len -= 4; + } + } + +#if USE_CHECKSUM_INFO + csi = csi->next; + } +#endif + + *c1 = k1; + *c2 = k2; +} + +#if 0 +static void show_checksum(CSI_TYPE* csi) +{ + uae_u32 k1=0; + uae_u32 k2=0; + uae_s32 len=CSI_LENGTH(csi); + uae_u32 tmp=(uintptr)CSI_START_P(csi); + uae_u32* pos; + + len+=(tmp&3); + tmp&=(~3); + pos=(uae_u32*)tmp; + + if (len<0 || len>MAX_CHECKSUM_LEN) { + return; + } + else { + while (len>0) { + jit_log("%08x ",*pos); + pos++; + len-=4; + } + jit_log(" bla"); + } +} +#endif + + +int check_for_cache_miss(void) +{ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + + if (bi) { + int cl=cacheline(regs.pc_p); + if (bi!=cache_tags[cl+1].bi) { + raise_in_cl_list(bi); + return 1; + } + } + return 0; +} + + +static void recompile_block(void) +{ + /* An existing block's countdown code has expired. We need to make + sure that execute_normal doesn't refuse to recompile due to a + perceived cache miss... */ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + + Dif (!bi) + jit_abort("recompile_block"); + raise_in_cl_list(bi); + execute_normal(); + return; +} +static void cache_miss(void) +{ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); +#if COMP_DEBUG + uae_u32 cl=cacheline(regs.pc_p); + blockinfo* bi2=get_blockinfo(cl); +#endif + + if (!bi) { + execute_normal(); /* Compile this block now */ + return; + } + Dif (!bi2 || bi==bi2) { + jit_abort("Unexplained cache miss %p %p",bi,bi2); + } + raise_in_cl_list(bi); + return; +} + +static int called_check_checksum(blockinfo* bi); + +static inline int block_check_checksum(blockinfo* bi) +{ + uae_u32 c1,c2; + bool isgood; + + if (bi->status!=BI_NEED_CHECK) + return 1; /* This block is in a checked state */ + + checksum_count++; + + if (bi->c1 || bi->c2) + calc_checksum(bi,&c1,&c2); + else { + c1=c2=1; /* Make sure it doesn't match */ + } + + isgood=(c1==bi->c1 && c2==bi->c2); + + if (isgood) { + /* This block is still OK. So we reactivate. Of course, that + means we have to move it into the needs-to-be-flushed list */ + bi->handler_to_use=bi->handler; + set_dhtu(bi,bi->direct_handler); + bi->status=BI_CHECKING; + isgood=called_check_checksum(bi) != 0; + } + if (isgood) { + jit_log2("reactivate %p/%p (%x %x/%x %x)",bi,bi->pc_p, c1,c2,bi->c1,bi->c2); + remove_from_list(bi); + add_to_active(bi); + raise_in_cl_list(bi); + bi->status=BI_ACTIVE; + } + else { + /* This block actually changed. We need to invalidate it, + and set it up to be recompiled */ + jit_log2("discard %p/%p (%x %x/%x %x)",bi,bi->pc_p, c1,c2,bi->c1,bi->c2); + invalidate_block(bi); + raise_in_cl_list(bi); + } + return isgood; +} + +static int called_check_checksum(blockinfo* bi) +{ + int isgood=1; + int i; + + for (i=0;i<2 && isgood;i++) { + if (bi->dep[i].jmp_off) { + isgood=block_check_checksum(bi->dep[i].target); + } + } + return isgood; +} + +static void check_checksum(void) +{ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + uae_u32 cl=cacheline(regs.pc_p); + blockinfo* bi2=get_blockinfo(cl); + + /* These are not the droids you are looking for... */ + if (!bi) { + /* Whoever is the primary target is in a dormant state, but + calling it was accidental, and we should just compile this + new block */ + execute_normal(); + return; + } + if (bi!=bi2) { + /* The block was hit accidentally, but it does exist. Cache miss */ + cache_miss(); + return; + } + + if (!block_check_checksum(bi)) + execute_normal(); +} + +static inline void match_states(blockinfo* bi) +{ + int i; + smallstate* s=&(bi->env); + + if (bi->status==BI_NEED_CHECK) { + block_check_checksum(bi); + } + if (bi->status==BI_ACTIVE || + bi->status==BI_FINALIZING) { /* Deal with the *promises* the + block makes (about not using + certain vregs) */ + for (i=0;i<16;i++) { + if (s->virt[i]==L_UNNEEDED) { + jit_log2("unneeded reg %d at %p",i,target); + COMPCALL(forget_about)(i); // FIXME + } + } + } + flush(1); + + /* And now deal with the *demands* the block makes */ + for (i=0;inat[i]; + if (v>=0) { + // printf("Loading reg %d into %d at %p\n",v,i,target); + readreg_specific(v,4,i); + // do_load_reg(i,v); + // setlock(i); + } + } + for (i=0;inat[i]; + if (v>=0) { + unlock2(i); + } + } +} + +static inline void create_popalls(void) +{ + int i,r; + + if (popallspace == NULL) { + if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) { + jit_log("WARNING: Could not allocate popallspace!"); +#ifdef UAE + if (currprefs.cachesize > 0) +#endif + { + jit_abort("Could not allocate popallspace!"); + } +#ifdef UAE + /* This is not fatal if JIT is not used. If JIT is + * turned on, it will crash, but it would have crashed + * anyway. */ + return; +#endif + } + } + vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE); + + int stack_space = STACK_OFFSET; + for (i=0;idirect_pen=(cpuop_func*)get_target(); + compemu_raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); + compemu_raw_mov_l_mr((uintptr)®s.pc_p,0); + compemu_raw_jmp((uintptr)popall_execute_normal); + + align_target(align_jumps); + bi->direct_pcc=(cpuop_func*)get_target(); + compemu_raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); + compemu_raw_mov_l_mr((uintptr)®s.pc_p,0); + compemu_raw_jmp((uintptr)popall_check_checksum); + flush_cpu_icache((void *)current_compile_p, (void *)target); + current_compile_p=get_target(); + + bi->deplist=NULL; + for (i=0;i<2;i++) { + bi->dep[i].prev_p=NULL; + bi->dep[i].next=NULL; + } + bi->env=default_ss; + bi->status=BI_INVALID; + bi->havestate=0; + //bi->env=empty_ss; +} + +#ifdef UAE +void compemu_reset(void) +{ + set_cache_state(0); +} +#endif + +#ifdef UAE +#else +// OPCODE is in big endian format, use cft_map() beforehand, if needed. +#endif +static inline void reset_compop(int opcode) +{ + compfunctbl[opcode] = NULL; + nfcompfunctbl[opcode] = NULL; +} + +static int read_opcode(const char *p) +{ + int opcode = 0; + for (int i = 0; i < 4; i++) { + int op = p[i]; + switch (op) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + opcode = (opcode << 4) | (op - '0'); + break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + opcode = (opcode << 4) | ((op - 'a') + 10); + break; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + opcode = (opcode << 4) | ((op - 'A') + 10); + break; + default: + return -1; + } + } + return opcode; +} + +static bool merge_blacklist() +{ +#ifdef UAE + const char *blacklist = ""; +#else + const char *blacklist = bx_options.jit.jitblacklist; +#endif + if (blacklist[0] != '\0') { + const char *p = blacklist; + for (;;) { + if (*p == 0) + return true; + + int opcode1 = read_opcode(p); + if (opcode1 < 0) + return false; + p += 4; + + int opcode2 = opcode1; + if (*p == '-') { + p++; + opcode2 = read_opcode(p); + if (opcode2 < 0) + return false; + p += 4; + } + + if (*p == 0 || *p == ',') { + jit_log(" : blacklist opcodes : %04x-%04x", opcode1, opcode2); + for (int opcode = opcode1; opcode <= opcode2; opcode++) + reset_compop(cft_map(opcode)); + + if (*(p++) == ',') + continue; + + return true; + } + + return false; + } + } + return true; +} + +void build_comp(void) +{ + int i; + unsigned long opcode; + const struct comptbl* tbl=op_smalltbl_0_comp_ff; + const struct comptbl* nftbl=op_smalltbl_0_comp_nf; + int count; +#ifdef WINUAE_ARANYM + unsigned int cpu_level = 4; // 68040 + const struct cputbl *nfctbl = op_smalltbl_0_nf; +#else +#ifdef NOFLAGS_SUPPORT + struct comptbl *nfctbl = (currprefs.cpu_level >= 5 ? op_smalltbl_0_nf + : currprefs.cpu_level == 4 ? op_smalltbl_1_nf + : (currprefs.cpu_level == 2 || currprefs.cpu_level == 3) ? op_smalltbl_2_nf + : currprefs.cpu_level == 1 ? op_smalltbl_3_nf + : ! currprefs.cpu_compatible ? op_smalltbl_4_nf + : op_smalltbl_5_nf); +#endif +#endif + +#ifdef NATMEM_OFFSET +#ifdef UAE +#ifdef JIT_EXCEPTION_HANDLER + install_exception_handler(); +#endif +#endif +#endif + + jit_log(" : building compiler function tables"); + + for (opcode = 0; opcode < 65536; opcode++) { + reset_compop(opcode); +#ifdef NOFLAGS_SUPPORT + nfcpufunctbl[opcode] = op_illg; +#endif + prop[opcode].use_flags = FLAG_ALL; + prop[opcode].set_flags = FLAG_ALL; +#ifdef UAE + prop[opcode].is_jump=1; +#else + prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap +#endif + } + + for (i = 0; tbl[i].opcode < 65536; i++) { +#ifdef UAE + int isjmp = (tbl[i].specific & COMP_OPCODE_ISJUMP); + int isaddx = (tbl[i].specific & COMP_OPCODE_ISADDX); + int iscjmp = (tbl[i].specific & COMP_OPCODE_ISCJUMP); + + prop[cft_map(tbl[i].opcode)].is_jump = isjmp; + prop[cft_map(tbl[i].opcode)].is_const_jump = iscjmp; + prop[cft_map(tbl[i].opcode)].is_addx = isaddx; +#else + int cflow = table68k[tbl[i].opcode].cflow; + if (follow_const_jumps && (tbl[i].specific & COMP_OPCODE_ISCJUMP)) + cflow = fl_const_jump; + else + cflow &= ~fl_const_jump; + prop[cft_map(tbl[i].opcode)].cflow = cflow; +#endif + + bool uses_fpu = (tbl[i].specific & COMP_OPCODE_USES_FPU) != 0; + if (uses_fpu && avoid_fpu) + compfunctbl[cft_map(tbl[i].opcode)] = NULL; + else + compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler; + } + + for (i = 0; nftbl[i].opcode < 65536; i++) { + bool uses_fpu = (tbl[i].specific & COMP_OPCODE_USES_FPU) != 0; + if (uses_fpu && avoid_fpu) + nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL; + else + nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler; +#ifdef NOFLAGS_SUPPORT + nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler; +#endif + } + +#ifdef NOFLAGS_SUPPORT + for (i = 0; nfctbl[i].handler; i++) { + nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler; + } +#endif + + for (opcode = 0; opcode < 65536; opcode++) { + compop_func *f; + compop_func *nff; +#ifdef NOFLAGS_SUPPORT + cpuop_func *nfcf; +#endif + int isaddx; +#ifdef UAE + int isjmp,iscjmp; +#else + int cflow; +#endif + +#ifdef UAE + int cpu_level = (currprefs.cpu_model - 68000) / 10; + if (cpu_level > 4) + cpu_level--; +#endif + if ((instrmnem)table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) + continue; + + if (table68k[opcode].handler != -1) { + f = compfunctbl[cft_map(table68k[opcode].handler)]; + nff = nfcompfunctbl[cft_map(table68k[opcode].handler)]; +#ifdef NOFLAGS_SUPPORT + nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)]; +#endif + isaddx = prop[cft_map(table68k[opcode].handler)].is_addx; + prop[cft_map(opcode)].is_addx = isaddx; +#ifdef UAE + isjmp = prop[cft_map(table68k[opcode].handler)].is_jump; + iscjmp = prop[cft_map(table68k[opcode].handler)].is_const_jump; + prop[cft_map(opcode)].is_jump = isjmp; + prop[cft_map(opcode)].is_const_jump = iscjmp; +#else + cflow = prop[cft_map(table68k[opcode].handler)].cflow; + prop[cft_map(opcode)].cflow = cflow; +#endif + compfunctbl[cft_map(opcode)] = f; + nfcompfunctbl[cft_map(opcode)] = nff; +#ifdef NOFLAGS_SUPPORT + Dif (nfcf == op_illg) + abort(); + nfcpufunctbl[cft_map(opcode)] = nfcf; +#endif + } + prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead; + prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive; + /* Unconditional jumps don't evaluate condition codes, so they + * don't actually use any flags themselves */ +#ifdef UAE + if (prop[cft_map(opcode)].is_const_jump) +#else + if (prop[cft_map(opcode)].cflow & fl_const_jump) +#endif + prop[cft_map(opcode)].use_flags = 0; + } +#ifdef NOFLAGS_SUPPORT + for (i = 0; nfctbl[i].handler != NULL; i++) { + if (nfctbl[i].specific) + nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler; + } +#endif + + /* Merge in blacklist */ + if (!merge_blacklist()) + { + jit_log(" : blacklist merge failure!"); + } + + count=0; + for (opcode = 0; opcode < 65536; opcode++) { + if (compfunctbl[cft_map(opcode)]) + count++; + } + jit_log(" : supposedly %d compileable opcodes!",count); + + /* Initialise state */ + create_popalls(); + alloc_cache(); + reset_lists(); + + for (i=0;ipc_p)].handler=(cpuop_func*)popall_execute_normal; + cache_tags[cacheline(bi->pc_p)+1].bi=NULL; + dbi=bi; bi=bi->next; + free_blockinfo(dbi); + } + bi=dormant; + while(bi) { + cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func*)popall_execute_normal; + cache_tags[cacheline(bi->pc_p)+1].bi=NULL; + dbi=bi; bi=bi->next; + free_blockinfo(dbi); + } + + reset_lists(); + if (!compiled_code) + return; + +#if defined(USE_DATA_BUFFER) + reset_data_buffer(); +#endif + + current_compile_p=compiled_code; +#ifdef UAE + set_special(0); /* To get out of compiled code */ +#else + SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */ +#endif +} + + +/* "Soft flushing" --- instead of actually throwing everything away, + we simply mark everything as "needs to be checked". +*/ + +#ifdef WINUAE_ARANYM +static inline void flush_icache_lazy(int) +#else +void flush_icache(int n) +#endif +{ + blockinfo* bi; + blockinfo* bi2; + +#ifdef UAE + if (currprefs.comp_hardflush) { + flush_icache_hard(n); + return; + } +#endif + soft_flush_count++; + if (!active) + return; + + bi=active; + while (bi) { + uae_u32 cl=cacheline(bi->pc_p); + if (bi->status==BI_INVALID || + bi->status==BI_NEED_RECOMP) { + if (bi==cache_tags[cl+1].bi) + cache_tags[cl].handler=(cpuop_func*)popall_execute_normal; + bi->handler_to_use=(cpuop_func*)popall_execute_normal; + set_dhtu(bi,bi->direct_pen); + bi->status=BI_INVALID; + } + else { + if (bi==cache_tags[cl+1].bi) + cache_tags[cl].handler=(cpuop_func*)popall_check_checksum; + bi->handler_to_use=(cpuop_func*)popall_check_checksum; + set_dhtu(bi,bi->direct_pcc); + bi->status=BI_NEED_CHECK; + } + bi2=bi; + bi=bi->next; + } + /* bi2 is now the last entry in the active list */ + bi2->next=dormant; + if (dormant) + dormant->prev_p=&(bi2->next); + + dormant=active; + active->prev_p=&dormant; + active=NULL; +} + +#ifdef UAE +static +#endif +void flush_icache_range(uae_u32 start, uae_u32 length) +{ + if (!active) + return; + +#if LAZY_FLUSH_ICACHE_RANGE + uae_u8 *start_p = get_real_address(start); + blockinfo *bi = active; + while (bi) { +#if USE_CHECKSUM_INFO + bool invalidate = false; + for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next) + invalidate = (((start_p - csi->start_p) < csi->length) || + ((csi->start_p - start_p) < length)); +#else + // Assume system is consistent and would invalidate the right range + const bool invalidate = (bi->pc_p - start_p) < length; +#endif + if (invalidate) { + uae_u32 cl = cacheline(bi->pc_p); + if (bi == cache_tags[cl + 1].bi) + cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; + bi->handler_to_use = (cpuop_func *)popall_execute_normal; + set_dhtu(bi, bi->direct_pen); + bi->status = BI_NEED_RECOMP; + } + bi = bi->next; + } + return; +#else + UNUSED(start); + UNUSED(length); +#endif + flush_icache(-1); +} + +/* +static void catastrophe(void) +{ + jit_abort("catastprophe"); +} +*/ + +int failure; + +#ifdef UAE +static inline unsigned int get_opcode_cft_map(unsigned int f) +{ + return ((f >> 8) & 255) | ((f & 255) << 8); +} +#define DO_GET_OPCODE(a) (get_opcode_cft_map((uae_u16)*(a))) +#else +#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU) +# define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a))) +#else +# define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a))) +#endif +#endif + +#ifdef JIT_DEBUG +static uae_u8 *last_regs_pc_p = 0; +static uae_u8 *last_compiled_block_addr = 0; + +void compiler_dumpstate(void) +{ + if (!JITDebug) + return; + + bug("### Host addresses"); + bug("MEM_BASE : %lx", (unsigned long)MEMBaseDiff); + bug("PC_P : %p", ®s.pc_p); + bug("SPCFLAGS : %p", ®s.spcflags); + bug("D0-D7 : %p-%p", ®s.regs[0], ®s.regs[7]); + bug("A0-A7 : %p-%p", ®s.regs[8], ®s.regs[15]); + bug(" "); + + bug("### M68k processor state"); + m68k_dumpstate(stderr, 0); + bug(" "); + + bug("### Block in Atari address space"); + bug("M68K block : %p", + (void *)(uintptr)last_regs_pc_p); + if (last_regs_pc_p != 0) { + bug("Native block : %p (%d bytes)", + (void *)last_compiled_block_addr, + get_blockinfo_addr(last_regs_pc_p)->direct_handler_size); + } + bug(" "); +} +#endif + +#ifdef UAE +void compile_block(cpu_history *pc_hist, int blocklen, int totcycles) +{ + if (letit && compiled_code && currprefs.cpu_model >= 68020) { +#else +static void compile_block(cpu_history* pc_hist, int blocklen) +{ + if (letit && compiled_code) { +#endif +#ifdef PROFILE_COMPILE_TIME + compile_count++; + clock_t start_time = clock(); +#endif +#ifdef JIT_DEBUG + bool disasm_block = true; +#endif + + /* OK, here we need to 'compile' a block */ + int i; + int r; + int was_comp=0; + uae_u8 liveflags[MAXRUN+1]; +#if USE_CHECKSUM_INFO + bool trace_in_rom = isinrom((uintptr)pc_hist[0].location) != 0; + uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location; + uintptr min_pcp=max_pcp; +#else + uintptr max_pcp=(uintptr)pc_hist[0].location; + uintptr min_pcp=max_pcp; +#endif + uae_u32 cl=cacheline(pc_hist[0].location); + void* specflags=(void*)®s.spcflags; + blockinfo* bi=NULL; + blockinfo* bi2; + int extra_len=0; + + redo_current_block=0; + if (current_compile_p >= MAX_COMPILE_PTR) + flush_icache_hard(7); + + alloc_blockinfos(); + + bi=get_blockinfo_addr_new(pc_hist[0].location,0); + bi2=get_blockinfo(cl); + + optlev=bi->optlevel; + if (bi->status!=BI_INVALID) { + Dif (bi!=bi2) { + /* I don't think it can happen anymore. Shouldn't, in + any case. So let's make sure... */ + jit_abort("WOOOWOO count=%d, ol=%d %p %p", bi->count,bi->optlevel,bi->handler_to_use, cache_tags[cl].handler); + } + + Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) { + jit_abort("bi->count=%d, bi->status=%d,bi->optlevel=%d",bi->count,bi->status,bi->optlevel); + /* What the heck? We are not supposed to be here! */ + } + } + if (bi->count==-1) { + optlev++; + while (!optcount[optlev]) + optlev++; + bi->count=optcount[optlev]-1; + } + current_block_pc_p=(uintptr)pc_hist[0].location; + + remove_deps(bi); /* We are about to create new code */ + bi->optlevel=optlev; + bi->pc_p=(uae_u8*)pc_hist[0].location; +#if USE_CHECKSUM_INFO + free_checksum_info_chain(bi->csi); + bi->csi = NULL; +#endif + + liveflags[blocklen]=FLAG_ALL; /* All flags needed afterwards */ + i=blocklen; + while (i--) { + uae_u16* currpcp=pc_hist[i].location; + uae_u32 op=DO_GET_OPCODE(currpcp); + +#if USE_CHECKSUM_INFO + trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp); + if (follow_const_jumps && is_const_jump(op)) { + checksum_info *csi = alloc_checksum_info(); + csi->start_p = (uae_u8 *)min_pcp; + csi->length = max_pcp - min_pcp + LONGEST_68K_INST; + csi->next = bi->csi; + bi->csi = csi; + max_pcp = (uintptr)currpcp; + } + min_pcp = (uintptr)currpcp; +#else + if ((uintptr)currpcpmax_pcp) + max_pcp=(uintptr)currpcp; +#endif + +#ifdef UAE + if (!currprefs.compnf) { + liveflags[i]=FLAG_ALL; + } + else +#endif + { + liveflags[i] = ((liveflags[i+1] & (~prop[op].set_flags))|prop[op].use_flags); + if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0) + liveflags[i]&= ~FLAG_Z; + } + } + +#if USE_CHECKSUM_INFO + checksum_info *csi = alloc_checksum_info(); + csi->start_p = (uae_u8 *)min_pcp; + csi->length = max_pcp - min_pcp + LONGEST_68K_INST; + csi->next = bi->csi; + bi->csi = csi; +#endif + + bi->needed_flags=liveflags[0]; + + align_target(align_loops); + was_comp=0; + + bi->direct_handler=(cpuop_func*)get_target(); + set_dhtu(bi,bi->direct_handler); + bi->status=BI_COMPILING; + current_block_start_target=(uintptr)get_target(); + + log_startblock(); + + if (bi->count>=0) { /* Need to generate countdown code */ + compemu_raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); + compemu_raw_sub_l_mi((uintptr)&(bi->count),1); + compemu_raw_jl((uintptr)popall_recompile_block); + } + if (optlev==0) { /* No need to actually translate */ + /* Execute normally without keeping stats */ + compemu_raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); + compemu_raw_jmp((uintptr)popall_exec_nostats); + } + else { + reg_alloc_run=0; + next_pc_p=0; + taken_pc_p=0; + branch_cc=0; // Only to be initialized. Will be set together with next_pc_p + + comp_pc_p=(uae_u8*)pc_hist[0].location; + init_comp(); + was_comp=1; + +#ifdef USE_CPU_EMUL_SERVICES + compemu_raw_sub_l_mi((uintptr)&emulated_ticks,blocklen); + compemu_raw_jcc_b_oponly(NATIVE_CC_GT); + uae_s8 *branchadd=(uae_s8*)get_target(); + skip_byte(); + raw_dec_sp(STACK_SHADOW_SPACE); + compemu_raw_call((uintptr)cpu_do_check_ticks); + raw_inc_sp(STACK_SHADOW_SPACE); + *branchadd=(uintptr)get_target()-((uintptr)branchadd+1); +#endif + +#ifdef JIT_DEBUG + if (JITDebug) { + compemu_raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location); + compemu_raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target); + } +#endif + + for (i=0;i1) { + failure=0; + if (!was_comp) { + comp_pc_p=(uae_u8*)pc_hist[i].location; + init_comp(); + } + was_comp=1; + + bool isnop = do_get_mem_word(pc_hist[i].location) == 0x4e71 || + ((i + 1) < blocklen && do_get_mem_word(pc_hist[i+1].location) == 0x4e71); + + if (isnop) + compemu_raw_mov_l_mi((uintptr)®s.fault_pc, ((uintptr)(pc_hist[i].location)) - MEMBaseDiff); + + comptbl[opcode](opcode); + freescratch(); + if (!(liveflags[i+1] & FLAG_CZNV)) { + /* We can forget about flags */ + dont_care_flags(); + } +#if INDIVIDUAL_INST + flush(1); + nop(); + flush(1); + was_comp=0; +#endif + /* + * workaround for buserror handling: on a "nop", write registers back + */ + if (isnop) + { + flush(1); + nop(); + was_comp=0; + } + } + + if (failure) { + if (was_comp) { + flush(1); + was_comp=0; + } + compemu_raw_mov_l_ri(REG_PAR1,(uae_u32)opcode); +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(REG_PAR1); +#endif + compemu_raw_mov_l_mi((uintptr)®s.pc_p, + (uintptr)pc_hist[i].location); + raw_dec_sp(STACK_SHADOW_SPACE); + compemu_raw_call((uintptr)cputbl[opcode]); + raw_inc_sp(STACK_SHADOW_SPACE); +#ifdef PROFILE_UNTRANSLATED_INSNS + // raw_cputbl_count[] is indexed with plain opcode (in m68k order) + compemu_raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1); +#endif +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(4); +#endif + + if (i < blocklen - 1) { + uae_s8* branchadd; + + /* if (SPCFLAGS_TEST(SPCFLAG_STOP)) popall_do_nothing() */ + compemu_raw_mov_l_rm(0,(uintptr)specflags); + compemu_raw_test_l_rr(0,0); +#if defined(USE_DATA_BUFFER) + data_check_end(8, 64); // just a pessimistic guess... +#endif + compemu_raw_jz_b_oponly(); + branchadd=(uae_s8*)get_target(); + skip_byte(); +#ifdef UAE + raw_sub_l_mi(uae_p32(&countdown),scaled_cycles(totcycles)); +#endif + compemu_raw_jmp((uintptr)popall_do_nothing); + *branchadd=(uintptr)get_target()-(uintptr)branchadd-1; + } + } + } +#if 1 /* This isn't completely kosher yet; It really needs to be + be integrated into a general inter-block-dependency scheme */ + if (next_pc_p && taken_pc_p && + was_comp && taken_pc_p==current_block_pc_p) + { + blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0); + blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0); + uae_u8 x=bi1->needed_flags; + + if (x==0xff || 1) { /* To be on the safe side */ + uae_u16* next=(uae_u16*)next_pc_p; + uae_u32 op=DO_GET_OPCODE(next); + + x=FLAG_ALL; + x&=(~prop[op].set_flags); + x|=prop[op].use_flags; + } + + x|=bi2->needed_flags; + if (!(x & FLAG_CZNV)) { + /* We can forget about flags */ + dont_care_flags(); + extra_len+=2; /* The next instruction now is part of this block */ + } + } +#endif + log_flush(); + + if (next_pc_p) { /* A branch was registered */ + uintptr t1=next_pc_p; + uintptr t2=taken_pc_p; + int cc=branch_cc; + + uae_u32* branchadd; + uae_u32* tba; + bigstate tmp; + blockinfo* tbi; + + if (taken_pc_penv))) { + mark_callers_recompile(bi); + } + + big_to_small_state(&live,&(bi->env)); +#endif + +#if USE_CHECKSUM_INFO + remove_from_list(bi); + if (trace_in_rom) { + // No need to checksum that block trace on cache invalidation + free_checksum_info_chain(bi->csi); + bi->csi = NULL; + add_to_dormant(bi); + } + else { + calc_checksum(bi,&(bi->c1),&(bi->c2)); + add_to_active(bi); + } +#else + if (next_pc_p+extra_len>=max_pcp && + next_pc_p+extra_lenlen=max_pcp-min_pcp; + bi->min_pcp=min_pcp; + + remove_from_list(bi); + if (isinrom(min_pcp) && isinrom(max_pcp)) { + add_to_dormant(bi); /* No need to checksum it on cache flush. + Please don't start changing ROMs in + flight! */ + } + else { + calc_checksum(bi,&(bi->c1),&(bi->c2)); + add_to_active(bi); + } +#endif + + current_cache_size += get_target() - (uae_u8 *)current_compile_p; + +#ifdef JIT_DEBUG + bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target; + + if (JITDebug && disasm_block) { + uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p); + jit_log("M68K block @ 0x%08x (%d insns)", block_addr, blocklen); + uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1; + disasm_m68k_block((const uae_u8 *)pc_hist[0].location, block_size); + jit_log("Compiled block @ %p", pc_hist[0].location); + disasm_native_block((const uae_u8 *)current_block_start_target, bi->direct_handler_size); + UNUSED(block_addr); + } +#endif + + log_dump(); + align_target(align_jumps); + +#ifdef UAE +#ifdef USE_UDIS86 + UDISFN(current_block_start_target, target) +#endif +#endif + + /* This is the non-direct handler */ + bi->handler= + bi->handler_to_use=(cpuop_func *)get_target(); + compemu_raw_cmp_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); + compemu_raw_jnz((uintptr)popall_cache_miss); + comp_pc_p=(uae_u8*)pc_hist[0].location; + + bi->status=BI_FINALIZING; + init_comp(); + match_states(bi); + flush(1); + + compemu_raw_jmp((uintptr)bi->direct_handler); + + flush_cpu_icache((void *)current_block_start_target, (void *)target); + current_compile_p=get_target(); + raise_in_cl_list(bi); +#ifdef UAE + bi->nexthandler=current_compile_p; +#endif + + /* We will flush soon, anyway, so let's do it now */ + if (current_compile_p >= MAX_COMPILE_PTR) + flush_icache_hard(7); + + bi->status=BI_ACTIVE; + if (redo_current_block) + block_need_recompile(bi); + +#ifdef PROFILE_COMPILE_TIME + compile_time += (clock() - start_time); +#endif +#ifdef UAE + /* Account for compilation time */ + do_extra_cycles(totcycles); +#endif + } + +#ifndef UAE + /* Account for compilation time */ + cpu_do_check_ticks(); +#endif +} + +#ifdef UAE + /* Slightly different function defined in newcpu.cpp */ +#else +void do_nothing(void) +{ + /* What did you expect this to do? */ +} +#endif + +#ifdef UAE + /* Different implementation in newcpu.cpp */ +#else +void exec_nostats(void) +{ + for (;;) { + uae_u32 opcode = GET_OPCODE; +#ifdef FLIGHT_RECORDER + m68k_record_step(m68k_getpc(), opcode); +#endif + (*cpufunctbl[opcode])(opcode); + cpu_check_ticks(); + if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) { + return; /* We will deal with the spcflags in the caller */ + } + } +} +#endif + +#ifdef UAE +/* FIXME: check differences against UAE execute_normal (newcpu.cpp) */ +#else +void execute_normal(void) +{ + if (!check_for_cache_miss()) { + cpu_history pc_hist[MAXRUN]; + int blocklen = 0; +#if 0 && FIXED_ADDRESSING + start_pc_p = regs.pc_p; + start_pc = get_virtual_address(regs.pc_p); +#else + start_pc_p = regs.pc_oldp; + start_pc = regs.pc; +#endif + for (;;) { /* Take note: This is the do-it-normal loop */ + pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p; + uae_u32 opcode = GET_OPCODE; +#ifdef FLIGHT_RECORDER + m68k_record_step(m68k_getpc(), opcode); +#endif + (*cpufunctbl[opcode])(opcode); + cpu_check_ticks(); + if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) { + compile_block(pc_hist, blocklen); + return; /* We will deal with the spcflags in the caller */ + } + /* No need to check regs.spcflags, because if they were set, + we'd have ended up inside that "if" */ + } + } +} +#endif + +typedef void (*compiled_handler)(void); + +#ifdef UAE +/* FIXME: check differences against UAE m68k_do_compile_execute */ +#else +void m68k_do_compile_execute(void) +{ + for (;;) { + ((compiled_handler)(pushall_call_handler))(); + /* Whenever we return from that, we should check spcflags */ + if (SPCFLAGS_TEST(SPCFLAG_ALL)) { + if (m68k_do_specialties ()) + return; + } + } +} +#endif + +#ifdef UAE +/* FIXME: check differences against UAE m68k_compile_execute */ +#else +void m68k_compile_execute (void) +{ +setjmpagain: + TRY(prb) { + for (;;) { + if (quit_program > 0) { + if (quit_program == 1) { +#ifdef FLIGHT_RECORDER + dump_flight_recorder(); +#endif + break; + } + quit_program = 0; + m68k_reset (); + } + m68k_do_compile_execute(); + } + } + CATCH(prb) { + jit_log("m68k_compile_execute: exception %d pc=%08x (%08x+%p-%p) fault_pc=%08x addr=%08x -> %08x sp=%08x", + int(prb), + m68k_getpc(), + regs.pc, regs.pc_p, regs.pc_oldp, + regs.fault_pc, + regs.mmu_fault_addr, get_long (regs.vbr + 4*prb), + regs.regs[15]); + flush_icache(0); + Exception(prb, 0); + goto setjmpagain; + } +} +#endif + +#endif /* JIT */ diff --git a/BasiliskII/src/uae_cpu/compiler/compstbla.cpp b/BasiliskII/src/uae_cpu/compiler/compstbla.cpp new file mode 100644 index 00000000..e2f36d1e --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/compstbla.cpp @@ -0,0 +1,5 @@ +/* + * compstbl.cpp must be compiled twice, once for the generator program + * and once for the actual executable + */ +#include "compstbl.cpp" diff --git a/BasiliskII/src/uae_cpu/compiler/flags_arm.h b/BasiliskII/src/uae_cpu/compiler/flags_arm.h new file mode 100644 index 00000000..c9a60490 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/flags_arm.h @@ -0,0 +1,52 @@ +/* + * compiler/flags_arm.h - Native flags definitions for ARM + * + * Copyright (c) 2013 Jens Heitmann of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef NATIVE_FLAGS_ARM_H +#define NATIVE_FLAGS_ARM_H + +/* Native integer code conditions */ +enum { + NATIVE_CC_EQ = 0, + NATIVE_CC_NE = 1, + NATIVE_CC_CS = 2, + NATIVE_CC_CC = 3, + NATIVE_CC_MI = 4, + NATIVE_CC_PL = 5, + NATIVE_CC_VS = 6, + NATIVE_CC_VC = 7, + NATIVE_CC_HI = 8, + NATIVE_CC_LS = 9, + NATIVE_CC_GE = 10, + NATIVE_CC_LT = 11, + NATIVE_CC_GT = 12, + NATIVE_CC_LE = 13, + NATIVE_CC_AL = 14 +}; + +#endif /* NATIVE_FLAGS_ARM_H */ diff --git a/BasiliskII/src/uae_cpu/compiler/flags_x86.h b/BasiliskII/src/uae_cpu/compiler/flags_x86.h new file mode 100644 index 00000000..310dbcc3 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/flags_x86.h @@ -0,0 +1,52 @@ +/* + * compiler/flags_x86.h - Native flags definitions for IA-32 + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2002 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2002 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef NATIVE_FLAGS_X86_H +#define NATIVE_FLAGS_X86_H + +/* Native integer code conditions */ +enum { + NATIVE_CC_HI = 7, + NATIVE_CC_LS = 6, + NATIVE_CC_CC = 3, + NATIVE_CC_CS = 2, + NATIVE_CC_NE = 5, + NATIVE_CC_EQ = 4, + NATIVE_CC_VC = 1, + NATIVE_CC_VS = 0, + NATIVE_CC_PL = 9, + NATIVE_CC_MI = 8, + NATIVE_CC_GE = 13, + NATIVE_CC_LT = 12, + NATIVE_CC_GT = 15, + NATIVE_CC_LE = 14 +}; + +/* FIXME: include/flags_x86.h in UAE had the following values: + NATIVE_CC_VC = 11, + NATIVE_CC_VS = 10, +*/ + +#endif /* NATIVE_FLAGS_X86_H */ diff --git a/BasiliskII/src/uae_cpu/compiler/gencomp.c b/BasiliskII/src/uae_cpu/compiler/gencomp.c new file mode 100644 index 00000000..a7c4ee2b --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/gencomp.c @@ -0,0 +1,3619 @@ +/* + * compiler/gencomp.c - MC680x0 compilation generator + * + * Based on work Copyright 1995, 1996 Bernd Schmidt + * Changes for UAE-JIT Copyright 2000 Bernd Meyer + * + * Adaptation for ARAnyM/ARM, copyright 2001-2014 + * Milan Jurik, Jens Heitmann + * + * Adaptation for Basilisk II and improvements, copyright 2000-2005 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2005 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define CC_FOR_BUILD 1 +#include "sysconfig.h" + +#include "sysdeps.h" +#include "readcpu.h" + +#undef NDEBUG +#include +#include +#include +#include +#include +#include +#undef abort + +#ifdef UAE +/* +#define DISABLE_I_OR_AND_EOR +#define DISABLE_I_SUB +#define DISABLE_I_SUBA +#define DISABLE_I_SUBX +#define DISABLE_I_ADD +#define DISABLE_I_ADDA +#define DISABLE_I_ADDX +#define DISABLE_I_NEG +#define DISABLE_I_NEGX +#define DISABLE_I_CLR +#define DISABLE_I_NOT +#define DISABLE_I_TST +#define DISABLE_I_BCHG_BCLR_BSET_BTST +#define DISABLE_I_CMPM_CMP +#define DISABLE_I_CMPA +#define DISABLE_I_MOVE +#define DISABLE_I_MOVEA +#define DISABLE_I_SWAP +#define DISABLE_I_EXG +#define DISABLE_I_EXT +#define DISABLE_I_MVEL +#define DISABLE_I_MVMLE +#define DISABLE_I_RTD +#define DISABLE_I_LINK +#define DISABLE_I_UNLK +#define DISABLE_I_RTS +#define DISABLE_I_JSR +#define DISABLE_I_JMP +#define DISABLE_I_BSR +#define DISABLE_I_BCC +#define DISABLE_I_LEA +#define DISABLE_I_PEA +#define DISABLE_I_DBCC +#define DISABLE_I_SCC +#define DISABLE_I_MULU +#define DISABLE_I_MULS +#define DISABLE_I_ASR +#define DISABLE_I_ASL +#define DISABLE_I_LSR +#define DISABLE_I_LSL +#define DISABLE_I_ROL +#define DISABLE_I_ROR +#define DISABLE_I_MULL +#define DISABLE_I_FPP +#define DISABLE_I_FBCC +#define DISABLE_I_FSCC +#define DISABLE_I_MOVE16 +*/ +#endif /* UAE */ + +#ifdef UAE +#define JIT_PATH "jit/" +#define GEN_PATH "jit/" +#define RETURN "return 0;" +#define RETTYPE "uae_u32" +#define NEXT_CPU_LEVEL 5 +#else +#define JIT_PATH "compiler/" +#define GEN_PATH "" +#define RETURN "return;" +#define RETTYPE "void" +#define NEXT_CPU_LEVEL 4 +#define ua(s) s +#endif + +#define BOOL_TYPE "int" +#define failure global_failure=1 +#define FAILURE global_failure=1 +#define isjump global_isjump=1 +#define is_const_jump global_iscjump=1 +#define isaddx global_isaddx=1 +#define uses_cmov global_cmov=1 +#define mayfail global_mayfail=1 +#define uses_fpu global_fpu=1 + +int hack_opcode; + +static int global_failure; +static int global_isjump; +static int global_iscjump; +static int global_isaddx; +static int global_cmov; +static int long_opcode; +static int global_mayfail; +static int global_fpu; + +static char endstr[1000]; +static char lines[100000]; +static int comp_index=0; + +#include "flags_x86.h" + +#ifndef __attribute__ +# ifndef __GNUC__ +# define __attribute__(x) +# endif +#endif + + +static int cond_codes[]={-1,-1, + NATIVE_CC_HI,NATIVE_CC_LS, + NATIVE_CC_CC,NATIVE_CC_CS, + NATIVE_CC_NE,NATIVE_CC_EQ, + -1,-1, + NATIVE_CC_PL,NATIVE_CC_MI, + NATIVE_CC_GE,NATIVE_CC_LT, + NATIVE_CC_GT,NATIVE_CC_LE + }; + +__attribute__((format(printf, 1, 2))) +static void comprintf(const char *format, ...) +{ + va_list args; + + va_start(args, format); + comp_index += vsprintf(lines + comp_index, format, args); + va_end(args); +} + +static void com_discard(void) +{ + comp_index = 0; +} + +static void com_flush(void) +{ + int i; + for (i = 0; i < comp_index; i++) + putchar(lines[i]); + com_discard(); +} + + +static FILE *headerfile; +static FILE *stblfile; + +static int using_prefetch; +static int using_exception_3; +static int cpu_level; +static int noflags; + +/* For the current opcode, the next lower level that will have different code. + * Initialized to -1 for each opcode. If it remains unchanged, indicates we + * are done with that opcode. */ +static int next_cpu_level; + +static int *opcode_map; +static int *opcode_next_clev; +static int *opcode_last_postfix; +static unsigned long *counts; + +static void read_counts(void) +{ + FILE *file; + unsigned long opcode, count, total; + char name[20]; + int nr = 0; + memset (counts, 0, 65536 * sizeof *counts); + + file = fopen ("frequent.68k", "r"); + if (file) + { + if (fscanf (file, "Total: %lu\n", &total) != 1) { + assert(0); + } + while (fscanf (file, "%lx: %lu %s\n", &opcode, &count, name) == 3) + { + opcode_next_clev[nr] = NEXT_CPU_LEVEL; + opcode_last_postfix[nr] = -1; + opcode_map[nr++] = opcode; + counts[opcode] = count; + } + fclose (file); + } + if (nr == nr_cpuop_funcs) + return; + for (opcode = 0; opcode < 0x10000; opcode++) + { + if (table68k[opcode].handler == -1 && table68k[opcode].mnemo != i_ILLG + && counts[opcode] == 0) + { + opcode_next_clev[nr] = NEXT_CPU_LEVEL; + opcode_last_postfix[nr] = -1; + opcode_map[nr++] = opcode; + counts[opcode] = count; + } + } + assert (nr == nr_cpuop_funcs); +} + +static int n_braces = 0; +static int insn_n_cycles; + +static void +start_brace (void) +{ + n_braces++; + comprintf ("{"); +} + +static void +close_brace (void) +{ + assert (n_braces > 0); + n_braces--; + comprintf ("}"); +} + +static void +finish_braces (void) +{ + while (n_braces > 0) + close_brace (); +} + +static inline void gen_update_next_handler(void) +{ + return; /* Can anything clever be done here? */ +} + +static void gen_writebyte(const char *address, const char *source) +{ + comprintf("\twritebyte(%s, %s, scratchie);\n", address, source); +} + +static void gen_writeword(const char *address, const char *source) +{ + comprintf("\twriteword(%s, %s, scratchie);\n", address, source); +} + +static void gen_writelong(const char *address, const char *source) +{ + comprintf("\twritelong(%s, %s, scratchie);\n", address, source); +} + +static void gen_readbyte(const char *address, const char* dest) +{ + comprintf("\treadbyte(%s, %s, scratchie);\n", address, dest); +} + +static void gen_readword(const char *address, const char *dest) +{ + comprintf("\treadword(%s,%s,scratchie);\n", address, dest); +} + +static void gen_readlong(const char *address, const char *dest) +{ + comprintf("\treadlong(%s, %s, scratchie);\n", address, dest); +} + + + +static const char * +gen_nextilong (void) +{ + static char buffer[80]; + + sprintf (buffer, "comp_get_ilong((m68k_pc_offset+=4)-4)"); + insn_n_cycles += 4; + + long_opcode=1; + return buffer; +} + +static const char * +gen_nextiword (void) +{ + static char buffer[80]; + + sprintf (buffer, "comp_get_iword((m68k_pc_offset+=2)-2)"); + insn_n_cycles+=2; + + long_opcode=1; + return buffer; +} + +static const char * +gen_nextibyte (void) +{ + static char buffer[80]; + + sprintf (buffer, "comp_get_ibyte((m68k_pc_offset+=2)-2)"); + insn_n_cycles += 2; + + long_opcode=1; + return buffer; +} + + +static void +swap_opcode (void) +{ +#ifdef UAE + /* no-op */ +#else + comprintf("#ifdef USE_JIT_FPU\n"); + comprintf("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + comprintf("\topcode = do_byteswap_16(opcode);\n"); + comprintf("#endif\n"); + comprintf("#endif\n"); +#endif +} + +static void +sync_m68k_pc (void) +{ + comprintf("\t if (m68k_pc_offset > SYNC_PC_OFFSET) sync_m68k_pc();\n"); +} + + +/* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0, + * the calling routine handles Apdi and Aipi modes. + * gb-- movem == 2 means the same thing but for a MOVE16 instruction */ +static void genamode(amodes mode, const char *reg, wordsizes size, const char *name, int getv, int movem) +{ + start_brace(); + switch (mode) + { + case Dreg: /* Do we need to check dodgy here? */ + assert (!movem); + if (getv == 1 || getv == 2) + { + /* We generate the variable even for getv==2, so we can use + it as a destination for MOVE */ + comprintf("\tint %s = %s;\n", name, reg); + } + return; + + case Areg: + assert (!movem); + if (getv == 1 || getv == 2) + { + /* see above */ + comprintf("\tint %s = dodgy ? scratchie++ : %s + 8;\n", name, reg); + if (getv == 1) + { + comprintf("\tif (dodgy) \n"); + comprintf("\t\tmov_l_rr(%s, %s + 8);\n", name, reg); + } + } + return; + + case Aind: + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, %s + 8);\n", name, reg); + break; + case Aipi: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_rr(%sa, %s + 8);\n", name, reg); + break; + case Apdi: + switch (size) + { + case sz_byte: + if (movem) + { + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, (uae_s32)-areg_byteinc[%s]);\n", reg, reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + case sz_word: + if (movem) + { + comprintf("\tint %sa=dodgy?scratchie++:%s+8;\n", name, reg); + comprintf("\tif (dodgy) \n"); + comprintf("\tmov_l_rr(%sa,8+%s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, -2);\n", reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + case sz_long: + if (movem) + { + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, -4);\n", reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + default: + assert(0); + break; + } + break; + case Ad16: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + comprintf("\tlea_l_brr(%sa, %sa, (uae_s32)(uae_s16)%s);\n", name, name, gen_nextiword()); + break; + case Ad8r: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tcalc_disp_ea_020(%s + 8, %s, %sa, scratchie);\n", reg, gen_nextiword(), name); + break; + + case PC16: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tuae_u32 address = start_pc + ((char *)comp_pc_p - (char *)start_pc_p) + m68k_pc_offset;\n"); + comprintf("\tuae_s32 PC16off = (uae_s32)(uae_s16)%s;\n", gen_nextiword()); + comprintf("\tmov_l_ri(%sa, address + PC16off);\n", name); + break; + + case PC8r: + comprintf("\tint pctmp = scratchie++;\n"); + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tuae_u32 address = start_pc + ((char *)comp_pc_p - (char *)start_pc_p) + m68k_pc_offset;\n"); + start_brace(); + comprintf("\tmov_l_ri(pctmp,address);\n"); + + comprintf("\tcalc_disp_ea_020(pctmp, %s, %sa, scratchie);\n", gen_nextiword(), name); + break; + case absw: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_ri(%sa, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + break; + case absl: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_ri(%sa, %s); /* absl */\n", name, gen_nextilong()); + break; + case imm: + assert (getv == 1); + switch (size) + { + case sz_byte: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s8)%s);\n", name, gen_nextibyte()); + break; + case sz_word: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + break; + case sz_long: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, gen_nextilong()); + break; + default: + assert(0); + break; + } + return; + case imm0: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s8)%s);\n", name, gen_nextibyte()); + return; + case imm1: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + return; + case imm2: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, gen_nextilong()); + return; + case immi: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, reg); + return; + default: + assert(0); + break; + } + + /* We get here for all non-reg non-immediate addressing modes to + * actually fetch the value. */ + if (getv == 1) + { + char astring[80]; + sprintf(astring, "%sa", name); + switch (size) + { + case sz_byte: + insn_n_cycles += 2; + break; + case sz_word: + insn_n_cycles += 2; + break; + case sz_long: + insn_n_cycles += 4; + break; + default: + assert(0); + break; + } + start_brace(); + comprintf("\tint %s = scratchie++;\n", name); + switch (size) + { + case sz_byte: + gen_readbyte(astring, name); + break; + case sz_word: + gen_readword(astring, name); + break; + case sz_long: + gen_readlong(astring, name); + break; + default: + assert(0); + break; + } + } + + /* We now might have to fix up the register for pre-dec or post-inc + * addressing modes. */ + if (!movem) + { + switch (mode) + { + case Aipi: + switch (size) + { + case sz_byte: + comprintf("\tlea_l_brr(%s + 8,%s + 8, areg_byteinc[%s]);\n", reg, reg, reg); + break; + case sz_word: + comprintf("\tlea_l_brr(%s + 8, %s + 8, 2);\n", reg, reg); + break; + case sz_long: + comprintf("\tlea_l_brr(%s + 8, %s + 8, 4);\n", reg, reg); + break; + default: + assert(0); + break; + } + break; + case Apdi: + break; + default: + break; + } + } +} + +static void genastore(const char *from, amodes mode, const char *reg, wordsizes size, const char *to) +{ + switch (mode) + { + case Dreg: + switch (size) + { + case sz_byte: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_b_rr(%s, %s);\n", reg, from); + break; + case sz_word: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_w_rr(%s, %s);\n", reg, from); + break; + case sz_long: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_l_rr(%s, %s);\n", reg, from); + break; + default: + assert(0); + break; + } + break; + case Areg: + switch (size) + { + case sz_word: + comprintf("\tif(%s + 8 != %s)\n", reg, from); + comprintf("\t\tmov_w_rr(%s + 8, %s);\n", reg, from); + break; + case sz_long: + comprintf("\tif(%s + 8 != %s)\n", reg, from); + comprintf("\t\tmov_l_rr(%s + 8, %s);\n", reg, from); + break; + default: + assert(0); + break; + } + break; + + case Apdi: + case absw: + case PC16: + case PC8r: + case Ad16: + case Ad8r: + case Aipi: + case Aind: + case absl: + { + char astring[80]; + sprintf(astring, "%sa", to); + + switch (size) + { + case sz_byte: + insn_n_cycles += 2; + gen_writebyte(astring, from); + break; + case sz_word: + insn_n_cycles += 2; + gen_writeword(astring, from); + break; + case sz_long: + insn_n_cycles += 4; + gen_writelong(astring, from); + break; + default: + assert(0); + break; + } + } + break; + case imm: + case imm0: + case imm1: + case imm2: + case immi: + assert(0); + break; + default: + assert(0); + break; + } +} + +static void genmov16(uae_u32 opcode, struct instr *curi) +{ + comprintf("\tint src=scratchie++;\n"); + comprintf("\tint dst=scratchie++;\n"); + + if ((opcode & 0xfff8) == 0xf620) { + /* MOVE16 (Ax)+,(Ay)+ */ + comprintf("\tuae_u16 dstreg=((%s)>>12)&0x07;\n", gen_nextiword()); + comprintf("\tmov_l_rr(src,8+srcreg);\n"); + comprintf("\tmov_l_rr(dst,8+dstreg);\n"); + } + else { + /* Other variants */ + genamode (curi->smode, "srcreg", curi->size, "src", 0, 2); + genamode (curi->dmode, "dstreg", curi->size, "dst", 0, 2); + comprintf("\tmov_l_rr(src,srca);\n"); + comprintf("\tmov_l_rr(dst,dsta);\n"); + } + + /* Align on 16-byte boundaries */ + comprintf("\tand_l_ri(src,~15);\n"); + comprintf("\tand_l_ri(dst,~15);\n"); + + if ((opcode & 0xfff8) == 0xf620) { + comprintf("\tif (srcreg != dstreg)\n"); + comprintf("\tadd_l_ri(srcreg+8,16);\n"); + comprintf("\tadd_l_ri(dstreg+8,16);\n"); + } + else if ((opcode & 0xfff8) == 0xf600) + comprintf("\tadd_l_ri(srcreg+8,16);\n"); + else if ((opcode & 0xfff8) == 0xf608) + comprintf("\tadd_l_ri(dstreg+8,16);\n"); + +#ifdef UAE + comprintf("\tif (special_mem) {\n"); + comprintf("\t\tint tmp=scratchie;\n"); + comprintf("\tscratchie+=4;\n" + "\treadlong(src,tmp,scratchie);\n" + "\twritelong_clobber(dst,tmp,scratchie);\n" + "\tadd_l_ri(src,4);\n" + "\tadd_l_ri(dst,4);\n" + "\treadlong(src,tmp,scratchie);\n" + "\twritelong_clobber(dst,tmp,scratchie);\n" + "\tadd_l_ri(src,4);\n" + "\tadd_l_ri(dst,4);\n" + "\treadlong(src,tmp,scratchie);\n" + "\twritelong_clobber(dst,tmp,scratchie);\n" + "\tadd_l_ri(src,4);\n" + "\tadd_l_ri(dst,4);\n" + "\treadlong(src,tmp,scratchie);\n" + "\twritelong_clobber(dst,tmp,scratchie);\n"); + comprintf("\t} else {\n"); +#endif + comprintf("\tint tmp=scratchie;\n"); + comprintf("\tscratchie+=4;\n" + "\tget_n_addr(src,src,scratchie);\n" + "\tget_n_addr(dst,dst,scratchie);\n" + "\tmov_l_rR(tmp+0,src,0);\n" + "\tmov_l_rR(tmp+1,src,4);\n" + "\tmov_l_rR(tmp+2,src,8);\n" + "\tmov_l_rR(tmp+3,src,12);\n" + "\tmov_l_Rr(dst,tmp+0,0);\n" + "\tforget_about(tmp+0);\n" + "\tmov_l_Rr(dst,tmp+1,4);\n" + "\tforget_about(tmp+1);\n" + "\tmov_l_Rr(dst,tmp+2,8);\n" + "\tforget_about(tmp+2);\n" + "\tmov_l_Rr(dst,tmp+3,12);\n"); +#ifdef UAE + comprintf("\t}\n"); +#endif +} + +static void +genmovemel (uae_u16 opcode) +{ + comprintf ("\tuae_u16 mask = %s;\n", gen_nextiword ()); + comprintf ("\tint native=scratchie++;\n"); + comprintf ("\tint i;\n"); + comprintf ("\tsigned char offset=0;\n"); + genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1); +#ifdef UAE + if (table68k[opcode].size == sz_long) + comprintf("\tif (1 && !special_mem) {\n"); + else + comprintf("\tif (1 && !special_mem) {\n"); +#endif + + /* Fast but unsafe... */ + comprintf("\tget_n_addr(srca,native,scratchie);\n"); + + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\tmov_l_rR(i,native,offset);\n" + "\t\t\tmid_bswap_32(i);\n" + "\t\t\toffset+=4;\n"); + break; + case sz_word: + comprintf("\t\t\tmov_w_rR(i,native,offset);\n" + "\t\t\tmid_bswap_16(i);\n" + "\t\t\tsign_extend_16_rr(i,i);\n" + "\t\t\toffset+=2;\n"); + break; + default: assert(0); + } + comprintf("\t\t}\n" + "\t}"); + if (table68k[opcode].dmode == Aipi) { + comprintf("\t\t\tlea_l_brr(8+dstreg,srca,offset);\n"); + } + /* End fast but unsafe. */ + +#ifdef UAE + comprintf("\t} else {\n"); + + comprintf ("\t\tint tmp=scratchie++;\n"); + + comprintf("\t\tmov_l_rr(tmp,srca);\n"); + comprintf("\t\tfor (i=0;i<16;i++) {\n" + "\t\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\t\treadlong(tmp,i,scratchie);\n" + "\t\t\t\tadd_l_ri(tmp,4);\n"); + break; + case sz_word: + comprintf("\t\t\t\treadword(tmp,i,scratchie);\n" + "\t\t\t\tadd_l_ri(tmp,2);\n"); + break; + default: assert(0); + } + + comprintf("\t\t\t}\n" + "\t\t}\n"); + if (table68k[opcode].dmode == Aipi) { + comprintf("\t\tmov_l_rr(8+dstreg,tmp);\n"); + } + comprintf("\t}\n"); +#endif + +} + + +static void +genmovemle (uae_u16 opcode) +{ + comprintf ("\tuae_u16 mask = %s;\n", gen_nextiword ()); + comprintf ("\tint native=scratchie++;\n"); + comprintf ("\tint i;\n"); + comprintf ("\tint tmp=scratchie++;\n"); + comprintf ("\tsigned char offset=0;\n"); + genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1); + +#ifdef UAE + /* *Sigh* Some clever geek realized that the fastest way to copy a + buffer from main memory to the gfx card is by using movmle. Good + on her, but unfortunately, gfx mem isn't "real" mem, and thus that + act of cleverness means that movmle must pay attention to special_mem, + or Genetic Species is a rather boring-looking game ;-) */ + if (table68k[opcode].size == sz_long) + comprintf("\tif (1 && !special_mem) {\n"); + else + comprintf("\tif (1 && !special_mem) {\n"); +#endif + comprintf("\tget_n_addr(srca,native,scratchie);\n"); + + if (table68k[opcode].dmode!=Apdi) { + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\tmov_l_rr(tmp,i);\n" + "\t\t\tmid_bswap_32(tmp);\n" + "\t\t\tmov_l_Rr(native,tmp,offset);\n" + "\t\t\toffset+=4;\n"); + break; + case sz_word: + comprintf("\t\t\tmov_l_rr(tmp,i);\n" + "\t\t\tmid_bswap_16(tmp);\n" + "\t\t\tmov_w_Rr(native,tmp,offset);\n" + "\t\t\toffset+=2;\n"); + break; + default: assert(0); + } + } + else { /* Pre-decrement */ + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\toffset-=4;\n" + "\t\t\tmov_l_rr(tmp,15-i);\n" + "\t\t\tmid_bswap_32(tmp);\n" + "\t\t\tmov_l_Rr(native,tmp,offset);\n" + ); + break; + case sz_word: + comprintf("\t\t\toffset-=2;\n" + "\t\t\tmov_l_rr(tmp,15-i);\n" + "\t\t\tmid_bswap_16(tmp);\n" + "\t\t\tmov_w_Rr(native,tmp,offset);\n" + ); + break; + default: assert(0); + } + } + + + comprintf("\t\t}\n" + "\t}"); + if (table68k[opcode].dmode == Apdi) { + comprintf("\t\t\tlea_l_brr(8+dstreg,srca,(uae_s32)offset);\n"); + } +#ifdef UAE + comprintf("\t} else {\n"); + + if (table68k[opcode].dmode!=Apdi) { + comprintf("\tmov_l_rr(tmp,srca);\n"); + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\twritelong(tmp,i,scratchie);\n" + "\t\t\tadd_l_ri(tmp,4);\n"); + break; + case sz_word: + comprintf("\t\t\twriteword(tmp,i,scratchie);\n" + "\t\t\tadd_l_ri(tmp,2);\n"); + break; + default: assert(0); + } + } + else { /* Pre-decrement */ + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\tsub_l_ri(srca,4);\n" + "\t\t\twritelong(srca,15-i,scratchie);\n"); + break; + case sz_word: + comprintf("\t\t\tsub_l_ri(srca,2);\n" + "\t\t\twriteword(srca,15-i,scratchie);\n"); + break; + default: assert(0); + } + } + + + comprintf("\t\t}\n" + "\t}"); + if (table68k[opcode].dmode == Apdi) { + comprintf("\t\t\tmov_l_rr(8+dstreg,srca);\n"); + } + comprintf("\t}\n"); +#endif +} + + +static void +duplicate_carry (void) +{ + comprintf ("\tif (needed_flags&FLAG_X) duplicate_carry();\n"); +} + +typedef enum +{ + flag_logical_noclobber, flag_logical, flag_add, flag_sub, flag_cmp, + flag_addx, flag_subx, flag_zn, flag_av, flag_sv, flag_and, flag_or, + flag_eor, flag_mov +} +flagtypes; + + +static void +genflags (flagtypes type, wordsizes size, const char *value, const char *src, const char *dst) +{ + if (noflags) { + switch(type) { + case flag_cmp: + comprintf("\tdont_care_flags();\n"); + comprintf("/* Weird --- CMP with noflags ;-) */\n"); + return; + case flag_add: + case flag_sub: + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch(type) { + case flag_add: op="add"; break; + case flag_sub: op="sub"; break; + default: assert(0); + } + switch (size) + { + case sz_byte: + comprintf("\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\t%s_l(%s,%s);\n",op,dst,src); + break; + } + return; + } + break; + + case flag_and: + comprintf("\tdont_care_flags();\n"); + switch (size) + { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src); + comprintf("\tor_l_ri(scratchie,0xffffff00);\n"); + comprintf("\tand_l(%s,scratchie);\n",dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tand_b(%s,%s);\n",dst,src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src); + comprintf("\tor_l_ri(scratchie,0xffff0000);\n"); + comprintf("\tand_l(%s,scratchie);\n",dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tand_w(%s,%s);\n",dst,src); + break; + case sz_long: + comprintf("\tand_l(%s,%s);\n",dst,src); + break; + } + return; + + case flag_mov: + comprintf("\tdont_care_flags();\n"); + switch (size) + { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src); + comprintf("\tand_l_ri(%s,0xffffff00);\n",dst); + comprintf("\tor_l(%s,scratchie);\n",dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tmov_b_rr(%s,%s);\n",dst,src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src); + comprintf("\tand_l_ri(%s,0xffff0000);\n",dst); + comprintf("\tor_l(%s,scratchie);\n",dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tmov_w_rr(%s,%s);\n",dst,src); + break; + case sz_long: + comprintf("\tmov_l_rr(%s,%s);\n",dst,src); + break; + } + return; + + case flag_or: + case flag_eor: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + const char* op; + switch(type) { + case flag_or: op="or"; break; + case flag_eor: op="xor"; break; + default: assert(0); + } + switch (size) + { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src); + comprintf("\t%s_l(%s,scratchie);\n",op,dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src); + comprintf("\t%s_l(%s,scratchie);\n",op,dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\t%s_l(%s,%s);\n",op,dst,src); + break; + } + close_brace(); + return; + } + + + case flag_addx: + case flag_subx: + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch(type) { + case flag_addx: op="adc"; break; + case flag_subx: op="sbb"; break; + default: assert(0); + } + comprintf("\trestore_carry();\n"); /* Reload the X flag into C */ + switch (size) + { + case sz_byte: + comprintf("\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\t%s_l(%s,%s);\n",op,dst,src); + break; + } + return; + } + break; + default: return; + } + } + + /* Need the flags, but possibly not all of them */ + switch (type) + { + case flag_logical_noclobber: + failure; + /* fall through */ + + case flag_and: + case flag_or: + case flag_eor: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + const char* op; + switch(type) { + case flag_and: op="and"; break; + case flag_or: op="or"; break; + case flag_eor: op="xor"; break; + default: assert(0); + } + switch (size) + { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n",op,dst,src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + } + + case flag_mov: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + switch (size) + { + case sz_byte: + comprintf("\tif (%s!=%s) {\n",src,dst); + comprintf("\tmov_b_ri(%s,0);\n" + "\tstart_needflags();\n",dst); + comprintf("\tor_b(%s,%s);\n",dst,src); + comprintf("\t} else {\n"); + comprintf("\tmov_b_rr(%s,%s);\n",dst,src); + comprintf("\ttest_b_rr(%s,%s);\n",dst,dst); + comprintf("\t}\n"); + break; + case sz_word: + comprintf("\tif (%s!=%s) {\n",src,dst); + comprintf("\tmov_w_ri(%s,0);\n" + "\tstart_needflags();\n",dst); + comprintf("\tor_w(%s,%s);\n",dst,src); + comprintf("\t} else {\n"); + comprintf("\tmov_w_rr(%s,%s);\n",dst,src); + comprintf("\ttest_w_rr(%s,%s);\n",dst,dst); + comprintf("\t}\n"); + break; + case sz_long: + comprintf("\tif (%s!=%s) {\n",src,dst); + comprintf("\tmov_l_ri(%s,0);\n" + "\tstart_needflags();\n",dst); + comprintf("\tor_l(%s,%s);\n",dst,src); + comprintf("\t} else {\n"); + comprintf("\tmov_l_rr(%s,%s);\n",dst,src); + comprintf("\ttest_l_rr(%s,%s);\n",dst,dst); + comprintf("\t}\n"); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + } + + case flag_logical: + comprintf("\tdont_care_flags();\n"); + start_brace(); + switch (size) + { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\ttest_b_rr(%s,%s);\n",value,value); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\ttest_w_rr(%s,%s);\n",value,value); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\ttest_l_rr(%s,%s);\n",value,value); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + + + case flag_add: + case flag_sub: + case flag_cmp: + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch(type) { + case flag_add: op="add"; break; + case flag_sub: op="sub"; break; + case flag_cmp: op="cmp"; break; + default: assert(0); + } + switch (size) + { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n",op,dst,src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + if (type!=flag_cmp) { + duplicate_carry(); + } + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + + return; + } + + case flag_addx: + case flag_subx: + uses_cmov; + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch(type) { + case flag_addx: op="adc"; break; + case flag_subx: op="sbb"; break; + default: assert(0); + } + start_brace(); + comprintf("\tint zero=scratchie++;\n" + "\tint one=scratchie++;\n" + "\tif (needed_flags&FLAG_Z) {\n" + "\tmov_l_ri(zero,0);\n" + "\tmov_l_ri(one,-1);\n" + "\tmake_flags_live();\n" + "\tcmov_l_rr(zero,one,%d);\n" + "\t}\n",NATIVE_CC_NE); + comprintf("\trestore_carry();\n"); /* Reload the X flag into C */ + switch (size) + { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n",op,dst,src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tif (needed_flags&FLAG_Z) {\n" + "\tcmov_l_rr(zero,one,%d);\n" + "\tset_zero(zero, one);\n" /* No longer need one */ + "\tlive_flags();\n" + "\t}\n",NATIVE_CC_NE); + comprintf("\tend_needflags();\n"); + duplicate_carry(); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + return; + } + default: + failure; + break; + } +} + +static int /* returns zero for success, non-zero for failure */ +gen_opcode (unsigned int opcode) +{ + struct instr *curi = table68k + opcode; + const char* ssize=NULL; + + insn_n_cycles = 2; + global_failure=0; + long_opcode=0; + global_isjump=0; + global_iscjump=0; + global_isaddx=0; + global_cmov=0; + global_fpu=0; + global_mayfail=0; + hack_opcode=opcode; + endstr[0]=0; + + start_brace (); + comprintf("\tuae_u8 scratchie=S1;\n"); + switch (curi->plev) + { + case 0: /* not privileged */ + break; + case 1: /* unprivileged only on 68000 */ + if (cpu_level == 0) + break; + if (next_cpu_level < 0) + next_cpu_level = 0; + + /* fall through */ + case 2: /* priviledged */ + failure; /* Easy ones first */ + break; + case 3: /* privileged if size == word */ + if (curi->size == sz_byte) + break; + failure; + break; + } + switch (curi->size) { + case sz_byte: ssize="b"; break; + case sz_word: ssize="w"; break; + case sz_long: ssize="l"; break; + default: assert(0); + } + (void)ssize; + + switch (curi->mnemo) + { + case i_OR: + case i_AND: + case i_EOR: +#ifdef DISABLE_I_OR_AND_EOR + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + switch(curi->mnemo) { + case i_OR: genflags (flag_or, curi->size, "", "src", "dst"); break; + case i_AND: genflags (flag_and, curi->size, "", "src", "dst"); break; + case i_EOR: genflags (flag_eor, curi->size, "", "src", "dst"); break; + } + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + + case i_ORSR: + case i_EORSR: + failure; + isjump; + break; + + case i_ANDSR: + failure; + isjump; + break; + + case i_SUB: +#ifdef DISABLE_I_SUB + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags (flag_sub, curi->size, "", "src", "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + + case i_SUBA: +#ifdef DISABLE_I_SUBA + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tsign_extend_8_rr(tmp,src);\n"); break; + case sz_word: comprintf("\tsign_extend_16_rr(tmp,src);\n"); break; + case sz_long: comprintf("\ttmp=src;\n"); break; + default: assert(0); + } + comprintf("\tsub_l(dst,tmp);\n"); + genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); + break; + + case i_SUBX: +#ifdef DISABLE_I_SUBX + failure; +#endif + isaddx; + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags (flag_subx, curi->size, "", "src", "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + + case i_SBCD: + failure; + /* I don't think so! */ + break; + + case i_ADD: +#ifdef DISABLE_I_ADD + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags (flag_add, curi->size, "", "src", "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + + case i_ADDA: +#ifdef DISABLE_I_ADDA + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tsign_extend_8_rr(tmp,src);\n"); break; + case sz_word: comprintf("\tsign_extend_16_rr(tmp,src);\n"); break; + case sz_long: comprintf("\ttmp=src;\n"); break; + default: assert(0); + } + comprintf("\tadd_l(dst,tmp);\n"); + genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); + break; + + case i_ADDX: +#ifdef DISABLE_I_ADDX + failure; +#endif + isaddx; + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + genflags (flag_addx, curi->size, "", "src", "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + + case i_ABCD: + failure; + /* No BCD maths for me.... */ + break; + + case i_NEG: +#ifdef DISABLE_I_NEG + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace (); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags (flag_sub, curi->size, "", "src", "dst"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + break; + + case i_NEGX: +#ifdef DISABLE_I_NEGX + failure; +#endif + isaddx; + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace (); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags (flag_subx, curi->size, "", "src", "dst"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + break; + + case i_NBCD: + failure; + /* Nope! */ + break; + + case i_CLR: +#ifdef DISABLE_I_CLR + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); + start_brace(); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags (flag_logical, curi->size, "dst", "", ""); + genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + break; + + case i_NOT: +#ifdef DISABLE_I_NOT + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace (); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0xffffffff);\n"); + genflags (flag_eor, curi->size, "", "src", "dst"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + break; + + case i_TST: +#ifdef DISABLE_I_TST + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genflags (flag_logical, curi->size, "src", "", ""); + break; + case i_BCHG: + case i_BCLR: + case i_BSET: + case i_BTST: +#ifdef DISABLE_I_BCHG_BCLR_BSET_BTST + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint s=scratchie++;\n" + "\tint tmp=scratchie++;\n" + "\tmov_l_rr(s,src);\n"); + if (curi->size == sz_byte) + comprintf("\tand_l_ri(s,7);\n"); + else + comprintf("\tand_l_ri(s,31);\n"); + + { + const char* op; + int need_write=1; + + switch(curi->mnemo) { + case i_BCHG: op="btc"; break; + case i_BCLR: op="btr"; break; + case i_BSET: op="bts"; break; + case i_BTST: op="bt"; need_write=0; break; + default: op=""; assert(0); + } + comprintf("\t%s_l_rr(dst,s);\n" /* Answer now in C */ + "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */ + "\tmake_flags_live();\n" /* Get the flags back */ + "\tdont_care_flags();\n",op); + if (!noflags) { + comprintf("\tstart_needflags();\n" + "\tset_zero(s,tmp);\n" + "\tlive_flags();\n" + "\tend_needflags();\n"); + } + if (need_write) + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + } + break; + + case i_CMPM: + case i_CMP: +#ifdef DISABLE_I_CMPM_CMP + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace (); + genflags (flag_cmp, curi->size, "", "src", "dst"); + break; + + case i_CMPA: +#ifdef DISABLE_I_CMPA + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmps=scratchie++;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tsign_extend_8_rr(tmps,src);\n"); break; + case sz_word: comprintf("\tsign_extend_16_rr(tmps,src);\n"); break; + case sz_long: comprintf("tmps=src;\n"); break; + default: assert(0); + } + genflags (flag_cmp, sz_long, "", "tmps", "dst"); + break; + /* The next two are coded a little unconventional, but they are doing + * weird things... */ + + case i_MVPRM: + isjump; + failure; + break; + + case i_MVPMR: + isjump; + failure; + break; + + case i_MOVE: +#ifdef DISABLE_I_MOVE + failure; +#endif + switch(curi->dmode) { + case Dreg: + case Areg: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genflags (flag_mov, curi->size, "", "src", "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + default: /* It goes to memory, not a register */ + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genflags (flag_logical, curi->size, "src", "", ""); + genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); + break; + } + break; + + case i_MOVEA: +#ifdef DISABLE_I_MOVEA + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + + start_brace(); + comprintf("\tint tmps=scratchie++;\n"); + switch(curi->size) { + case sz_word: comprintf("\tsign_extend_16_rr(dst,src);\n"); break; + case sz_long: comprintf("\tmov_l_rr(dst,src);\n"); break; + default: assert(0); + } + genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); + break; + + case i_MVSR2: + isjump; + failure; + break; + + case i_MV2SR: + isjump; + failure; + break; + + case i_SWAP: +#ifdef DISABLE_I_SWAP + failure; +#endif + genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\tdont_care_flags();\n"); + comprintf("\trol_l_ri(src,16);\n"); + genflags (flag_logical, sz_long, "src", "", ""); + genastore ("src", curi->smode, "srcreg", sz_long, "src"); + break; + + case i_EXG: +#ifdef DISABLE_I_EXG + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tmov_l_rr(tmp,src);\n"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + genastore ("tmp", curi->dmode, "dstreg", curi->size, "dst"); + break; + + case i_EXT: +#ifdef DISABLE_I_EXT + failure; +#endif + genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\tdont_care_flags();\n"); + start_brace (); + switch (curi->size) + { + case sz_byte: + comprintf ("\tint dst = src;\n" + "\tsign_extend_8_rr(src,src);\n"); + break; + case sz_word: + comprintf ("\tint dst = scratchie++;\n" + "\tsign_extend_8_rr(dst,src);\n"); + break; + case sz_long: + comprintf ("\tint dst = src;\n" + "\tsign_extend_16_rr(src,src);\n"); + break; + default: + assert(0); + } + genflags (flag_logical, + curi->size == sz_word ? sz_word : sz_long, "dst", "", ""); + genastore ("dst", curi->smode, "srcreg", + curi->size == sz_word ? sz_word : sz_long, "src"); + break; + + case i_MVMEL: +#ifdef DISABLE_I_MVEL + failure; +#endif + genmovemel (opcode); + break; + + case i_MVMLE: +#ifdef DISABLE_I_MVMLE + failure; +#endif + genmovemle (opcode); + break; + + case i_TRAP: + isjump; + failure; + break; + + case i_MVR2USP: + isjump; + failure; + break; + + case i_MVUSP2R: + isjump; + failure; + break; + + case i_RESET: + isjump; + failure; + break; + + case i_NOP: + break; + + case i_STOP: + isjump; + failure; + break; + + case i_RTE: + isjump; + failure; + break; + + case i_RTD: +#ifdef DISABLE_I_RTD + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "offs", 1, 0); + /* offs is constant */ + comprintf("\tadd_l_ri(offs,4);\n"); + start_brace(); + comprintf("\tint newad=scratchie++;\n" + "\treadlong(15,newad,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc,newad);\n" + "\tget_n_addr_jmp(newad,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n" + "\tadd_l(15,offs);\n"); + gen_update_next_handler(); + isjump; + break; + + case i_LINK: +#ifdef DISABLE_I_LINK + failure; +#endif + genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0); + comprintf("\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,src,scratchie);\n" + "\tmov_l_rr(src,15);\n"); + if (curi->size==sz_word) + comprintf("\tsign_extend_16_rr(offs,offs);\n"); + comprintf("\tadd_l(15,offs);\n"); + genastore ("src", curi->smode, "srcreg", sz_long, "src"); + break; + + case i_UNLK: +#ifdef DISABLE_I_UNLK + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + comprintf("\tmov_l_rr(15,src);\n" + "\treadlong(15,src,scratchie);\n" + "\tadd_l_ri(15,4);\n"); + genastore ("src", curi->smode, "srcreg", curi->size, "src"); + break; + + case i_RTS: +#ifdef DISABLE_I_RTS + failure; +#endif + comprintf("\tint newad=scratchie++;\n" + "\treadlong(15,newad,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc,newad);\n" + "\tget_n_addr_jmp(newad,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n" + "\tlea_l_brr(15,15,4);\n"); + gen_update_next_handler(); + isjump; + break; + + case i_TRAPV: + isjump; + failure; + break; + + case i_RTR: + isjump; + failure; + break; + + case i_JSR: +#ifdef DISABLE_I_JSR + failure; +#endif + isjump; + genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + start_brace(); + comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf("\tint ret=scratchie++;\n" + "\tmov_l_ri(ret,retadd);\n" + "\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,ret,scratchie);\n"); + comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n" + "\tget_n_addr_jmp(srca,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n"); + gen_update_next_handler(); + break; + + case i_JMP: +#ifdef DISABLE_I_JMP + failure; +#endif + isjump; + genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n" + "\tget_n_addr_jmp(srca,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n"); + gen_update_next_handler(); + break; + + case i_BSR: +#ifdef DISABLE_I_BSR + failure; +#endif + is_const_jump; + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf("\tint ret=scratchie++;\n" + "\tmov_l_ri(ret,retadd);\n" + "\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,ret,scratchie);\n"); + comprintf("\tadd_l_ri(src,m68k_pc_offset_thisinst+2);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + comprintf("\tadd_l(PC_P,src);\n"); + + comprintf("\tcomp_pc_p=(uae_u8*)(uintptr)get_const(PC_P);\n"); + break; + + case i_Bcc: +#ifdef DISABLE_I_BCC + failure; +#endif + comprintf("\tuae_u32 v,v1,v2;\n"); + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + /* That source is an immediate, so we can clobber it with abandon */ + switch(curi->size) { + case sz_byte: comprintf("\tsign_extend_8_rr(src,src);\n"); break; + case sz_word: comprintf("\tsign_extend_16_rr(src,src);\n"); break; + case sz_long: break; + } + comprintf("\tsub_l_ri(src,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n"); + /* Leave the following as "add" --- it will allow it to be optimized + away due to src being a constant ;-) */ + comprintf("\tadd_l_ri(src,(uintptr)comp_pc_p);\n"); + comprintf("\tmov_l_ri(PC_P,(uintptr)comp_pc_p);\n"); + /* Now they are both constant. Might as well fold in m68k_pc_offset */ + comprintf("\tadd_l_ri(src,m68k_pc_offset);\n"); + comprintf("\tadd_l_ri(PC_P,m68k_pc_offset);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + + if (curi->cc>=2) { + comprintf("\tv1=get_const(PC_P);\n" + "\tv2=get_const(src);\n" + "\tregister_branch(v1,v2,%d);\n", + cond_codes[curi->cc]); + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + isjump; + } + else { + is_const_jump; + } + + switch(curi->cc) { + case 0: /* Unconditional jump */ + comprintf("\tmov_l_rr(PC_P,src);\n"); + comprintf("\tcomp_pc_p=(uae_u8*)(uintptr)get_const(PC_P);\n"); + break; + case 1: break; /* This is silly! */ + case 8: failure; break; /* Work out details! FIXME */ + case 9: failure; break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + break; + default: assert(0); + } + break; + + case i_LEA: +#ifdef DISABLE_I_LEA + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genastore ("srca", curi->dmode, "dstreg", curi->size, "dst"); + break; + + case i_PEA: +#ifdef DISABLE_I_PEA + failure; +#endif + if (table68k[opcode].smode==Areg || + table68k[opcode].smode==Aind || + table68k[opcode].smode==Aipi || + table68k[opcode].smode==Apdi || + table68k[opcode].smode==Ad16 || + table68k[opcode].smode==Ad8r) + comprintf("if (srcreg==7) dodgy=1;\n"); + + genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode (Apdi, "7", sz_long, "dst", 2, 0); + genastore ("srca", Apdi, "7", sz_long, "dst"); + break; + + case i_DBcc: +#ifdef DISABLE_I_DBCC + failure; +#endif + isjump; + uses_cmov; + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0); + + /* That offs is an immediate, so we can clobber it with abandon */ + switch(curi->size) { + case sz_word: comprintf("\tsign_extend_16_rr(offs,offs);\n"); break; + default: assert(0); /* Seems this only comes in word flavour */ + } + comprintf("\tsub_l_ri(offs,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n"); + comprintf("\tadd_l_ri(offs,(uintptr)comp_pc_p);\n"); /* New PC, + once the + offset_68k is + * also added */ + /* Let's fold in the m68k_pc_offset at this point */ + comprintf("\tadd_l_ri(offs,m68k_pc_offset);\n"); + comprintf("\tadd_l_ri(PC_P,m68k_pc_offset);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + + start_brace(); + comprintf("\tint nsrc=scratchie++;\n"); + + if (curi->cc>=2) { + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + } + + assert (curi->size==sz_word); + + switch(curi->cc) { + case 0: /* This is an elaborate nop? */ + break; + case 1: + comprintf("\tstart_needflags();\n"); + comprintf("\tsub_w_ri(src,1);\n"); + comprintf("\t end_needflags();\n"); + start_brace(); + comprintf("\tuae_u32 v2,v;\n" + "\tuae_u32 v1=get_const(PC_P);\n"); + comprintf("\tv2=get_const(offs);\n" + "\tregister_branch(v1,v2,%d);\n", NATIVE_CC_CC); + break; + + case 8: failure; break; /* Work out details! FIXME */ + case 9: failure; break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\tmov_l_rr(nsrc,src);\n"); + comprintf("\tlea_l_brr(scratchie,src,(uae_s32)-1);\n" + "\tmov_w_rr(src,scratchie);\n"); + comprintf("\tcmov_l_rr(offs,PC_P,%d);\n", + cond_codes[curi->cc]); + comprintf("\tcmov_l_rr(src,nsrc,%d);\n", + cond_codes[curi->cc]); + /* OK, now for cc=true, we have src==nsrc and offs==PC_P, + so whether we move them around doesn't matter. However, + if cc=false, we have offs==jump_pc, and src==nsrc-1 */ + + comprintf("\t start_needflags();\n"); + comprintf("\ttest_w_rr(nsrc,nsrc);\n"); + comprintf("\t end_needflags();\n"); + comprintf("\tcmov_l_rr(PC_P,offs,%d);\n", NATIVE_CC_NE); + break; + default: assert(0); + } + genastore ("src", curi->smode, "srcreg", curi->size, "src"); + gen_update_next_handler(); + break; + + case i_Scc: +#ifdef DISABLE_I_SCC + failure; +#endif + genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); + start_brace (); + comprintf ("\tint val = scratchie++;\n"); + + /* We set val to 0 if we really should use 255, and to 1 for real 0 */ + switch(curi->cc) { + case 0: /* Unconditional set */ + comprintf("\tmov_l_ri(val,0);\n"); + break; + case 1: + /* Unconditional not-set */ + comprintf("\tmov_l_ri(val,1);\n"); + break; + case 8: failure; break; /* Work out details! FIXME */ + case 9: failure; break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + /* All condition codes can be inverted by changing the LSB */ + comprintf("\tsetcc(val,%d);\n", + cond_codes[curi->cc]^1); break; + default: assert(0); + } + comprintf("\tsub_b_ri(val,1);\n"); + genastore ("val", curi->smode, "srcreg", curi->size, "src"); + break; + + case i_DIVU: + isjump; + failure; + break; + + case i_DIVS: + isjump; + failure; + break; + + case i_MULU: +#ifdef DISABLE_I_MULU + failure; +#endif + comprintf("\tdont_care_flags();\n"); + genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0); + /* To do 16x16 unsigned multiplication, we actually use + 32x32 signed, and zero-extend the registers first. + That solves the problem of MUL needing dedicated registers + on the x86 */ + comprintf("\tzero_extend_16_rr(scratchie,src);\n" + "\tzero_extend_16_rr(dst,dst);\n" + "\timul_32_32(dst,scratchie);\n"); + genflags (flag_logical, sz_long, "dst", "", ""); + genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); + break; + + case i_MULS: +#ifdef DISABLE_I_MULS + failure; +#endif + comprintf("\tdont_care_flags();\n"); + genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0); + comprintf("\tsign_extend_16_rr(scratchie,src);\n" + "\tsign_extend_16_rr(dst,dst);\n" + "\timul_32_32(dst,scratchie);\n"); + genflags (flag_logical, sz_long, "dst", "", ""); + genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); + break; + + case i_CHK: + isjump; + failure; + break; + + case i_CHK2: + isjump; + failure; + break; + + case i_ASR: +#ifdef DISABLE_I_ASR + failure; +#endif + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode!=immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint width;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n" + "\tint highshift=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(data,cnt);\n" + "\thighmask=0x38;\n" + "\twidth=8;\n"); + break; + case sz_word: comprintf("\tshra_w_rr(data,cnt);\n" + "\thighmask=0x30;\n" + "\twidth=16;\n"); + break; + case sz_long: comprintf("\tshra_l_rr(data,cnt);\n" + "\thighmask=0x20;\n" + "\twidth=32;\n"); + break; + default: assert(0); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(highshift,0);\n" + "mov_l_ri(scratchie,width/2);\n" + "cmov_l_rr(highshift,scratchie,%d);\n", NATIVE_CC_NE); + /* The x86 masks out bits, so we now make sure that things + really get shifted as much as planned */ + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break; + case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break; + case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break; + default: assert(0); + } + /* And again */ + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break; + case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break; + case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break; + default: assert(0); + } + + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(cdata,tmpcnt);\n");break; + case sz_word: comprintf("\tshra_w_rr(cdata,tmpcnt);\n");break; + case sz_long: comprintf("\tshra_l_rr(cdata,tmpcnt);\n");break; + default: assert(0); + } + /* If the shift count was higher than the width, we need + to pick up the sign from data */ + comprintf("test_l_ri(tmpcnt,highmask);\n" + "cmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint width;\n" + "\tint highshift=scratchie++;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(data,cnt);\n" + "\thighmask=0x38;\n" + "\twidth=8;\n"); + break; + case sz_word: comprintf("\tshra_w_rr(data,cnt);\n" + "\thighmask=0x30;\n" + "\twidth=16;\n"); + break; + case sz_long: comprintf("\tshra_l_rr(data,cnt);\n" + "\thighmask=0x20;\n" + "\twidth=32;\n"); + break; + default: assert(0); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(highshift,0);\n" + "mov_l_ri(scratchie,width/2);\n" + "cmov_l_rr(highshift,scratchie,%d);\n",NATIVE_CC_NE); + /* The x86 masks out bits, so we now make sure that things + really get shifted as much as planned */ + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break; + case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break; + case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break; + default: assert(0); + } + /* And again */ + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break; + case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break; + case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break; + default: assert(0); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + } + else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + case sz_word: comprintf("\tshra_w_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + case sz_long: comprintf("\tshra_l_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + default: assert(0); + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + break; + + case i_ASL: +#ifdef DISABLE_I_ASL + failure; +#endif + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + /* Except for the handling of the V flag, this is identical to + LSL. The handling of V is, uhm, unpleasant, so if it's needed, + let the normal emulation handle it. Shoulders of giants kinda + thing ;-) */ + comprintf("if (needed_flags & FLAG_V) {\n" + " FAIL(1);\n" + " " RETURN "\n" + "} \n"); + + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode!=immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n",NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: assert(0); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n",NATIVE_CC_EQ); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; + default: assert(0); + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_rr(cdata,tmpcnt);\n");break; + case sz_word: comprintf("\tshll_w_rr(cdata,tmpcnt);\n");break; + case sz_long: comprintf("\tshll_l_rr(cdata,tmpcnt);\n");break; + default: assert(0); + } + comprintf("test_l_ri(tmpcnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(cdata,scratchie,%d);\n",NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,7);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,15);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,31);\n"); break; + } + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: assert(0); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n",NATIVE_CC_EQ); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; + default: assert(0); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + } + else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n" + "\tbp=8-srcreg;\n"); break; + case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n" + "\tbp=16-srcreg;\n"); break; + case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n" + "\tbp=32-srcreg;\n"); break; + default: assert(0); + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + break; + + case i_LSR: +#ifdef DISABLE_I_LSR + failure; +#endif + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode!=immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n",NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch(curi->size) { + case sz_byte: comprintf("\tshrl_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshrl_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshrl_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: assert(0); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n",NATIVE_CC_EQ); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; + default: assert(0); + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshrl_b_rr(cdata,tmpcnt);\n");break; + case sz_word: comprintf("\tshrl_w_rr(cdata,tmpcnt);\n");break; + case sz_long: comprintf("\tshrl_l_rr(cdata,tmpcnt);\n");break; + default: assert(0); + } + comprintf("test_l_ri(tmpcnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(cdata,scratchie,%d);\n",NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshrl_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshrl_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshrl_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: assert(0); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n",NATIVE_CC_EQ); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; + default: assert(0); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + } + else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshrl_b_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + case sz_word: comprintf("\tshrl_w_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + case sz_long: comprintf("\tshrl_l_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + default: assert(0); + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + break; + + case i_LSL: +#ifdef DISABLE_I_LSL + failure; +#endif + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode!=immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n",NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: assert(0); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n",NATIVE_CC_EQ); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; + default: assert(0); + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_rr(cdata,tmpcnt);\n");break; + case sz_word: comprintf("\tshll_w_rr(cdata,tmpcnt);\n");break; + case sz_long: comprintf("\tshll_l_rr(cdata,tmpcnt);\n");break; + default: assert(0); + } + comprintf("test_l_ri(tmpcnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(cdata,scratchie,%d);\n",NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,7);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,15);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,31);\n"); break; + } + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: assert(0); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n",NATIVE_CC_EQ); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; + default: assert(0); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + } + else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n" + "\tbp=8-srcreg;\n"); break; + case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n" + "\tbp=16-srcreg;\n"); break; + case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n" + "\tbp=32-srcreg;\n"); break; + default: assert(0); + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + break; + + case i_ROL: +#ifdef DISABLE_I_ROL + failure; +#endif + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace (); + + switch(curi->size) { + case sz_long: comprintf("\t rol_l_rr(data,cnt);\n"); break; + case sz_word: comprintf("\t rol_w_rr(data,cnt);\n"); break; + case sz_byte: comprintf("\t rol_b_rr(data,cnt);\n"); break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(data,0x00);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + break; + + case i_ROR: +#ifdef DISABLE_I_ROR + failure; +#endif + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " " RETURN "\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace (); + + switch(curi->size) { + case sz_long: comprintf("\t ror_l_rr(data,cnt);\n"); break; + case sz_word: comprintf("\t ror_w_rr(data,cnt);\n"); break; + case sz_byte: comprintf("\t ror_b_rr(data,cnt);\n"); break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + switch(curi->size) { + case sz_byte: comprintf("\t bt_l_ri(data,0x07);\n"); break; + case sz_word: comprintf("\t bt_l_ri(data,0x0f);\n"); break; + case sz_long: comprintf("\t bt_l_ri(data,0x1f);\n"); break; + } + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + break; + + case i_ROXL: + failure; + break; + + case i_ROXR: + failure; + break; + + case i_ASRW: + failure; + break; + + case i_ASLW: + failure; + break; + + case i_LSRW: + failure; + break; + + case i_LSLW: + failure; + break; + + case i_ROLW: + failure; + break; + + case i_RORW: + failure; + break; + + case i_ROXLW: + failure; + break; + + case i_ROXRW: + failure; + break; + + case i_MOVEC2: + isjump; + failure; + break; + + case i_MOVE2C: + isjump; + failure; + break; + + case i_CAS: + failure; + break; + + case i_CAS2: + failure; + break; + + case i_MOVES: /* ignore DFC and SFC because we have no MMU */ + isjump; + failure; + break; + + case i_BKPT: /* only needed for hardware emulators */ + isjump; + failure; + break; + + case i_CALLM: /* not present in 68030 */ + isjump; + failure; + break; + + case i_RTM: /* not present in 68030 */ + isjump; + failure; + break; + + case i_TRAPcc: + isjump; + failure; + break; + + case i_DIVL: + isjump; + failure; + break; + + case i_MULL: +#ifdef DISABLE_I_MULL + failure; +#endif + if (!noflags) { + failure; + break; + } + comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); + comprintf("\tint r2=(extra>>12)&7;\n" + "\tint tmp=scratchie++;\n"); + + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + /* The two operands are in dst and r2 */ + comprintf("\tif (extra&0x0400) {\n" /* Need full 64 bit result */ + "\tint r3=(extra&7);\n" + "\tmov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ + comprintf("\tif (extra&0x0800) { \n" /* signed */ + "\t\timul_64_32(r2,r3);\n" + "\t} else { \n" + "\t\tmul_64_32(r2,r3);\n" + "\t} \n"); + /* The result is in r2/tmp, with r2 holding the lower 32 bits */ + comprintf("\t} else {\n"); /* Only want 32 bit result */ + /* operands in dst and r2, result foes into r2 */ + /* shouldn't matter whether it's signed or unsigned?!? */ + comprintf("\timul_32_32(r2,dst);\n" + "\t}\n"); + break; + + case i_BFTST: + case i_BFEXTU: + case i_BFCHG: + case i_BFEXTS: + case i_BFCLR: + case i_BFFFO: + case i_BFSET: + case i_BFINS: + failure; + break; + + case i_PACK: + failure; + break; + + case i_UNPK: + failure; + break; + + case i_TAS: + failure; + break; + + case i_FPP: +#ifdef DISABLE_I_FPP + failure; +#endif + uses_fpu; + mayfail; + comprintf("#ifdef USE_JIT_FPU\n"); + comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); + swap_opcode(); + comprintf("\tcomp_fpp_opp(opcode,extra);\n"); + comprintf("#else\n"); + comprintf("\tfailure = 1;\n"); + comprintf("#endif\n"); + break; + + case i_FBcc: +#ifdef DISABLE_I_FBCC + failure; +#endif + uses_fpu; + isjump; + uses_cmov; + mayfail; + comprintf("#ifdef USE_JIT_FPU\n"); + swap_opcode(); + comprintf("\tcomp_fbcc_opp(opcode);\n"); + comprintf("#else\n"); + comprintf("\tfailure = 1;\n"); + comprintf("#endif\n"); + break; + + case i_FDBcc: + uses_fpu; + isjump; + failure; + break; + + case i_FScc: +#ifdef DISABLE_I_FSCC + failure; +#endif + uses_fpu; + mayfail; + uses_cmov; + comprintf("#ifdef USE_JIT_FPU\n"); + comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); + swap_opcode(); + comprintf("\tcomp_fscc_opp(opcode,extra);\n"); + comprintf("#else\n"); + comprintf("\tfailure = 1;\n"); + comprintf("#endif\n"); + break; + + case i_FTRAPcc: + uses_fpu; + isjump; + failure; + break; + + case i_FSAVE: + uses_fpu; + failure; + break; + + case i_FRESTORE: + uses_fpu; + failure; + break; + + case i_CINVL: + case i_CINVP: + case i_CINVA: + isjump; /* Not really, but it's probably a good idea to stop + translating at this point */ + failure; + comprintf ("\tflush_icache();\n"); /* Differentiate a bit more? */ + break; + + case i_CPUSHL: + case i_CPUSHP: + case i_CPUSHA: + isjump; /* Not really, but it's probably a good idea to stop + translating at this point */ + failure; + break; + + case i_MOVE16: +#ifdef DISABLE_I_MOVE16 + failure; +#endif + genmov16(opcode,curi); + break; + +#ifdef UAE + case i_MMUOP030: + case i_PFLUSHN: + case i_PFLUSH: + case i_PFLUSHAN: + case i_PFLUSHA: + case i_PLPAR: + case i_PLPAW: + case i_PTESTR: + case i_PTESTW: + case i_LPSTOP: + isjump; + failure; + break; +#endif + +#ifdef WINUAE_ARANYM + case i_EMULOP_RETURN: + isjump; + failure; + break; + + case i_EMULOP: + failure; + break; + + case i_NATFEAT_ID: + case i_NATFEAT_CALL: + failure; + break; + + case i_MMUOP: + isjump; + failure; + break; +#endif + + default: + assert(0); + break; + } + comprintf("%s",endstr); + finish_braces (); + sync_m68k_pc (); + if (global_mayfail) + comprintf("\tif (failure) m68k_pc_offset=m68k_pc_offset_thisinst;\n"); + return global_failure; +} + +static void +generate_includes (FILE * f) +{ + fprintf (f, "#include \"sysconfig.h\"\n"); + fprintf (f, "#if defined(JIT)\n"); + fprintf (f, "#include \"sysdeps.h\"\n"); +#ifdef UAE + fprintf (f, "#include \"options.h\"\n"); + fprintf (f, "#include \"memory.h\"\n"); +#else + fprintf (f, "#include \"m68k.h\"\n"); + fprintf (f, "#include \"memory.h\"\n"); +#endif + fprintf (f, "#include \"readcpu.h\"\n"); + fprintf (f, "#include \"newcpu.h\"\n"); + fprintf (f, "#include \"comptbl.h\"\n"); + fprintf (f, "#include \"debug.h\"\n"); +} + +static int postfix; + + +#ifdef UAE +static char *decodeEA (amodes mode, wordsizes size) +{ + static char buffer[80]; + + buffer[0] = 0; + switch (mode){ + case Dreg: + strcpy (buffer,"Dn"); + break; + case Areg: + strcpy (buffer,"An"); + break; + case Aind: + strcpy (buffer,"(An)"); + break; + case Aipi: + strcpy (buffer,"(An)+"); + break; + case Apdi: + strcpy (buffer,"-(An)"); + break; + case Ad16: + strcpy (buffer,"(d16,An)"); + break; + case Ad8r: + strcpy (buffer,"(d8,An,Xn)"); + break; + case PC16: + strcpy (buffer,"(d16,PC)"); + break; + case PC8r: + strcpy (buffer,"(d8,PC,Xn)"); + break; + case absw: + strcpy (buffer,"(xxx).W"); + break; + case absl: + strcpy (buffer,"(xxx).L"); + break; + case imm: + switch (size){ + case sz_byte: + strcpy (buffer,"#.B"); + break; + case sz_word: + strcpy (buffer,"#.W"); + break; + case sz_long: + strcpy (buffer,"#.L"); + break; + default: + break; + } + break; + case imm0: + strcpy (buffer,"#.B"); + break; + case imm1: + strcpy (buffer,"#.W"); + break; + case imm2: + strcpy (buffer,"#.L"); + break; + case immi: + strcpy (buffer,"#"); + break; + + default: + break; + } + return buffer; +} + +static char *outopcode (int opcode) +{ + static char out[100]; + struct instr *ins; + int i; + + ins = &table68k[opcode]; + for (i = 0; lookuptab[i].name[0]; i++) { + if (ins->mnemo == lookuptab[i].mnemo) + break; + } + { + char *s = ua (lookuptab[i].name); + strcpy (out, s); + xfree (s); + } + if (ins->smode == immi) + strcat (out, "Q"); + if (ins->size == sz_byte) + strcat (out,".B"); + if (ins->size == sz_word) + strcat (out,".W"); + if (ins->size == sz_long) + strcat (out,".L"); + strcat (out," "); + if (ins->suse) + strcat (out, decodeEA (ins->smode, ins->size)); + if (ins->duse) { + if (ins->suse) strcat (out,","); + strcat (out, decodeEA (ins->dmode, ins->size)); + } + return out; +} +#endif + + +static void +generate_one_opcode (int rp, int noflags) +{ + int i; + uae_u16 smsk, dmsk; + unsigned int opcode = opcode_map[rp]; + int aborted=0; + int have_srcreg=0; + int have_dstreg=0; +#ifdef UAE + char *name; +#else + const char *name; +#endif + + if (table68k[opcode].mnemo == i_ILLG + || table68k[opcode].clev > cpu_level) + return; + + for (i = 0; lookuptab[i].name[0]; i++) + { + if (table68k[opcode].mnemo == lookuptab[i].mnemo) + break; + } + + if (table68k[opcode].handler != -1) + return; + + switch (table68k[opcode].stype) + { + case 0: + smsk = 7; + break; + case 1: + smsk = 255; + break; + case 2: + smsk = 15; + break; + case 3: + smsk = 7; + break; + case 4: + smsk = 7; + break; + case 5: + smsk = 63; + break; +#ifndef UAE + case 6: + smsk = 255; + break; +#endif + case 7: + smsk = 3; + break; + default: + smsk = 0; + assert(0); + } + dmsk = 7; + + next_cpu_level = -1; + if (table68k[opcode].suse + && table68k[opcode].smode != imm && table68k[opcode].smode != imm0 + && table68k[opcode].smode != imm1 && table68k[opcode].smode != imm2 + && table68k[opcode].smode != absw && table68k[opcode].smode != absl + && table68k[opcode].smode != PC8r && table68k[opcode].smode != PC16) + { + have_srcreg=1; + if (table68k[opcode].spos == -1) + { + if (((int) table68k[opcode].sreg) >= 128) + comprintf ("\tuae_s32 srcreg = (uae_s32)(uae_s8)%d;\n", (int) table68k[opcode].sreg); + else + comprintf ("\tuae_s32 srcreg = %d;\n", (int) table68k[opcode].sreg); + } + else + { + char source[100]; + int pos = table68k[opcode].spos; + +#ifndef UAE + comprintf ("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + + if (pos < 8 && (smsk >> (8 - pos)) != 0) + sprintf (source, "(((opcode >> %d) | (opcode << %d)) & %d)", + pos ^ 8, 8 - pos, dmsk); + else if (pos != 8) + sprintf (source, "((opcode >> %d) & %d)", pos ^ 8, smsk); + else + sprintf (source, "(opcode & %d)", smsk); + + if (table68k[opcode].stype == 3) + comprintf ("\tuae_u32 srcreg = imm8_table[%s];\n", source); + else if (table68k[opcode].stype == 1) + comprintf ("\tuae_u32 srcreg = (uae_s32)(uae_s8)%s;\n", source); + else + comprintf ("\tuae_u32 srcreg = %s;\n", source); + + comprintf ("#else\n"); +#endif + + if (pos) + sprintf (source, "((opcode >> %d) & %d)", pos, smsk); + else + sprintf (source, "(opcode & %d)", smsk); + + if (table68k[opcode].stype == 3) + comprintf ("\tuae_s32 srcreg = imm8_table[%s];\n", source); + else if (table68k[opcode].stype == 1) + comprintf ("\tuae_s32 srcreg = (uae_s32)(uae_s8)%s;\n", source); + else + comprintf ("\tuae_s32 srcreg = %s;\n", source); + +#ifndef UAE + comprintf ("#endif\n"); +#endif + } + } + if (table68k[opcode].duse + /* Yes, the dmode can be imm, in case of LINK or DBcc */ + && table68k[opcode].dmode != imm && table68k[opcode].dmode != imm0 + && table68k[opcode].dmode != imm1 && table68k[opcode].dmode != imm2 + && table68k[opcode].dmode != absw && table68k[opcode].dmode != absl) + { + have_dstreg=1; + if (table68k[opcode].dpos == -1) + { + if (((int) table68k[opcode].dreg) >= 128) + comprintf ("\tuae_s32 dstreg = (uae_s32)(uae_s8)%d;\n", (int) table68k[opcode].dreg); + else + comprintf ("\tuae_s32 dstreg = %d;\n", (int) table68k[opcode].dreg); + } + else + { + int pos = table68k[opcode].dpos; + +#ifndef UAE + comprintf ("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + + if (pos < 8 && (dmsk >> (8 - pos)) != 0) + comprintf ("\tuae_u32 dstreg = ((opcode >> %d) | (opcode << %d)) & %d;\n", + pos ^ 8, 8 - pos, dmsk); + else if (pos != 8) + comprintf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", + pos ^ 8, dmsk); + else + comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); + + comprintf ("#else\n"); +#endif + + if (pos) + comprintf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", + pos, dmsk); + else + comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); + +#ifndef UAE + comprintf ("#endif\n"); +#endif + } + } + + if (have_srcreg && have_dstreg && + (table68k[opcode].dmode==Areg || + table68k[opcode].dmode==Aind || + table68k[opcode].dmode==Aipi || + table68k[opcode].dmode==Apdi || + table68k[opcode].dmode==Ad16 || + table68k[opcode].dmode==Ad8r) && + (table68k[opcode].smode==Areg || + table68k[opcode].smode==Aind || + table68k[opcode].smode==Aipi || + table68k[opcode].smode==Apdi || + table68k[opcode].smode==Ad16 || + table68k[opcode].smode==Ad8r) + ) { + comprintf("\tuae_u32 dodgy=(srcreg==(uae_s32)dstreg);\n"); + } + else { + comprintf("\tuae_u32 dodgy=0;\n"); + } + comprintf("\tuae_u32 m68k_pc_offset_thisinst=m68k_pc_offset;\n"); + comprintf("\tm68k_pc_offset+=2;\n"); + + aborted=gen_opcode (opcode); + { + char flags[64 * 6]; + *flags = '\0'; + if (global_isjump) strcat(flags, "COMP_OPCODE_ISJUMP|"); + if (long_opcode) strcat(flags, "COMP_OPCODE_LONG_OPCODE|"); + if (global_cmov) strcat(flags, "COMP_OPCODE_CMOV|"); + if (global_isaddx) strcat(flags, "COMP_OPCODE_ISADDX|"); + if (global_iscjump) strcat(flags, "COMP_OPCODE_ISCJUMP|"); + if (global_fpu) strcat(flags, "COMP_OPCODE_USES_FPU|"); + if (*flags) + flags[strlen(flags) - 1] = '\0'; + else + strcpy(flags, "0"); + +#ifdef UAE + comprintf ("return 0;\n"); +#endif + comprintf ("}\n"); + +#ifdef UAE + name = ua (lookuptab[i].name); +#else + name = lookuptab[i].name; +#endif + if (aborted) { + fprintf (stblfile, "{ NULL, %u, %s }, /* %s */\n", opcode, flags, name); + com_discard(); + } else { + const char *tbl = noflags ? "nf" : "ff"; +#ifdef UAE + printf ("/* %s */\n", outopcode (opcode)); +#else + printf ("/* %s */\n", name); +#endif + fprintf (stblfile, "{ op_%x_%d_comp_%s, %u, %s }, /* %s */\n", opcode, postfix, tbl, opcode, flags, name); + fprintf (headerfile, "extern compop_func op_%x_%d_comp_%s;\n", opcode, postfix, tbl); + printf (RETTYPE " REGPARAM2 op_%x_%d_comp_%s(uae_u32 opcode)\n{\n", opcode, postfix, tbl); + com_flush(); + } +#ifdef UAE + xfree (name); +#endif + } + opcode_next_clev[rp] = next_cpu_level; + opcode_last_postfix[rp] = postfix; +} + +static void +generate_func (int noflags) +{ + int i, j, rp; + const char *tbl = noflags ? "nf" : "ff"; + + using_prefetch = 0; + using_exception_3 = 0; + for (i = 0; i < 1; i++) /* We only do one level! */ + { + cpu_level = NEXT_CPU_LEVEL - i; + postfix = i; + + fprintf (stblfile, "const struct comptbl op_smalltbl_%d_comp_%s[] = {\n", postfix, tbl); + + /* sam: this is for people with low memory (eg. me :)) */ + printf ("\n" + "#if !defined(PART_1) && !defined(PART_2) && " + "!defined(PART_3) && !defined(PART_4) && " + "!defined(PART_5) && !defined(PART_6) && " + "!defined(PART_7) && !defined(PART_8)" + "\n" + "#define PART_1 1\n" + "#define PART_2 1\n" + "#define PART_3 1\n" + "#define PART_4 1\n" + "#define PART_5 1\n" + "#define PART_6 1\n" + "#define PART_7 1\n" + "#define PART_8 1\n" + "#endif\n\n"); +#ifdef UAE + printf ("extern void comp_fpp_opp();\n" + "extern void comp_fscc_opp();\n" + "extern void comp_fbcc_opp();\n\n"); +#endif + + rp = 0; + for (j = 1; j <= 8; ++j) + { + int k = (j * nr_cpuop_funcs) / 8; + printf ("#ifdef PART_%d\n", j); + for (; rp < k; rp++) + generate_one_opcode (rp,noflags); + printf ("#endif\n\n"); + } + + fprintf (stblfile, "{ 0, 65536, 0 }};\n"); + } + +} + +#if (defined(OS_cygwin) || defined(OS_mingw)) && defined(EXTENDED_SIGSEGV) +void cygwin_mingw_abort() +{ +#undef abort + abort(); +} +#endif + +int main(void) +{ + read_table68k (); + do_merges (); + + opcode_map = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + opcode_last_postfix = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + opcode_next_clev = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + counts = (unsigned long *) malloc (65536 * sizeof (unsigned long)); + read_counts (); + + /* It would be a lot nicer to put all in one file (we'd also get rid of + * cputbl.h that way), but cpuopti can't cope. That could be fixed, but + * I don't dare to touch the 68k version. */ + + headerfile = fopen (GEN_PATH "comptbl.h", "wb"); + fprintf (headerfile, "" + "extern const struct comptbl op_smalltbl_0_comp_nf[];\n" + "extern const struct comptbl op_smalltbl_0_comp_ff[];\n" + ""); + + stblfile = fopen (GEN_PATH "compstbl.cpp", "wb"); + if (freopen (GEN_PATH "compemu.cpp", "wb", stdout) == NULL) { + abort(); + } + + generate_includes (stdout); + generate_includes (stblfile); + + printf("#include \"" JIT_PATH "compemu.h\"\n"); + + noflags=0; + generate_func (noflags); + + free(opcode_map); + free(opcode_last_postfix); + free(opcode_next_clev); + free(counts); + + opcode_map = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + opcode_last_postfix = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + opcode_next_clev = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + counts = (unsigned long *) malloc (65536 * sizeof (unsigned long)); + read_counts (); + noflags=1; + generate_func (noflags); + + printf ("#endif\n"); + fprintf (stblfile, "#endif\n"); + + free(opcode_map); + free(opcode_last_postfix); + free(opcode_next_clev); + free(counts); + + free (table68k); + fclose (stblfile); + fclose (headerfile); + return 0; +} + +#ifdef UAE +void write_log (const TCHAR *format,...) +{ +} +#endif diff --git a/BasiliskII/src/uae_cpu/compiler/gencomp_arm.c b/BasiliskII/src/uae_cpu/compiler/gencomp_arm.c new file mode 100644 index 00000000..13e2776e --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/gencomp_arm.c @@ -0,0 +1,4981 @@ +/* + * compiler/gencomp_arm2.c - MC680x0 compilation generator (ARM Adaption JIT v1 & JIT v2) + * + * Based on work Copyright 1995, 1996 Bernd Schmidt + * Changes for UAE-JIT Copyright 2000 Bernd Meyer + * + * Adaptation for ARAnyM/ARM, copyright 2001-2015 + * Milan Jurik, Jens Heitmann + * + * Basilisk II (C) 1997-2005 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Notes + * ===== + * + * Advantages of JIT v2 + * - Processor independent style + * - Reduced overhead + * - Easier to understand / read + * - Easier to optimize + * - More precise flag handling + * - Better optimization for different CPU version ARM, ARMv6 etc.. + * + * Disadvantages of JIT v2 + * - Less generated + * - Requires more code implementation by hand (MidFunc) + * - MIDFUNCS are more CPU minded (closer to raw) + * - Separate code for each instruction (but this could be also an advantage, because you can concentrate on it) + * + * Additional note: + * - current using jnf_xxx calls for non-flag operations and + * jff_xxx for flag operations + * + * Still todo: + * - Optimize genamode, genastore, gen_writeXXX, gen_readXXX, genmovemXXX + * + */ + +#define CC_FOR_BUILD 1 +#include "sysconfig.h" + +#include "sysdeps.h" +#include "readcpu.h" + +#include +#include +#include +#include +#include +#include +#undef abort + +#define BOOL_TYPE "int" +#define failure global_failure=1 +#define FAILURE global_failure=1 +#define isjump global_isjump=1 +#define is_const_jump global_iscjump=1 +#define isaddx global_isaddx=1 +#define uses_cmov global_cmov=1 +#define mayfail global_mayfail=1 +#define uses_fpu global_fpu=1 + +int hack_opcode; + +static int global_failure; +static int global_isjump; +static int global_iscjump; +static int global_isaddx; +static int global_cmov; +static int long_opcode; +static int global_mayfail; +static int global_fpu; + +static char endstr[1000]; +static char lines[100000]; +static int comp_index = 0; + +#include "flags_arm.h" + +#ifndef __attribute__ +# ifndef __GNUC__ +# define __attribute__(x) +# endif +#endif + + +static int cond_codes[] = { // + NATIVE_CC_AL, -1, // + NATIVE_CC_HI, NATIVE_CC_LS, // + NATIVE_CC_CC, NATIVE_CC_CS, // + NATIVE_CC_NE, NATIVE_CC_EQ, // + NATIVE_CC_VC, NATIVE_CC_VS, // + NATIVE_CC_PL, NATIVE_CC_MI, // + NATIVE_CC_GE, NATIVE_CC_LT, // + NATIVE_CC_GT, NATIVE_CC_LE // + }; + +__attribute__((format(printf, 1, 2))) +static void comprintf(const char *format, ...) +{ + va_list args; + + va_start(args, format); + comp_index += vsprintf(lines + comp_index, format, args); + va_end(args); +} + +static void com_discard(void) +{ + comp_index = 0; +} + +static void com_flush(void) +{ + int i; + for (i = 0; i < comp_index; i++) + putchar(lines[i]); + com_discard(); +} + + +static FILE *headerfile; +static FILE *stblfile; + +static int using_prefetch; +static int using_exception_3; +static int cpu_level; +static int noflags; + +/* For the current opcode, the next lower level that will have different code. + * Initialized to -1 for each opcode. If it remains unchanged, indicates we + * are done with that opcode. */ +static int next_cpu_level; + +static int *opcode_map; +static int *opcode_next_clev; +static int *opcode_last_postfix; +static unsigned long *counts; + +static void read_counts(void) +{ + FILE *file; + unsigned long opcode, count, total; + char name[20]; + int nr = 0; + memset(counts, 0, 65536 * sizeof *counts); + + file = fopen("frequent.68k", "r"); + if (file) { + if (fscanf(file, "Total: %lu\n", &total) != 1) + { + assert(0); + } + while (fscanf(file, "%lx: %lu %s\n", &opcode, &count, name) == 3) { + opcode_next_clev[nr] = 4; + opcode_last_postfix[nr] = -1; + opcode_map[nr++] = opcode; + counts[opcode] = count; + } + fclose(file); + } + if (nr == nr_cpuop_funcs) + return; + for (opcode = 0; opcode < 0x10000; opcode++) { + if (table68k[opcode].handler == -1 && table68k[opcode].mnemo != i_ILLG + && counts[opcode] == 0) { + opcode_next_clev[nr] = 4; + opcode_last_postfix[nr] = -1; + opcode_map[nr++] = opcode; + counts[opcode] = count; + } + } + assert (nr == nr_cpuop_funcs); +} + +static int n_braces = 0; +static int insn_n_cycles; + +static void start_brace(void) { + n_braces++; + comprintf("{"); +} + +static void close_brace(void) { + assert(n_braces > 0); + n_braces--; + comprintf("}"); +} + +static void finish_braces(void) { + while (n_braces > 0) + close_brace(); +} + +static inline void gen_update_next_handler(void) { + return; /* Can anything clever be done here? */ +} + +static void gen_writebyte(const char *address, const char *source) +{ + comprintf("\twritebyte(%s, %s, scratchie);\n", address, source); +} + +static void gen_writeword(const char *address, const char *source) +{ + comprintf("\twriteword(%s, %s, scratchie);\n", address, source); +} + +static void gen_writelong(const char *address, const char *source) +{ + comprintf("\twritelong(%s, %s, scratchie);\n", address, source); +} + +static void gen_readbyte(const char *address, const char* dest) +{ + comprintf("\treadbyte(%s, %s, scratchie);\n", address, dest); +} + +static void gen_readword(const char *address, const char *dest) +{ + comprintf("\treadword(%s,%s,scratchie);\n", address, dest); +} + +static void gen_readlong(const char *address, const char *dest) +{ + comprintf("\treadlong(%s, %s, scratchie);\n", address, dest); +} + +static const char * +gen_nextilong(void) { + static char buffer[80]; + + sprintf(buffer, "comp_get_ilong((m68k_pc_offset+=4)-4)"); + insn_n_cycles += 4; + + long_opcode = 1; + return buffer; +} + +static const char * +gen_nextiword(void) { + static char buffer[80]; + + sprintf(buffer, "comp_get_iword((m68k_pc_offset+=2)-2)"); + insn_n_cycles += 2; + + long_opcode = 1; + return buffer; +} + +static const char * +gen_nextibyte(void) { + static char buffer[80]; + + sprintf(buffer, "comp_get_ibyte((m68k_pc_offset+=2)-2)"); + insn_n_cycles += 2; + + long_opcode = 1; + return buffer; +} + +#if defined(USE_JIT_FPU) +// Only used by FPU (future), get rid of unused warning +static void +swap_opcode (void) +{ + comprintf("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + comprintf("\topcode = do_byteswap_16(opcode);\n"); + comprintf("#endif\n"); +} +#endif + +static void sync_m68k_pc(void) { + comprintf("\t if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();\n"); +} + +/* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0, + * the calling routine handles Apdi and Aipi modes. + * gb-- movem == 2 means the same thing but for a MOVE16 instruction */ +static void genamode(amodes mode, const char *reg, wordsizes size, const char *name, int getv, int movem) +{ + start_brace(); + switch (mode) + { + case Dreg: /* Do we need to check dodgy here? */ + assert (!movem); + if (getv == 1 || getv == 2) + { + /* We generate the variable even for getv==2, so we can use + it as a destination for MOVE */ + comprintf("\tint %s = %s;\n", name, reg); + } + return; + + case Areg: + assert (!movem); + if (getv == 1 || getv == 2) + { + /* see above */ + comprintf("\tint %s = dodgy ? scratchie++ : %s + 8;\n", name, reg); + if (getv == 1) + { + comprintf("\tif (dodgy) \n"); + comprintf("\t\tmov_l_rr(%s, %s + 8);\n", name, reg); + } + } + return; + + case Aind: + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, %s + 8);\n", name, reg); + break; + case Aipi: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_rr(%sa, %s + 8);\n", name, reg); + break; + case Apdi: + switch (size) + { + case sz_byte: + if (movem) + { + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, (uae_s32)-areg_byteinc[%s]);\n", reg, reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + case sz_word: + if (movem) + { + comprintf("\tint %sa=dodgy?scratchie++:%s+8;\n", name, reg); + comprintf("\tif (dodgy) \n"); + comprintf("\tmov_l_rr(%sa,8+%s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, -2);\n", reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + case sz_long: + if (movem) + { + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } else + { + start_brace(); + comprintf("\tint %sa = dodgy ? scratchie++ : %s + 8;\n", name, reg); + comprintf("\tlea_l_brr(%s + 8, %s + 8, -4);\n", reg, reg); + comprintf("\tif (dodgy)\n"); + comprintf("\t\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + } + break; + default: + assert(0); + break; + } + break; + case Ad16: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_rr(%sa, 8 + %s);\n", name, reg); + comprintf("\tlea_l_brr(%sa, %sa, (uae_s32)(uae_s16)%s);\n", name, name, gen_nextiword()); + break; + case Ad8r: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tcalc_disp_ea_020(%s + 8, %s, %sa, scratchie);\n", reg, gen_nextiword(), name); + break; + + case PC16: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tuae_u32 address = start_pc + ((char *)comp_pc_p - (char *)start_pc_p) + m68k_pc_offset;\n"); + comprintf("\tuae_s32 PC16off = (uae_s32)(uae_s16)%s;\n", gen_nextiword()); + comprintf("\tmov_l_ri(%sa, address + PC16off);\n", name); + break; + + case PC8r: + comprintf("\tint pctmp = scratchie++;\n"); + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tuae_u32 address = start_pc + ((char *)comp_pc_p - (char *)start_pc_p) + m68k_pc_offset;\n"); + start_brace(); + comprintf("\tmov_l_ri(pctmp,address);\n"); + + comprintf("\tcalc_disp_ea_020(pctmp, %s, %sa, scratchie);\n", gen_nextiword(), name); + break; + case absw: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_ri(%sa, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + break; + case absl: + comprintf("\tint %sa = scratchie++;\n", name); + comprintf("\tmov_l_ri(%sa, %s); /* absl */\n", name, gen_nextilong()); + break; + case imm: + assert (getv == 1); + switch (size) + { + case sz_byte: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s8)%s);\n", name, gen_nextibyte()); + break; + case sz_word: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + break; + case sz_long: + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, gen_nextilong()); + break; + default: + assert(0); + break; + } + return; + case imm0: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s8)%s);\n", name, gen_nextibyte()); + return; + case imm1: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, (uae_s32)(uae_s16)%s);\n", name, gen_nextiword()); + return; + case imm2: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, gen_nextilong()); + return; + case immi: + assert (getv == 1); + comprintf("\tint %s = scratchie++;\n", name); + comprintf("\tmov_l_ri(%s, %s);\n", name, reg); + return; + default: + assert(0); + break; + } + + /* We get here for all non-reg non-immediate addressing modes to + * actually fetch the value. */ + if (getv == 1) + { + char astring[80]; + sprintf(astring, "%sa", name); + switch (size) + { + case sz_byte: + insn_n_cycles += 2; + break; + case sz_word: + insn_n_cycles += 2; + break; + case sz_long: + insn_n_cycles += 4; + break; + default: + assert(0); + break; + } + start_brace(); + comprintf("\tint %s = scratchie++;\n", name); + switch (size) + { + case sz_byte: + gen_readbyte(astring, name); + break; + case sz_word: + gen_readword(astring, name); + break; + case sz_long: + gen_readlong(astring, name); + break; + default: + assert(0); + break; + } + } + + /* We now might have to fix up the register for pre-dec or post-inc + * addressing modes. */ + if (!movem) + { + switch (mode) + { + case Aipi: + switch (size) + { + case sz_byte: + comprintf("\tlea_l_brr(%s + 8,%s + 8, areg_byteinc[%s]);\n", reg, reg, reg); + break; + case sz_word: + comprintf("\tlea_l_brr(%s + 8, %s + 8, 2);\n", reg, reg); + break; + case sz_long: + comprintf("\tlea_l_brr(%s + 8, %s + 8, 4);\n", reg, reg); + break; + default: + assert(0); + break; + } + break; + case Apdi: + break; + default: + break; + } + } +} + +static void genastore(const char *from, amodes mode, const char *reg, wordsizes size, const char *to) +{ + switch (mode) + { + case Dreg: + switch (size) + { + case sz_byte: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_b_rr(%s, %s);\n", reg, from); + break; + case sz_word: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_w_rr(%s, %s);\n", reg, from); + break; + case sz_long: + comprintf("\tif(%s != %s)\n", reg, from); + comprintf("\t\tmov_l_rr(%s, %s);\n", reg, from); + break; + default: + assert(0); + break; + } + break; + case Areg: + switch (size) + { + case sz_word: + comprintf("\tif(%s + 8 != %s)\n", reg, from); + comprintf("\t\tmov_w_rr(%s + 8, %s);\n", reg, from); + break; + case sz_long: + comprintf("\tif(%s + 8 != %s)\n", reg, from); + comprintf("\t\tmov_l_rr(%s + 8, %s);\n", reg, from); + break; + default: + assert(0); + break; + } + break; + + case Apdi: + case absw: + case PC16: + case PC8r: + case Ad16: + case Ad8r: + case Aipi: + case Aind: + case absl: + { + char astring[80]; + sprintf(astring, "%sa", to); + + switch (size) + { + case sz_byte: + insn_n_cycles += 2; + gen_writebyte(astring, from); + break; + case sz_word: + insn_n_cycles += 2; + gen_writeword(astring, from); + break; + case sz_long: + insn_n_cycles += 4; + gen_writelong(astring, from); + break; + default: + assert(0); + break; + } + } + break; + case imm: + case imm0: + case imm1: + case imm2: + case immi: + assert(0); + break; + default: + assert(0); + break; + } +} + +static void gen_move16(uae_u32 opcode, struct instr *curi) { +#if defined(USE_JIT2) + comprintf("\tint src=scratchie++;\n"); + comprintf("\tint dst=scratchie++;\n"); + + uae_u32 masked_op = (opcode & 0xfff8); + if (masked_op == 0xf620) { + // POSTINCREMENT SOURCE AND DESTINATION version + comprintf("\t uae_u16 dstreg = ((%s)>>12) & 0x07;\n", gen_nextiword()); + comprintf("\t jnf_MOVE(src, srcreg + 8);"); + comprintf("\t jnf_MOVE(dst, dstreg + 8);"); + comprintf("\t if (srcreg != dstreg)\n"); + comprintf("\t jnf_ADD_imm(srcreg + 8, srcreg + 8, 16);"); + comprintf("\t jnf_ADD_imm(dstreg + 8, dstreg + 8, 16);"); + } else { + /* Other variants */ + genamode(curi->smode, "srcreg", curi->size, "src", 0, 2); + genamode(curi->dmode, "dstreg", curi->size, "dst", 0, 2); + switch (masked_op) { + case 0xf600: + comprintf("\t jnf_ADD_imm(srcreg + 8, srcreg + 8, 16);"); + break; + case 0xf608: + comprintf("\t jnf_ADD_imm(dstreg + 8, dstreg + 8, 16);"); + break; + } + } + comprintf("\t jnf_MOVE16(dst, src);"); +#else + comprintf("\tint src=scratchie++;\n"); + comprintf("\tint dst=scratchie++;\n"); + + if ((opcode & 0xfff8) == 0xf620) { + /* MOVE16 (Ax)+,(Ay)+ */ + comprintf("\tuae_u16 dstreg=((%s)>>12)&0x07;\n", gen_nextiword()); + comprintf("\tmov_l_rr(src,8+srcreg);\n"); + comprintf("\tmov_l_rr(dst,8+dstreg);\n"); + } else { + /* Other variants */ + genamode(curi->smode, "srcreg", curi->size, "src", 0, 2); + genamode(curi->dmode, "dstreg", curi->size, "dst", 0, 2); + comprintf("\tmov_l_rr(src,srca);\n"); + comprintf("\tmov_l_rr(dst,dsta);\n"); + } + + /* Align on 16-byte boundaries */ + comprintf("\tand_l_ri(src,~15);\n"); + comprintf("\tand_l_ri(dst,~15);\n"); + + if ((opcode & 0xfff8) == 0xf620) { + comprintf("\tif (srcreg != dstreg)\n"); + comprintf("\tarm_ADD_l_ri8(srcreg+8,16);\n"); + comprintf("\tarm_ADD_l_ri8(dstreg+8,16);\n"); + } else if ((opcode & 0xfff8) == 0xf600) + comprintf("\tarm_ADD_l_ri8(srcreg+8,16);\n"); + else if ((opcode & 0xfff8) == 0xf608) + comprintf("\tarm_ADD_l_ri8(dstreg+8,16);\n"); + + comprintf("\tint tmp=scratchie;\n"); + comprintf("\tscratchie+=4;\n"); + + comprintf("\tget_n_addr(src,src,scratchie);\n" + "\tget_n_addr(dst,dst,scratchie);\n" + "\tmov_l_rR(tmp+0,src,0);\n" + "\tmov_l_rR(tmp+1,src,4);\n" + "\tmov_l_rR(tmp+2,src,8);\n" + "\tmov_l_rR(tmp+3,src,12);\n" + "\tmov_l_Rr(dst,tmp+0,0);\n" + "\tforget_about(tmp+0);\n" + "\tmov_l_Rr(dst,tmp+1,4);\n" + "\tforget_about(tmp+1);\n" + "\tmov_l_Rr(dst,tmp+2,8);\n" + "\tforget_about(tmp+2);\n" + "\tmov_l_Rr(dst,tmp+3,12);\n"); +#endif +} + +static void genmovemel(uae_u16 opcode) { + comprintf("\tuae_u16 mask = %s;\n", gen_nextiword()); + comprintf("\tint native=scratchie++;\n"); + comprintf("\tint i;\n"); + comprintf("\tsigned char offset=0;\n"); + genamode(table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, + 1); + comprintf("\tget_n_addr(srca,native,scratchie);\n"); + + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch (table68k[opcode].size) { + case sz_long: + comprintf("\t\t\tmov_l_rR(i,native,offset);\n" + "\t\t\tmid_bswap_32(i);\n" + "\t\t\toffset+=4;\n"); + break; + case sz_word: + comprintf("\t\t\tmov_w_rR(i,native,offset);\n" + "\t\t\tmid_bswap_16(i);\n" + "\t\t\tsign_extend_16_rr(i,i);\n" + "\t\t\toffset+=2;\n"); + break; + default: + assert(0); + break; + } + comprintf("\t\t}\n" + "\t}"); + if (table68k[opcode].dmode == Aipi) { + comprintf("\t\t\tlea_l_brr(8+dstreg,srca,offset);\n"); + } +} + +static void genmovemle(uae_u16 opcode) { + comprintf("\tuae_u16 mask = %s;\n", gen_nextiword()); + comprintf("\tint native=scratchie++;\n"); + comprintf("\tint i;\n"); + comprintf("\tint tmp=scratchie++;\n"); + comprintf("\tsigned char offset=0;\n"); + genamode(table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, + 1); + + comprintf("\tget_n_addr(srca,native,scratchie);\n"); + + if (table68k[opcode].dmode != Apdi) { + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch (table68k[opcode].size) { + case sz_long: + comprintf("\t\t\tmov_l_rr(tmp,i);\n" + "\t\t\tmid_bswap_32(tmp);\n" + "\t\t\tmov_l_Rr(native,tmp,offset);\n" + "\t\t\toffset+=4;\n"); + break; + case sz_word: + comprintf("\t\t\tmov_l_rr(tmp,i);\n" + "\t\t\tmid_bswap_16(tmp);\n" + "\t\t\tmov_w_Rr(native,tmp,offset);\n" + "\t\t\toffset+=2;\n"); + break; + default: + assert(0); + break; + } + } else { /* Pre-decrement */ + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch (table68k[opcode].size) { + case sz_long: + comprintf("\t\t\toffset-=4;\n" + "\t\t\tmov_l_rr(tmp,15-i);\n" + "\t\t\tmid_bswap_32(tmp);\n" + "\t\t\tmov_l_Rr(native,tmp,offset);\n"); + break; + case sz_word: + comprintf("\t\t\toffset-=2;\n" + "\t\t\tmov_l_rr(tmp,15-i);\n" + "\t\t\tmid_bswap_16(tmp);\n" + "\t\t\tmov_w_Rr(native,tmp,offset);\n"); + break; + default: + assert(0); + break; + } + } + + comprintf("\t\t}\n" + "\t}"); + if (table68k[opcode].dmode == Apdi) { + comprintf("\t\t\tlea_l_brr(8+dstreg,srca,(uae_s32)offset);\n"); + } +} + +static void duplicate_carry(void) { + comprintf("\tif (needed_flags&FLAG_X) duplicate_carry();\n"); +} + +typedef enum { + flag_logical_noclobber, + flag_logical, + flag_add, + flag_sub, + flag_cmp, + flag_addx, + flag_subx, + flag_zn, + flag_av, + flag_sv, + flag_and, + flag_or, + flag_eor, + flag_mov +} flagtypes; + +#if !defined(USE_JIT2) +static void genflags(flagtypes type, wordsizes size, const char *value, const char *src, const char *dst) +{ + if (noflags) { + switch (type) { + case flag_cmp: + comprintf("\tdont_care_flags();\n"); + comprintf("/* Weird --- CMP with noflags ;-) */\n"); + return; + case flag_add: + case flag_sub: + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch (type) { + case flag_add: + op = "add"; + break; // nf + case flag_sub: + op = "sub"; + break; // nf + default: + assert(0); + break; + } + switch (size) { + case sz_byte: + comprintf("\t%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("\t%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\t%s_l(%s,%s);\n", op, dst, src); + break; + } + return; + } + break; + + case flag_and: + comprintf("\tdont_care_flags();\n"); + switch (size) { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n", src); + comprintf("\tor_l_ri(scratchie,0xffffff00);\n"); // nf + comprintf("\tarm_AND_l(%s,scratchie);\n", dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tarm_AND_b(%s,%s);\n", dst, src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n", src); + comprintf("\tor_l_ri(scratchie,0xffff0000);\n"); // nf + comprintf("\tarm_AND_l(%s,scratchie);\n", dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tarm_AND_w(%s,%s);\n", dst, src); + break; + case sz_long: + comprintf("\tarm_AND_l(%s,%s);\n", dst, src); + break; + } + return; + + case flag_mov: + comprintf("\tdont_care_flags();\n"); + switch (size) { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n", src); + comprintf("\tand_l_ri(%s,0xffffff00);\n", dst); // nf + comprintf("\tarm_ORR_l(%s,scratchie);\n", dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tmov_b_rr(%s,%s);\n", dst, src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n", src); + comprintf("\tand_l_ri(%s,0xffff0000);\n", dst); // nf + comprintf("\tarm_ORR_l(%s,scratchie);\n", dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tmov_w_rr(%s,%s);\n", dst, src); + break; + case sz_long: + comprintf("\tmov_l_rr(%s,%s);\n", dst, src); + break; + } + return; + + case flag_or: + case flag_eor: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + const char* op; + switch (type) { + case flag_or: + op = "ORR"; + break; // nf + case flag_eor: + op = "EOR"; + break; // nf + default: + assert(0); + break; + } + switch (size) { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n", src); + comprintf("\tarm_%s_l(%s,scratchie);\n", op, dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tarm_%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n", src); + comprintf("\tarm_%s_l(%s,scratchie);\n", op, dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tarm_%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\tarm_%s_l(%s,%s);\n", op, dst, src); + break; + } + close_brace(); + return; + } + + case flag_addx: + case flag_subx: + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch (type) { + case flag_addx: + op = "adc"; + break; + case flag_subx: + op = "sbb"; + break; + default: + assert(0); + break; + } + comprintf("\trestore_carry();\n"); /* Reload the X flag into C */ + switch (size) { + case sz_byte: + comprintf("\t%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("\t%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\t%s_l(%s,%s);\n", op, dst, src); + break; + } + return; + } + break; + default: + return; + } + } + + /* Need the flags, but possibly not all of them */ + switch (type) { + case flag_logical_noclobber: + failure; + /* fall through */ + + case flag_and: + case flag_or: + case flag_eor: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + const char* op; + switch (type) { + case flag_and: + op = "and"; + break; + case flag_or: + op = "or"; + break; + case flag_eor: + op = "xor"; + break; + default: + assert(0); + break; + } + switch (size) { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n", op, dst, src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + } + + case flag_mov: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + switch (size) { + case sz_byte: + comprintf("\tif (%s!=%s) {\n", src, dst); + comprintf("\tmov_b_ri(%s,0);\n" + "\tstart_needflags();\n", dst); + comprintf("\tor_b(%s,%s);\n", dst, src); + comprintf("\t} else {\n"); + comprintf("\tmov_b_rr(%s,%s);\n", dst, src); + comprintf("\ttest_b_rr(%s,%s);\n", dst, dst); + comprintf("\t}\n"); + break; + case sz_word: + comprintf("\tif (%s!=%s) {\n", src, dst); + comprintf("\tmov_w_ri(%s,0);\n" + "\tstart_needflags();\n", dst); + comprintf("\tor_w(%s,%s);\n", dst, src); + comprintf("\t} else {\n"); + comprintf("\tmov_w_rr(%s,%s);\n", dst, src); + comprintf("\ttest_w_rr(%s,%s);\n", dst, dst); + comprintf("\t}\n"); + break; + case sz_long: + comprintf("\tif (%s!=%s) {\n", src, dst); + comprintf("\tmov_l_ri(%s,0);\n" + "\tstart_needflags();\n", dst); + comprintf("\tor_l(%s,%s);\n", dst, src); + comprintf("\t} else {\n"); + comprintf("\tmov_l_rr(%s,%s);\n", dst, src); + comprintf("\ttest_l_rr(%s,%s);\n", dst, dst); + comprintf("\t}\n"); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + } + + case flag_logical: + comprintf("\tdont_care_flags();\n"); + start_brace(); + switch (size) { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\ttest_b_rr(%s,%s);\n", value, value); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\ttest_w_rr(%s,%s);\n", value, value); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\ttest_l_rr(%s,%s);\n", value, value); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + + case flag_add: + case flag_sub: + case flag_cmp: + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch (type) { + case flag_add: + op = "add"; + break; + case flag_sub: + op = "sub"; + break; + case flag_cmp: + op = "cmp"; + break; + default: + assert(0); + break; + } + switch (size) { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n", op, dst, src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + if (type != flag_cmp) { + duplicate_carry(); + } + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + + return; + } + + case flag_addx: + case flag_subx: + uses_cmov; + comprintf("\tdont_care_flags();\n"); + { + const char* op; + switch (type) { + case flag_addx: + op = "adc"; + break; + case flag_subx: + op = "sbb"; + break; + default: + assert(0); + break; + } + start_brace(); + comprintf("\tint zero=scratchie++;\n" + "\tint one=scratchie++;\n" + "\tif (needed_flags&FLAG_Z) {\n" + "\tmov_l_ri(zero,0);\n" + "\tmov_l_ri(one,-1);\n" + "\tmake_flags_live();\n" + "\tcmov_l_rr(zero,one,%d);\n" + "\t}\n", NATIVE_CC_NE); + comprintf("\trestore_carry();\n"); /* Reload the X flag into C */ + switch (size) { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n", op, dst, src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n", op, dst, src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n", op, dst, src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tif (needed_flags&FLAG_Z) {\n" + "\tcmov_l_rr(zero,one,%d);\n" + "\tset_zero(zero, one);\n" /* No longer need one */ + "\tlive_flags();\n" + "\t}\n", NATIVE_CC_NE); + comprintf("\tend_needflags();\n"); + duplicate_carry(); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + return; + } + default: + failure; + break; + } +} +#endif + +static void gen_abcd(uae_u32 opcode, struct instr *curi, const char* ssize) { +#if 0 +#else + (void) opcode; + (void) curi; + (void) ssize; + failure; + /* No BCD maths for me.... */ +#endif +} + +static void gen_add(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + // Use tmp register to avoid destroying upper part in .B., .W cases + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ADD_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_ADD(tmp,dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_add, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_adda(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\t jnf_ADDA_%s(dst, src);\n", ssize); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tsign_extend_8_rr(tmp,src);\n"); + break; + case sz_word: + comprintf("\tsign_extend_16_rr(tmp,src);\n"); + break; + case sz_long: + comprintf("\ttmp=src;\n"); + break; + default: + assert(0); + break; + } + comprintf("\tarm_ADD_l(dst,tmp);\n"); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#endif +} + +static void gen_addx(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + + // Use tmp register to avoid destroying upper part in .B., .W cases + comprintf("\t dont_care_flags();\n"); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ADDX_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf("\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_ADDX(tmp,dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + genflags(flag_addx, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_and(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + + comprintf("\t dont_care_flags();\n"); + comprintf("\t int tmp=scratchie++;\n"); + start_brace(); + if (!noflags) { + comprintf("\t jff_AND_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_AND(tmp,dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_and, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_andsr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ANDSR(ARM_CCR_MAP[src & 0xF], (src & 0x10));\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } +#else + (void) curi; + failure; + isjump; +#endif +} + +static void gen_asl(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\t dont_care_flags();\n"); + comprintf("\t int tmp=scratchie++;\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + + if (curi->smode != immi) { + if (!noflags) { + start_brace(); + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ASL_%s_reg(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + start_brace(); + comprintf("\t jnf_LSL_reg(tmp,data,cnt);\n"); + } + } else { + start_brace(); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ASL_%s_imm(tmp,data,srcreg);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_LSL_imm(tmp,data,srcreg);\n"); + } + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) ssize; + + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + /* Except for the handling of the V flag, this is identical to + LSL. The handling of V is, uhm, unpleasant, so if it's needed, + let the normal emulation handle it. Shoulders of giants kinda + thing ;-) */ + comprintf("if (needed_flags & FLAG_V) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode != immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(cdata,tmpcnt);\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(cdata,tmpcnt);\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(cdata,tmpcnt);\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(tmpcnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(cdata,scratchie,%d);\n", NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,7);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,15);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,31);\n"); + break; + } + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } + } else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_ri(data,srcreg);\n" + "\tbp=8-srcreg;\n"); + break; + case sz_word: + comprintf("\tshll_w_ri(data,srcreg);\n" + "\tbp=16-srcreg;\n"); + break; + case sz_long: + comprintf("\tshll_l_ri(data,srcreg);\n" + "\tbp=32-srcreg;\n"); + break; + default: + assert(0); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } +#endif +} + +static void gen_aslw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ASLW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_ASLW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_asr(uae_u32 opcode, struct instr *curi, const char* ssize) { +#if defined(USE_JIT2) + (void)opcode; + + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\t dont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (curi->smode != immi) { + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ASR_%s_reg(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_ASR_%s_reg(tmp,data,cnt);\n", ssize); + } + } else { + char *op; + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + op = "ff"; + } else + op = "nf"; + + comprintf("\t j%s_ASR_%s_imm(tmp,data,srcreg);\n", op, ssize); + if (!noflags) { + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) opcode; + (void) ssize; + + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode != immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint width;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n" + "\tint highshift=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,cnt);\n" + "\thighmask=0x38;\n" + "\twidth=8;\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,cnt);\n" + "\thighmask=0x30;\n" + "\twidth=16;\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,cnt);\n" + "\thighmask=0x20;\n" + "\twidth=32;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(highshift,0);\n" + "mov_l_ri(scratchie,width/2);\n" + "cmov_l_rr(highshift,scratchie,%d);\n", NATIVE_CC_NE); + /* The x86 masks out bits, so we now make sure that things + really get shifted as much as planned */ + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,highshift);\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,highshift);\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,highshift);\n"); + break; + default: + assert(0); + break; + } + /* And again */ + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,highshift);\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,highshift);\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,highshift);\n"); + break; + default: + assert(0); + break; + } + + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(cdata,tmpcnt);\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(cdata,tmpcnt);\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(cdata,tmpcnt);\n"); + break; + default: + assert(0); + break; + } + /* If the shift count was higher than the width, we need + to pick up the sign from data */ + comprintf("test_l_ri(tmpcnt,highmask);\n" + "cmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint width;\n" + "\tint highshift=scratchie++;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,cnt);\n" + "\thighmask=0x38;\n" + "\twidth=8;\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,cnt);\n" + "\thighmask=0x30;\n" + "\twidth=16;\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,cnt);\n" + "\thighmask=0x20;\n" + "\twidth=32;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(highshift,0);\n" + "mov_l_ri(scratchie,width/2);\n" + "cmov_l_rr(highshift,scratchie,%d);\n", NATIVE_CC_NE); + /* The x86 masks out bits, so we now make sure that things + really get shifted as much as planned */ + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,highshift);\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,highshift);\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,highshift);\n"); + break; + default: + assert(0); + break; + } + /* And again */ + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_rr(data,highshift);\n"); + break; + case sz_word: + comprintf("\tshra_w_rr(data,highshift);\n"); + break; + case sz_long: + comprintf("\tshra_l_rr(data,highshift);\n"); + break; + default: + assert(0); + break; + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } + } else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshra_b_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + case sz_word: + comprintf("\tshra_w_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + case sz_long: + comprintf("\tshra_l_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + default: + assert(0); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } +#endif +} + +static void gen_asrw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ASRW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_ASRW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_bchg(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_BCHG_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_BCHG_%s(dst,src);\n", ssize); + comprintf("\t dont_care_flags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint s=scratchie++;\n" + "\tint tmp=scratchie++;\n" + "\tmov_l_rr(s,src);\n"); + if (curi->size == sz_byte) + comprintf("\tand_l_ri(s,7);\n"); + else + comprintf("\tand_l_ri(s,31);\n"); + + comprintf("\tbtc_l_rr(dst,s);\n" /* Answer now in C */ + "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */ + "\tmake_flags_live();\n" /* Get the flags back */ + "\tdont_care_flags();\n"); + if (!noflags) { + comprintf("\tstart_needflags();\n" + "\tset_zero(s,tmp);\n" + "\tlive_flags();\n" + "\tend_needflags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_bclr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_BCLR_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_BCLR_%s(dst,src);\n", ssize); + comprintf("\t dont_care_flags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint s=scratchie++;\n" + "\tint tmp=scratchie++;\n" + "\tmov_l_rr(s,src);\n"); + if (curi->size == sz_byte) + comprintf("\tand_l_ri(s,7);\n"); + else + comprintf("\tand_l_ri(s,31);\n"); + + comprintf("\tbtr_l_rr(dst,s);\n" /* Answer now in C */ + "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */ + "\tmake_flags_live();\n" /* Get the flags back */ + "\tdont_care_flags();\n"); + if (!noflags) { + comprintf("\tstart_needflags();\n" + "\tset_zero(s,tmp);\n" + "\tlive_flags();\n" + "\tend_needflags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_bset(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_BSET_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_BSET_%s(dst,src);\n", ssize); + comprintf("\t dont_care_flags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint s=scratchie++;\n" + "\tint tmp=scratchie++;\n" + "\tmov_l_rr(s,src);\n"); + if (curi->size == sz_byte) + comprintf("\tand_l_ri(s,7);\n"); + else + comprintf("\tand_l_ri(s,31);\n"); + + comprintf("\tbts_l_rr(dst,s);\n" /* Answer now in C */ + "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */ + "\tmake_flags_live();\n" /* Get the flags back */ + "\tdont_care_flags();\n"); + if (!noflags) { + comprintf("\tstart_needflags();\n" + "\tset_zero(s,tmp);\n" + "\tlive_flags();\n" + "\tend_needflags();\n"); + } + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_btst(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + + // If we are not interested in flags it is not necessary to do + // anything with the data + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_BTST_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t dont_care_flags();\n"); + } +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint s=scratchie++;\n" + "\tint tmp=scratchie++;\n" + "\tmov_l_rr(s,src);\n"); + if (curi->size == sz_byte) + comprintf("\tand_l_ri(s,7);\n"); + else + comprintf("\tand_l_ri(s,31);\n"); + + comprintf("\tbt_l_rr(dst,s);\n" /* Answer now in C */ + "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */ + "\tmake_flags_live();\n" /* Get the flags back */ + "\tdont_care_flags();\n"); + if (!noflags) { + comprintf("\tstart_needflags();\n" + "\tset_zero(s,tmp);\n" + "\tlive_flags();\n" + "\tend_needflags();\n"); + } +#endif +} + +static void gen_clr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 2, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_CLR(tmp);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_CLR(tmp);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + genamode(curi->smode, "srcreg", curi->size, "src", 2, 0); + start_brace(); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags(flag_logical, curi->size, "dst", "", ""); + genastore("dst", curi->smode, "srcreg", curi->size, "src"); +#endif +} + +static void gen_cmp(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\t dont_care_flags();\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_CMP_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("/* Weird --- CMP with noflags ;-) */\n"); + } +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + genflags(flag_cmp, curi->size, "", "src", "dst"); +#endif +} + +static void gen_cmpa(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + if (!noflags) { + comprintf("\t dont_care_flags();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_CMPA_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\tdont_care_flags();\n"); + comprintf("/* Weird --- CMP with noflags ;-) */\n"); + } +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmps=scratchie++;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tsign_extend_8_rr(tmps,src);\n"); + break; + case sz_word: + comprintf("\tsign_extend_16_rr(tmps,src);\n"); + break; + case sz_long: + comprintf("tmps=src;\n"); + break; + default: + assert(0); + break; + } + genflags(flag_cmp, sz_long, "", "tmps", "dst"); +#endif +} + +static void gen_dbcc(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if 0 + isjump; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "offs", 1, 0); + + comprintf("uae_u32 voffs;\n"); + comprintf("voffs = get_const(offs);\n"); + /* That offs is an immediate, so we can clobber it with abandon */ + switch (curi->size) { + case sz_word: + comprintf("\t voffs = (uae_s32)((uae_s16)voffs);\n"); + break; + default: + assert(0); /* Seems this only comes in word flavour */ + break; + } + comprintf("\t voffs -= m68k_pc_offset - m68k_pc_offset_thisinst - 2;\n"); + comprintf("\t voffs += (uintptr)comp_pc_p + m68k_pc_offset;\n"); + + comprintf("\t add_const_v(PC_P, m68k_pc_offset);\n"); + comprintf("\t m68k_pc_offset = 0;\n"); + + start_brace(); + + if (curi->cc >= 2) { + comprintf("\t make_flags_live();\n"); /* Load the flags */ + } + + assert(curi->size == sz_word); + + switch (curi->cc) { + case 0: /* This is an elaborate nop? */ + break; + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\t start_needflags();\n"); + comprintf("\t jnf_DBcc(src,voffs,%d);\n", curi->cc); + comprintf("\t end_needflags();\n"); + break; + default: + assert(0); + break; + } + genastore("src", curi->smode, "srcreg", curi->size, "src"); + gen_update_next_handler(); +#else + isjump; + uses_cmov; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "offs", 1, 0); + + /* That offs is an immediate, so we can clobber it with abandon */ + switch (curi->size) { + case sz_word: + comprintf("\tsign_extend_16_rr(offs,offs);\n"); + break; + default: + assert(0); /* Seems this only comes in word flavour */ + break; + } + comprintf("\tsub_l_ri(offs,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n"); + comprintf("\tarm_ADD_l_ri(offs,(uintptr)comp_pc_p);\n"); + /* New PC, + once the + offset_68k is + * also added */ + /* Let's fold in the m68k_pc_offset at this point */ + comprintf("\tarm_ADD_l_ri(offs,m68k_pc_offset);\n"); + comprintf("\tarm_ADD_l_ri(PC_P,m68k_pc_offset);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + + start_brace(); + comprintf("\tint nsrc=scratchie++;\n"); + + if (curi->cc >= 2) { + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + } + + assert (curi->size == sz_word); + + switch (curi->cc) { + case 0: /* This is an elaborate nop? */ + break; + case 1: + comprintf("\tstart_needflags();\n"); + comprintf("\tsub_w_ri(src,1);\n"); + comprintf("\t end_needflags();\n"); + start_brace(); + comprintf("\tuae_u32 v2,v;\n" + "\tuae_u32 v1=get_const(PC_P);\n"); + comprintf("\tv2=get_const(offs);\n" + "\tregister_branch(v1,v2,%d);\n", NATIVE_CC_CC); + break; + + case 8: + failure; + break; /* Work out details! FIXME */ + case 9: + failure; + break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\tmov_l_rr(nsrc,src);\n"); + comprintf("\tlea_l_brr(scratchie,src,(uae_s32)-1);\n" + "\tmov_w_rr(src,scratchie);\n"); + comprintf("\tcmov_l_rr(offs,PC_P,%d);\n", cond_codes[curi->cc]); + comprintf("\tcmov_l_rr(src,nsrc,%d);\n", cond_codes[curi->cc]); + /* OK, now for cc=true, we have src==nsrc and offs==PC_P, + so whether we move them around doesn't matter. However, + if cc=false, we have offs==jump_pc, and src==nsrc-1 */ + + comprintf("\t start_needflags();\n"); + comprintf("\ttest_w_rr(nsrc,nsrc);\n"); + comprintf("\t end_needflags();\n"); + comprintf("\tcmov_l_rr(PC_P,offs,%d);\n", NATIVE_CC_NE); + break; + default: + assert(0); + break; + } + genastore("src", curi->smode, "srcreg", curi->size, "src"); + gen_update_next_handler(); +#endif +} + +static void gen_eor(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t jff_EOR_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_EOR(tmp,dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_eor, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_eorsr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_EORSR(ARM_CCR_MAP[src & 0xF], ((src & 0x10) >> 4));\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } +#else + (void) curi; + failure; + isjump; +#endif +} + +static void gen_exg(uae_u32 opcode, struct instr *curi, const char* ssize) { +#if 0 +#else + (void) opcode; + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tmov_l_rr(tmp,src);\n"); + genastore("dst", curi->smode, "srcreg", curi->size, "src"); + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_ext(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_EXT_%s(tmp,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_EXT_%s(tmp,src);\n", ssize); + } + genastore("tmp", curi->smode, "srcreg", + curi->size == sz_word ? sz_word : sz_long, "src"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\tdont_care_flags();\n"); + start_brace(); + switch (curi->size) { + case sz_byte: + comprintf("\tint dst = src;\n" + "\tsign_extend_8_rr(src,src);\n"); + break; + case sz_word: + comprintf("\tint dst = scratchie++;\n" + "\tsign_extend_8_rr(dst,src);\n"); + break; + case sz_long: + comprintf("\tint dst = src;\n" + "\tsign_extend_16_rr(src,src);\n"); + break; + default: + assert(0); + break; + } + genflags(flag_logical, curi->size == sz_word ? sz_word : sz_long, "dst", "", + ""); + genastore("dst", curi->smode, "srcreg", + curi->size == sz_word ? sz_word : sz_long, "src"); +#endif +} + +static void gen_lsl(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + comprintf("\t int tmp=scratchie++;\n"); + if (curi->smode != immi) { + if (!noflags) { + start_brace(); + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_LSL_%s_reg(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + start_brace(); + comprintf("\t jnf_LSL_reg(tmp,data,cnt);\n"); + } + } else { + start_brace(); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_LSL_%s_imm(tmp,data,srcreg);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_LSL_imm(tmp,data,srcreg);\n"); + } + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode != immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(cdata,tmpcnt);\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(cdata,tmpcnt);\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(cdata,tmpcnt);\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(tmpcnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(cdata,scratchie,%d);\n", NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,7);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,15);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,31);\n"); + break; + } + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } + } else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshll_b_ri(data,srcreg);\n" + "\tbp=8-srcreg;\n"); + break; + case sz_word: + comprintf("\tshll_w_ri(data,srcreg);\n" + "\tbp=16-srcreg;\n"); + break; + case sz_long: + comprintf("\tshll_l_ri(data,srcreg);\n" + "\tbp=32-srcreg;\n"); + break; + default: + assert(0); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } +#endif +} + +static void gen_lslw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_LSLW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_LSLW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_lsr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\t dont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + comprintf("\t int tmp=scratchie++;\n"); + if (curi->smode != immi) { + if (!noflags) { + start_brace(); + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_LSR_%s_reg(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + start_brace(); + comprintf("\t jnf_LSR_%s_reg(tmp,data,cnt);\n", ssize); + } + } else { + start_brace(); + char *op; + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + op = "ff"; + } else + op = "nf"; + + comprintf("\t j%s_LSR_%s_imm(tmp,data,srcreg);\n", op, ssize); + + if (!noflags) { + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) ssize; + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode != immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,%d);\n", NATIVE_CC_NE); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch (curi->size) { + case sz_byte: + comprintf("\tshrl_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshrl_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshrl_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshrl_b_rr(cdata,tmpcnt);\n"); + break; + case sz_word: + comprintf("\tshrl_w_rr(cdata,tmpcnt);\n"); + break; + case sz_long: + comprintf("\tshrl_l_rr(cdata,tmpcnt);\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(tmpcnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(cdata,scratchie,%d);\n", NATIVE_CC_NE); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshrl_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: + comprintf("\tshrl_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: + comprintf("\tshrl_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: + assert(0); + break; + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,%d);\n", NATIVE_CC_EQ); + switch (curi->size) { + case sz_byte: + comprintf("\tmov_b_rr(data,scratchie);\n"); + break; + case sz_word: + comprintf("\tmov_w_rr(data,scratchie);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(data,scratchie);\n"); + break; + default: + assert(0); + break; + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } + } else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tshrl_b_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + case sz_word: + comprintf("\tshrl_w_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + case sz_long: + comprintf("\tshrl_l_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); + break; + default: + assert(0); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); + } +#endif +} + +static void gen_lsrw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_LSRW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_LSRW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_move(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + switch (curi->dmode) { + case Dreg: + case Areg: + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags && curi->dmode == Dreg) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_MOVE_%s(tmp, src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t tmp = src;\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); + break; + + default: /* It goes to memory, not a register */ + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_TST_%s(src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } + genastore("src", curi->dmode, "dstreg", curi->size, "dst"); + break; + } +#else + (void) ssize; + + switch (curi->dmode) { + case Dreg: + case Areg: + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genflags(flag_mov, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + default: /* It goes to memory, not a register */ + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genflags(flag_logical, curi->size, "src", "", ""); + genastore("src", curi->dmode, "dstreg", curi->size, "dst"); + break; + } +#endif +} + +static void gen_movea(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + + start_brace(); + comprintf("\t jnf_MOVEA_%s(dst, src);\n", ssize); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#else + (void) ssize; + + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + + start_brace(); + comprintf("\tint tmps=scratchie++;\n"); + switch (curi->size) { + case sz_word: + comprintf("\tsign_extend_16_rr(dst,src);\n"); + break; + case sz_long: + comprintf("\tmov_l_rr(dst,src);\n"); + break; + default: + assert(0); + break; + } + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#endif +} + +static void gen_mull(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t uae_u16 extra=%s;\n", gen_nextiword()); + comprintf("\t int r2=(extra>>12)&7;\n" + "\t int tmp=scratchie++;\n"); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + /* The two operands are in dst and r2 */ + if (!noflags) { + comprintf("\t if (extra & 0x0400) {\n"); /* Need full 64 bit result */ + comprintf("\t int r3=(extra & 7);\n"); + comprintf("\t mov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t\t jff_MULS64(r2,r3);\n"); + comprintf("\t } else { \n"); + comprintf("\t\t jff_MULU64(r2,r3);\n"); + comprintf("\t } \n"); /* The result is in r2/r3, with r2 holding the lower 32 bits */ + comprintf("\t } else {\n"); /* Only want 32 bit result */ + /* operands in dst and r2, result goes into r2 */ + /* shouldn't matter whether it's signed or unsigned?!? */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t jff_MULS32(r2,dst);\n"); + comprintf("\t } else { \n"); + comprintf("\t\t jff_MULU32(r2,dst);\n"); + comprintf("\t } \n"); /* The result is in r2, with r2 holding the lower 32 bits */ + comprintf("\t }\n"); + } else { + comprintf("\t if (extra & 0x0400) {\n"); /* Need full 64 bit result */ + comprintf("\t int r3=(extra & 7);\n"); + comprintf("\t mov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t\t jnf_MULS64(r2,r3);\n"); + comprintf("\t } else { \n"); + comprintf("\t\t jnf_MULU64(r2,r3);\n"); + comprintf("\t } \n"); /* The result is in r2/r3, with r2 holding the lower 32 bits */ + comprintf("\t } else {\n"); /* Only want 32 bit result */ + /* operands in dst and r2, result foes into r2 */ + /* shouldn't matter whether it's signed or unsigned?!? */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t jnf_MULS32(r2,dst);\n"); + comprintf("\t } else { \n"); + comprintf("\t\t jnf_MULU32(r2,dst);\n"); + comprintf("\t } \n"); /* The result is in r2, with r2 holding the lower 32 bits */ + comprintf("\t }\n"); + } +#else + if (!noflags) { + failure; + return; + } + comprintf("\tuae_u16 extra=%s;\n", gen_nextiword()); + comprintf("\tint r2=(extra>>12)&7;\n" + "\tint tmp=scratchie++;\n"); + + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + /* The two operands are in dst and r2 */ + comprintf("\tif (extra&0x0400) {\n" /* Need full 64 bit result */ + "\tint r3=(extra&7);\n" + "\tmov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ + comprintf("\tif (extra&0x0800) { \n" /* signed */ + "\t\timul_64_32(r2,r3);\n" + "\t} else { \n" + "\t\tmul_64_32(r2,r3);\n" + "\t} \n"); + /* The result is in r2/tmp, with r2 holding the lower 32 bits */ + comprintf("\t} else {\n"); /* Only want 32 bit result */ + /* operands in dst and r2, result foes into r2 */ + /* shouldn't matter whether it's signed or unsigned?!? */ + comprintf("\timul_32_32(r2,dst);\n" + "\t}\n"); +#endif +} + +static void gen_muls(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_word, "dst", 1, 0); + start_brace(); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_MULS(dst,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_MULS(dst,src);\n"); + } + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#else + comprintf("\tdont_care_flags();\n"); + genamode(curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_word, "dst", 1, 0); + comprintf("\tsign_extend_16_rr(scratchie,src);\n" + "\tsign_extend_16_rr(dst,dst);\n" + "\timul_32_32(dst,scratchie);\n"); + genflags(flag_logical, sz_long, "dst", "", ""); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#endif +} + +static void gen_mulu(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_word, "dst", 1, 0); + start_brace(); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_MULU(dst,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_MULU(dst,src);\n"); + } + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#else + comprintf("\tdont_care_flags();\n"); + genamode(curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_word, "dst", 1, 0); + /* To do 16x16 unsigned multiplication, we actually use + 32x32 signed, and zero-extend the registers first. + That solves the problem of MUL needing dedicated registers + on the x86 */ + comprintf("\tzero_extend_16_rr(scratchie,src);\n" + "\tzero_extend_16_rr(dst,dst);\n" + "\timul_32_32(dst,scratchie);\n"); + genflags(flag_logical, sz_long, "dst", "", ""); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); + +#endif +} + +static void gen_nbcd(uae_u32 opcode, struct instr *curi, const char* ssize) { +#if 0 +#else + (void) opcode; + (void) curi; + (void) ssize; + failure; + /* Nope! */ +#endif +} + +static void gen_neg(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_NEG_%s(tmp,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf("\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_NEG(tmp,src);\n"); + } + + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags(flag_sub, curi->size, "", "src", "dst"); + genastore("dst", curi->smode, "srcreg", curi->size, "src"); +#endif +} + +static void gen_negx(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int dst=scratchie++;\n"); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_inverted_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_NEGX_%s(dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf("\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t restore_inverted_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_NEGX(dst,src);\n"); + } + + genastore("dst", curi->smode, "srcreg", curi->size, "src"); +#else + (void) ssize; + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags(flag_subx, curi->size, "", "src", "dst"); + genastore("dst", curi->smode, "srcreg", curi->size, "src"); +#endif +} + +static void gen_not(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_NOT_%s(tmp,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_NOT(tmp,src);\n", ssize); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0xffffffff);\n"); + genflags(flag_eor, curi->size, "", "src", "dst"); + genastore("dst", curi->smode, "srcreg", curi->size, "src"); +#endif +} + +static void gen_or(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + + comprintf("\t dont_care_flags();\n"); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t jff_OR_%s(tmp, dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_OR(tmp, dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_or, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_orsr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ORSR(ARM_CCR_MAP[src & 0xF], ((src & 0x10) >> 4));\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } +#else + (void) curi; + failure; + isjump; +#endif +} + +static void gen_rol(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROL_%s(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_ROL_%s(tmp,data,cnt);\n", ssize); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) ssize; + + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + + switch (curi->size) { + case sz_long: + comprintf("\t rol_l_rr(data,cnt);\n"); + break; + case sz_word: + comprintf("\t rol_w_rr(data,cnt);\n"); + break; + case sz_byte: + comprintf("\t rol_b_rr(data,cnt);\n"); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + comprintf("\t bt_l_ri(data,0x00);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); +#endif +} + +static void gen_rolw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROLW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_ROLW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_ror(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROR_%s(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_ROR_%s(tmp,data,cnt);\n", ssize); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) ssize; + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + + switch (curi->size) { + case sz_long: + comprintf("\t ror_l_rr(data,cnt);\n"); + break; + case sz_word: + comprintf("\t ror_w_rr(data,cnt);\n"); + break; + case sz_byte: + comprintf("\t ror_b_rr(data,cnt);\n"); + break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch (curi->size) { + case sz_byte: + comprintf("\t test_b_rr(data,data);\n"); + break; + case sz_word: + comprintf("\t test_w_rr(data,data);\n"); + break; + case sz_long: + comprintf("\t test_l_rr(data,data);\n"); + break; + } + switch (curi->size) { + case sz_byte: + comprintf("\t bt_l_ri(data,0x07);\n"); + break; + case sz_word: + comprintf("\t bt_l_ri(data,0x0f);\n"); + break; + case sz_long: + comprintf("\t bt_l_ri(data,0x1f);\n"); + break; + } + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); +#endif +} + +static void gen_rorw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_RORW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } else { + comprintf("\t jnf_RORW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_roxl(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + isaddx; + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROXL_%s(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + } else { + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_ROXL_%s(tmp,data,cnt);\n", ssize); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) curi; + (void) ssize; + failure; +#endif +} + +static void gen_roxlw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + isaddx; + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROXLW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + } else { + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_ROXLW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_roxr(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + mayfail; + if (curi->smode == Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + isaddx; + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace(); + comprintf("\t int tmp=scratchie++;\n"); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROXR_%s(tmp,data,cnt);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + } else { + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_ROXR_%s(tmp,data,cnt);\n", ssize); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "data"); +#else + (void) curi; + failure; +#endif +} + +static void gen_roxrw(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + isaddx; + comprintf("\t dont_care_flags();\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\t int tmp = scratchie++;\n"); + + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_ROXRW(tmp,src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + } else { + comprintf("\t restore_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_ROXRW(tmp,src);\n"); + } + genastore("tmp", curi->smode, "srcreg", curi->size, "src"); +#else + (void) curi; + failure; +#endif +} + +static void gen_sbcd(uae_u32 opcode, struct instr *curi, const char* ssize) { +#if 0 +#else + (void) opcode; + (void) curi; + (void) ssize; + failure; + /* I don't think so! */ +#endif +} + +static void gen_scc(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if 0 + genamode(curi->smode, "srcreg", curi->size, "src", 2, 0); + start_brace(); + comprintf("\t int val = scratchie++;\n"); + switch (curi->cc) { + case 0: /* Unconditional set */ + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\t make_flags_live();\n"); /* Load the flags */ + comprintf("\t jnf_Scc_ri(val,%d);\n", curi->cc); + break; + default: + assert(0); + break; + } + genastore("val", curi->smode, "srcreg", curi->size, "src"); +#else + genamode(curi->smode, "srcreg", curi->size, "src", 2, 0); + start_brace(); + comprintf("\tint val = scratchie++;\n"); + + /* We set val to 0 if we really should use 255, and to 1 for real 0 */ + switch (curi->cc) { + case 0: /* Unconditional set */ + comprintf("\tmov_l_ri(val,0);\n"); + break; + case 1: + /* Unconditional not-set */ + comprintf("\tmov_l_ri(val,1);\n"); + break; + case 8: + failure; + break; /* Work out details! FIXME */ + case 9: + failure; + break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + /* All condition codes can be inverted by changing the LSB */ + comprintf("\tsetcc(val,%d);\n", cond_codes[curi->cc] ^ 1); + break; + default: + assert(0); + break; + } + comprintf("\tsub_b_ri(val,1);\n"); + genastore("val", curi->smode, "srcreg", curi->size, "src"); +#endif +} + +static void gen_sub(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + + comprintf("\t dont_care_flags();\n"); + start_brace(); + // Use tmp register to avoid destroying upper part in .B., .W cases + comprintf("\t int tmp=scratchie++;\n"); + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_SUB_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf( + "\t if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_SUB_%s(tmp,dst,src);\n", ssize); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_sub, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_suba(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\t jnf_SUBA_%s(dst, src);\n", ssize); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#else + (void) ssize; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n"); + switch (curi->size) { + case sz_byte: + comprintf("\tsign_extend_8_rr(tmp,src);\n"); + break; + case sz_word: + comprintf("\tsign_extend_16_rr(tmp,src);\n"); + break; + case sz_long: + comprintf("\ttmp=src;\n"); + break; + default: + assert(0); + break; + } + comprintf("\tsub_l(dst,tmp);\n"); + genastore("dst", curi->dmode, "dstreg", sz_long, "dst"); +#endif +} + +static void gen_subx(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; +#if defined(USE_JIT2) + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n"); + comprintf("\tdont_care_flags();\n"); + if (!noflags) { + comprintf("\t make_flags_live();\n"); + comprintf("\t restore_inverted_carry();\n"); /* Reload the X flag into C */ + comprintf("\t start_needflags();\n"); + comprintf("\t jff_SUBX_%s(tmp,dst,src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + duplicate_carry(); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t restore_inverted_carry();\n"); /* Reload the X flag into C */ + comprintf("\t jnf_SUBX(tmp,dst,src);\n"); + } + genastore("tmp", curi->dmode, "dstreg", curi->size, "dst"); +#else + (void) ssize; + isaddx; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags(flag_subx, curi->size, "", "src", "dst"); + genastore("dst", curi->dmode, "dstreg", curi->size, "dst"); +#endif +} + +static void gen_swap(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\t dont_care_flags();\n"); + start_brace(); + + if (!noflags) { + comprintf("\t start_needflags();\n"); + comprintf("\t jff_SWAP(src);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } else { + comprintf("\t jnf_SWAP(src);\n"); + } + genastore("src", curi->smode, "srcreg", sz_long, "src"); +#else + genamode(curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\tdont_care_flags();\n"); + comprintf("\tarm_ROR_l_ri8(src,16);\n"); + genflags(flag_logical, sz_long, "src", "", ""); + genastore("src", curi->smode, "srcreg", sz_long, "src"); +#endif +} + +static void gen_tst(uae_u32 opcode, struct instr *curi, const char* ssize) { + (void) opcode; + (void) ssize; +#if defined(USE_JIT2) + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + comprintf("\t dont_care_flags();\n"); + if (!noflags) { + start_brace(); + comprintf("\t start_needflags();\n"); + comprintf("\t jff_TST_%s(src);\n", ssize); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } +#else + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + genflags(flag_logical, curi->size, "src", "", ""); +#endif +} + +static int /* returns zero for success, non-zero for failure */ +gen_opcode(unsigned long int opcode) { + struct instr *curi = table68k + opcode; + const char* ssize = NULL; + + insn_n_cycles = 2; + global_failure = 0; + long_opcode = 0; + global_isjump = 0; + global_iscjump = 0; + global_isaddx = 0; + global_cmov = 0; + global_fpu = 0; + global_mayfail = 0; + hack_opcode = opcode; + endstr[0] = 0; + + start_brace(); + comprintf("\tuae_u8 scratchie=S1;\n"); + switch (curi->plev) { + case 0: /* not privileged */ + break; + case 1: /* unprivileged only on 68000 */ + if (cpu_level == 0) + break; + if (next_cpu_level < 0) + next_cpu_level = 0; + + /* fall through */ + case 2: /* priviledged */ + failure; /* Easy ones first */ + break; + case 3: /* privileged if size == word */ + if (curi->size == sz_byte) + break; + failure; + break; + } + switch (curi->size) { + case sz_byte: + ssize = "b"; + break; + case sz_word: + ssize = "w"; + break; + case sz_long: + ssize = "l"; + break; + default: + assert(0); + break; + } + (void) ssize; + + switch (curi->mnemo) { + case i_AND: + gen_and(opcode, curi, ssize); + break; + + case i_OR: + gen_or(opcode, curi, ssize); + break; + + case i_EOR: + gen_eor(opcode, curi, ssize); + break; + + case i_ORSR: + gen_orsr(opcode, curi, ssize); + break; + + case i_EORSR: + gen_eorsr(opcode, curi, ssize); + break; + + case i_ANDSR: + gen_andsr(opcode, curi, ssize); + break; + + case i_SUB: + gen_sub(opcode, curi, ssize); + break; + + case i_SUBA: + gen_suba(opcode, curi, ssize); + break; + + case i_SUBX: + gen_subx(opcode, curi, ssize); + break; + + case i_SBCD: + gen_sbcd(opcode, curi, ssize); + break; + + case i_ADD: + gen_add(opcode, curi, ssize); + break; + + case i_ADDA: + gen_adda(opcode, curi, ssize); + break; + + case i_ADDX: + gen_addx(opcode, curi, ssize); + break; + + case i_ABCD: + gen_abcd(opcode, curi, ssize); + break; + + case i_NEG: + gen_neg(opcode, curi, ssize); + break; + + case i_NEGX: + gen_negx(opcode, curi, ssize); + break; + + case i_NBCD: + gen_nbcd(opcode, curi, ssize); + break; + + case i_CLR: + gen_clr(opcode, curi, ssize); + break; + + case i_NOT: + gen_not(opcode, curi, ssize); + break; + + case i_TST: + gen_tst(opcode, curi, ssize); + break; + + case i_BCHG: + gen_bchg(opcode, curi, ssize); + break; + + case i_BCLR: + gen_bclr(opcode, curi, ssize); + break; + + case i_BSET: + gen_bset(opcode, curi, ssize); + break; + + case i_BTST: + gen_btst(opcode, curi, ssize); + break; + + case i_CMPM: + case i_CMP: + gen_cmp(opcode, curi, ssize); + break; + + case i_CMPA: + gen_cmpa(opcode, curi, ssize); + break; + + /* The next two are coded a little unconventional, but they are doing + * weird things... */ + case i_MVPRM: + isjump; + failure; + break; + + case i_MVPMR: + isjump; + failure; + break; + + case i_MOVE: + gen_move(opcode, curi, ssize); + break; + + case i_MOVEA: + gen_movea(opcode, curi, ssize); + break; + + case i_MVSR2: + isjump; + failure; + break; + + case i_MV2SR: + isjump; + failure; + break; + + case i_SWAP: + gen_swap(opcode, curi, ssize); + break; + + case i_EXG: + gen_exg(opcode, curi, ssize); + break; + + case i_EXT: + gen_ext(opcode, curi, ssize); + break; + + case i_MVMEL: + genmovemel(opcode); + break; + + case i_MVMLE: + genmovemle(opcode); + break; + + case i_TRAP: + isjump; + failure; + break; + + case i_MVR2USP: + isjump; + failure; + break; + + case i_MVUSP2R: + isjump; + failure; + break; + + case i_RESET: + isjump; + failure; + break; + + case i_NOP: + break; + + case i_STOP: + isjump; + failure; + break; + + case i_RTE: + isjump; + failure; + break; + + case i_RTD: + genamode(curi->smode, "srcreg", curi->size, "offs", 1, 0); + /* offs is constant */ + comprintf("\tarm_ADD_l_ri8(offs,4);\n"); + start_brace(); + comprintf("\tint newad=scratchie++;\n" + "\treadlong(15,newad,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc,newad);\n" + "\tget_n_addr_jmp(newad,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n" + "\tarm_ADD_l(15,offs);\n"); + gen_update_next_handler(); + isjump; + break; + + case i_LINK: + genamode(curi->smode, "srcreg", sz_long, "src", 1, 0); + genamode(curi->dmode, "dstreg", curi->size, "offs", 1, 0); + comprintf("\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,src,scratchie);\n" + "\tmov_l_rr(src,15);\n"); + if (curi->size == sz_word) + comprintf("\tsign_extend_16_rr(offs,offs);\n"); + comprintf("\tarm_ADD_l(15,offs);\n"); + genastore("src", curi->smode, "srcreg", sz_long, "src"); + break; + + case i_UNLK: + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + comprintf("\tmov_l_rr(15,src);\n" + "\treadlong(15,src,scratchie);\n" + "\tarm_ADD_l_ri8(15,4);\n"); + genastore("src", curi->smode, "srcreg", curi->size, "src"); + break; + + case i_RTS: + comprintf("\tint newad=scratchie++;\n" + "\treadlong(15,newad,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc,newad);\n" + "\tget_n_addr_jmp(newad,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n" + "\tlea_l_brr(15,15,4);\n"); + gen_update_next_handler(); + isjump; + break; + + case i_TRAPV: + isjump; + failure; + break; + + case i_RTR: + isjump; + failure; + break; + + case i_JSR: + isjump; + genamode(curi->smode, "srcreg", curi->size, "src", 0, 0); + start_brace(); + comprintf( + "\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf("\tint ret=scratchie++;\n" + "\tmov_l_ri(ret,retadd);\n" + "\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,ret,scratchie);\n"); + comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n" + "\tget_n_addr_jmp(srca,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n"); + gen_update_next_handler(); + break; + + case i_JMP: + isjump; + genamode(curi->smode, "srcreg", curi->size, "src", 0, 0); + comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n" + "\tget_n_addr_jmp(srca,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n"); + gen_update_next_handler(); + break; + + case i_BSR: + is_const_jump; + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf( + "\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf("\tint ret=scratchie++;\n" + "\tmov_l_ri(ret,retadd);\n" + "\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,ret,scratchie);\n"); + comprintf("\tarm_ADD_l_ri(src,m68k_pc_offset_thisinst+2);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + comprintf("\tarm_ADD_l(PC_P,src);\n"); + comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n"); + break; + + case i_Bcc: + comprintf("\tuae_u32 v,v1,v2;\n"); + genamode(curi->smode, "srcreg", curi->size, "src", 1, 0); + /* That source is an immediate, so we can clobber it with abandon */ + switch (curi->size) { + case sz_byte: + comprintf("\tsign_extend_8_rr(src,src);\n"); + break; + case sz_word: + comprintf("\tsign_extend_16_rr(src,src);\n"); + break; + case sz_long: + break; + } + comprintf( + "\tsub_l_ri(src,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n"); + /* Leave the following as "add" --- it will allow it to be optimized + away due to src being a constant ;-) */ + comprintf("\tarm_ADD_l_ri(src,(uintptr)comp_pc_p);\n"); + comprintf("\tmov_l_ri(PC_P,(uintptr)comp_pc_p);\n"); + /* Now they are both constant. Might as well fold in m68k_pc_offset */ + comprintf("\tarm_ADD_l_ri(src,m68k_pc_offset);\n"); + comprintf("\tarm_ADD_l_ri(PC_P,m68k_pc_offset);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + + if (curi->cc >= 2) { + comprintf("\tv1=get_const(PC_P);\n" + "\tv2=get_const(src);\n" + "\tregister_branch(v1,v2,%d);\n", cond_codes[curi->cc]); + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + isjump; + } else { + is_const_jump; + } + + switch (curi->cc) { + case 0: /* Unconditional jump */ + comprintf("\tmov_l_rr(PC_P,src);\n"); + comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n"); + break; + case 1: + break; /* This is silly! */ + case 8: + failure; + break; /* Work out details! FIXME */ + case 9: + failure; + break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + break; + default: + assert(0); + break; + } + break; + + case i_LEA: + genamode(curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode(curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genastore("srca", curi->dmode, "dstreg", curi->size, "dst"); + break; + + case i_PEA: + if (table68k[opcode].smode == Areg || table68k[opcode].smode == Aind + || table68k[opcode].smode == Aipi + || table68k[opcode].smode == Apdi + || table68k[opcode].smode == Ad16 + || table68k[opcode].smode == Ad8r) + comprintf("if (srcreg==7) dodgy=1;\n"); + + genamode(curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode(Apdi, "7", sz_long, "dst", 2, 0); + genastore("srca", Apdi, "7", sz_long, "dst"); + break; + + case i_DBcc: + gen_dbcc(opcode, curi, ssize); + break; + + case i_Scc: + gen_scc(opcode, curi, ssize); + break; + + case i_DIVU: + isjump; + failure; + break; + + case i_DIVS: + isjump; + failure; + break; + + case i_MULU: + gen_mulu(opcode, curi, ssize); + break; + + case i_MULS: + gen_muls(opcode, curi, ssize); + break; + + case i_CHK: + isjump; + failure; + break; + + case i_CHK2: + isjump; + failure; + break; + + case i_ASR: + gen_asr(opcode, curi, ssize); + break; + + case i_ASL: + gen_asl(opcode, curi, ssize); + break; + + case i_LSR: + gen_lsr(opcode, curi, ssize); + break; + + case i_LSL: + gen_lsl(opcode, curi, ssize); + break; + + case i_ROL: + gen_rol(opcode, curi, ssize); + break; + + case i_ROR: + gen_ror(opcode, curi, ssize); + break; + + case i_ROXL: + gen_roxl(opcode, curi, ssize); + break; + + case i_ROXR: + gen_roxr(opcode, curi, ssize); + break; + + case i_ASRW: + gen_asrw(opcode, curi, ssize); + break; + + case i_ASLW: + gen_aslw(opcode, curi, ssize); + break; + + case i_LSRW: + gen_lsrw(opcode, curi, ssize); + break; + + case i_LSLW: + gen_lslw(opcode, curi, ssize); + break; + + case i_ROLW: + gen_rolw(opcode, curi, ssize); + break; + + case i_RORW: + gen_rorw(opcode, curi, ssize); + break; + + case i_ROXLW: + gen_roxlw(opcode, curi, ssize); + break; + + case i_ROXRW: + gen_roxrw(opcode, curi, ssize); + break; + + case i_MOVEC2: + isjump; + failure; + break; + + case i_MOVE2C: + isjump; + failure; + break; + + case i_CAS: + failure; + break; + + case i_CAS2: + failure; + break; + + case i_MOVES: + /* ignore DFC and SFC because we have no MMU */ + isjump; + failure; + break; + + case i_BKPT: + /* only needed for hardware emulators */ + isjump; + failure; + break; + + case i_CALLM: + /* not present in 68030 */ + isjump; + failure; + break; + + case i_RTM: + /* not present in 68030 */ + isjump; + failure; + break; + + case i_TRAPcc: + isjump; + failure; + break; + + case i_DIVL: + isjump; + failure; + break; + + case i_MULL: + gen_mull(opcode, curi, ssize); + break; + + case i_BFTST: + case i_BFEXTU: + case i_BFCHG: + case i_BFEXTS: + case i_BFCLR: + case i_BFFFO: + case i_BFSET: + case i_BFINS: + failure; + break; + case i_PACK: + failure; + break; + case i_UNPK: + failure; + break; + case i_TAS: + failure; + break; + case i_FPP: + uses_fpu; +#ifdef USE_JIT_FPU + mayfail; + comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); + swap_opcode(); + comprintf("\tcomp_fpp_opp(opcode,extra);\n"); +#else + failure; +#endif + break; + case i_FBcc: + uses_fpu; +#ifdef USE_JIT_FPU + isjump; + uses_cmov; + mayfail; + swap_opcode(); + comprintf("\tcomp_fbcc_opp(opcode);\n"); +#else + isjump; + failure; +#endif + break; + case i_FDBcc: + uses_fpu; + isjump; + failure; + break; + case i_FScc: + uses_fpu; +#ifdef USE_JIT_FPU + mayfail; + uses_cmov; + comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); + swap_opcode(); + comprintf("\tcomp_fscc_opp(opcode,extra);\n"); +#else + failure; +#endif + break; + case i_FTRAPcc: + uses_fpu; + isjump; + failure; + break; + case i_FSAVE: + uses_fpu; + failure; + break; + case i_FRESTORE: + uses_fpu; + failure; + break; + + case i_CINVL: + case i_CINVP: + case i_CINVA: + isjump; /* Not really, but it's probably a good idea to stop + translating at this point */ + failure; + comprintf("\tflush_icache();\n"); /* Differentiate a bit more? */ + break; + case i_CPUSHL: + case i_CPUSHP: + case i_CPUSHA: + isjump; /* Not really, but it's probably a good idea to stop + translating at this point */ + failure; + break; + + case i_MOVE16: + gen_move16(opcode, curi); + break; + + case i_EMULOP_RETURN: + isjump; + failure; + break; + + case i_EMULOP: + failure; + break; + + case i_NATFEAT_ID: + case i_NATFEAT_CALL: + failure; + break; + + case i_MMUOP: + isjump; + failure; + break; + default: + assert(0); + break; + } + comprintf("%s", endstr); + finish_braces(); + sync_m68k_pc(); + if (global_mayfail) + comprintf("\tif (failure) m68k_pc_offset=m68k_pc_offset_thisinst;\n"); + return global_failure; +} + +static void generate_includes(FILE * f) { + fprintf(f, "#include \"sysdeps.h\"\n"); + fprintf(f, "#include \"m68k.h\"\n"); + fprintf(f, "#include \"memory.h\"\n"); + fprintf(f, "#include \"readcpu.h\"\n"); + fprintf(f, "#include \"newcpu.h\"\n"); + fprintf(f, "#include \"comptbl.h\"\n"); + fprintf(f, "#include \"debug.h\"\n"); +} + +static int postfix; + +static void generate_one_opcode(int rp, int noflags) { + int i; + uae_u16 smsk, dmsk; + int opcode = opcode_map[rp]; + int aborted = 0; + int have_srcreg = 0; + int have_dstreg = 0; + const char *name; + + if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) + return; + + for (i = 0; lookuptab[i].name[0]; i++) { + if (table68k[opcode].mnemo == lookuptab[i].mnemo) + break; + } + + if (table68k[opcode].handler != -1) + return; + + switch (table68k[opcode].stype) { + case 0: + smsk = 7; + break; + case 1: + smsk = 255; + break; + case 2: + smsk = 15; + break; + case 3: + smsk = 7; + break; + case 4: + smsk = 7; + break; + case 5: + smsk = 63; + break; + case 6: + smsk = 255; + break; + case 7: + smsk = 3; + break; + default: + assert(0); + break; + } + dmsk = 7; + + next_cpu_level = -1; + if (table68k[opcode].suse && table68k[opcode].smode != imm + && table68k[opcode].smode != imm0 && table68k[opcode].smode != imm1 + && table68k[opcode].smode != imm2 && table68k[opcode].smode != absw + && table68k[opcode].smode != absl && table68k[opcode].smode != PC8r + && table68k[opcode].smode != PC16) { + have_srcreg = 1; + if (table68k[opcode].spos == -1) { + if (((int) table68k[opcode].sreg) >= 128) + comprintf("\tuae_s32 srcreg = (uae_s32)(uae_s8)%d;\n", + (int) table68k[opcode].sreg); + else + comprintf("\tuae_s32 srcreg = %d;\n", + (int) table68k[opcode].sreg); + } else { + char source[100]; + int pos = table68k[opcode].spos; + + comprintf( + "#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + + if (pos < 8 && (smsk >> (8 - pos)) != 0) + sprintf(source, "(((opcode >> %d) | (opcode << %d)) & %d)", + pos ^ 8, 8 - pos, dmsk); + else if (pos != 8) + sprintf(source, "((opcode >> %d) & %d)", pos ^ 8, smsk); + else + sprintf(source, "(opcode & %d)", smsk); + + if (table68k[opcode].stype == 3) + comprintf("\tuae_u32 srcreg = imm8_table[%s];\n", source); + else if (table68k[opcode].stype == 1) + comprintf("\tuae_u32 srcreg = (uae_s32)(uae_s8)%s;\n", source); + else + comprintf("\tuae_u32 srcreg = %s;\n", source); + + comprintf("#else\n"); + + if (pos) + sprintf(source, "((opcode >> %d) & %d)", pos, smsk); + else + sprintf(source, "(opcode & %d)", smsk); + + if (table68k[opcode].stype == 3) + comprintf("\tuae_s32 srcreg = imm8_table[%s];\n", source); + else if (table68k[opcode].stype == 1) + comprintf("\tuae_s32 srcreg = (uae_s32)(uae_s8)%s;\n", source); + else + comprintf("\tuae_s32 srcreg = %s;\n", source); + + comprintf("#endif\n"); + } + } + if (table68k[opcode].duse + /* Yes, the dmode can be imm, in case of LINK or DBcc */ + && table68k[opcode].dmode != imm && table68k[opcode].dmode != imm0 + && table68k[opcode].dmode != imm1 && table68k[opcode].dmode != imm2 + && table68k[opcode].dmode != absw + && table68k[opcode].dmode != absl) { + have_dstreg = 1; + if (table68k[opcode].dpos == -1) { + if (((int) table68k[opcode].dreg) >= 128) + comprintf("\tuae_s32 dstreg = (uae_s32)(uae_s8)%d;\n", + (int) table68k[opcode].dreg); + else + comprintf("\tuae_s32 dstreg = %d;\n", + (int) table68k[opcode].dreg); + } else { + int pos = table68k[opcode].dpos; + + comprintf( + "#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + + if (pos < 8 && (dmsk >> (8 - pos)) != 0) + comprintf( + "\tuae_u32 dstreg = ((opcode >> %d) | (opcode << %d)) & %d;\n", + pos ^ 8, 8 - pos, dmsk); + else if (pos != 8) + comprintf("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", pos ^ 8, + dmsk); + else + comprintf("\tuae_u32 dstreg = opcode & %d;\n", dmsk); + + comprintf("#else\n"); + + if (pos) + comprintf("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", pos, + dmsk); + else + comprintf("\tuae_u32 dstreg = opcode & %d;\n", dmsk); + + comprintf("#endif\n"); + } + } + + if (have_srcreg && have_dstreg + && (table68k[opcode].dmode == Areg || table68k[opcode].dmode == Aind + || table68k[opcode].dmode == Aipi + || table68k[opcode].dmode == Apdi + || table68k[opcode].dmode == Ad16 + || table68k[opcode].dmode == Ad8r) + && (table68k[opcode].smode == Areg || table68k[opcode].smode == Aind + || table68k[opcode].smode == Aipi + || table68k[opcode].smode == Apdi + || table68k[opcode].smode == Ad16 + || table68k[opcode].smode == Ad8r)) { + comprintf("\tuae_u32 dodgy=(srcreg==(uae_s32)dstreg);\n"); + } else { + comprintf("\tuae_u32 dodgy=0;\n"); + } + comprintf("\tuae_u32 m68k_pc_offset_thisinst=m68k_pc_offset;\n"); + comprintf("\tm68k_pc_offset+=2;\n"); + + aborted = gen_opcode(opcode); + { + int flags = 0; + if (global_isjump) + flags |= 1; + if (long_opcode) + flags |= 2; + if (global_cmov) + flags |= 4; + if (global_isaddx) + flags |= 8; + if (global_iscjump) + flags |= 16; + if (global_fpu) + flags |= 32; + + comprintf("}\n"); + + name = lookuptab[i].name; + if (aborted) { + fprintf(stblfile, "{ NULL, 0x%08x, %d }, /* %s */\n", opcode, flags, name); + com_discard(); + } else { + const char *tbl = noflags ? "nf" : "ff"; + fprintf(stblfile, + "{ op_%x_%d_comp_%s, %d, 0x%08x }, /* %s */\n", + opcode, postfix, tbl, opcode, flags, name); + fprintf(headerfile, "extern compop_func op_%x_%d_comp_%s;\n", + opcode, postfix, tbl); + printf( + "void REGPARAM2 op_%x_%d_comp_%s(uae_u32 opcode) /* %s */\n{\n", + opcode, postfix, tbl, name); + com_flush(); + } + } + opcode_next_clev[rp] = next_cpu_level; + opcode_last_postfix[rp] = postfix; +} + +static void generate_func(int noflags) { + int i, j, rp; + const char *tbl = noflags ? "nf" : "ff"; + + using_prefetch = 0; + using_exception_3 = 0; + for (i = 0; i < 1; i++) /* We only do one level! */ + { + cpu_level = 4 - i; + postfix = i; + + fprintf(stblfile, "const struct comptbl op_smalltbl_%d_comp_%s[] = {\n", + postfix, tbl); + + /* sam: this is for people with low memory (eg. me :)) */ + printf("\n" + "#if !defined(PART_1) && !defined(PART_2) && " + "!defined(PART_3) && !defined(PART_4) && " + "!defined(PART_5) && !defined(PART_6) && " + "!defined(PART_7) && !defined(PART_8)" + "\n" + "#define PART_1 1\n" + "#define PART_2 1\n" + "#define PART_3 1\n" + "#define PART_4 1\n" + "#define PART_5 1\n" + "#define PART_6 1\n" + "#define PART_7 1\n" + "#define PART_8 1\n" + "#endif\n\n"); + + rp = 0; + for (j = 1; j <= 8; ++j) { + int k = (j * nr_cpuop_funcs) / 8; + printf("#ifdef PART_%d\n", j); + for (; rp < k; rp++) + generate_one_opcode(rp, noflags); + printf("#endif\n\n"); + } + + fprintf(stblfile, "{ 0, 65536, 0 }};\n"); + } + +} + +#if (defined(OS_cygwin) || defined(OS_mingw)) && defined(EXTENDED_SIGSEGV) +void cygwin_mingw_abort() +{ +#undef abort + abort(); +} +#endif + +int main(void) +{ + read_table68k(); + do_merges(); + + opcode_map = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + opcode_last_postfix = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + opcode_next_clev = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + counts = (unsigned long *) malloc(65536 * sizeof(unsigned long)); + read_counts(); + + /* It would be a lot nicer to put all in one file (we'd also get rid of + * cputbl.h that way), but cpuopti can't cope. That could be fixed, but + * I don't dare to touch the 68k version. */ + + headerfile = fopen("comptbl.h", "wb"); + fprintf (headerfile, "" + "extern const struct comptbl op_smalltbl_0_comp_nf[];\n" + "extern const struct comptbl op_smalltbl_0_comp_ff[];\n" + ""); + + stblfile = fopen("compstbl.cpp", "wb"); + if (freopen("compemu.cpp", "wb", stdout) == NULL) + { + assert(0); + } + + generate_includes(stdout); + generate_includes(stblfile); + + printf("#include \"compiler/compemu.h\"\n"); + + noflags = 0; + generate_func(noflags); + + free(opcode_map); + free(opcode_last_postfix); + free(opcode_next_clev); + free(counts); + + opcode_map = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + opcode_last_postfix = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + opcode_next_clev = (int *) malloc(sizeof(int) * nr_cpuop_funcs); + counts = (unsigned long *) malloc(65536 * sizeof(unsigned long)); + read_counts(); + noflags = 1; + generate_func(noflags); + + free(opcode_map); + free(opcode_last_postfix); + free(opcode_next_clev); + free(counts); + + free(table68k); + fclose(stblfile); + fclose(headerfile); + return 0; +} diff --git a/BasiliskII/src/uae_cpu/compiler/test_codegen_arm.c b/BasiliskII/src/uae_cpu/compiler/test_codegen_arm.c new file mode 100644 index 00000000..227a99d3 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/test_codegen_arm.c @@ -0,0 +1,264 @@ +/* Example of using sigaction() to setup a signal handler with 3 arguments + * including siginfo_t. + */ +#include +#include +#include +#include + +#include "flags_arm.h" +#include "codegen_arm.h" + +#define TEST(c,ex,s) { c; if (opcode != ex) printf("(%s) Invalid opcode %x expected %x\n", s, opcode, ex); } + +int opcode; + +void emit_long(v) { + opcode = v; +} + +int main (int argc, char *argv[]) +{ +TEST(MOV_ri(8, 15), 0xe3a0800f, "mov r8,#15"); +TEST(MOV_rr(8,9), 0xe1a08009, "mov r8, r9"); +TEST(MOV_rrLSLi(8,9,5), 0xe1a08289, "lsl r8, r9, #5"); +TEST(MOV_rrLSLr(8,9,7), 0xe1a08719, "lsl r8, r9, r7"); +TEST(MOV_rrLSRi(8,9,5), 0xe1a082a9, "lsr r8, r9, #5"); +TEST(MOV_rrLSRr(8,9,7), 0xe1a08739, "lsr r8, r9, r7"); +TEST(MOV_rrASRi(8,9,5), 0xe1a082c9, "asr r8, r9, #5"); +TEST(MOV_rrASRr(8,9,7), 0xe1a08759, "asr r8, r9, r7"); +TEST(MOV_rrRORi(8,9,5), 0xe1a082e9, "ror r8, r9, #5"); +TEST(MOV_rrRORr(8,9,7), 0xe1a08779, "ror r8, r9, r7"); +TEST(MOV_rrRRX(8,9), 0xe1a08069, "rrx r8, r9"); + +TEST(MOVS_ri(8, 15), 0xe3b0800f, "movs r8,#15"); +TEST(MOVS_rr(8,9), 0xe1b08009, "movs r8, r9"); +TEST(MOVS_rrLSLi(8,9,5), 0xe1b08289, "lsls r8, r9, #5"); +TEST(MOVS_rrLSLr(8,9,7), 0xe1b08719, "lsls r8, r9, r7"); +TEST(MOVS_rrLSRi(8,9,5), 0xe1b082a9, "lsrs r8, r9, #5"); +TEST(MOVS_rrLSRr(8,9,7), 0xe1b08739, "lsrs r8, r9, r7"); +TEST(MOVS_rrASRi(8,9,5), 0xe1b082c9, "asrs r8, r9, #5"); +TEST(MOVS_rrASRr(8,9,7), 0xe1b08759, "asrs r8, r9, r7"); +TEST(MOVS_rrRORi(8,9,5), 0xe1b082e9, "rors r8, r9, #5"); +TEST(MOVS_rrRORr(8,9,7), 0xe1b08779, "rors r8, r9, r7"); +TEST(MOVS_rrRRX(8,9), 0xe1b08069, "rrxs r8, r9"); + +TEST(MVN_ri(8, 15), 0xe3e0800f, "mvn r8,#15"); +TEST(MVN_rr(8,9), 0xe1e08009, "mvn r8, r9"); +TEST(MVN_rrLSLi(8,9,5), 0xe1e08289, "mvn r8, r9, lsl #5"); +TEST(MVN_rrLSLr(8,9,7), 0xe1e08719, "mvn r8, r9, lsl r7"); +TEST(MVN_rrLSRi(8,9,5), 0xe1e082a9, "mvn r8, r9, lsr #5"); +TEST(MVN_rrLSRr(8,9,7), 0xe1e08739, "mvn r8, r9, lsr r7"); +TEST(MVN_rrASRi(8,9,5), 0xe1e082c9, "mvn r8, r9, asr #5"); +TEST(MVN_rrASRr(8,9,7), 0xe1e08759, "mvn r8, r9, asr r7"); +TEST(MVN_rrRORi(8,9,5), 0xe1e082e9, "mvn r8, r9, ror #5"); +TEST(MVN_rrRORr(8,9,7), 0xe1e08779, "mvn r8, r9, ror r7"); +TEST(MVN_rrRRX(8,9), 0xe1e08069, "mvn r8, r9, rrx"); + +TEST(CMP_ri(8, 15), 0xe358000f, "cmp r8,#15"); +TEST(CMP_rr(8,9), 0xe1580009, "cmp r8, r9"); +TEST(CMP_rrLSLi(8,9,5), 0xe1580289, "cmp r8, r9, #5"); +TEST(CMP_rrLSLr(8,9,7), 0xe1580719, "cmp r8, r9, r7"); +TEST(CMP_rrLSRi(8,9,5), 0xe15802a9, "cmp r8, r9, #5"); +TEST(CMP_rrLSRr(8,9,7), 0xe1580739, "cmp r8, r9, r7"); +TEST(CMP_rrASRi(8,9,5), 0xe15802c9, "cmp r8, r9, #5"); +TEST(CMP_rrASRr(8,9,7), 0xe1580759, "cmp r8, r9, r7"); +TEST(CMP_rrRORi(8,9,5), 0xe15802e9, "cmp r8, r9, #5"); +TEST(CMP_rrRORr(8,9,7), 0xe1580779, "cmp r8, r9, r7"); +TEST(CMP_rrRRX(8,9), 0xe1580069, "cmp r8, r9"); + +TEST(CMP_ri(8, 0x81), 0xe3580081, "cmp r8,#0x81"); +TEST(CMP_ri(8, 0x204), 0xe3580f81, "cmp r8,#0x204"); +TEST(CMP_ri(8, 0x810), 0xe3580e81, "cmp r8,#0x8100"); +TEST(CMP_ri(8, 0x2040), 0xe3580d81, "cmp r8,#0x2040"); +TEST(CMP_ri(8, 0x8100), 0xe3580c81, "cmp r8,#0x8100"); +TEST(CMP_ri(8, 0x20400), 0xe3580b81, "cmp r8,#0x20400"); +TEST(CMP_ri(8, 0x81000), 0xe3580a81, "cmp r8,#0x81000"); +TEST(CMP_ri(8, 0x204000), 0xe3580981, "cmp r8,#0x204000"); +TEST(CMP_ri(8, 0x810000), 0xe3580881, "cmp r8,#0x810000"); +TEST(CMP_ri(8, 0x2040000), 0xe3580781, "cmp r8,#0x2040000"); +TEST(CMP_ri(8, 0x8100000), 0xe3580681, "cmp r8,#0x8100000"); +TEST(CMP_ri(8, 0x20400000), 0xe3580581, "cmp r8,#0x20400000"); +TEST(CMP_ri(8, 0x81000000), 0xe3580481, "cmp r8,#0x81000000"); +TEST(CMP_ri(8, 0x04000002), 0xe3580381, "cmp r8,#0x04000002"); +TEST(CMP_ri(8, 0x10000008), 0xe3580281, "cmp r8,#0x10000008"); +TEST(CMP_ri(8, 0x40000020), 0xe3580181, "cmp r8,#0x40000020"); + +TEST(CMP_ri(8, 0x1200), 0xe3580c12, "cmp r8,#0x1200"); +TEST(CMP_ri(8, 0x120000), 0xe3580812, "cmp r8,#0x120000"); +TEST(CMP_ri(8, 0x12000000), 0xe3580412, "cmp r8,#0x12000000"); + +TEST(BEQ_i(5), 0x0a000005, "beq #5"); +TEST(BNE_i(5), 0x1a000005, "bne #5"); +TEST(BCS_i(5), 0x2a000005, "bcs #5"); +TEST(BCC_i(5), 0x3a000005, "bcc #5"); +TEST(BMI_i(5), 0x4a000005, "bmi #5"); +TEST(BPL_i(5), 0x5a000005, "bpl #5"); +TEST(BVS_i(5), 0x6a000005, "bvs #5"); +TEST(BVC_i(5), 0x7a000005, "bvc #5"); +TEST(BHI_i(5), 0x8a000005, "bhi #5"); +TEST(BLS_i(5), 0x9a000005, "bls #5"); +TEST(BGE_i(5), 0xaa000005, "bge #5"); +TEST(BLT_i(5), 0xba000005, "blt #5"); +TEST(BGT_i(5), 0xca000005, "bgt #5"); +TEST(BLE_i(5), 0xda000005, "ble #5"); +TEST(B_i(5), 0xea000005, "b #5"); + +TEST(BL_i(5), 0xeb000005, "bl #5"); +TEST(BLX_r(8), 0xe12fff38, "blx r8"); +TEST(BX_r(8), 0xe12fff18, "bx r8"); + +TEST(EOR_rri(6, 8, 15), 0xe228600f, "eor r6, r8,#15"); +TEST(EOR_rrr(6, 8,9), 0xe0286009, "eor r6, r8, r9"); +TEST(EOR_rrrLSLi(6,8,9,5), 0xe0286289, "eor r6, r8, r9, lsl #5"); +TEST(EOR_rrrLSLr(6,8,9,7), 0xe0286719, "eor r6, r8, r9, lsl r7"); +TEST(EOR_rrrLSRi(6,8,9,5), 0xe02862a9, "eor r6, r8, r9, lsr #5"); +TEST(EOR_rrrLSRr(6,8,9,7), 0xe0286739, "eor r6, r8, r9, lsr r7"); +TEST(EOR_rrrASRi(6,8,9,5), 0xe02862c9, "eor r6, r8, r9, asr #5"); +TEST(EOR_rrrASRr(6,8,9,7), 0xe0286759, "eor r6, r8, r9, asr r7"); +TEST(EOR_rrrRORi(6,8,9,5), 0xe02862e9, "eor r6, r8, r9, ror #5"); +TEST(EOR_rrrRORr(6,8,9,7), 0xe0286779, "eor r6, r8, r9, ror r7"); +TEST(EOR_rrrRRX(6,8,9), 0xe0286069, "eor r6, r8, r9, rrx"); + +TEST(EORS_rri(6, 8, 15), 0xe238600f, "eors r6, r8,#15"); +TEST(EORS_rrr(6, 8,9), 0xe0386009, "eors r6, r8, r9"); +TEST(EORS_rrrLSLi(6,8,9,5), 0xe0386289, "eors r6, r8, r9, lsl #5"); +TEST(EORS_rrrLSLr(6,8,9,7), 0xe0386719, "eors r6, r8, r9, lsr r7"); +TEST(EORS_rrrLSRi(6,8,9,5), 0xe03862a9, "eors r6, r8, r9, lsr #5"); +TEST(EORS_rrrLSRr(6,8,9,7), 0xe0386739, "eors r6, r8, r9, lsr r7"); +TEST(EORS_rrrASRi(6,8,9,5), 0xe03862c9, "eors r6, r8, r9, asr #5"); +TEST(EORS_rrrASRr(6,8,9,7), 0xe0386759, "eors r6, r8, r9, asr r7"); +TEST(EORS_rrrRORi(6,8,9,5), 0xe03862e9, "eors r6, r8, r9, ror #5"); +TEST(EORS_rrrRORr(6,8,9,7), 0xe0386779, "eors r6, r8, r9, ror r7"); +TEST(EORS_rrrRRX(6,8,9), 0xe0386069, "eors r6, r8, r9, rrx"); + +TEST(MRS_CPSR(6), 0xe10f6000, "mrs r6, CPSR"); +TEST(MRS_SPSR(6), 0xe14f6000, "mrs r6, SPSR"); + +TEST(MSR_CPSR_i(5), 0xe329f005, "msr CPSR_fc, #5"); +TEST(MSR_CPSR_r(5), 0xe129f005, "msr CPSR_fc, r5"); + +TEST(MSR_CPSRf_i(5), 0xe328f005, "msr CPSR_f, #5"); +TEST(MSR_CPSRf_r(5), 0xe128f005, "msr CPSR_f, r5"); + +TEST(MSR_CPSRc_i(5), 0xe321f005, "msr CPSR_c, #5"); +TEST(MSR_CPSRc_r(5), 0xe121f005, "msr CPSR_c, r5"); + +TEST(PUSH(6), 0xe92d0040, "push {r6}"); +TEST(POP(6), 0xe8bd0040, "pop {r6}"); + +TEST(BIC_rri(0, 0, 0x9f000000), 0xe3c0049f, "bic r0, r0, #0x9f000000"); +TEST(BIC_rri(2, 3, 0xff00), 0xe3c32cff, "bic r2, r3, #0xff00"); +TEST(BIC_rri(3, 4, 0xff), 0xe3c430ff, "bic r3, r4, #0xff"); + +TEST(ORR_rrrLSRi(0, 1, 2, 16), 0xe1810822, "orr r0, r1, r2, lsr #16"); +TEST(ORR_rrrLSRi(0, 1, 2, 24), 0xe1810c22, "orr r0, r1, r2, lsr #24"); + +TEST(LDR_rR(8, 9), 0xe5998000, "ldr r8, [r9]"); +TEST(LDR_rRI(8, 9, 4), 0xe5998004, "ldr r8, [r9, #4]"); +TEST(LDR_rRi(8, 9, 4), 0xe5198004, "ldr r8, [r9, #-4]"); +TEST(LDR_rRR(8, 9, 7), 0xe7998007, "ldr r8, [r9, r7]"); +TEST(LDR_rRr(8, 9, 7), 0xe7198007, "ldr r8, [r9, -r7]"); +TEST(LDR_rRR_LSLi(8, 9, 7, 5), 0xe7998287, "ldr r8, [r9, r7, lsl #5]"); +TEST(LDR_rRr_LSLi(8, 9, 7, 5), 0xe7198287, "ldr r8, [r9, -r7, lsl #5]"); +TEST(LDR_rRR_LSRi(8, 9, 7, 5), 0xe79982a7, "ldr r8, [r9, r7, lsr #5]"); +TEST(LDR_rRr_LSRi(8, 9, 7, 5), 0xe71982a7, "ldr r8, [r9, -r7, lsr #5]"); +TEST(LDR_rRR_ASRi(8, 9, 7, 5), 0xe79982c7, "ldr r8, [r9, r7, asr #5]"); +TEST(LDR_rRr_ASRi(8, 9, 7, 5), 0xe71982c7, "ldr r8, [r9, -r7, asr #5]"); +TEST(LDR_rRR_RORi(8, 9, 7, 5), 0xe79982e7, "ldr r8, [r9, r7, ror #5]"); +TEST(LDR_rRr_RORi(8, 9, 7, 5), 0xe71982e7, "ldr r8, [r9, -r7, ror #5]"); +TEST(LDR_rRR_RRX(8, 9, 7), 0xe7998067, "ldr r8, [r9, r7, rrx]"); +TEST(LDR_rRr_RRX(8, 9, 7), 0xe7198067, "ldr r8, [r9, -r7, rrx]"); + +TEST(LDRB_rR(8, 9), 0xe5d98000, "ldrb r8, [r9]"); +TEST(LDRB_rRI(8, 9, 4), 0xe5d98004, "ldrb r8, [r9, #4]"); +TEST(LDRB_rRi(8, 9, 4), 0xe5598004, "ldrb r8, [r9, #-4]"); +TEST(LDRB_rRR(8, 9, 7), 0xe7d98007, "ldrb r8, [r9, r7]"); +TEST(LDRB_rRr(8, 9, 7), 0xe7598007, "ldrb r8, [r9, -r7]"); +TEST(LDRB_rRR_LSLi(8, 9, 7, 5), 0xe7d98287, "ldrb r8, [r9, r7, lsl #5]"); +TEST(LDRB_rRr_LSLi(8, 9, 7, 5), 0xe7598287, "ldrb r8, [r9, -r7, lsl #5]"); +TEST(LDRB_rRR_LSRi(8, 9, 7, 5), 0xe7d982a7, "ldrb r8, [r9, r7, lsr #5]"); +TEST(LDRB_rRr_LSRi(8, 9, 7, 5), 0xe75982a7, "ldrb r8, [r9, -r7, lsr #5]"); +TEST(LDRB_rRR_ASRi(8, 9, 7, 5), 0xe7d982c7, "ldrb r8, [r9, r7, asr #5]"); +TEST(LDRB_rRr_ASRi(8, 9, 7, 5), 0xe75982c7, "ldrb r8, [r9, -r7, asr #5]"); +TEST(LDRB_rRR_RORi(8, 9, 7, 5), 0xe7d982e7, "ldrb r8, [r9, r7, ror #5]"); +TEST(LDRB_rRr_RORi(8, 9, 7, 5), 0xe75982e7, "ldrb r8, [r9, -r7, ror #5]"); +TEST(LDRB_rRR_RRX(8, 9, 7), 0xe7d98067, "ldrb r8, [r9, r7, rrx]"); +TEST(LDRB_rRr_RRX(8, 9, 7), 0xe7598067, "ldrb r8, [r9, -r7, rrx]"); + +TEST(LDRSB_rR(8, 9), 0xe1d980d0, "ldrsb r8, [r9]"); +TEST(LDRSB_rRI(8, 9, 4), 0xe1d980d4, "ldrsb r8, [r9, #4]"); +TEST(LDRSB_rRi(8, 9, 4), 0xe15980d4, "ldrsb r8, [r9, #-4]"); +TEST(LDRSB_rRR(8, 9, 7), 0xe19980d7, "ldrsb r8, [r9, r7]"); +TEST(LDRSB_rRr(8, 9, 7), 0xe11980d7, "ldrsb r8, [r9, -r7]"); + +TEST(LDRSH_rR(8, 9), 0xe1d980f0, "ldrsh r8, [r9]"); +TEST(LDRSH_rRI(8, 9, 4), 0xe1d980f4, "ldrsh r8, [r9, #4]"); +TEST(LDRSH_rRi(8, 9, 4), 0xe15980f4, "ldrsh r8, [r9, #-4]"); +TEST(LDRSH_rRR(8, 9, 7), 0xe19980f7, "ldrsh r8, [r9, r7]"); +TEST(LDRSH_rRr(8, 9, 7), 0xe11980f7, "ldrsh r8, [r9, -r7]"); + +TEST(LDRH_rR(8, 9), 0xe1d980b0, "ldrh r8, [r9]"); +TEST(LDRH_rRI(8, 9, 4), 0xe1d980b4, "ldrh r8, [r9, #4]"); +TEST(LDRH_rRi(8, 9, 4), 0xe15980b4, "ldrh r8, [r9, #-4]"); +TEST(LDRH_rRR(8, 9, 7), 0xe19980b7, "ldrh r8, [r9, r7]"); +TEST(LDRH_rRr(8, 9, 7), 0xe11980b7, "ldrh r8, [r9, -r7]"); + +TEST(STR_rRR(8,9,7), 0xe7898007, "str r8, [r9, r7]"); +TEST(STR_rRr(8,9,7), 0xe7098007, "str r8, [r9, -r7]"); + +TEST(STRB_rR(5, 6), 0xe5c65000, "strb r5,[r6]"); + +TEST(STRH_rR(8, 9), 0xe1c980b0, "strh r8, [r9]"); +TEST(STRH_rRI(8, 9, 4), 0xe1c980b4, "strh r8, [r9, #4]"); +TEST(STRH_rRi(8, 9, 4), 0xe14980b4, "strh r8, [r9, #-4]"); +TEST(STRH_rRR(8, 9, 7), 0xe18980b7, "strh r8, [r9, r7]"); +TEST(STRH_rRr(8, 9, 7), 0xe10980b7, "strh r8, [r9, -r7]"); + +TEST(CLZ_rr(2, 3), 0xe16f2f13, "clz r2,r3"); +TEST(REV_rr(2, 3), 0xe6bf2f33, "rev r2, r3"); +TEST(REV16_rr(2, 3), 0xe6bf2fb3, "rev16 r2, r3"); +TEST(REVSH_rr(2, 3), 0xe6ff2fb3, "revsh r2, r3"); + +TEST(SXTB_rr(2,3), 0xe6af2073, "sxtb r2,r3"); +TEST(SXTB_rr(3,4), 0xe6af3074, "sxtb r3,r4"); + +TEST(SXTB_rr_ROR8(2,3), 0xe6af2473, "sxtb r2, r3, ror #8"); +TEST(SXTB_rr_ROR16(2,3), 0xe6af2873, "sxtb r2, r3, ror #16"); +TEST(SXTB_rr_ROR24(2,3), 0xe6af2c73, "sxtb r2, r3, ror #24"); +TEST(SXTH_rr(2,3), 0xe6bf2073, "sxth r2, r3"); +TEST(SXTH_rr_ROR8(2,3), 0xe6bf2473, "sxth r2, r3, ror #8"); +TEST(SXTH_rr_ROR16(2,3), 0xe6bf2873, "sxth r2, r3, ror #16"); +TEST(SXTH_rr_ROR24(2,3), 0xe6bf2c73, "sxth r2, r3, ror #24"); +TEST(UXTB_rr(2,3), 0xe6ef2073, "uxtb r2, r3"); +TEST(UXTB_rr_ROR8(2,3), 0xe6ef2473, "uxtb r2, r3, ror #8"); +TEST(UXTB_rr_ROR16(2,3), 0xe6ef2873, "uxtb r2, r3, ror #16"); +TEST(UXTB_rr_ROR24(2,3), 0xe6ef2c73, "uxtb r2, r3, ror #24"); +TEST(UXTH_rr(2,3), 0xe6ff2073, "uxth r2, r3"); +TEST(UXTH_rr_ROR8(2,3), 0xe6ff2473, "uxth r2, r3, ror #8"); +TEST(UXTH_rr_ROR16(2,3), 0xe6ff2873, "uxth r2, r3, ror #16"); +TEST(UXTH_rr_ROR24(2,3), 0xe6ff2c73, "uxth r2, r3, ror #24"); + +TEST(REV_rr(2,3), 0xe6bf2f33, "rev r2, r3"); +TEST(REV16_rr(2,3), 0xe6bf2fb3, "rev16 r2, r3"); +TEST(REVSH_rr(2,3), 0xe6ff2fb3, "revsh r2, r3"); + +TEST(CC_MOV_ri(NATIVE_CC_CS, 4,1), 0x23a04001, "movcs r4, #1"); +TEST(CC_MOV_ri(NATIVE_CC_CC, 4,1), 0x33a04001, "movcc r4, #1"); + +int imm = 0x9f; +TEST(ADDS_rri(0, 0, imm << 24), 0xe290049f, "adds r0, r0, 0x9f000000"); + +TEST(PKHBT_rrr(1, 2, 3), 0xe6821013, "pkhbt r1,r2,r3"); +TEST(MVN_ri8(1,2), 0xe3e01002, "mvn r1,#2"); + +TEST(ORR_rri8RORi(1,2,0x12,24), 0xe3821c12, "orr r1, r2, #0x1200"); +TEST(PKHTB_rrrASRi(1, 2, 3, 4), 0xe6821253, "pkhtb r1,r2,r3,ASR #4"); +TEST(PKHBT_rrrLSLi(1, 2, 3, 4), 0xe6821213, "pkhbt r1,r2,r3,LSL #4"); + +TEST(MUL_rrr(1,2,3), 0xe0010392, "mul r1, r2, r3"); +TEST(MULS_rrr(1,2,3), 0xe0110392, "muls r1, r2, r3"); + + +} + diff --git a/BasiliskII/src/uae_cpu/compiler/test_codegen_x86.cpp b/BasiliskII/src/uae_cpu/compiler/test_codegen_x86.cpp new file mode 100644 index 00000000..216effe5 --- /dev/null +++ b/BasiliskII/src/uae_cpu/compiler/test_codegen_x86.cpp @@ -0,0 +1,1008 @@ +/******************** -*- mode: C; tab-width: 8 -*- ******************** + * + * Dumb and Brute Force Run-time assembler verifier for IA-32 and AMD64 + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2004 Gwenole Beauchesne + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + ***********************************************************************/ + +/* + * STATUS: 5.5M variations covering unary register based operations, + * reg/reg operations, imm/reg operations. + * + * TODO: + * - Rewrite to use internal BFD/opcodes format instead of string compares + * - Add reg/mem, imm/mem variations + */ + +#define _BSD_SOURCE 1 +#include +#include +#include +#include +#include +#include + +#include "sysdeps.h" + +#undef abort +#define abort() do { \ + fprintf(stderr, "ABORT: %s, line %d\n", __FILE__, __LINE__); \ + (abort)(); \ +} while (0) + +#define X86_TARGET_64BIT 1 +#define X86_FLAT_REGISTERS 0 +#define X86_OPTIMIZE_ALU 1 +#define X86_OPTIMIZE_ROTSHI 1 +#include "compiler/codegen_x86.h" + +#define x86_emit_byte(B) emit_byte(B) +#define x86_emit_word(W) emit_word(W) +#define x86_emit_long(L) emit_long(L) +#define x86_emit_quad(Q) emit_quad(Q) +#define x86_get_target() get_target() +#define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__) + +static void jit_fail(const char *msg, const char *file, int line, const char *function) +{ + fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n", + function, file, line, msg); + abort(); +} + +static uint8 *target; + +static inline void emit_byte(uint8 x) +{ + *target++ = x; +} + +static inline void emit_word(uint16 x) +{ + *((uint16 *)target) = x; + target += 2; +} + +static inline void emit_long(uint32 x) +{ + *((uint32 *)target) = x; + target += 4; +} + +static inline void emit_quad(uint64 x) +{ + *((uint64 *)target) = x; + target += 8; +} + +static inline void set_target(uint8 *t) +{ + target = t; +} + +static inline uint8 *get_target(void) +{ + return target; +} + +static uint32 mon_read_byte(uintptr addr) +{ + uint8 *m = (uint8 *)addr; + return (uint32)(*m); +} + +extern "C" { +#include "disass/dis-asm.h" + +int buffer_read_memory(bfd_vma from, bfd_byte *to, unsigned int length, struct disassemble_info *info) +{ + while (length--) + *to++ = mon_read_byte(from++); + return 0; +} + +void perror_memory(int status, bfd_vma memaddr, struct disassemble_info *info) +{ + info->fprintf_func(info->stream, "Unknown error %d\n", status); +} + +void generic_print_address(bfd_vma addr, struct disassemble_info *info) +{ + if (addr >= UVAL64(0x100000000)) + info->fprintf_func(info->stream, "$%08x%08x", (uint32)(addr >> 32), (uint32)addr); + else + info->fprintf_func(info->stream, "$%08x", (uint32)addr); +} + +int generic_symbol_at_address(bfd_vma addr, struct disassemble_info *info) +{ + return 0; +} +} + +struct SFILE { + char *buffer; + char *current; +}; + +static int mon_sprintf(SFILE *f, const char *format, ...) +{ + int n; + va_list args; + va_start(args, format); + vsprintf(f->current, format, args); + f->current += n = strlen(f->current); + va_end(args); + return n; +} + +static int disass_x86(char *buf, uintptr adr) +{ + disassemble_info info; + SFILE sfile; + sfile.buffer = buf; + sfile.current = buf; + INIT_DISASSEMBLE_INFO(info, (FILE *)&sfile, (fprintf_ftype)mon_sprintf); + info.mach = bfd_mach_x86_64; + info.disassembler_options = "suffix"; + return print_insn_i386(adr, &info); +} + +enum { + op_disp, + op_reg, + op_base, + op_index, + op_scale, + op_imm, +}; +struct operand_t { + int32 disp; + int8 reg; + int8 base; + int8 index; + int8 scale; + int64 imm; + + void clear() { + disp = imm = 0; + reg = base = index = -1; + scale = 1; + } + + void fill(int optype, int value) { + switch (optype) { + case op_disp: disp = value; break; + case op_reg: reg = value; break; + case op_base: base = value; break; + case op_index: index = value; break; + case op_scale: scale = value; break; + case op_imm: imm = value; break; + default: abort(); + } + } +}; + +struct insn_t { + char name[16]; + int n_operands; +#define MAX_OPERANDS 3 + operand_t operands[MAX_OPERANDS]; + + void clear() { + memset(name, 0, sizeof(name)); + n_operands = 0; + for (int i = 0; i < MAX_OPERANDS; i++) + operands[i].clear(); + } + + void pretty_print() { + printf("%s, %d operands\n", name, n_operands); + for (int i = 0; i < n_operands; i++) { + operand_t *op = &operands[i]; + if (op->reg != -1) + printf(" reg r%d\n", op->reg); + else { + printf(" mem 0x%08x(", op->disp); + if (op->base != -1) + printf("r%d", op->base); + printf(","); + if (op->index != -1) + printf("r%d", op->index); + printf(","); + if (op->base != -1 || op->index != -1) + printf("%d", op->scale); + printf(")\n"); + } + } + } +}; + +static const struct { + const char *name; + int reg; +} +regnames[] = { +#define _(REG) { #REG, X86_##REG } + + _(AL), _(CL), _(DL), _(BL), + _(AH), _(CH), _(DH), _(BH), + _(SPL), _(BPL), _(SIL), _(DIL), + _(R8B), _(R9B), _(R10B), _(R11B), _(R12B), _(R13B), _(R14B), _(R15B), + + _(AX), _(CX), _(DX), _(BX), _(SP), _(BP), _(SI), _(DI), + _(R8W), _(R9W), _(R10W), _(R11W), _(R12W), _(R13W), _(R14W), _(R15W), + + _(EAX), _(ECX), _(EDX), _(EBX), _(ESP), _(EBP), _(ESI), _(EDI), + _(R8D), _(R9D), _(R10D), _(R11D), _(R12D), _(R13D), _(R14D), _(R15D), + + _(RAX), _(RCX), _(RDX), _(RBX), _(RSP), _(RBP), _(RSI), _(RDI), + _(R8), _(R9), _(R10), _(R11), _(R12), _(R13), _(R14), _(R15), + + { NULL, -1 } +#undef _ +}; + +static int parse_reg(operand_t *op, int optype, char *buf) +{ + for (int i = 0; regnames[i].name; i++) { + int len = strlen(regnames[i].name); + if (strncasecmp(regnames[i].name, buf, len) == 0) { + op->fill(optype, regnames[i].reg); + return len; + } + } + return 0; +} + +static int parse_mem(operand_t *op, char *buf) +{ + char *p = buf; + + if (strncmp(buf, "0x", 2) == 0) { + unsigned long val = strtoul(buf, &p, 16); + if (val == 0 && errno == EINVAL) + abort(); + op->disp = val; + } + + if (*p == '(') { + p++; + + if (*p == '%') { + p++; + + int n = parse_reg(op, op_base, p); + if (n <= 0) + return -3; + p += n; + } + + if (*p == ',') { + p++; + + if (*p == '%') { + int n = parse_reg(op, op_index, ++p); + if (n <= 0) + return -4; + p += n; + + if (*p != ',') + return -5; + p++; + + goto do_parse_scale; + } + else if (isdigit(*p)) { + do_parse_scale: + long val = strtol(p, &p, 10); + if (val == 0 && errno == EINVAL) + abort(); + op->scale = val; + } + } + + if (*p != ')') + return -6; + p++; + } + + return p - buf; +} + +static void parse_insn(insn_t *ii, char *buf) +{ + char *p = buf; + ii->clear(); + + for (int i = 0; !isspace(*p); i++) + ii->name[i] = *p++; + + while (*p && isspace(*p)) + p++; + if (*p == '\0') + return; + + int n_operands = 0; + int optype = op_reg; + bool done = false; + while (!done) { + int n; + switch (*p) { + case '%': + n = parse_reg(&ii->operands[n_operands], optype, ++p); + if (n <= 0) { + fprintf(stderr, "parse_reg(%s) error %d\n", p, n); + abort(); + } + p += n; + break; + case '0': case '(': + n = parse_mem(&ii->operands[n_operands], p); + if (n <= 0) { + fprintf(stderr, "parse_mem(%s) error %d\n", p, n); + abort(); + } + p += n; + break; + case '$': { + unsigned long val = strtoul(++p, &p, 16); + if (val == 0 && errno == EINVAL) + abort(); + ii->operands[n_operands].imm = val; + break; + } + case '*': + p++; + break; + case ',': + n_operands++; + p++; + break; + case ' ': case '\t': + p++; + break; + case '\0': + done = true; + break; + default: + fprintf(stderr, "parse error> %s\n", p); + abort(); + } + } + ii->n_operands = n_operands + 1; +} + +static long n_tests, n_failures; +static long n_all_tests, n_all_failures; + +static bool check_reg(insn_t *ii, const char *name, int r) +{ + if (strcasecmp(ii->name, name) != 0) { + fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); + return false; + } + + if (ii->n_operands != 1) { + fprintf(stderr, "ERROR: instruction expected 1 operand, got %d\n", ii->n_operands); + return false; + } + + int reg = ii->operands[0].reg; + + if (reg != r) { + fprintf(stderr, "ERROR: instruction expected r%d as source, got ", r); + if (reg == -1) + fprintf(stderr, "nothing\n"); + else + fprintf(stderr, "%d\n", reg); + return false; + } + + return true; +} + +static bool check_reg_reg(insn_t *ii, const char *name, int s, int d) +{ + if (strcasecmp(ii->name, name) != 0) { + fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); + return false; + } + + if (ii->n_operands != 2) { + fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands); + return false; + } + + int srcreg = ii->operands[0].reg; + int dstreg = ii->operands[1].reg; + + if (srcreg != s) { + fprintf(stderr, "ERROR: instruction expected r%d as source, got ", s); + if (srcreg == -1) + fprintf(stderr, "nothing\n"); + else + fprintf(stderr, "%d\n", srcreg); + return false; + } + + if (dstreg != d) { + fprintf(stderr, "ERROR: instruction expected r%d as destination, got ", d); + if (dstreg == -1) + fprintf(stderr, "nothing\n"); + else + fprintf(stderr, "%d\n", dstreg); + return false; + } + + return true; +} + +static bool check_imm_reg(insn_t *ii, const char *name, uint32 v, int d, int mode = -1) +{ + if (strcasecmp(ii->name, name) != 0) { + fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); + return false; + } + + if (ii->n_operands != 2) { + fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands); + return false; + } + + uint32 imm = ii->operands[0].imm; + int dstreg = ii->operands[1].reg; + + if (mode == -1) { + char suffix = name[strlen(name) - 1]; + switch (suffix) { + case 'b': mode = 1; break; + case 'w': mode = 2; break; + case 'l': mode = 4; break; + case 'q': mode = 8; break; + } + } + switch (mode) { + case 1: v &= 0xff; break; + case 2: v &= 0xffff; break; + } + + if (imm != v) { + fprintf(stderr, "ERROR: instruction expected 0x%08x as immediate, got ", v); + if (imm == -1) + fprintf(stderr, "nothing\n"); + else + fprintf(stderr, "0x%08x\n", imm); + return false; + } + + if (dstreg != d) { + fprintf(stderr, "ERROR: instruction expected r%d as destination, got ", d); + if (dstreg == -1) + fprintf(stderr, "nothing\n"); + else + fprintf(stderr, "%d\n", dstreg); + return false; + } + + return true; +} + +static bool check_mem_reg(insn_t *ii, const char *name, uint32 D, int B, int I, int S, int R) +{ + if (strcasecmp(ii->name, name) != 0) { + fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); + return false; + } + + if (ii->n_operands != 2) { + fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands); + return false; + } + + operand_t *mem = &ii->operands[0]; + operand_t *reg = &ii->operands[1]; + + uint32 d = mem->disp; + int b = mem->base; + int i = mem->index; + int s = mem->scale; + int r = reg->reg; + + if (d != D) { + fprintf(stderr, "ERROR: instruction expected 0x%08x as displacement, got 0x%08x\n", D, d); + return false; + } + + if (b != B) { + fprintf(stderr, "ERROR: instruction expected r%d as base, got r%d\n", B, b); + return false; + } + + if (i != I) { + fprintf(stderr, "ERROR: instruction expected r%d as index, got r%d\n", I, i); + return false; + } + + if (s != S) { + fprintf(stderr, "ERROR: instruction expected %d as scale factor, got %d\n", S, s); + return false; + } + + if (r != R) { + fprintf(stderr, "ERROR: instruction expected r%d as reg operand, got r%d\n", R, r); + return false; + } + + return true; +} + +static int verbose = 2; + +int main(void) +{ + static char buffer[1024]; +#define MAX_INSN_LENGTH 16 +#define MAX_INSNS 1024 + static uint8 block[MAX_INSNS * MAX_INSN_LENGTH]; + static char *insns[MAX_INSNS]; + static int modes[MAX_INSNS]; + n_all_tests = n_all_failures = 0; + + printf("Testing reg forms\n"); + n_tests = n_failures = 0; + for (int r = 0; r < 16; r++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP##r(r); \ +} while (0) +#define GENA(INSN, GENOP) do { \ + GEN(INSN "b", GENOP##B); \ + GEN(INSN "w", GENOP##W); \ + GEN(INSN "l", GENOP##L); \ + GEN(INSN "q", GENOP##Q); \ +} while (0) + GENA("not", NOT); + GENA("neg", NEG); + GENA("mul", MUL); + GENA("imul", IMUL); + GENA("div", DIV); + GENA("idiv", IDIV); + GENA("dec", DEC); + GENA("inc", INC); + GEN("callq", CALLs); + GEN("jmpq", JMPs); + GEN("pushl", PUSHQ); // FIXME: disass bug? wrong suffix + GEN("popl", POPQ); // FIXME: disass bug? wrong suffix + GEN("bswap", BSWAPL); // FIXME: disass bug? no suffix + GEN("bswap", BSWAPQ); // FIXME: disass bug? no suffix + GEN("seto", SETO); + GEN("setno", SETNO); + GEN("setb", SETB); + GEN("setae", SETAE); + GEN("sete", SETE); + GEN("setne", SETNE); + GEN("setbe", SETBE); + GEN("seta", SETA); + GEN("sets", SETS); + GEN("setns", SETNS); + GEN("setp", SETP); + GEN("setnp", SETNP); + GEN("setl", SETL); + GEN("setge", SETGE); + GEN("setle", SETLE); + GEN("setg", SETG); +#undef GENA +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_reg(&ii, insns[i], r)) { + if (verbose > 1) + fprintf(stderr, "%s\n", buffer); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; + + printf("Testing reg,reg forms\n"); + n_tests = n_failures = 0; + for (int s = 0; s < 16; s++) { + for (int d = 0; d < 16; d++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP##rr(s, d); \ +} while (0) +#define GEN1(INSN, GENOP, OP) do { \ + insns[i++] = INSN; \ + GENOP##rr(OP, s, d); \ +} while (0) +#define GENA(INSN, GENOP) do { \ + GEN(INSN "b", GENOP##B); \ + GEN(INSN "w", GENOP##W); \ + GEN(INSN "l", GENOP##L); \ + GEN(INSN "q", GENOP##Q); \ +} while (0) + GENA("adc", ADC); + GENA("add", ADD); + GENA("and", AND); + GENA("cmp", CMP); + GENA("or", OR); + GENA("sbb", SBB); + GENA("sub", SUB); + GENA("xor", XOR); + GENA("mov", MOV); + GEN("btw", BTW); + GEN("btl", BTL); + GEN("btq", BTQ); + GEN("btcw", BTCW); + GEN("btcl", BTCL); + GEN("btcq", BTCQ); + GEN("btrw", BTRW); + GEN("btrl", BTRL); + GEN("btrq", BTRQ); + GEN("btsw", BTSW); + GEN("btsl", BTSL); + GEN("btsq", BTSQ); + GEN("imulw", IMULW); + GEN("imull", IMULL); + GEN("imulq", IMULQ); + GEN1("cmove", CMOVW, X86_CC_Z); + GEN1("cmove", CMOVL, X86_CC_Z); + GEN1("cmove", CMOVQ, X86_CC_Z); + GENA("test", TEST); + GENA("cmpxchg", CMPXCHG); + GENA("xadd", XADD); + GENA("xchg", XCHG); + GEN("bsfw", BSFW); + GEN("bsfl", BSFL); + GEN("bsfq", BSFQ); + GEN("bsrw", BSRW); + GEN("bsrl", BSRL); + GEN("bsrq", BSRQ); + GEN("movsbw", MOVSBW); + GEN("movsbl", MOVSBL); + GEN("movsbq", MOVSBQ); + GEN("movzbw", MOVZBW); + GEN("movzbl", MOVZBL); + GEN("movzbq", MOVZBQ); + GEN("movswl", MOVSWL); + GEN("movswq", MOVSWQ); + GEN("movzwl", MOVZWL); + GEN("movzwq", MOVZWQ); + GEN("movslq", MOVSLQ); +#undef GENA +#undef GEN1 +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_reg_reg(&ii, insns[i], s, d)) { + if (verbose > 1) + fprintf(stderr, "%s\n", buffer); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; + + printf("Testing cl,reg forms\n"); + n_tests = n_failures = 0; + for (int d = 0; d < 16; d++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP##rr(X86_CL, d); \ +} while (0) +#define GENA(INSN, GENOP) do { \ + GEN(INSN "b", GENOP##B); \ + GEN(INSN "w", GENOP##W); \ + GEN(INSN "l", GENOP##L); \ + GEN(INSN "q", GENOP##Q); \ +} while (0) + GENA("rol", ROL); + GENA("ror", ROR); + GENA("rcl", RCL); + GENA("rcr", RCR); + GENA("shl", SHL); + GENA("shr", SHR); + GENA("sar", SAR); +#undef GENA +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_reg_reg(&ii, insns[i], X86_CL, d)) { + if (verbose > 1) + fprintf(stderr, "%s\n", buffer); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; + + printf("Testing imm,reg forms\n"); + static const uint32 imm_table[] = { + 0x00000000, 0x00000001, 0x00000002, 0x00000004, + 0x00000008, 0x00000010, 0x00000020, 0x00000040, + 0x00000080, 0x000000fe, 0x000000ff, 0x00000100, + 0x00000101, 0x00000102, 0xfffffffe, 0xffffffff, + 0x00000000, 0x10000000, 0x20000000, 0x30000000, + 0x40000000, 0x50000000, 0x60000000, 0x70000000, + 0x80000000, 0x90000000, 0xa0000000, 0xb0000000, + 0xc0000000, 0xd0000000, 0xe0000000, 0xf0000000, + 0xfffffffd, 0xfffffffe, 0xffffffff, 0x00000001, + 0x00000002, 0x00000003, 0x11111111, 0x22222222, + 0x33333333, 0x44444444, 0x55555555, 0x66666666, + 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa, + 0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee, + }; + const int n_imm_tab_count = sizeof(imm_table)/sizeof(imm_table[0]); + n_tests = n_failures = 0; + for (int j = 0; j < n_imm_tab_count; j++) { + const uint32 value = imm_table[j]; + for (int d = 0; d < 16; d++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i] = INSN; \ + modes[i] = -1; \ + i++; GENOP##ir(value, d); \ + } while (0) +#define GENM(INSN, GENOP, MODE) do { \ + insns[i] = INSN; \ + modes[i] = MODE; \ + i++; GENOP##ir(value, d); \ + } while (0) +#define GENA(INSN, GENOP) do { \ + GEN(INSN "b", GENOP##B); \ + GEN(INSN "w", GENOP##W); \ + GEN(INSN "l", GENOP##L); \ + GEN(INSN "q", GENOP##Q); \ + } while (0) +#define GENAM(INSN, GENOP, MODE) do { \ + GENM(INSN "b", GENOP##B, MODE); \ + GENM(INSN "w", GENOP##W, MODE); \ + GENM(INSN "l", GENOP##L, MODE); \ + GENM(INSN "q", GENOP##Q, MODE); \ + } while (0) + GENA("adc", ADC); + GENA("add", ADD); + GENA("and", AND); + GENA("cmp", CMP); + GENA("or", OR); + GENA("sbb", SBB); + GENA("sub", SUB); + GENA("xor", XOR); + GENA("mov", MOV); + GENM("btw", BTW, 1); + GENM("btl", BTL, 1); + GENM("btq", BTQ, 1); + GENM("btcw", BTCW, 1); + GENM("btcl", BTCL, 1); + GENM("btcq", BTCQ, 1); + GENM("btrw", BTRW, 1); + GENM("btrl", BTRL, 1); + GENM("btrq", BTRQ, 1); + GENM("btsw", BTSW, 1); + GENM("btsl", BTSL, 1); + GENM("btsq", BTSQ, 1); + if (value != 1) { + GENAM("rol", ROL, 1); + GENAM("ror", ROR, 1); + GENAM("rcl", RCL, 1); + GENAM("rcr", RCR, 1); + GENAM("shl", SHL, 1); + GENAM("shr", SHR, 1); + GENAM("sar", SAR, 1); + } + GENA("test", TEST); +#undef GENAM +#undef GENA +#undef GENM +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_imm_reg(&ii, insns[i], value, d, modes[i])) { + if (verbose > 1) + fprintf(stderr, "%s\n", buffer); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; + + printf("Testing mem,reg forms\n"); + n_tests = n_failures = 0; + static const uint32 off_table[] = { + 0x00000000, + 0x00000001, + 0x00000040, + 0x00000080, + 0x000000ff, + 0x00000100, + 0xfffffffe, + 0xffffffff, + }; + const int off_table_count = sizeof(off_table) / sizeof(off_table[0]); + for (int d = 0; d < off_table_count; d++) { + const uint32 D = off_table[d]; + for (int B = -1; B < 16; B++) { + for (int I = -1; I < 16; I++) { + if (I == X86_RSP) + continue; + for (int S = 1; S < 8; S *= 2) { + if (I == -1) + continue; + for (int r = 0; r < 16; r++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP##mr(D, B, I, S, r); \ + } while (0) +#define GENA(INSN, GENOP) do { \ + GEN(INSN "b", GENOP##B); \ + GEN(INSN "w", GENOP##W); \ + GEN(INSN "l", GENOP##L); \ + GEN(INSN "q", GENOP##Q); \ + } while (0) + GENA("adc", ADC); + GENA("add", ADD); + GENA("and", AND); + GENA("cmp", CMP); + GENA("or", OR); + GENA("sbb", SBB); + GENA("sub", SUB); + GENA("xor", XOR); + GENA("mov", MOV); + GEN("imulw", IMULW); + GEN("imull", IMULL); + GEN("imulq", IMULQ); + GEN("bsfw", BSFW); + GEN("bsfl", BSFL); + GEN("bsfq", BSFQ); + GEN("bsrw", BSRW); + GEN("bsrl", BSRL); + GEN("bsrq", BSRQ); + GEN("movsbw", MOVSBW); + GEN("movsbl", MOVSBL); + GEN("movsbq", MOVSBQ); + GEN("movzbw", MOVZBW); + GEN("movzbl", MOVZBL); + GEN("movzbq", MOVZBQ); + GEN("movswl", MOVSWL); + GEN("movswq", MOVSWQ); + GEN("movzwl", MOVZWL); + GEN("movzwq", MOVZWQ); + GEN("movslq", MOVSLQ); +#undef GENA +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_mem_reg(&ii, insns[i], D, B, I, S, r)) { + if (verbose > 1) + fprintf(stderr, "%s\n", buffer); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + } + } + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; + + printf("\n"); + printf("All %ld tests run, %ld failures\n", n_all_tests, n_all_failures); +} diff --git a/BasiliskII/src/uae_cpu/cpu_emulation.h b/BasiliskII/src/uae_cpu/cpu_emulation.h index cc1d6e05..2e926874 100644 --- a/BasiliskII/src/uae_cpu/cpu_emulation.h +++ b/BasiliskII/src/uae_cpu/cpu_emulation.h @@ -80,7 +80,13 @@ extern bool Init680x0(void); // This routine may want to look at CPUType/FPUType extern void Exit680x0(void); // 680x0 emulation functions -struct M68kRegisters; +struct M68kRegisters { + uint32 d[8]; + memptr a[8]; + uint16 sr; + memptr usp, isp, msp; + memptr pc; +}; extern void Start680x0(void); // Reset and start 680x0 extern "C" void Execute68k(uint32 addr, M68kRegisters *r); // Execute 68k code from EMUL_OP routine extern "C" void Execute68kTrap(uint16 trap, M68kRegisters *r); // Execute MacOS 68k trap from EMUL_OP routine @@ -89,4 +95,12 @@ extern "C" void Execute68kTrap(uint16 trap, M68kRegisters *r); // Execute MacOS extern void TriggerInterrupt(void); // Trigger interrupt level 1 (InterruptFlag must be set first) extern void TriggerNMI(void); // Trigger interrupt level 7 +// CPU looping handlers +void check_eps_limit(uaecptr); +void report_double_bus_error(void); + +extern int intlev(void); + +static inline void AtariReset(void) {} + #endif diff --git a/BasiliskII/src/uae_cpu/cpudefsa.cpp b/BasiliskII/src/uae_cpu/cpudefsa.cpp new file mode 100644 index 00000000..ad7d6979 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpudefsa.cpp @@ -0,0 +1,5 @@ +/* + * cpudefs.cpp must be compiled twice, once for the generator program + * and once for the actual executable + */ +#include "cpudefs.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu1.cpp b/BasiliskII/src/uae_cpu/cpuemu1.cpp new file mode 100644 index 00000000..089eefd4 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu1.cpp @@ -0,0 +1,2 @@ +#define PART_1 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu1_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu1_nf.cpp new file mode 100644 index 00000000..58acf444 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu1_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_1 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu2.cpp b/BasiliskII/src/uae_cpu/cpuemu2.cpp new file mode 100644 index 00000000..1e18b587 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu2.cpp @@ -0,0 +1,2 @@ +#define PART_2 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu2_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu2_nf.cpp new file mode 100644 index 00000000..8e5136c4 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu2_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_2 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu3.cpp b/BasiliskII/src/uae_cpu/cpuemu3.cpp new file mode 100644 index 00000000..0385e2f0 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu3.cpp @@ -0,0 +1,2 @@ +#define PART_3 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu3_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu3_nf.cpp new file mode 100644 index 00000000..6565dc8c --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu3_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_3 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu4.cpp b/BasiliskII/src/uae_cpu/cpuemu4.cpp new file mode 100644 index 00000000..13d27e7a --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu4.cpp @@ -0,0 +1,2 @@ +#define PART_4 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu4_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu4_nf.cpp new file mode 100644 index 00000000..a16c36cb --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu4_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_4 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu5.cpp b/BasiliskII/src/uae_cpu/cpuemu5.cpp new file mode 100644 index 00000000..9b33a654 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu5.cpp @@ -0,0 +1,2 @@ +#define PART_5 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu5_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu5_nf.cpp new file mode 100644 index 00000000..5bf24360 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu5_nf.cpp @@ -0,0 +1,4 @@ +#define NOFLAGS 1 +#define PART_5 +#include "cpuemu.cpp" + diff --git a/BasiliskII/src/uae_cpu/cpuemu6.cpp b/BasiliskII/src/uae_cpu/cpuemu6.cpp new file mode 100644 index 00000000..e4b1efb0 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu6.cpp @@ -0,0 +1,2 @@ +#define PART_6 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu6_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu6_nf.cpp new file mode 100644 index 00000000..7afe15d4 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu6_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_6 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu7.cpp b/BasiliskII/src/uae_cpu/cpuemu7.cpp new file mode 100644 index 00000000..faec7ef8 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu7.cpp @@ -0,0 +1,2 @@ +#define PART_7 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu7_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu7_nf.cpp new file mode 100644 index 00000000..1e404dea --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu7_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_7 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu8.cpp b/BasiliskII/src/uae_cpu/cpuemu8.cpp new file mode 100644 index 00000000..c4efcfa3 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu8.cpp @@ -0,0 +1,2 @@ +#define PART_8 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpuemu8_nf.cpp b/BasiliskII/src/uae_cpu/cpuemu8_nf.cpp new file mode 100644 index 00000000..7c7f8f6e --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpuemu8_nf.cpp @@ -0,0 +1,3 @@ +#define NOFLAGS 1 +#define PART_8 +#include "cpuemu.cpp" diff --git a/BasiliskII/src/uae_cpu/cpufunctbla.cpp b/BasiliskII/src/uae_cpu/cpufunctbla.cpp new file mode 100644 index 00000000..17dd0d3f --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpufunctbla.cpp @@ -0,0 +1,5 @@ +/* + * cpufunctbl.cpp must be compiled twice, once for the generator program + * and once for the actual executable + */ +#include "cpufunctbl.cpp" diff --git a/BasiliskII/src/uae_cpu/cpummu.cpp b/BasiliskII/src/uae_cpu/cpummu.cpp new file mode 100644 index 00000000..1630bc78 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpummu.cpp @@ -0,0 +1,1096 @@ +/* + * cpummu.cpp - MMU emulation + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by UAE MMU patch + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define DEBUG 0 +#include "sysdeps.h" + +#include "cpummu.h" +#include "memory.h" +#include "newcpu.h" +#include "debug.h" +#ifdef USE_JIT +# include "compiler/compemu.h" +#endif + +#define DBG_MMU_VERBOSE 1 +#define DBG_MMU_SANITY 1 + +#ifdef FULLMMU + +mmu_atc_l1_array atc_l1[2]; +mmu_atc_l1_array *current_atc; +struct mmu_atc_line atc_l2[2][ATC_L2_SIZE]; + +# ifdef ATC_STATS +static unsigned int mmu_atc_hits[ATC_L2_SIZE]; +# endif + + +static void mmu_dump_ttr(const char * label, uae_u32 ttr) +{ + DUNUSED(label); +#if DEBUG + uae_u32 from_addr, to_addr; + + from_addr = ttr & MMU_TTR_LOGICAL_BASE; + to_addr = (ttr & MMU_TTR_LOGICAL_MASK) << 8; + + D(bug("%s: [%08x] %08x - %08x enabled=%d supervisor=%d wp=%d cm=%02d", + label, ttr, + from_addr, to_addr, + ttr & MMU_TTR_BIT_ENABLED ? 1 : 0, + (ttr & (MMU_TTR_BIT_SFIELD_ENABLED | MMU_TTR_BIT_SFIELD_SUPER)) >> MMU_TTR_SFIELD_SHIFT, + ttr & MMU_TTR_BIT_WRITE_PROTECT ? 1 : 0, + (ttr & MMU_TTR_CACHE_MASK) >> MMU_TTR_CACHE_SHIFT + )); +#else + DUNUSED(ttr); +#endif +} + +void mmu_make_transparent_region(uaecptr baseaddr, uae_u32 size, int datamode) +{ + uae_u32 * ttr; + uae_u32 * ttr0 = datamode ? ®s.dtt0 : ®s.itt0; + uae_u32 * ttr1 = datamode ? ®s.dtt1 : ®s.itt1; + + if ((*ttr1 & MMU_TTR_BIT_ENABLED) == 0) + ttr = ttr1; + else if ((*ttr0 & MMU_TTR_BIT_ENABLED) == 0) + ttr = ttr0; + else + return; + + *ttr = baseaddr & MMU_TTR_LOGICAL_BASE; + *ttr |= ((baseaddr + size - 1) & MMU_TTR_LOGICAL_BASE) >> 8; + *ttr |= MMU_TTR_BIT_ENABLED; + + D(bug("MMU: map transparent mapping of %08x", *ttr)); +} + +/* check if an address matches a ttr */ +static int mmu_do_match_ttr(uae_u32 ttr, uaecptr addr, int super) +{ + if (ttr & MMU_TTR_BIT_ENABLED) { /* TTR enabled */ + uae_u8 msb, mask; + + msb = ((addr ^ ttr) & MMU_TTR_LOGICAL_BASE) >> 24; + mask = (ttr & MMU_TTR_LOGICAL_MASK) >> 16; + + if (!(msb & ~mask)) { + + if ((ttr & MMU_TTR_BIT_SFIELD_ENABLED) == 0) { + if (((ttr & MMU_TTR_BIT_SFIELD_SUPER) == 0) != (super == 0)) { + return TTR_NO_MATCH; + } + } + + return (ttr & MMU_TTR_BIT_WRITE_PROTECT) ? TTR_NO_WRITE : TTR_OK_MATCH; + } + } + return TTR_NO_MATCH; +} + +static inline int mmu_match_ttr(uaecptr addr, int super, int data) +{ + int res; + + if (data) { + res = mmu_do_match_ttr(regs.dtt0, addr, super); + if (res == TTR_NO_MATCH) + res = mmu_do_match_ttr(regs.dtt1, addr, super); + } else { + res = mmu_do_match_ttr(regs.itt0, addr, super); + if (res == TTR_NO_MATCH) + res = mmu_do_match_ttr(regs.itt1, addr, super); + } + return res; +} + +#if DEBUG +/* {{{ mmu_dump_table */ +static void mmu_dump_table(const char * label, uaecptr root_ptr) +{ + DUNUSED(label); + const int ROOT_TABLE_SIZE = 128, + PTR_TABLE_SIZE = 128, + PAGE_TABLE_SIZE = regs.mmu_pagesize_8k ? 32 : 64, + ROOT_INDEX_SHIFT = 25, + PTR_INDEX_SHIFT = 18; + const uae_u32 ptr_addr_mask = (regs.mmu_pagesize_8k ? MMU_PTR_PAGE_ADDR_MASK_8 : MMU_PTR_PAGE_ADDR_MASK_4); + const uae_u32 page_addr_mask = (regs.mmu_pagesize_8k ? MMU_PAGE_ADDR_MASK_8 : MMU_PAGE_ADDR_MASK_4); + const uae_u32 page_ur_mask = (regs.mmu_pagesize_8k ? MMU_PAGE_UR_MASK_8 : MMU_PAGE_UR_MASK_4); + const uae_u32 page_size = (regs.mmu_pagesize_8k ? (1 << 13) : (1 << 12)); + int root_idx, ptr_idx, page_idx; + uae_u32 root_des, ptr_des, page_des; + uaecptr ptr_des_addr, page_addr, + root_log, ptr_log, page_log; + + D(bug("%s: root=%x", label, root_ptr)); + + for (root_idx = 0; root_idx < ROOT_TABLE_SIZE; root_idx++) { + root_des = phys_get_long(root_ptr + (root_idx << 2)); + + if ((root_des & 2) == 0) + continue; /* invalid */ + + D(bug("ROOT: %03d U=%d W=%d UDT=%02d", root_idx, + root_des & 8 ? 1 : 0, + root_des & 4 ? 1 : 0, + root_des & 3 + )); + + root_log = root_idx << ROOT_INDEX_SHIFT; + + ptr_des_addr = root_des & MMU_ROOT_PTR_ADDR_MASK; + + for (ptr_idx = 0; ptr_idx < PTR_TABLE_SIZE; ptr_idx++) { + struct { + uaecptr log, phys; + int start_idx, n_pages; /* number of pages covered by this entry */ + uae_u32 match; + } page_info[PAGE_TABLE_SIZE]; + int n_pages_used; + + ptr_des = phys_get_long(ptr_des_addr + (ptr_idx << 2)); + ptr_log = root_log | (ptr_idx << PTR_INDEX_SHIFT); + + if ((ptr_des & 2) == 0) + continue; /* invalid */ + + page_addr = ptr_des & ptr_addr_mask; + + n_pages_used = -1; + for (page_idx = 0; page_idx < PAGE_TABLE_SIZE; page_idx++) { + + page_des = phys_get_long(page_addr + (page_idx << 2)); + page_log = ptr_log | (page_idx * page_size); + + switch (page_des & 3) { + case 0: /* invalid */ + continue; + case 1: case 3: /* resident */ + case 2: /* indirect */ + if (n_pages_used == -1 || + (page_info[n_pages_used].match & ~page_addr_mask) != (page_des & ~page_addr_mask) || + page_info[n_pages_used].phys + (page_info[n_pages_used].n_pages * page_size) != (page_des & page_addr_mask)) + { + /* use the next entry */ + n_pages_used++; + + page_info[n_pages_used].match = page_des; + page_info[n_pages_used].n_pages = 1; + page_info[n_pages_used].start_idx = page_idx; + page_info[n_pages_used].log = page_log; + page_info[n_pages_used].phys = page_des & page_addr_mask; + } else { + page_info[n_pages_used].n_pages++; + } + break; + } + } + + if (n_pages_used == -1) + continue; + + D(bug(" PTR: %03d U=%d W=%d UDT=%02d", ptr_idx, + ptr_des & 8 ? 1 : 0, + ptr_des & 4 ? 1 : 0, + ptr_des & 3 + )); + + + for (page_idx = 0; page_idx <= n_pages_used; page_idx++) { + page_des = page_info[page_idx].match; + + if ((page_des & MMU_PDT_MASK) == 2) { + D(bug(" PAGE: %03d-%03d log=%08x INDIRECT --> addr=%08x", + page_info[page_idx].start_idx, + page_info[page_idx].start_idx + page_info[page_idx].n_pages - 1, + page_info[page_idx].log, + page_des & MMU_PAGE_INDIRECT_MASK + )); + + } else { + D(bug(" PAGE: %03d-%03d log=%08x addr=%08x UR=%02d G=%d U1/0=%d S=%d CM=%d M=%d U=%d W=%d", + page_info[page_idx].start_idx, + page_info[page_idx].start_idx + page_info[page_idx].n_pages - 1, + page_info[page_idx].log, + page_info[page_idx].phys, + (page_des & page_ur_mask) >> MMU_PAGE_UR_SHIFT, + page_des & MMU_DES_GLOBAL ? 1 : 0, + (page_des & MMU_TTR_UX_MASK) >> MMU_TTR_UX_SHIFT, + page_des & MMU_DES_SUPER ? 1 : 0, + (page_des & MMU_TTR_CACHE_MASK) >> MMU_TTR_CACHE_SHIFT, + page_des & MMU_DES_MODIFIED ? 1 : 0, + page_des & MMU_DES_USED ? 1 : 0, + page_des & MMU_DES_WP ? 1 : 0 + )); + } + } + } + + } +} +/* }}} */ +#endif + +/* {{{ mmu_dump_atc */ +void mmu_dump_atc(void) +{ + int i, j; + for (i = 0; i < 2; i++) { + for (j = 0; j < ATC_L2_SIZE; j++) { + if (atc_l2[i][j].tag == 0x8000) + continue; + D(bug("ATC[%02d] G=%d TT=%d M=%d WP=%d VD=%d VI=%d tag=%08x --> phys=%08x", + j, atc_l2[i][j].global, atc_l2[i][j].tt, atc_l2[i][j].modified, + atc_l2[i][j].write_protect, atc_l2[i][j].valid_data, atc_l2[i][j].valid_inst, + atc_l2[i][j].tag, atc_l2[i][j].phys)); + } + } +} +/* }}} */ + +/* {{{ mmu_dump_tables */ +void mmu_dump_tables(void) +{ + D(bug("URP: %08x SRP: %08x MMUSR: %x TC: %x", regs.urp, regs.srp, regs.mmusr, regs.tc)); + mmu_dump_ttr("DTT0", regs.dtt0); + mmu_dump_ttr("DTT1", regs.dtt1); + mmu_dump_ttr("ITT0", regs.itt0); + mmu_dump_ttr("ITT1", regs.itt1); + mmu_dump_atc(); + //mmu_dump_table("SRP", regs.srp); +} +/* }}} */ + +static uaecptr REGPARAM2 mmu_lookup_pagetable(uaecptr addr, int super, int write); + +static ALWAYS_INLINE int mmu_get_fc(bool super, bool data) +{ + return (super ? 4 : 0) | (data ? 1 : 2); +} + +static void mmu_bus_error(uaecptr addr, int fc, int write, int size) +{ + uae_u16 ssw = 0; + + ssw |= fc & MMU_SSW_TM; /* Copy TM */ + switch (size) { + case sz_byte: + ssw |= MMU_SSW_SIZE_B; + break; + case sz_word: + ssw |= MMU_SSW_SIZE_W; + break; + case sz_long: + ssw |= MMU_SSW_SIZE_L; + break; + } + + regs.wb3_status = write ? 0x80 | ssw : 0; + if (!write) + ssw |= MMU_SSW_RW; + + regs.mmu_fault_addr = addr; + regs.mmu_ssw = ssw | MMU_SSW_ATC; + + D(bug("BUS ERROR: fc=%d w=%d log=%08x ssw=%04x", fc, write, addr, ssw)); + + breakpt(); + THROW(2); +} + +/* + * Update the atc line for a given address by doing a mmu lookup. + */ +static uaecptr mmu_fill_atc_l2(uaecptr addr, int super, int data, int write, + struct mmu_atc_line *l) +{ + int res; + uae_u32 desc; + + l->tag = ATC_TAG(addr); + l->hw = l->bus_fault = 0; + + /* check ttr0 */ + res = mmu_match_ttr(addr, super, data); + if (res != TTR_NO_MATCH) { + l->tt = 1; + if (data) { + l->valid_data = 1; + l->valid_inst = mmu_match_ttr(addr, super, 0) == res; + } else { + l->valid_inst = 1; + l->valid_data = mmu_match_ttr(addr, super, 1) == res; + } + l->global = 1; + l->modified = 1; + l->write_protect = (res == TTR_NO_WRITE); + l->phys = 0; + + return 0; + } + + l->tt = 0; + if (!regs.mmu_enabled) { + l->valid_data = l->valid_inst = 1; + l->global = 1; + l->modified = 1; + l->write_protect = 0; + l->phys = 0; + return 0; + } + + SAVE_EXCEPTION; + TRY(prb) { + desc = mmu_lookup_pagetable(addr, super, write); + D(bug("translate: %x,%u,%u,%u -> %x", addr, super, write, data, desc)); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + /* bus error during table search */ + desc = 0; + goto fail; + } + + if ((desc & 1) == 0 || (!super && desc & MMU_MMUSR_S)) { + fail: + l->valid_data = l->valid_inst = 0; + l->global = 0; + } else { + l->valid_data = l->valid_inst = 1; + if (regs.mmu_pagesize_8k) + l->phys = (desc & ~0x1fff) - (addr & ~0x1fff); + else + l->phys = (desc & ~0xfff) - (addr & ~0xfff); + l->global = (desc & MMU_MMUSR_G) != 0; + l->modified = (desc & MMU_MMUSR_M) != 0; + l->write_protect = (desc & MMU_MMUSR_W) != 0; + } + + return desc; +} + +static ALWAYS_INLINE bool +mmu_fill_atc_l1(uaecptr addr, int super, int data, int write, + struct mmu_atc_line *l1) +{ + int idx = ATC_L2_INDEX(addr); + int tag = ATC_TAG(addr); + struct mmu_atc_line *l = &atc_l2[super][idx]; + uaecptr phys_addr; + + if (l->tag != tag) { + restart: + mmu_fill_atc_l2(addr, super, data, write, l); + } + if (!(data ? l->valid_data : l->valid_inst)) { + D(bug("MMU: non-resident page (%x,%x,%x)!", addr, regs.pc, regs.fault_pc)); + goto fail; + } + if (write) { + if (l->write_protect) { + D(bug("MMU: write protected (via %s) %x", l->tt ? "ttr" : "atc", addr)); + goto fail; + } + if (!l->modified) + goto restart; + } + *l1 = *l; + + phys_addr = addr + l1->phys; + if ((phys_addr & 0xfff00000) == 0x00f00000) { + l1->hw = 1; + goto fail; + } + if ((phys_addr & 0xfff00000) == 0xfff00000) { + l1->hw = 1; + l1->phys -= 0xff000000; + goto fail; + } + + if (!test_ram_boundary(phys_addr, 1, super, write)) { + l1->bus_fault = 1; + goto fail; + } + + return true; + +fail: + l1->tag = ~l1->tag; + return false; +} + +uaecptr mmu_translate(uaecptr addr, int super, int data, int write) +{ + struct mmu_atc_line *l; + + l = &atc_l2[super][ATC_L2_INDEX(addr)]; + mmu_fill_atc_l2(addr, super, data, write, l); + if (!(data ? l->valid_data : l->valid_inst)) + { + breakpt(); + THROW(2); + } + + return addr + l->phys; +} + +/* + * Lookup the address by walking the page table and updating + * the page descriptors accordingly. Returns the found descriptor + * or produces a bus error. + */ +static uaecptr REGPARAM2 mmu_lookup_pagetable(uaecptr addr, int super, int write) +{ + uae_u32 desc, desc_addr, wp; + int i; + + wp = 0; + desc = super ? regs.srp : regs.urp; + + /* fetch root table descriptor */ + i = (addr >> 23) & 0x1fc; + desc_addr = (desc & MMU_ROOT_PTR_ADDR_MASK) | i; + desc = phys_get_long(desc_addr); + if ((desc & 2) == 0) { + D(bug("MMU: invalid root descriptor for %x", addr)); + return 0; + } + + wp |= desc; + if ((desc & MMU_DES_USED) == 0) + phys_put_long(desc_addr, desc | MMU_DES_USED); + + /* fetch pointer table descriptor */ + i = (addr >> 16) & 0x1fc; + desc_addr = (desc & MMU_ROOT_PTR_ADDR_MASK) | i; + desc = phys_get_long(desc_addr); + if ((desc & 2) == 0) { + D(bug("MMU: invalid ptr descriptor for %x", addr)); + return 0; + } + wp |= desc; + if ((desc & MMU_DES_USED) == 0) + phys_put_long(desc_addr, desc | MMU_DES_USED); + + /* fetch page table descriptor */ + if (regs.mmu_pagesize_8k) { + i = (addr >> 11) & 0x7c; + desc_addr = (desc & MMU_PTR_PAGE_ADDR_MASK_8) | i; + } else { + i = (addr >> 10) & 0xfc; + desc_addr = (desc & MMU_PTR_PAGE_ADDR_MASK_4) | i; + } + + desc = phys_get_long(desc_addr); + if ((desc & 3) == 2) { + /* indirect */ + desc_addr = desc & MMU_PAGE_INDIRECT_MASK; + desc = phys_get_long(desc_addr); + } + if ((desc & 1) == 0) { + D(bug("MMU: invalid page descriptor log=%08x desc=%08x @%08x", addr, desc, desc_addr)); + return desc; + } + + desc |= wp & MMU_DES_WP; + if (write) { + if (desc & MMU_DES_WP) { + if ((desc & MMU_DES_USED) == 0) { + desc |= MMU_DES_USED; + phys_put_long(desc_addr, desc); + } + } else if ((desc & (MMU_DES_USED|MMU_DES_MODIFIED)) != + (MMU_DES_USED|MMU_DES_MODIFIED)) { + desc |= MMU_DES_USED|MMU_DES_MODIFIED; + phys_put_long(desc_addr, desc); + } + } else { + if ((desc & MMU_DES_USED) == 0) { + desc |= MMU_DES_USED; + phys_put_long(desc_addr, desc); + } + } + return desc; +} + +uae_u16 mmu_get_word_unaligned(uaecptr addr, int data) +{ + uae_u16 res; + + res = (uae_u16)mmu_get_byte(addr, data, sz_word) << 8; + SAVE_EXCEPTION; + TRY(prb) { + res |= mmu_get_byte(addr + 1, data, sz_word); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + return res; +} + +uae_u32 mmu_get_long_unaligned(uaecptr addr, int data) +{ + uae_u32 res; + + if (likely(!(addr & 1))) { + res = (uae_u32)mmu_get_word(addr, data, sz_long) << 16; + SAVE_EXCEPTION; + TRY(prb) { + res |= mmu_get_word(addr + 2, data, sz_long); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + } else { + res = (uae_u32)mmu_get_byte(addr, data, sz_long) << 8; + SAVE_EXCEPTION; + TRY(prb) { + res = (res | mmu_get_byte(addr + 1, data, sz_long)) << 8; + res = (res | mmu_get_byte(addr + 2, data, sz_long)) << 8; + res |= mmu_get_byte(addr + 3, data, sz_long); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + } + return res; +} + +uae_u8 mmu_get_byte_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) + return HWget_b(cl->phys + addr); + mmu_bus_error(addr, mmu_get_fc(super, data), 0, size); + return 0; + } + + if (!mmu_fill_atc_l1(addr, super, data, 0, cl)) + goto redo; + + return do_get_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl)); +} + +uae_u16 mmu_get_word_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) + return HWget_w(cl->phys + addr); + mmu_bus_error(addr, mmu_get_fc(super, data), 0, size); + return 0; + } + + if (!mmu_fill_atc_l1(addr, super, data, 0, cl)) + goto redo; + + return do_get_mem_word((uae_u16 *)mmu_get_real_address(addr, cl)); +} + +uae_u32 mmu_get_long_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) + return HWget_l(cl->phys + addr); + mmu_bus_error(addr, mmu_get_fc(super, data), 0, size); + return 0; + } + + if (!mmu_fill_atc_l1(addr, super, data, 0, cl)) + goto redo; + + return do_get_mem_long((uae_u32 *)mmu_get_real_address(addr, cl)); +} + + +uae_u64 mmu_get_quad_slow(uaecptr addr, int super, int data, + struct mmu_atc_line *cl) +{ + uae_u64 h = mmu_get_long_slow(addr, super, data, sz_long, cl); + uae_u64 l = mmu_get_long_slow(addr + 4, super, data, sz_long, cl); + return (h << 32) | l; +} + +REGPARAM2 void mmu_put_long_unaligned(uaecptr addr, uae_u32 val, int data) +{ + SAVE_EXCEPTION; + TRY(prb) { + if (likely(!(addr & 1))) { + mmu_put_word(addr, val >> 16, data, sz_long); + mmu_put_word(addr + 2, val, data, sz_long); + } else { + mmu_put_byte(addr, val >> 24, data, sz_long); + mmu_put_byte(addr + 1, val >> 16, data, sz_long); + mmu_put_byte(addr + 2, val >> 8, data, sz_long); + mmu_put_byte(addr + 3, val, data, sz_long); + } + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.wb3_data = val; + if (regs.mmu_fault_addr != addr) { + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + } + breakpt(); + THROW_AGAIN(prb); + } +} + +REGPARAM2 void mmu_put_word_unaligned(uaecptr addr, uae_u16 val, int data) +{ + SAVE_EXCEPTION; + TRY(prb) { + mmu_put_byte(addr, val >> 8, data, sz_word); + mmu_put_byte(addr + 1, val, data, sz_word); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.wb3_data = val; + if (regs.mmu_fault_addr != addr) { + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + } + breakpt(); + THROW_AGAIN(prb); + } +} + +REGPARAM2 void mmu_put_byte_slow(uaecptr addr, uae_u8 val, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) { + HWput_b(cl->phys + addr, val); + return; + } + regs.wb3_data = val; + mmu_bus_error(addr, mmu_get_fc(super, data), 1, size); + return; + } + + if (!mmu_fill_atc_l1(addr, super, data, 1, cl)) + goto redo; + + do_put_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl), val); +} + +REGPARAM2 void mmu_put_word_slow(uaecptr addr, uae_u16 val, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) { + HWput_w(cl->phys + addr, val); + return; + } + regs.wb3_data = val; + mmu_bus_error(addr, mmu_get_fc(super, data), 1, size); + return; + } + + if (!mmu_fill_atc_l1(addr, super, data, 1, cl)) + goto redo; + + do_put_mem_word((uae_u16 *)mmu_get_real_address(addr, cl), val); +} + +REGPARAM2 void mmu_put_long_slow(uaecptr addr, uae_u32 val, int super, int data, + int size, struct mmu_atc_line *cl) +{ + uae_u32 tag = ATC_TAG(addr); + + if (cl->tag == (uae_u16)~tag) { + redo: + if (cl->hw) { + HWput_l(cl->phys + addr, val); + return; + } + regs.wb3_data = val; + mmu_bus_error(addr, mmu_get_fc(super, data), 1, size); + return; + } + + if (!mmu_fill_atc_l1(addr, super, data, 1, cl)) + goto redo; + + do_put_mem_long((uae_u32 *)mmu_get_real_address(addr, cl), val); +} + +REGPARAM2 void mmu_put_quad_slow(uaecptr addr, uae_u64 val, int super, int data, + struct mmu_atc_line *cl) +{ + mmu_put_long_slow(addr, (uae_u32)(val >> 32), super, data, sz_long, cl); + mmu_put_long_slow(addr + 4, (uae_u32)(val), super, data, sz_long, cl); +} + +uae_u32 sfc_get_long(uaecptr addr) +{ + int super = (regs.sfc & 4) != 0; + int data = (regs.sfc & 3) != 2; + uae_u32 res; + + if (likely(!is_unaligned(addr, 4))) + return mmu_get_user_long(addr, super, data, sz_long); + + if (likely(!(addr & 1))) { + res = (uae_u32)mmu_get_user_word(addr, super, data, sz_long) << 16; + SAVE_EXCEPTION; + TRY(prb) { + res |= mmu_get_user_word(addr + 2, super, data, sz_long); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + } else { + res = (uae_u32)mmu_get_user_byte(addr, super, data, sz_long) << 8; + SAVE_EXCEPTION; + TRY(prb) { + res = (res | mmu_get_user_byte(addr + 1, super, data, sz_long)) << 8; + res = (res | mmu_get_user_byte(addr + 2, super, data, sz_long)) << 8; + res |= mmu_get_user_byte(addr + 3, super, data, sz_long); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + } + return res; +} + +uae_u16 sfc_get_word(uaecptr addr) +{ + int super = (regs.sfc & 4) != 0; + int data = (regs.sfc & 3) != 2; + uae_u16 res; + + if (likely(!is_unaligned(addr, 2))) + return mmu_get_user_word(addr, super, data, sz_word); + + res = (uae_u16)mmu_get_user_byte(addr, super, data, sz_word) << 8; + SAVE_EXCEPTION; + TRY(prb) { + res |= mmu_get_user_byte(addr + 1, super, data, sz_word); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + breakpt(); + THROW_AGAIN(prb); + } + return res; +} + +uae_u8 sfc_get_byte(uaecptr addr) +{ + int super = (regs.sfc & 4) != 0; + int data = (regs.sfc & 3) != 2; + + return mmu_get_user_byte(addr, super, data, sz_byte); +} + +void dfc_put_long(uaecptr addr, uae_u32 val) +{ + int super = (regs.dfc & 4) != 0; + int data = (regs.dfc & 3) != 2; + + SAVE_EXCEPTION; + TRY(prb) { + if (likely(!is_unaligned(addr, 4))) + mmu_put_user_long(addr, val, super, data, sz_long); + else if (likely(!(addr & 1))) { + mmu_put_user_word(addr, val >> 16, super, data, sz_long); + mmu_put_user_word(addr + 2, val, super, data, sz_long); + } else { + mmu_put_user_byte(addr, val >> 24, super, data, sz_long); + mmu_put_user_byte(addr + 1, val >> 16, super, data, sz_long); + mmu_put_user_byte(addr + 2, val >> 8, super, data, sz_long); + mmu_put_user_byte(addr + 3, val, super, data, sz_long); + } + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.wb3_data = val; + if (regs.mmu_fault_addr != addr) { + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + } + breakpt(); + THROW_AGAIN(prb); + } +} + +void dfc_put_word(uaecptr addr, uae_u16 val) +{ + int super = (regs.dfc & 4) != 0; + int data = (regs.dfc & 3) != 2; + + SAVE_EXCEPTION; + TRY(prb) { + if (likely(!is_unaligned(addr, 2))) + mmu_put_user_word(addr, val, super, data, sz_word); + else { + mmu_put_user_byte(addr, val >> 8, super, data, sz_word); + mmu_put_user_byte(addr + 1, val, super, data, sz_word); + } + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.wb3_data = val; + if (regs.mmu_fault_addr != addr) { + regs.mmu_fault_addr = addr; + regs.mmu_ssw |= MMU_SSW_MA; + } + breakpt(); + THROW_AGAIN(prb); + } +} + +void dfc_put_byte(uaecptr addr, uae_u8 val) +{ + int super = (regs.dfc & 4) != 0; + int data = (regs.dfc & 3) != 2; + + SAVE_EXCEPTION; + TRY(prb) { + mmu_put_user_byte(addr, val, super, data, sz_byte); + RESTORE_EXCEPTION; + } + CATCH(prb) { + RESTORE_EXCEPTION; + regs.wb3_data = val; + breakpt(); + THROW_AGAIN(prb); + } +} + +void mmu_op(uae_u32 opcode, uae_u16 extra) +{ + int super = (regs.dfc & 4) != 0; + DUNUSED(extra); + if ((opcode & 0xFE0) == 0x0500) { + int regno, glob; + //D(didflush = 0); + uae_u32 addr; + /* PFLUSH */ + regno = opcode & 7; + glob = (opcode & 8) != 0; + + if (opcode & 16) { + D(bug("pflusha(%u,%u)", glob, regs.dfc)); + mmu_flush_atc_all(glob); + } else { + addr = m68k_areg(regs, regno); + D(bug("pflush(%u,%u,%x)", glob, regs.dfc, addr)); + mmu_flush_atc(addr, super, glob); + } + flush_internals(); +#ifdef USE_JIT + flush_icache(0); +#endif + } else if ((opcode & 0x0FD8) == 0x548) { + int write, regno; + uae_u32 addr; + + regno = opcode & 7; + write = (opcode & 32) == 0; + addr = m68k_areg(regs, regno); + //bug("ptest(%u,%u,%x)", write, regs.dfc, addr); + D(bug("PTEST%c (A%d) %08x DFC=%d", write ? 'W' : 'R', regno, addr, regs.dfc)); + mmu_flush_atc(addr, super, true); + SAVE_EXCEPTION; + TRY(prb) { + struct mmu_atc_line *l; + uae_u32 desc; + bool data = (regs.dfc & 3) != 2; + + l = &atc_l2[super][ATC_L2_INDEX(addr)]; + desc = mmu_fill_atc_l2(addr, super, data, write, l); + if (!(data ? l->valid_data : l->valid_inst)) + regs.mmusr = MMU_MMUSR_B; + else if (l->tt) + regs.mmusr = MMU_MMUSR_T | MMU_MMUSR_R; + else { + regs.mmusr = desc & (~0xfff|MMU_MMUSR_G|MMU_MMUSR_Ux|MMU_MMUSR_S| + MMU_MMUSR_CM|MMU_MMUSR_M|MMU_MMUSR_W); + regs.mmusr |= MMU_MMUSR_R; + } + } + CATCH(prb) { + regs.mmusr = MMU_MMUSR_B; + } + RESTORE_EXCEPTION; + D(bug("PTEST result: mmusr %08x", regs.mmusr)); + } else + op_illg (opcode); +} + +void mmu_flush_atc(uaecptr addr, bool super, bool global) +{ + struct mmu_atc_line *l; + int i, j; + + l = atc_l1[super][0][0]; + i = ATC_L1_INDEX(addr); + for (j = 0; j < 4; j++) { + if (global || !l[i].global) + l[i].tag = 0x8000; + l += ATC_L1_SIZE; + } + if (regs.mmu_pagesize_8k) { + i = ATC_L1_INDEX(addr) ^ 1; + for (j = 0; j < 4; j++) { + if (global || !l[i].global) + l[i].tag = 0x8000; + l += ATC_L1_SIZE; + } + } + l = atc_l2[super]; + i = ATC_L2_INDEX(addr); + if (global || !l[i].global) + l[i].tag = 0x8000; + if (regs.mmu_pagesize_8k) { + i ^= 1; + if (global || !l[i].global) + l[i].tag = 0x8000; + } +} + +void mmu_flush_atc_all(bool global) +{ + struct mmu_atc_line *l; + unsigned int i; + + l = atc_l1[0][0][0]; + for (i = 0; i < sizeof(atc_l1) / sizeof(*l); l++, i++) { + if (global || !l->global) + l->tag = 0x8000; + } + + l = atc_l2[0]; + for (i = 0; i < sizeof(atc_l2) / sizeof(*l); l++, i++) { + if (global || !l->global) + l->tag = 0x8000; + } +} + +void mmu_reset(void) +{ + mmu_flush_atc_all(true); + + regs.urp = regs.srp = 0; + regs.itt0 = regs.itt1 = 0; + regs.dtt0 = regs.dtt1 = 0; + regs.mmusr = 0; +} + + +void mmu_set_tc(uae_u16 tc) +{ + if (regs.tc == tc) + return; + + regs.tc = tc; + regs.mmu_enabled = tc & 0x8000 ? 1 : 0; + regs.mmu_pagesize_8k = tc & 0x4000 ? 1 : 0; + mmu_flush_atc_all(true); + + D(bug("MMU: enabled=%d page8k=%d\n", regs.mmu_enabled, regs.mmu_pagesize_8k)); +} + +void mmu_set_super(bool super) +{ + current_atc = &atc_l1[super]; +} + +#else + +void mmu_op(uae_u32 opcode, uae_u16 /*extra*/) +{ + if ((opcode & 0xFE0) == 0x0500) { + /* PFLUSH instruction */ + flush_internals(); + } else if ((opcode & 0x0FD8) == 0x548) { + /* PTEST instruction */ + } else + op_illg(opcode); +} + +#endif + +/* +vim:ts=4:sw=4: +*/ diff --git a/BasiliskII/src/uae_cpu/cpummu.h b/BasiliskII/src/uae_cpu/cpummu.h new file mode 100644 index 00000000..01359f6f --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpummu.h @@ -0,0 +1,267 @@ +/* + * cpummu.h - MMU emulation + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by UAE MMU patch + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef CPUMMU_H +#define CPUMMU_H + +#include "registers.h" + +# include + +#define MMU_TEST_PTEST 1 +#define MMU_TEST_VERBOSE 2 +#define MMU_TEST_FORCE_TABLE_SEARCH 4 +#define MMU_TEST_NO_BUSERR 8 + +extern void mmu_dump_tables(void); + +#define MMU_TTR_LOGICAL_BASE 0xff000000 +#define MMU_TTR_LOGICAL_MASK 0x00ff0000 +#define MMU_TTR_BIT_ENABLED (1 << 15) +#define MMU_TTR_BIT_SFIELD_ENABLED (1 << 14) +#define MMU_TTR_BIT_SFIELD_SUPER (1 << 13) +#define MMU_TTR_SFIELD_SHIFT 13 +#define MMU_TTR_UX_MASK ((1 << 9) | (1 << 8)) +#define MMU_TTR_UX_SHIFT 8 +#define MMU_TTR_CACHE_MASK ((1 << 6) | (1 << 5)) +#define MMU_TTR_CACHE_SHIFT 5 +#define MMU_TTR_BIT_WRITE_PROTECT (1 << 2) + +#define MMU_UDT_MASK 3 +#define MMU_PDT_MASK 3 + +#define MMU_DES_WP 4 +#define MMU_DES_USED 8 + +/* page descriptors only */ +#define MMU_DES_MODIFIED 16 +#define MMU_DES_SUPER (1 << 7) +#define MMU_DES_GLOBAL (1 << 10) + +#define MMU_ROOT_PTR_ADDR_MASK 0xfffffe00 +#define MMU_PTR_PAGE_ADDR_MASK_8 0xffffff80 +#define MMU_PTR_PAGE_ADDR_MASK_4 0xffffff00 + +#define MMU_PAGE_INDIRECT_MASK 0xfffffffc +#define MMU_PAGE_ADDR_MASK_8 0xffffe000 +#define MMU_PAGE_ADDR_MASK_4 0xfffff000 +#define MMU_PAGE_UR_MASK_8 ((1 << 12) | (1 << 11)) +#define MMU_PAGE_UR_MASK_4 (1 << 11) +#define MMU_PAGE_UR_SHIFT 11 + +#define MMU_MMUSR_ADDR_MASK 0xfffff000 +#define MMU_MMUSR_B (1 << 11) +#define MMU_MMUSR_G (1 << 10) +#define MMU_MMUSR_U1 (1 << 9) +#define MMU_MMUSR_U0 (1 << 8) +#define MMU_MMUSR_Ux (MMU_MMUSR_U1 | MMU_MMUSR_U0) +#define MMU_MMUSR_S (1 << 7) +#define MMU_MMUSR_CM ((1 << 6) | ( 1 << 5)) +#define MMU_MMUSR_M (1 << 4) +#define MMU_MMUSR_W (1 << 2) +#define MMU_MMUSR_T (1 << 1) +#define MMU_MMUSR_R (1 << 0) + +/* special status word (access error stack frame) */ +#define MMU_SSW_TM 0x0007 +#define MMU_SSW_TT 0x0018 +#define MMU_SSW_SIZE 0x0060 +#define MMU_SSW_SIZE_B 0x0020 +#define MMU_SSW_SIZE_W 0x0040 +#define MMU_SSW_SIZE_L 0x0000 +#define MMU_SSW_RW 0x0100 +#define MMU_SSW_LK 0x0200 +#define MMU_SSW_ATC 0x0400 +#define MMU_SSW_MA 0x0800 + +#define TTR_I0 4 +#define TTR_I1 5 +#define TTR_D0 6 +#define TTR_D1 7 + +#define TTR_NO_MATCH 0 +#define TTR_NO_WRITE 1 +#define TTR_OK_MATCH 2 + +struct mmu_atc_line { + uae_u16 tag; + unsigned tt : 1; + unsigned valid_data : 1; + unsigned valid_inst : 1; + unsigned global : 1; + unsigned modified : 1; + unsigned write_protect : 1; + unsigned hw : 1; + unsigned bus_fault : 1; + uaecptr phys; +}; + +/* + * We don't need to store the whole logical address in the atc cache, as part of + * it is encoded as index into the cache. 14 bits of the address are stored in + * the tag, this means at least 6 bits must go into the index. The upper two + * bits of the tag define the type of data in the atc line: + * - 00: a normal memory address + * - 11: invalid memory address or hardware access + * (generated via ~ATC_TAG(addr) in the slow path) + * - 10: empty atc line + */ + +#define ATC_TAG_SHIFT 18 +#define ATC_TAG(addr) ((uae_u32)(addr) >> ATC_TAG_SHIFT) + + +#define ATC_L1_SIZE_LOG 8 +#define ATC_L1_SIZE (1 << ATC_L1_SIZE_LOG) + +#define ATC_L1_INDEX(addr) (((addr) >> 12) % ATC_L1_SIZE) + +/* + * first level atc cache + * indexed by [super][data][rw][idx] + */ + +typedef struct mmu_atc_line mmu_atc_l1_array[2][2][ATC_L1_SIZE]; +extern mmu_atc_l1_array atc_l1[2]; +extern mmu_atc_l1_array *current_atc; + +#define ATC_L2_SIZE_LOG 12 +#define ATC_L2_SIZE (1 << ATC_L2_SIZE_LOG) + +#define ATC_L2_INDEX(addr) ((((addr) >> 12) ^ ((addr) >> (32 - ATC_L2_SIZE_LOG))) % ATC_L2_SIZE) + +extern struct mmu_atc_line atc_l2[2][ATC_L2_SIZE]; + +/* + * lookup address in the level 1 atc cache, + * the data and write arguments are constant in the common, + * thus allows gcc to generate a constant offset. + */ +static ALWAYS_INLINE int mmu_lookup(uaecptr addr, bool data, bool write, + struct mmu_atc_line **cl) +{ + addr >>= 12; + *cl = &(*current_atc)[data][write][addr % ATC_L1_SIZE]; + return (*cl)->tag == addr >> (ATC_TAG_SHIFT - 12); +} + +/* + * similiar to mmu_user_lookup, but for the use of the moves instruction + */ +static ALWAYS_INLINE int mmu_user_lookup(uaecptr addr, bool super, bool data, + bool write, struct mmu_atc_line **cl) +{ + addr >>= 12; + *cl = &atc_l1[super][data][write][addr % ATC_L1_SIZE]; + return (*cl)->tag == addr >> (ATC_TAG_SHIFT - 12); +} + +extern REGPARAM2 uae_u16 mmu_get_word_unaligned(uaecptr addr, int data); +extern REGPARAM2 uae_u32 mmu_get_long_unaligned(uaecptr addr, int data); + +extern REGPARAM2 uae_u8 mmu_get_byte_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 uae_u16 mmu_get_word_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 uae_u32 mmu_get_long_slow(uaecptr addr, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 uae_u64 mmu_get_quad_slow(uaecptr addr, int super, int data, + struct mmu_atc_line *cl); + +extern REGPARAM2 void mmu_put_word_unaligned(uaecptr addr, uae_u16 val, int data); +extern REGPARAM2 void mmu_put_long_unaligned(uaecptr addr, uae_u32 val, int data); + +extern REGPARAM2 void mmu_put_byte_slow(uaecptr addr, uae_u8 val, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 void mmu_put_word_slow(uaecptr addr, uae_u16 val, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 void mmu_put_long_slow(uaecptr addr, uae_u32 val, int super, int data, + int size, struct mmu_atc_line *cl); +extern REGPARAM2 void mmu_put_quad_slow(uaecptr addr, uae_u64 val, int super, int data, + struct mmu_atc_line *cl); + +extern void mmu_make_transparent_region(uaecptr baseaddr, uae_u32 size, int datamode); + +static inline void mmu_set_ttr(int regno, uae_u32 val) +{ + uae_u32 * ttr; + switch(regno) { + case TTR_I0: ttr = ®s.itt0; break; + case TTR_I1: ttr = ®s.itt1; break; + case TTR_D0: ttr = ®s.dtt0; break; + case TTR_D1: ttr = ®s.dtt1; break; + default: abort(); + } + *ttr = val; +} + +static inline void mmu_set_mmusr(uae_u32 val) +{ + regs.mmusr = val; +} + +#define FC_DATA (regs.s ? 5 : 1) +#define FC_INST (regs.s ? 6 : 2) + +extern uaecptr REGPARAM2 mmu_translate(uaecptr addr, int super, int data, int write); + +extern uae_u32 REGPARAM2 sfc_get_long(uaecptr addr); +extern uae_u16 REGPARAM2 sfc_get_word(uaecptr addr); +extern uae_u8 REGPARAM2 sfc_get_byte(uaecptr addr); +extern void REGPARAM2 dfc_put_long(uaecptr addr, uae_u32 val); +extern void REGPARAM2 dfc_put_word(uaecptr addr, uae_u16 val); +extern void REGPARAM2 dfc_put_byte(uaecptr addr, uae_u8 val); + + +extern void REGPARAM2 mmu_flush_atc(uaecptr addr, bool super, bool global); +extern void REGPARAM2 mmu_flush_atc_all(bool global); +extern void REGPARAM2 mmu_op(uae_u32 opcode, uae_u16 extra); + +#ifdef FULLMMU + +extern void REGPARAM2 mmu_reset(void); +extern void REGPARAM2 mmu_set_tc(uae_u16 tc); +extern void REGPARAM2 mmu_set_super(bool super); + +#else + +static inline void mmu_reset(void) +{ +} + +static inline void mmu_set_tc(uae_u16 /*tc*/) +{ +} + +static inline void mmu_set_super(bool /*super*/) +{ +} + +#endif + +#endif /* CPUMMU_H */ +/* +vim:ts=4:sw=4: +*/ diff --git a/BasiliskII/src/uae_cpu/cpuopti.c b/BasiliskII/src/uae_cpu/cpuopti.c deleted file mode 100644 index 2dc10507..00000000 --- a/BasiliskII/src/uae_cpu/cpuopti.c +++ /dev/null @@ -1,298 +0,0 @@ -/* - * UAE - The Un*x Amiga Emulator - * - * cpuopti.c - Small optimizer for cpu*.s files - * Based on work by Tauno Taipaleenmaki - * - * Copyright 1996 Bernd Schmidt - */ - -#include -#include -#include - -#include "sysdeps.h" - -struct line { - struct line *next, *prev; - int delet; - char *data; -}; - -struct func { - struct line *first_line, *last_line; - int initial_offset; -}; - -static void oops(void) -{ - fprintf(stderr, "Don't know how to optimize this file.\n"); - abort(); -} - -static char * match(struct line *l, const char *m) -{ - char *str = l->data; - int len = strlen(m); - while (isspace(*str)) - str++; - - if (strncmp(str, m, len) != 0) - return NULL; - return str + len; -} - -static int insn_references_reg (struct line *l, char *reg) -{ - if (reg[0] != 'e') { - fprintf(stderr, "Unknown register?!?\n"); - abort(); - } - if (strstr (l->data, reg) != 0) - return 1; - if (strstr (l->data, reg+1) != 0) - return 1; - if (strcmp (reg, "eax") == 0 - && (strstr (l->data, "%al") != 0 || strstr (l->data, "%ah") != 0)) - return 1; - if (strcmp (reg, "ebx") == 0 - && (strstr (l->data, "%bl") != 0 || strstr (l->data, "%bh") != 0)) - return 1; - if (strcmp (reg, "ecx") == 0 - && (strstr (l->data, "%cl") != 0 || strstr (l->data, "%ch") != 0)) - return 1; - if (strcmp (reg, "edx") == 0 - && (strstr (l->data, "%dl") != 0 || strstr (l->data, "%dh") != 0)) - return 1; - return 0; -} - -static void do_function(struct func *f) -{ - int v; - int pops_at_end = 0; - struct line *l, *l1, *fl, *l2; - char *s, *s2; - int in_pop_area = 1; - - f->initial_offset = 0; - - l = f->last_line; - fl = f->first_line; - - if (match(l,".LFE")) - l = l->prev; - if (!match(l,"ret")) - oops(); - - while (!match(fl, "op_")) - fl = fl->next; - fl = fl->next; - - /* Try reordering the insns at the end of the function so that the - * pops are all at the end. */ - l2 = l->prev; - /* Tolerate one stack adjustment */ - if (match (l2, "addl $") && strstr(l2->data, "esp") != 0) - l2 = l2->prev; - for (;;) { - char *forbidden_reg; - struct line *l3, *l4; - - while (match (l2, "popl %")) - l2 = l2->prev; - - l3 = l2; - for (;;) { - forbidden_reg = match (l3, "popl %"); - if (forbidden_reg) - break; - if (l3 == fl) - goto reordered; - /* Jumps and labels put an end to our attempts... */ - if (strstr (l3->data, ".L") != 0) - goto reordered; - /* Likewise accesses to the stack pointer... */ - if (strstr (l3->data, "esp") != 0) - goto reordered; - /* Function calls... */ - if (strstr (l3->data, "call") != 0) - goto reordered; - l3 = l3->prev; - } - if (l3 == l2) - abort(); - for (l4 = l2; l4 != l3; l4 = l4->prev) { - /* The register may not be referenced by any of the insns that we - * move the popl past */ - if (insn_references_reg (l4, forbidden_reg)) - goto reordered; - } - l3->prev->next = l3->next; - l3->next->prev = l3->prev; - l2->next->prev = l3; - l3->next = l2->next; - l2->next = l3; - l3->prev = l2; - } -reordered: - - l = l->prev; - - s = match (l, "addl $"); - s2 = match (fl, "subl $"); - - l1 = l; - if (s == 0) { - char *t = match (l, "popl %"); - if (t != 0 && (strcmp (t, "ecx") == 0 || strcmp (t, "edx") == 0)) { - s = "4,%esp"; - l = l->prev; - t = match (l, "popl %"); - if (t != 0 && (strcmp (t, "ecx") == 0 || strcmp (t, "edx") == 0)) { - s = "8,%esp"; - l = l->prev; - } - } - } else { - l = l->prev; - } - - if (s && s2) { - int v = 0; - if (strcmp (s, s2) != 0) { - fprintf (stderr, "Stack adjustment not matching.\n"); - return; - } - - while (isdigit(*s)) { - v = v * 10 + (*s) - '0'; - s++; - } - - if (strcmp (s, ",%esp") != 0) { - fprintf (stderr, "Not adjusting the stack pointer.\n"); - return; - } - f->initial_offset = v; - fl->delet = 3; - fl = fl->next; - l1->delet = 2; - l1 = l1->prev; - while (l1 != l) { - l1->delet = 1; - l1 = l1->prev; - } - } - - while (in_pop_area) { - char *popm, *pushm; - popm = match (l, "popl %"); - pushm = match (fl, "pushl %"); - if (popm && pushm && strcmp(pushm, popm) == 0) { - pops_at_end++; - fl->delet = l->delet = 1; - } else - in_pop_area = 0; - l = l->prev; - fl = fl->next; - } - if (f->initial_offset) - f->initial_offset += 4 * pops_at_end; -} - -static void output_function(struct func *f) -{ - struct line *l = f->first_line; - - while (l) { - switch (l->delet) { - case 1: - break; - case 0: - printf("%s\n", l->data); - break; - case 2: - if (f->initial_offset) - printf("\taddl $%d,%%esp\n", f->initial_offset); - break; - case 3: - if (f->initial_offset) - printf("\tsubl $%d,%%esp\n", f->initial_offset); - break; - } - l = l->next; - } -} - -int main(int argc, char **argv) -{ - FILE *infile = stdin; - char tmp[4096]; - -#ifdef __mc68000__ - if(system("perl machdep/cpuopti")==-1) { - perror("perl machdep/cpuopti"); - return 10; - } else return 0; -#endif - - /* For debugging... */ - if (argc == 2) - infile = fopen (argv[1], "r"); - - for(;;) { - char *s; - - if ((fgets(tmp, 4095, infile)) == NULL) - break; - - s = strchr (tmp, '\n'); - if (s != NULL) - *s = 0; - - if (strncmp(tmp, ".globl op_", 10) == 0) { - struct line *first_line = NULL, *prev = NULL; - struct line **nextp = &first_line; - struct func f; - int nr_rets = 0; - int can_opt = 1; - - do { - struct line *current; - - if (strcmp (tmp, "#APP") != 0 && strcmp (tmp, "#NO_APP") != 0) { - current = *nextp = (struct line *)malloc(sizeof (struct line)); - nextp = ¤t->next; - current->prev = prev; prev = current; - current->next = NULL; - current->delet = 0; - current->data = strdup (tmp); - if (match (current, "movl %esp,%ebp") || match (current, "enter")) { - fprintf (stderr, "GCC failed to eliminate fp: %s\n", first_line->data); - can_opt = 0; - } - - if (match (current, "ret")) - nr_rets++; - } - if ((fgets(tmp, 4095, infile)) == NULL) - oops(); - s = strchr (tmp, '\n'); - if (s != NULL) - *s = 0; - } while (strncmp (tmp,".Lfe", 4) != 0); - - f.first_line = first_line; - f.last_line = prev; - - if (nr_rets == 1 && can_opt) - do_function(&f); - /*else - fprintf(stderr, "Too many RET instructions: %s\n", first_line->data);*/ - output_function(&f); - } - printf("%s\n", tmp); - } - return 0; -} diff --git a/BasiliskII/src/uae_cpu/cpustbl_nf.cpp b/BasiliskII/src/uae_cpu/cpustbl_nf.cpp new file mode 100644 index 00000000..0ea66010 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpustbl_nf.cpp @@ -0,0 +1,2 @@ +#define NOFLAGS 1 +#include "cpustbl.cpp" diff --git a/BasiliskII/src/uae_cpu/cpustbla.cpp b/BasiliskII/src/uae_cpu/cpustbla.cpp new file mode 100644 index 00000000..f3f8e320 --- /dev/null +++ b/BasiliskII/src/uae_cpu/cpustbla.cpp @@ -0,0 +1,5 @@ +/* + * cpustbl.cpp must be compiled twice, once for the generator program + * and once for the actual executable + */ +#include "cpustbl.cpp" diff --git a/BasiliskII/src/uae_cpu/debug.cpp b/BasiliskII/src/uae_cpu/debug.cpp new file mode 100644 index 00000000..8b2f14e0 --- /dev/null +++ b/BasiliskII/src/uae_cpu/debug.cpp @@ -0,0 +1,82 @@ +/* + * debug.cpp - CPU debugger + * + * Copyright (c) 2001-2010 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Bernd Schmidt's UAE + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * UAE - The Un*x Amiga Emulator + * + * Debugger + * + * (c) 1995 Bernd Schmidt + * + */ + +#include "sysdeps.h" + +#include "memory.h" +#include "newcpu.h" +#include "debug.h" + +#include "input.h" +#include "cpu_emulation.h" + +#include "main.h" + +static int debugger_active = 0; +int debugging = 0; +int irqindebug = 0; + +int ignore_irq = 0; + + +void activate_debugger (void) +{ +#ifdef DEBUGGER + ndebug::do_skip = false; +#endif + debugger_active = 1; + SPCFLAGS_SET( SPCFLAG_BRK ); + debugging = 1; + /* use_debugger = 1; */ +} + +void deactivate_debugger(void) +{ + debugging = 0; + debugger_active = 0; +} + +void debug (void) +{ + if (ignore_irq && regs.s && !regs.m ) { + SPCFLAGS_SET( SPCFLAG_BRK ); + return; + } +#ifdef DEBUGGER + ndebug::run(); +#endif +} + +/* +vim:ts=4:sw=4: +*/ diff --git a/BasiliskII/src/uae_cpu/fpu/core.h b/BasiliskII/src/uae_cpu/fpu/core.h new file mode 100644 index 00000000..1801ff7c --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/core.h @@ -0,0 +1,268 @@ +/* + * fpu/core.h - base fpu context definition + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef FPU_CORE_H +#define FPU_CORE_H + +#include "sysdeps.h" +#include "fpu/types.h" + +/* Always use x87 FPU stack on IA-32. */ +#if defined(X86_ASSEMBLY) +#define USE_X87_ASSEMBLY 1 +#ifndef USE_JIT_FPU +#define ACCURATE_SIN_COS_TAN 1 +#endif +#endif + +/* Only use x87 FPU on x86-64 if long double precision is requested. */ +#if defined(X86_64_ASSEMBLY) && defined(USE_LONG_DOUBLE) +#define USE_X87_ASSEMBLY 1 +#define ACCURATE_SIN_COS_TAN 1 +#endif + +/* ========================================================================== */ +/* ========================= FPU CONTEXT DEFINITION ========================= */ +/* ========================================================================== */ + +/* We don't use all features of the C++ language so that we may still + * easily backport that code to C. + */ + +struct fpu_t { + + /* ---------------------------------------------------------------------- */ + /* --- Floating-Point Data Registers --- */ + /* ---------------------------------------------------------------------- */ + + /* The eight %fp0 .. %fp7 registers */ + fpu_register registers[8]; + + /* Used for lazy evalualation of FPU flags */ + fpu_register result; + + /* ---------------------------------------------------------------------- */ + /* --- Floating-Point Control Register --- */ + /* ---------------------------------------------------------------------- */ + + struct { + + /* Exception Enable Byte */ + uae_u32 exception_enable; + #define FPCR_EXCEPTION_ENABLE 0x0000ff00 + #define FPCR_EXCEPTION_BSUN 0x00008000 + #define FPCR_EXCEPTION_SNAN 0x00004000 + #define FPCR_EXCEPTION_OPERR 0x00002000 + #define FPCR_EXCEPTION_OVFL 0x00001000 + #define FPCR_EXCEPTION_UNFL 0x00000800 + #define FPCR_EXCEPTION_DZ 0x00000400 + #define FPCR_EXCEPTION_INEX2 0x00000200 + #define FPCR_EXCEPTION_INEX1 0x00000100 + + /* Mode Control Byte Mask */ + #define FPCR_MODE_CONTROL 0x000000ff + + /* Rounding precision */ + uae_u32 rounding_precision; + #define FPCR_ROUNDING_PRECISION 0x000000c0 + #define FPCR_PRECISION_SINGLE 0x00000040 + #define FPCR_PRECISION_DOUBLE 0x00000080 + #define FPCR_PRECISION_EXTENDED 0x00000000 + + /* Rounding mode */ + uae_u32 rounding_mode; + #define FPCR_ROUNDING_MODE 0x00000030 + #define FPCR_ROUND_NEAR 0x00000000 + #define FPCR_ROUND_ZERO 0x00000010 + #define FPCR_ROUND_MINF 0x00000020 + #define FPCR_ROUND_PINF 0x00000030 + + } fpcr; + + /* ---------------------------------------------------------------------- */ + /* --- Floating-Point Status Register --- */ + /* ---------------------------------------------------------------------- */ + + struct { + + /* Floating-Point Condition Code Byte */ + uae_u32 condition_codes; + #define FPSR_CCB 0x0f000000 + #define FPSR_CCB_NEGATIVE 0x08000000 + #define FPSR_CCB_ZERO 0x04000000 + #define FPSR_CCB_INFINITY 0x02000000 + #define FPSR_CCB_NAN 0x01000000 + + /* Quotient Byte */ + uae_u32 quotient; + #define FPSR_QUOTIENT 0x00ff0000 + #define FPSR_QUOTIENT_SIGN 0x00800000 + #define FPSR_QUOTIENT_VALUE 0x007f0000 + + /* Exception Status Byte */ + uae_u32 exception_status; + #define FPSR_EXCEPTION_STATUS FPCR_EXCEPTION_ENABLE + #define FPSR_EXCEPTION_BSUN FPCR_EXCEPTION_BSUN + #define FPSR_EXCEPTION_SNAN FPCR_EXCEPTION_SNAN + #define FPSR_EXCEPTION_OPERR FPCR_EXCEPTION_OPERR + #define FPSR_EXCEPTION_OVFL FPCR_EXCEPTION_OVFL + #define FPSR_EXCEPTION_UNFL FPCR_EXCEPTION_UNFL + #define FPSR_EXCEPTION_DZ FPCR_EXCEPTION_DZ + #define FPSR_EXCEPTION_INEX2 FPCR_EXCEPTION_INEX2 + #define FPSR_EXCEPTION_INEX1 FPCR_EXCEPTION_INEX1 + + /* Accrued Exception Byte */ + uae_u32 accrued_exception; + #define FPSR_ACCRUED_EXCEPTION 0x000000ff + #define FPSR_ACCR_IOP 0x00000080 + #define FPSR_ACCR_OVFL 0x00000040 + #define FPSR_ACCR_UNFL 0x00000020 + #define FPSR_ACCR_DZ 0x00000010 + #define FPSR_ACCR_INEX 0x00000008 + + } fpsr; + + /* ---------------------------------------------------------------------- */ + /* --- Floating-Point Instruction Address Register --- */ + /* ---------------------------------------------------------------------- */ + + uae_u32 instruction_address; + + /* ---------------------------------------------------------------------- */ + /* --- Initialization / Finalization --- */ + /* ---------------------------------------------------------------------- */ + + /* Flag set if we emulate an integral 68040 FPU */ + bool is_integral; + + /* ---------------------------------------------------------------------- */ + /* --- Extra FPE-dependant defines --- */ + /* ---------------------------------------------------------------------- */ + + #if defined(FPU_X86) \ + || (defined(FPU_UAE) && defined(USE_X87_ASSEMBLY)) \ + || (defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY)) + + #define CW_RESET 0x0040 // initial CW value after RESET + #define CW_FINIT 0x037F // initial CW value after FINIT + #define SW_RESET 0x0000 // initial SW value after RESET + #define SW_FINIT 0x0000 // initial SW value after FINIT + #define TW_RESET 0x5555 // initial TW value after RESET + #define TW_FINIT 0x0FFF // initial TW value after FINIT + + #define CW_X 0x1000 // infinity control + #define CW_RC_ZERO 0x0C00 // rounding control toward zero + #define CW_RC_UP 0x0800 // rounding control toward + + #define CW_RC_DOWN 0x0400 // rounding control toward - + #define CW_RC_NEAR 0x0000 // rounding control toward even + #define CW_PC_EXTENDED 0x0300 // precision control 64bit + #define CW_PC_DOUBLE 0x0200 // precision control 53bit + #define CW_PC_RESERVED 0x0100 // precision control reserved + #define CW_PC_SINGLE 0x0000 // precision control 24bit + #define CW_PM 0x0020 // precision exception mask + #define CW_UM 0x0010 // underflow exception mask + #define CW_OM 0x0008 // overflow exception mask + #define CW_ZM 0x0004 // zero divide exception mask + #define CW_DM 0x0002 // denormalized operand exception mask + #define CW_IM 0x0001 // invalid operation exception mask + + #define SW_B 0x8000 // busy flag + #define SW_C3 0x4000 // condition code flag 3 + #define SW_TOP_7 0x3800 // top of stack = ST(7) + #define SW_TOP_6 0x3000 // top of stack = ST(6) + #define SW_TOP_5 0x2800 // top of stack = ST(5) + #define SW_TOP_4 0x2000 // top of stack = ST(4) + #define SW_TOP_3 0x1800 // top of stack = ST(3) + #define SW_TOP_2 0x1000 // top of stack = ST(2) + #define SW_TOP_1 0x0800 // top of stack = ST(1) + #define SW_TOP_0 0x0000 // top of stack = ST(0) + #define SW_C2 0x0400 // condition code flag 2 + #define SW_C1 0x0200 // condition code flag 1 + #define SW_C0 0x0100 // condition code flag 0 + #define SW_ES 0x0080 // error summary status flag + #define SW_SF 0x0040 // stack fault flag + #define SW_PE 0x0020 // precision exception flag + #define SW_UE 0x0010 // underflow exception flag + #define SW_OE 0x0008 // overflow exception flag + #define SW_ZE 0x0004 // zero divide exception flag + #define SW_DE 0x0002 // denormalized operand exception flag + #define SW_IE 0x0001 // invalid operation exception flag + + #define X86_ROUNDING_MODE 0x0C00 + #define X86_ROUNDING_PRECISION 0x0300 + + #endif /* FPU_X86 */ + +}; + +/* We handle only one global fpu */ +extern fpu_t fpu; + +/* Return the address of a particular register */ +inline fpu_register * fpu_register_address(int i) + { return &fpu.registers[i]; } + +/* Dump functions for m68k_dumpstate */ +extern void fpu_dump_registers(void); +extern void fpu_dump_flags(void); + +/* Accessors to FPU Control Register */ +//static inline uae_u32 get_fpcr(void); +//static inline void set_fpcr(uae_u32 new_fpcr); + +/* Accessors to FPU Status Register */ +//static inline uae_u32 get_fpsr(void); +//static inline void set_fpsr(uae_u32 new_fpsr); + +/* Accessors to FPU Instruction Address Register */ +//static inline uae_u32 get_fpiar(); +//static inline void set_fpiar(uae_u32 new_fpiar); + +/* Initialization / Finalization */ +extern void fpu_init(bool integral_68040); +extern void fpu_exit(void); +extern void fpu_reset(void); + +/* Floating-point arithmetic instructions */ +void fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) REGPARAM; + +/* Floating-point program control operations */ +void fpuop_bcc(uae_u32 opcode, uaecptr pc, uae_u32 extra) REGPARAM; +void fpuop_dbcc(uae_u32 opcode, uae_u32 extra) REGPARAM; +void fpuop_scc(uae_u32 opcode, uae_u32 extra) REGPARAM; + +/* Floating-point system control operations */ +void fpuop_save(uae_u32 opcode) REGPARAM; +void fpuop_restore(uae_u32 opcode) REGPARAM; +void fpuop_trapcc(uae_u32 opcode, uaecptr oldpc, uae_u32 extra) REGPARAM; + +#endif /* FPU_CORE_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/exceptions.cpp b/BasiliskII/src/uae_cpu/fpu/exceptions.cpp new file mode 100644 index 00000000..2a597997 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/exceptions.cpp @@ -0,0 +1,193 @@ +/* + * fpu/exceptions.cpp - system-dependant FPU exceptions management + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#undef PRIVATE +#define PRIVATE /**/ + +#undef PUBLIC +#define PUBLIC /**/ + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +/* -------------------------------------------------------------------------- */ +/* --- Native X86 exceptions --- */ +/* -------------------------------------------------------------------------- */ + +#ifdef FPU_USE_X86_EXCEPTIONS +void FFPU fpu_init_native_exceptions(void) +{ + // Mapping for "sw" -> fpsr exception byte + for (uae_u32 i = 0; i < 0x80; i++) { + exception_host2mac[i] = 0; + + if(i & SW_FAKE_BSUN) { + exception_host2mac[i] |= FPSR_EXCEPTION_BSUN; + } + // precision exception + if(i & SW_PE) { + exception_host2mac[i] |= FPSR_EXCEPTION_INEX2; + } + // underflow exception + if(i & SW_UE) { + exception_host2mac[i] |= FPSR_EXCEPTION_UNFL; + } + // overflow exception + if(i & SW_OE) { + exception_host2mac[i] |= FPSR_EXCEPTION_OVFL; + } + // zero divide exception + if(i & SW_ZE) { + exception_host2mac[i] |= FPSR_EXCEPTION_DZ; + } + // denormalized operand exception. + // wrong, but should not get here, normalization is done in elsewhere + if(i & SW_DE) { + exception_host2mac[i] |= FPSR_EXCEPTION_SNAN; + } + // invalid operation exception + if(i & SW_IE) { + exception_host2mac[i] |= FPSR_EXCEPTION_OPERR; + } + } + + // Mapping for fpsr exception byte -> "sw" + for (uae_u32 i = 0; i < 0x100; i++) { + uae_u32 fpsr = (i << 8); + exception_mac2host[i] = 0; + + // BSUN; make sure that you don't generate FPU stack faults. + if(fpsr & FPSR_EXCEPTION_BSUN) { + exception_mac2host[i] |= SW_FAKE_BSUN; + } + // precision exception + if(fpsr & FPSR_EXCEPTION_INEX2) { + exception_mac2host[i] |= SW_PE; + } + // underflow exception + if(fpsr & FPSR_EXCEPTION_UNFL) { + exception_mac2host[i] |= SW_UE; + } + // overflow exception + if(fpsr & FPSR_EXCEPTION_OVFL) { + exception_mac2host[i] |= SW_OE; + } + // zero divide exception + if(fpsr & FPSR_EXCEPTION_DZ) { + exception_mac2host[i] |= SW_ZE; + } + // denormalized operand exception + if(fpsr & FPSR_EXCEPTION_SNAN) { + exception_mac2host[i] |= SW_DE; //Wrong + } + // invalid operation exception + if(fpsr & FPSR_EXCEPTION_OPERR) { + exception_mac2host[i] |= SW_IE; + } + } +} +#endif + +#ifdef FPU_USE_X86_ACCRUED_EXCEPTIONS +void FFPU fpu_init_native_accrued_exceptions(void) +{ + /* + 68881/68040 accrued exceptions accumulate as follows: + Accrued.IOP |= (Exception.SNAN | Exception.OPERR) + Accrued.OVFL |= (Exception.OVFL) + Accrued.UNFL |= (Exception.UNFL | Exception.INEX2) + Accrued.DZ |= (Exception.DZ) + Accrued.INEX |= (Exception.INEX1 | Exception.INEX2 | Exception.OVFL) + */ + + // Mapping for "fpsr.accrued_exception" -> fpsr accrued exception byte + for (uae_u32 i = 0; i < 0x40; i++ ) { + accrued_exception_host2mac[i] = 0; + + // precision exception + if(i & SW_PE) { + accrued_exception_host2mac[i] |= FPSR_ACCR_INEX; + } + // underflow exception + if(i & SW_UE) { + accrued_exception_host2mac[i] |= FPSR_ACCR_UNFL; + } + // overflow exception + if(i & SW_OE) { + accrued_exception_host2mac[i] |= FPSR_ACCR_OVFL; + } + // zero divide exception + if(i & SW_ZE) { + accrued_exception_host2mac[i] |= FPSR_ACCR_DZ; + } + // denormalized operand exception + if(i & SW_DE) { + accrued_exception_host2mac[i] |= FPSR_ACCR_IOP; //?????? + } + // invalid operation exception + if(i & SW_IE) { + accrued_exception_host2mac[i] |= FPSR_ACCR_IOP; + } + } + + // Mapping for fpsr accrued exception byte -> "fpsr.accrued_exception" + for (uae_u32 i = 0; i < 0x20; i++) { + int fpsr = (i << 3); + accrued_exception_mac2host[i] = 0; + + // precision exception + if(fpsr & FPSR_ACCR_INEX) { + accrued_exception_mac2host[i] |= SW_PE; + } + // underflow exception + if(fpsr & FPSR_ACCR_UNFL) { + accrued_exception_mac2host[i] |= SW_UE; + } + // overflow exception + if(fpsr & FPSR_ACCR_OVFL) { + accrued_exception_mac2host[i] |= SW_OE; + } + // zero divide exception + if(fpsr & FPSR_ACCR_DZ) { + accrued_exception_mac2host[i] |= SW_ZE; + } + // What about SW_DE; //?????? + // invalid operation exception + if(fpsr & FPSR_ACCR_IOP) { + accrued_exception_mac2host[i] |= SW_IE; + } + } +} +#endif diff --git a/BasiliskII/src/uae_cpu/fpu/exceptions.h b/BasiliskII/src/uae_cpu/fpu/exceptions.h new file mode 100644 index 00000000..f943da04 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/exceptions.h @@ -0,0 +1,154 @@ +/* + * fpu/exceptions.h - system-dependant FPU exceptions management + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef FPU_EXCEPTIONS_H +#define FPU_EXCEPTIONS_H + +/* NOTE: this file shall be included only from fpu/fpu_*.cpp */ +#undef PUBLIC +#define PUBLIC extern + +#undef PRIVATE +#define PRIVATE static + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +/* Defaults to generic exceptions */ +#define FPU_USE_GENERIC_EXCEPTIONS +#define FPU_USE_GENERIC_ACCRUED_EXCEPTIONS + +/* -------------------------------------------------------------------------- */ +/* --- Selection of floating-point exceptions handling mode --- */ +/* -------------------------------------------------------------------------- */ + +/* Optimized i386 fpu core must use native exceptions */ +#if defined(FPU_X86) && defined(USE_X87_ASSEMBLY) +# undef FPU_USE_GENERIC_EXCEPTIONS +# define FPU_USE_X86_EXCEPTIONS +#endif + +/* Optimized i386 fpu core must use native accrued exceptions */ +#if defined(FPU_X86) && defined(USE_X87_ASSEMBLY) +# undef FPU_USE_GENERIC_ACCRUED_EXCEPTIONS +# define FPU_USE_X86_ACCRUED_EXCEPTIONS +#endif + +/* -------------------------------------------------------------------------- */ +/* --- Native X86 Exceptions --- */ +/* -------------------------------------------------------------------------- */ + +#ifdef FPU_USE_X86_EXCEPTIONS + +/* Extend the SW_* codes */ +#define SW_FAKE_BSUN SW_SF + +/* Shorthand */ +#define SW_EXCEPTION_MASK (SW_ES|SW_SF|SW_PE|SW_UE|SW_OE|SW_ZE|SW_DE|SW_IE) +// #define SW_EXCEPTION_MASK (SW_SF|SW_PE|SW_UE|SW_OE|SW_ZE|SW_DE|SW_IE) + +/* Lookup tables */ +PRIVATE uae_u32 exception_host2mac[ 0x80 ]; +PRIVATE uae_u32 exception_mac2host[ 0x100 ]; + +/* Initialize native exception management */ +PUBLIC void FFPU fpu_init_native_exceptions(void); + +/* Return m68k floating-point exception status */ +PRIVATE inline uae_u32 FFPU get_exception_status(void) + { return exception_host2mac[FPU fpsr.exception_status & (SW_FAKE_BSUN|SW_PE|SW_UE|SW_OE|SW_ZE|SW_DE|SW_IE)]; } + +/* Set new exception status. Assumes mask against FPSR_EXCEPTION to be already performed */ +PRIVATE inline void FFPU set_exception_status(uae_u32 new_status) + { FPU fpsr.exception_status = exception_mac2host[new_status >> 8]; } + +#endif /* FPU_USE_X86_EXCEPTIONS */ + +#ifdef FPU_USE_X86_ACCRUED_EXCEPTIONS + +/* Lookup tables */ +PRIVATE uae_u32 accrued_exception_host2mac[ 0x40 ]; +PRIVATE uae_u32 accrued_exception_mac2host[ 0x20 ]; + +/* Initialize native accrued exception management */ +PUBLIC void FFPU fpu_init_native_accrued_exceptions(void); + +/* Return m68k accrued exception byte */ +PRIVATE inline uae_u32 FFPU get_accrued_exception(void) + { return accrued_exception_host2mac[FPU fpsr.accrued_exception & (SW_PE|SW_UE|SW_OE|SW_ZE|SW_DE|SW_IE)]; } + +/* Set new accrued exception byte */ +PRIVATE inline void FFPU set_accrued_exception(uae_u32 new_status) + { FPU fpsr.accrued_exception = accrued_exception_mac2host[(new_status & 0xF8) >> 3]; } + +#endif /* FPU_USE_X86_ACCRUED_EXCEPTIONS */ + +/* -------------------------------------------------------------------------- */ +/* --- Default Exceptions Handling --- */ +/* -------------------------------------------------------------------------- */ + +#ifdef FPU_USE_GENERIC_EXCEPTIONS + +/* Initialize native exception management */ +static inline void FFPU fpu_init_native_exceptions(void) + { } + +/* Return m68k floating-point exception status */ +PRIVATE inline uae_u32 FFPU get_exception_status(void) + { return FPU fpsr.exception_status; } + +/* Set new exception status. Assumes mask against FPSR_EXCEPTION to be already performed */ +PRIVATE inline void FFPU set_exception_status(uae_u32 new_status) + { FPU fpsr.exception_status = new_status; } + +#endif /* FPU_USE_GENERIC_EXCEPTIONS */ + +#ifdef FPU_USE_GENERIC_ACCRUED_EXCEPTIONS + +/* Initialize native accrued exception management */ +PRIVATE inline void FFPU fpu_init_native_accrued_exceptions(void) + { } + +/* Return m68k accrued exception byte */ +PRIVATE inline uae_u32 FFPU get_accrued_exception(void) + { return FPU fpsr.accrued_exception; } + +/* Set new accrued exception byte */ +PRIVATE inline void FFPU set_accrued_exception(uae_u32 new_status) + { FPU fpsr.accrued_exception = new_status; } + +#endif /* FPU_USE_GENERIC_ACCRUED_EXCEPTIONS */ + +#endif /* FPU_EXCEPTIONS_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/flags.cpp b/BasiliskII/src/uae_cpu/fpu/flags.cpp new file mode 100644 index 00000000..4b0972df --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/flags.cpp @@ -0,0 +1,174 @@ +/* + * fpu/flags.cpp - Floating-point flags + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* NOTE: this file shall be included only from fpu/fpu_*.cpp */ +#undef PRIVATE +#define PRIVATE /**/ + +#undef PUBLIC +#define PUBLIC /**/ + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +/* -------------------------------------------------------------------------- */ +/* --- Native X86 floating-point flags --- */ +/* -------------------------------------------------------------------------- */ + +#ifdef FPU_USE_X86_FLAGS + +/* Initialization */ +void FFPU fpu_init_native_fflags(void) +{ + // Adapted from fpu_x86.cpp + #define SW_Z_I_NAN_MASK (SW_C0|SW_C2|SW_C3) + #define SW_Z (SW_C3) + #define SW_I (SW_C0|SW_C2) + #define SW_NAN (SW_C0) + #define SW_FINITE (SW_C2) + #define SW_EMPTY_REGISTER (SW_C0|SW_C3) + #define SW_DENORMAL (SW_C2|SW_C3) + #define SW_UNSUPPORTED (0) + #define SW_N (SW_C1) + + // Sanity checks + #if (SW_Z != NATIVE_FFLAG_ZERO) + #error "Incorrect X86 Z fflag" + #endif + #if (SW_I != NATIVE_FFLAG_INFINITY) + #error "Incorrect X86 I fflag" + #endif + #if (SW_N != NATIVE_FFLAG_NEGATIVE) + #error "Incorrect X86 N fflag" + #endif + #if (SW_NAN != NATIVE_FFLAG_NAN) + #error "Incorrect X86 NAN fflag" + #endif + + // Native status word to m68k mappings + for (uae_u32 i = 0; i < 0x48; i++) { + to_m68k_fpcond[i] = 0; + const uae_u32 native_fpcond = i << 8; + switch (native_fpcond & SW_Z_I_NAN_MASK) { +#ifndef FPU_UAE +// gb-- enabling it would lead to incorrect drawing of digits +// in Speedometer Performance Test + case SW_UNSUPPORTED: +#endif + case SW_NAN: + case SW_EMPTY_REGISTER: + to_m68k_fpcond[i] |= FPSR_CCB_NAN; + break; + case SW_FINITE: + case SW_DENORMAL: + break; + case SW_I: + to_m68k_fpcond[i] |= FPSR_CCB_INFINITY; + break; + case SW_Z: + to_m68k_fpcond[i] |= FPSR_CCB_ZERO; + break; + } + if (native_fpcond & SW_N) + to_m68k_fpcond[i] |= FPSR_CCB_NEGATIVE; + } + + // m68k to native status word mappings + for (uae_u32 i = 0; i < 0x10; i++) { + const uae_u32 m68k_fpcond = i << 24; + if (m68k_fpcond & FPSR_CCB_NAN) + to_host_fpcond[i] = SW_NAN; + else if (m68k_fpcond & FPSR_CCB_ZERO) + to_host_fpcond[i] = SW_Z; + else if (m68k_fpcond & FPSR_CCB_INFINITY) + to_host_fpcond[i] = SW_I; + else + to_host_fpcond[i] = SW_FINITE; + if (m68k_fpcond & FPSR_CCB_NEGATIVE) + to_host_fpcond[i] |= SW_N; + } + + // truth-table for FPU conditions + for (uae_u32 host_fpcond = 0; host_fpcond < 0x08; host_fpcond++) { + // host_fpcond: C3 on bit 2, C1 and C0 are respectively on bits 1 and 0 + const uae_u32 real_host_fpcond = ((host_fpcond & 4) << 12) | ((host_fpcond & 3) << 8); + const bool N = ((real_host_fpcond & NATIVE_FFLAG_NEGATIVE) == NATIVE_FFLAG_NEGATIVE); + const bool Z = ((real_host_fpcond & NATIVE_FFLAG_ZERO) == NATIVE_FFLAG_ZERO); + const bool NaN = ((real_host_fpcond & NATIVE_FFLAG_NAN) == NATIVE_FFLAG_NAN); + + int value; + for (uae_u32 m68k_fpcond = 0; m68k_fpcond < 0x20; m68k_fpcond++) { + switch (m68k_fpcond) { + case 0x00: value = 0; break; // False + case 0x01: value = Z; break; // Equal + case 0x02: value = !(NaN || Z || N); break; // Ordered Greater Than + case 0x03: value = Z || !(NaN || N); break; // Ordered Greater Than or Equal + case 0x04: value = N && !(NaN || Z); break; // Ordered Less Than + case 0x05: value = Z || (N && !NaN); break; // Ordered Less Than or Equal + case 0x06: value = !(NaN || Z); break; // Ordered Greater or Less Than + case 0x07: value = !NaN; break; // Ordered + case 0x08: value = NaN; break; // Unordered + case 0x09: value = NaN || Z; break; // Unordered or Equal + case 0x0a: value = NaN || !(N || Z); break; // Unordered or Greater Than + case 0x0b: value = NaN || Z || !N; break; // Unordered or Greater or Equal + case 0x0c: value = NaN || (N && !Z); break; // Unordered or Less Than + case 0x0d: value = NaN || Z || N; break; // Unordered or Less or Equal + case 0x0e: value = !Z; break; // Not Equal + case 0x0f: value = 1; break; // True + case 0x10: value = 0; break; // Signaling False + case 0x11: value = Z; break; // Signaling Equal + case 0x12: value = !(NaN || Z || N); break; // Greater Than + case 0x13: value = Z || !(NaN || N); break; // Greater Than or Equal + case 0x14: value = N && !(NaN || Z); break; // Less Than + case 0x15: value = Z || (N && !NaN); break; // Less Than or Equal + case 0x16: value = !(NaN || Z); break; // Greater or Less Than + case 0x17: value = !NaN; break; // Greater, Less or Equal + case 0x18: value = NaN; break; // Not Greater, Less or Equal + case 0x19: value = NaN || Z; break; // Not Greater or Less Than + case 0x1a: value = NaN || !(N || Z); break; // Not Less Than or Equal + case 0x1b: value = NaN || Z || !N; break; // Not Less Than + case 0x1c: value = NaN || (N && !Z); break; // Not Greater Than or Equal +// case 0x1c: value = !Z && (NaN || N); break; // Not Greater Than or Equal + case 0x1d: value = NaN || Z || N; break; // Not Greater Than + case 0x1e: value = !Z; break; // Signaling Not Equal + case 0x1f: value = 1; break; // Signaling True + default: value = -1; + } + fpcond_truth_table[m68k_fpcond][host_fpcond] = value; + } + } +} + +#endif diff --git a/BasiliskII/src/uae_cpu/fpu/flags.h b/BasiliskII/src/uae_cpu/fpu/flags.h new file mode 100644 index 00000000..3d144ac2 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/flags.h @@ -0,0 +1,228 @@ +/* + * fpu/flags.h - Floating-point flags + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef FPU_FLAGS_H +#define FPU_FLAGS_H + +/* NOTE: this file shall be included only from fpu/fpu_*.cpp */ +#undef PUBLIC +#define PUBLIC extern + +#undef PRIVATE +#define PRIVATE static + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +/* Defaults to generic flags */ +#define FPU_USE_GENERIC_FLAGS + +/* -------------------------------------------------------------------------- */ +/* --- Selection of floating-point flags handling mode --- */ +/* -------------------------------------------------------------------------- */ + +/* Optimized i386 fpu core must use native flags */ +#if defined(FPU_X86) && defined(USE_X87_ASSEMBLY) +# undef FPU_USE_GENERIC_FLAGS +# define FPU_USE_X86_FLAGS +#endif + +/* Old UAE FPU core can use native flags */ +#if defined(FPU_UAE) && defined(USE_X87_ASSEMBLY) +# undef FPU_USE_GENERIC_FLAGS +# define FPU_USE_X86_FLAGS +#endif + +/* IEEE-based implementation must use lazy flag evaluation */ +#if defined(FPU_IEEE) +# undef FPU_USE_GENERIC_FLAGS +# define FPU_USE_LAZY_FLAGS +#endif + +/* JIT Compilation for FPU only works with lazy evaluation of FPU flags */ +#if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY) && defined(USE_JIT_FPU) +# undef FPU_USE_GENERIC_FLAGS +# define FPU_USE_LAZY_FLAGS +#endif + +#ifdef FPU_IMPLEMENTATION + +/* -------------------------------------------------------------------------- */ +/* --- Native X86 Floating-Point Flags --- */ +/* -------------------------------------------------------------------------- */ + +/* FPU_X86 has its own set of lookup functions */ + +#ifdef FPU_USE_X86_FLAGS + +#define FPU_USE_NATIVE_FLAGS + +#define NATIVE_FFLAG_NEGATIVE 0x0200 +#define NATIVE_FFLAG_ZERO 0x4000 +#define NATIVE_FFLAG_INFINITY 0x0500 +#define NATIVE_FFLAG_NAN 0x0100 + +/* Translation tables between native and m68k floating-point flags */ +PRIVATE uae_u32 to_m68k_fpcond[0x48]; +PRIVATE uae_u32 to_host_fpcond[0x10]; + +/* Truth table for floating-point condition codes */ +PRIVATE uae_u32 fpcond_truth_table[32][8]; // 32 m68k conditions x 8 host condition codes + +/* Initialization */ +PUBLIC void FFPU fpu_init_native_fflags(void); + +#ifdef FPU_UAE + +/* Native to m68k floating-point condition codes */ +PRIVATE inline uae_u32 FFPU get_fpccr(void) + { return to_m68k_fpcond[(FPU fpsr.condition_codes >> 8) & 0x47]; } + +/* M68k to native floating-point condition codes */ +PRIVATE inline void FFPU set_fpccr(uae_u32 new_fpcond) + /* Precondition: new_fpcond is only valid for floating-point condition codes */ + { FPU fpsr.condition_codes = to_host_fpcond[new_fpcond >> 24]; } + +/* Make FPSR according to the value passed in argument */ +PRIVATE inline void FFPU make_fpsr(fpu_register const & r) + { uae_u16 sw; __asm__ __volatile__ ("fxam\n\tfnstsw %0" : "=a" (sw) : "f" (r)); FPU fpsr.condition_codes = sw; } + +/* Return the corresponding ID of the current floating-point condition codes */ +/* NOTE: only valid for evaluation of a condition */ +PRIVATE inline int FFPU host_fpcond_id(void) + { return ((FPU fpsr.condition_codes >> 12) & 4) | ((FPU fpsr.condition_codes >> 8) & 3); } + +/* Return true if the floating-point condition is satisfied */ +PRIVATE inline bool FFPU fpcctrue(int condition) + { return fpcond_truth_table[condition][host_fpcond_id()]; } + +#endif /* FPU_UAE */ + +/* Return the address of the floating-point condition codes truth table */ +static inline uae_u8 * const FFPU address_of_fpcond_truth_table(void) + { return ((uae_u8*)&fpcond_truth_table[0][0]); } + +#endif /* FPU_X86_USE_NATIVE_FLAGS */ + +/* -------------------------------------------------------------------------- */ +/* --- Use Original M68K FPU Mappings --- */ +/* -------------------------------------------------------------------------- */ + +#ifdef FPU_USE_GENERIC_FLAGS + +#undef FPU_USE_NATIVE_FLAGS + +#define NATIVE_FFLAG_NEGATIVE 0x08000000 +#define NATIVE_FFLAG_ZERO 0x04000000 +#define NATIVE_FFLAG_INFINITY 0x02000000 +#define NATIVE_FFLAG_NAN 0x01000000 + +/* Initialization - NONE */ +PRIVATE inline void FFPU fpu_init_native_fflags(void) + { } + +/* Native to m68k floating-point condition codes - SELF */ +PRIVATE inline uae_u32 FFPU get_fpccr(void) + { return FPU fpsr.condition_codes; } + +/* M68k to native floating-point condition codes - SELF */ +PRIVATE inline void FFPU set_fpccr(uae_u32 new_fpcond) + { FPU fpsr.condition_codes = new_fpcond; } + +#endif /* FPU_USE_GENERIC_FLAGS */ + +/* -------------------------------------------------------------------------- */ +/* --- Use Lazy Flags Evaluation --- */ +/* -------------------------------------------------------------------------- */ + +#ifdef FPU_USE_LAZY_FLAGS + +#undef FPU_USE_NATIVE_FLAGS + +#define NATIVE_FFLAG_NEGATIVE 0x08000000 +#define NATIVE_FFLAG_ZERO 0x04000000 +#define NATIVE_FFLAG_INFINITY 0x02000000 +#define NATIVE_FFLAG_NAN 0x01000000 + +/* Initialization - NONE */ +PRIVATE inline void FFPU fpu_init_native_fflags(void) + { } + +/* Native to m68k floating-point condition codes - SELF */ +PRIVATE inline uae_u32 FFPU get_fpccr(void) +{ + uae_u32 fpccr = 0; + if (isnan(FPU result)) + fpccr |= FPSR_CCB_NAN; + else if (FPU result == 0.0) + fpccr |= FPSR_CCB_ZERO; + else if (FPU result < 0.0) + fpccr |= FPSR_CCB_NEGATIVE; + if (isinf(FPU result)) + fpccr |= FPSR_CCB_INFINITY; + return fpccr; +} + +/* M68k to native floating-point condition codes - SELF */ +PRIVATE inline void FFPU set_fpccr(uae_u32 new_fpcond) +{ + if (new_fpcond & FPSR_CCB_NAN) + make_nan(FPU result); + else if (new_fpcond & FPSR_CCB_ZERO) + FPU result = 0.0; + else if (new_fpcond & FPSR_CCB_NEGATIVE) + FPU result = -1.0; + else + FPU result = +1.0; + /* gb-- where is Infinity ? */ +} + +/* Make FPSR according to the value passed in argument */ +PRIVATE inline void FFPU make_fpsr(fpu_register const & r) + { FPU result = r; } + +#endif /* FPU_USE_LAZY_FLAGS */ + +#endif + +/* -------------------------------------------------------------------------- */ +/* --- Common methods --- */ +/* -------------------------------------------------------------------------- */ + +/* Return the address of the floating-point condition codes register */ +static inline uae_u32 * FFPU address_of_fpccr(void) + { return ((uae_u32 *)& FPU fpsr.condition_codes); } + +#endif /* FPU_FLAGS_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/fpu.h b/BasiliskII/src/uae_cpu/fpu/fpu.h new file mode 100644 index 00000000..d1fe6dd2 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/fpu.h @@ -0,0 +1,59 @@ +/* + * fpu/fpu.h - public header + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef FPU_PUBLIC_HEADER_H +#define FPU_PUBLIC_HEADER_H + +#ifndef FPU_DEBUG +#define FPU_DEBUG 0 +#endif + +#if FPU_DEBUG +#define fpu_debug(args) printf args; +#define FPU_DUMP_REGISTERS 0 +#define FPU_DUMP_FIRST_BYTES 0 +#else +#define fpu_debug(args) ; +#undef FPU_DUMP_REGISTERS +#undef FPU_DUMP_FIRST_BYTES +#endif + +#include "sysdeps.h" +#include "fpu/types.h" +#include "fpu/core.h" + +void fpu_set_fpsr(uae_u32 new_fpsr); +uae_u32 fpu_get_fpsr(void); +void fpu_set_fpcr(uae_u32 new_fpcr); +uae_u32 fpu_get_fpcr(void); + +#endif /* FPU_PUBLIC_HEADER_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp b/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp new file mode 100644 index 00000000..5fa1ad0b --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp @@ -0,0 +1,2330 @@ +/* + * fpu_ieee.cpp - the IEEE FPU + * + * Copyright (c) 2001-2008 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * UAE - The Un*x Amiga Emulator + * + * MC68881/MC68040 emulation + * + * Copyright 1996 Herman ten Brugge + * + * + * Following fixes by Lauri Pesonen, July 1999: + * + * FMOVEM list handling: + * The lookup tables did not work correctly, rewritten. + * FINT: + * (int) cast does not work, fixed. + * Further, now honors the FPU fpcr rounding modes. + * FINTRZ: + * (int) cast cannot be used, fixed. + * FGETEXP: + * Input argument value 0 returned erroneous value. + * FMOD: + * (int) cast cannot be used. Replaced by proper rounding. + * Quotient byte handling was missing. + * FREM: + * (int) cast cannot be used. Replaced by proper rounding. + * Quotient byte handling was missing. + * FSCALE: + * Input argument value 0 was not handled correctly. + * FMOVEM Control Registers to/from address FPU registers An: + * A bug caused the code never been called. + * FMOVEM Control Registers pre-decrement: + * Moving of control regs from memory to FPP was not handled properly, + * if not all of the three FPU registers were moved. + * Condition code "Not Greater Than or Equal": + * Returned erroneous value. + * FSINCOS: + * Cosine must be loaded first if same register. + * FMOVECR: + * Status register was not updated (yes, this affects it). + * FMOVE -> reg: + * Status register was not updated (yes, this affects it). + * FMOVE reg -> reg: + * Status register was not updated. + * FDBcc: + * The loop termination condition was wrong. + * Possible leak from int16 to int32 fixed. + * get_fp_value: + * Immediate addressing mode && Operation Length == Byte -> + * Use the low-order byte of the extension word. + * Now FPU fpcr high 16 bits are always read as zeroes, no matter what was + * written to them. + * + * Other: + * - Optimized single/double/extended to/from conversion functions. + * Huge speed boost, but not (necessarily) portable to other systems. + * Enabled/disabled by #define FPU_HAVE_IEEE_DOUBLE 1 + * - Optimized versions of FSCALE, FGETEXP, FGETMAN + * - Conversion routines now handle NaN and infinity better. + * - Some constants precalculated. Not all compilers can optimize the + * expressions previously used. + * + * TODO: + * - Floating point exceptions. + * - More Infinity/NaN/overflow/underflow checking. + * - FPU instruction_address (only needed when exceptions are implemented) + * - Should be written in assembly to support long doubles. + * - Precision rounding single/double + */ + +#include "sysdeps.h" +#include +#include "memory.h" +#include "readcpu.h" +#include "newcpu.h" +#include "main.h" +#define FPU_IMPLEMENTATION +#include "fpu/fpu.h" +#include "fpu/fpu_ieee.h" + +/* Global FPU context */ +fpu_t fpu; + +/* -------------------------------------------------------------------------- */ +/* --- Scopes Definition --- */ +/* -------------------------------------------------------------------------- */ + +#undef PUBLIC +#define PUBLIC /**/ + +#undef PRIVATE +#define PRIVATE static + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +/* -------------------------------------------------------------------------- */ +/* --- Native Support --- */ +/* -------------------------------------------------------------------------- */ + +#include "fpu/mathlib.h" +#include "fpu/flags.h" +#include "fpu/exceptions.h" +#include "fpu/rounding.h" +#include "fpu/impl.h" + +#include "fpu/mathlib.cpp" +#include "fpu/flags.cpp" +#include "fpu/exceptions.cpp" +#include "fpu/rounding.cpp" + +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) +#define LD(x) x ## L +#ifdef HAVE_POWL +#define POWL(x, y) powl(x, y) +#else +#define POWL(x, y) pow(x, y) +#endif +#ifdef HAVE_LOG10L +#define LOG10L(x) log10l(x) +#else +#define LOG10L(x) log10(x) +#endif +#else +#define LD(x) x +#define POWL(x, y) pow(x, y) +#define LOG10L(x) log10(x) +#endif + +/* -------------------------------------------------------------------------- */ +/* --- Debugging --- */ +/* -------------------------------------------------------------------------- */ + +PUBLIC void FFPU fpu_dump_registers(void) +{ + for (int i = 0; i < 8; i++){ + printf ("FP%d: %g ", i, fpu_get_register(i)); + if ((i & 3) == 3) + printf ("\n"); + } +} + +PUBLIC void FFPU fpu_dump_flags(void) +{ + printf ("N=%d Z=%d I=%d NAN=%d\n", + (get_fpsr() & FPSR_CCB_NEGATIVE) != 0, + (get_fpsr() & FPSR_CCB_ZERO)!= 0, + (get_fpsr() & FPSR_CCB_INFINITY) != 0, + (get_fpsr() & FPSR_CCB_NAN) != 0); +} + +#if FPU_DEBUG && FPU_DUMP_REGISTERS +PRIVATE void FFPU dump_registers(const char * str) +{ + char temp_str[512]; + + sprintf(temp_str, "%s: %.04f, %.04f, %.04f, %.04f, %.04f, %.04f, %.04f, %.04f\n", + str, + fpu_get_register(0), fpu_get_register(1), fpu_get_register(2), + fpu_get_register(3), fpu_get_register(4), fpu_get_register(5), + fpu_get_register(6), fpu_get_register(7) ); + + fpu_debug((temp_str)); +#else +PRIVATE void FFPU dump_registers(const char *) +{ +#endif +} + +#if FPU_DEBUG && FPU_DUMP_FIRST_BYTES +PRIVATE void FFPU dump_first_bytes(uae_u8 * buffer, uae_s32 actual) +{ + char temp_buf1[256], temp_buf2[10]; + int bytes = sizeof(temp_buf1)/3-1-3; + if (actual < bytes) + bytes = actual; + + temp_buf1[0] = 0; + for (int i = 0; i < bytes; i++) { + sprintf(temp_buf2, "%02x ", (uae_u32)buffer[i]); + strcat(temp_buf1, temp_buf2); + } + + strcat(temp_buf1, "\n"); + fpu_debug((temp_buf1)); +#else + PRIVATE void FFPU dump_first_bytes(uae_u8 *, uae_s32) +{ +#endif +} + +// Quotient Byte is loaded with the sign and least significant +// seven bits of the quotient. +PRIVATE inline void FFPU make_quotient(fpu_register const & quotient, uae_u32 sign) +{ + uae_u32 lsb = (uae_u32)fp_fabs(quotient) & 0x7f; + FPU fpsr.quotient = sign | (lsb << 16); +} + +// to_single +PRIVATE inline fpu_register FFPU make_single(uae_u32 value) +{ +#if 1 + // Use a single, otherwise some checks for NaN, Inf, Zero would have to + // be performed + fpu_single result = 0; + fp_declare_init_shape(srp, single); + srp.ieee.negative = (value >> 31) & 1; + srp.ieee.exponent = (value >> 23) & FP_SINGLE_EXP_MAX; + srp.ieee.mantissa = value & 0x007fffff; + result = srp.value; + fpu_debug(("make_single (%X) = %.04f\n",value,(double)result)); + return result; +#elif 0 /* Original code */ + if ((value & 0x7fffffff) == 0) + return (0.0); + + fpu_register result; + fpu_register_parts *p = (fpu_register_parts *)&result; + + uae_u32 sign = (value & 0x80000000); + uae_u32 exp = ((value & 0x7F800000) >> 23) + 1023 - 127; + + p->parts[FLO] = value << 29; + p->parts[FHI] = sign | (exp << 20) | ((value & 0x007FFFFF) >> 3); + + fpu_debug(("make_single (%X) = %.04f\n",value,(double)result)); + + return(result); +#endif +} + +// from_single +PRIVATE inline uae_u32 FFPU extract_single(fpu_register const & src) +{ +#if 1 + fpu_single input = (fpu_single) src; + fp_declare_init_shape(sip, single); + sip.value = input; + uae_u32 result = (sip.ieee.negative << 31) + | (sip.ieee.exponent << 23) + | sip.ieee.mantissa; + fpu_debug(("extract_single (%.04f) = %X\n",(double)src,result)); + return result; +#elif 0 /* Original code */ + if (src == 0.0) + return 0; + + uae_u32 result; + fpu_register_parts const *p = (fpu_register_parts const *)&src; + + uae_u32 sign = (p->parts[FHI] & 0x80000000); + uae_u32 exp = (p->parts[FHI] & 0x7FF00000) >> 20; + + if(exp + 127 < 1023) { + exp = 0; + } else if(exp > 1023 + 127) { + exp = 255; + } else { + exp = exp + 127 - 1023; + } + + result = sign | (exp << 23) | ((p->parts[FHI] & 0x000FFFFF) << 3) | (p->parts[FLO] >> 29); + + fpu_debug(("extract_single (%.04f) = %X\n",(double)src,result)); + + return (result); +#endif +} + +// to_exten +PRIVATE inline fpu_register FFPU make_extended(uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +{ + // is it zero? + if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) + return (wrd1 & 0x80000000) ? -0.0 : 0.0; + + fpu_register result; +#if defined(USE_QUAD_DOUBLE) + // is it NaN? + if ((wrd1 & 0x7fff0000) == 0x7fff0000 && ((wrd2 & 0x7fffffff) != 0 || wrd3 != 0)) { + make_nan(result); + return result; + } + // is it inf? + if ((wrd1 & 0x7ffff000) == 0x7fff0000 && (wrd2 & 0x7fffffff) == 0 && wrd3 == 0) { + if ((wrd1 & 0x80000000) == 0) + make_inf_positive(result); + else + make_inf_negative(result); + return result; + } + fp_declare_init_shape(srp, extended); + srp.ieee.negative = (wrd1 >> 31) & 1; + srp.ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; + srp.ieee.mantissa0 = (wrd2 >> 16) & 0xffff; + srp.ieee.mantissa1 = ((wrd2 & 0xffff) << 16) | ((wrd3 >> 16) & 0xffff); + srp.ieee.mantissa2 = (wrd3 & 0xffff) << 16; + srp.ieee.mantissa3 = 0; +#elif defined(USE_LONG_DOUBLE) + fp_declare_init_shape(srp, extended); + srp.ieee.negative = (wrd1 >> 31) & 1; + srp.ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; + srp.ieee.mantissa0 = wrd2; + srp.ieee.mantissa1 = wrd3; + +#else + uae_u32 sgn = (wrd1 >> 31) & 1; + uae_u32 exp = (wrd1 >> 16) & 0x7fff; + + // the explicit integer bit is not set, must normalize + if ((wrd2 & 0x80000000) == 0) { + fpu_debug(("make_extended denormalized mantissa (%X,%X,%X)\n",wrd1,wrd2,wrd3)); + if (wrd2 | wrd3) { + // mantissa, not fraction. + uae_u64 man = ((uae_u64)wrd2 << 32) | wrd3; + while (exp > 0 && (man & UVAL64(0x8000000000000000)) == 0) { + man <<= 1; + exp--; + } + wrd2 = (uae_u32)(man >> 32); + wrd3 = (uae_u32)(man & 0xFFFFFFFF); + } + else if (exp != 0x7fff) // zero + exp = FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS; + } + + if (exp < FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS) + exp = 0; + else if (exp > FP_EXTENDED_EXP_BIAS + FP_DOUBLE_EXP_BIAS) + exp = FP_DOUBLE_EXP_MAX; + else + exp += FP_DOUBLE_EXP_BIAS - FP_EXTENDED_EXP_BIAS; + + fp_declare_init_shape(srp, double); + srp.ieee.negative = sgn; + srp.ieee.exponent = exp; + // drop the explicit integer bit + srp.ieee.mantissa0 = (wrd2 & 0x7fffffff) >> 11; + srp.ieee.mantissa1 = (wrd2 << 21) | (wrd3 >> 11); +#endif + result = srp.value; + fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result)); + return result; +} + +/* + Would be so much easier with full size floats :( + ... this is so vague. +*/ +// make_extended_no_normalize +PRIVATE inline void FFPU make_extended_no_normalize( + uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3, fpu_register & result +) +{ + // is it zero? + if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) { + if (wrd1 & 0x80000000) + make_zero_negative(result); + else + make_zero_positive(result); + return; + } + // is it NaN? + if ((wrd1 & 0x7fff0000) == 0x7fff0000 && ((wrd2 & 0x7fffffff) != 0 || wrd3 != 0)) { + make_nan(result); + return; + } +#if defined(USE_QUAD_DOUBLE) + // is it inf? + if ((wrd1 & 0x7ffff000) == 0x7fff0000 && (wrd2 & 0x7fffffff) == 0 && wrd3 == 0) { + if ((wrd1 & 0x80000000) == 0) + make_inf_positive(result); + else + make_inf_negative(result); + return; + } + fp_declare_init_shape(srp, extended); + srp.ieee.negative = (wrd1 >> 31) & 1; + srp.ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; + srp.ieee.mantissa0 = (wrd2 >> 16) & 0xffff; + srp.ieee.mantissa1 = ((wrd2 & 0xffff) << 16) | ((wrd3 >> 16) & 0xffff); + srp.ieee.mantissa2 = (wrd3 & 0xffff) << 16; + srp.ieee.mantissa3 = 0; +#elif defined(USE_LONG_DOUBLE) + fp_declare_init_shape(srp, extended); + srp.ieee.negative = (wrd1 >> 31) & 1; + srp.ieee.exponent = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX; + srp.ieee.mantissa0 = wrd2; + srp.ieee.mantissa1 = wrd3; +#else + uae_u32 exp = (wrd1 >> 16) & 0x7fff; + if (exp < FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS) + exp = 0; + else if (exp > FP_EXTENDED_EXP_BIAS + FP_DOUBLE_EXP_BIAS) + exp = FP_DOUBLE_EXP_MAX; + else + exp += FP_DOUBLE_EXP_BIAS - FP_EXTENDED_EXP_BIAS; + + fp_declare_init_shape(srp, double); + srp.ieee.negative = (wrd1 >> 31) & 1; + srp.ieee.exponent = exp; + // drop the explicit integer bit + srp.ieee.mantissa0 = (wrd2 & 0x7fffffff) >> 11; + srp.ieee.mantissa1 = (wrd2 << 21) | (wrd3 >> 11); +#endif + result = srp.value; + fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result)); +} + +// from_exten +PRIVATE inline void FFPU extract_extended(fpu_register const & src, + uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3 +) +{ + if (src == 0.0) { + *wrd1 = *wrd2 = *wrd3 = 0; + return; + } +#if defined(USE_QUAD_DOUBLE) + // FIXME: deal with denormals? + fp_declare_init_shape(srp, extended); + srp.value = src; + *wrd1 = (srp.ieee.negative << 31) | (srp.ieee.exponent << 16); + // always set the explicit integer bit. + *wrd2 = 0x80000000 | (srp.ieee.mantissa0 << 15) | ((srp.ieee.mantissa1 & 0xfffe0000) >> 17); + *wrd3 = (srp.ieee.mantissa1 << 15) | ((srp.ieee.mantissa2 & 0xfffe0000) >> 17); +#elif defined(USE_LONG_DOUBLE) + fpu_register_parts p = { src }; +#ifdef WORDS_BIGENDIAN + *wrd1 = p.parts[0]; + *wrd2 = p.parts[1]; + *wrd3 = p.parts[2]; +#else + *wrd3 = p.parts[0]; + *wrd2 = p.parts[1]; + *wrd1 = (p.parts[2] & 0xffff) << 16; +#endif +#else + fp_declare_init_shape(srp, double); + srp.value = src; + fpu_debug(("extract_extended (%d,%d,%X,%X)\n", + srp.ieee.negative , srp.ieee.exponent, + srp.ieee.mantissa0, srp.ieee.mantissa1)); + + uae_u32 exp = srp.ieee.exponent; + + if (exp == FP_DOUBLE_EXP_MAX) + exp = FP_EXTENDED_EXP_MAX; + else + exp += FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS; + + *wrd1 = (srp.ieee.negative << 31) | (exp << 16); + // always set the explicit integer bit. + *wrd2 = 0x80000000 | (srp.ieee.mantissa0 << 11) | ((srp.ieee.mantissa1 & 0xffe00000) >> 21); + *wrd3 = srp.ieee.mantissa1 << 11; +#endif + fpu_debug(("extract_extended (%.04f) = %X,%X,%X\n",(double)src,*wrd1,*wrd2,*wrd3)); +} + +// to_double +PRIVATE inline fpu_register FFPU make_double(uae_u32 wrd1, uae_u32 wrd2) +{ + union { + fpu_double value; + uae_u32 parts[2]; + } dest; +#ifdef WORDS_BIGENDIAN + dest.parts[0] = wrd1; + dest.parts[1] = wrd2; +#else + dest.parts[0] = wrd2; + dest.parts[1] = wrd1; +#endif + fpu_debug(("make_double (%X,%X) = %.04f\n",wrd1,wrd2,dest.value)); + return (fpu_register)(dest.value); +} + +// from_double +PRIVATE inline void FFPU extract_double(fpu_register const & src, + uae_u32 * wrd1, uae_u32 * wrd2 +) +{ + union { + fpu_double value; + uae_u32 parts[2]; + } dest; + dest.value = (fpu_double)src; +#ifdef WORDS_BIGENDIAN + *wrd1 = dest.parts[0]; + *wrd2 = dest.parts[1]; +#else + *wrd2 = dest.parts[0]; + *wrd1 = dest.parts[1]; +#endif + fpu_debug(("extract_double (%.04f) = %X,%X\n",(double)src,*wrd1,*wrd2)); +} + +// to_pack +PRIVATE inline fpu_register FFPU make_packed(uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +{ + fpu_register d; + bool sm = (wrd1 & 0x80000000) != 0; + bool se = (wrd1 & 0x40000000) != 0; + int exp = (wrd1 & 0x7fff0000) >> 16; + unsigned int dig; + fpu_register pwr; + + if (exp == 0x7fff) + { + if (wrd2 == 0 && wrd3 == 0) + { + sm ? make_inf_negative(d) : make_inf_positive(d); + } else + { + make_nan(d); + } + return d; + } + dig = wrd1 & 0x0000000f; + if (dig == 0 && wrd2 == 0 && wrd3 == 0) + { + sm ? make_zero_negative(d) : make_zero_positive(d); + return d; + } + + /* + * Convert the bcd exponent to binary by successive adds and + * muls. Set the sign according to SE. Subtract 16 to compensate + * for the mantissa which is to be interpreted as 17 integer + * digits, rather than 1 integer and 16 fraction digits. + * Note: this operation can never overflow. + */ + exp = ((wrd1 >> 24) & 0xf); + exp = exp * 10 + ((wrd1 >> 20) & 0xf); + exp = exp * 10 + ((wrd1 >> 16) & 0xf); + if (se) + exp = -exp; + /* sub to compensate for shift of mant */ + exp = exp - 16; + + /* + * Convert the bcd mantissa to binary by successive + * adds and muls. Set the sign according to SM. + * The mantissa digits will be converted with the decimal point + * assumed following the least-significant digit. + * Note: this operation can never overflow. + */ + d = wrd1 & 0xf; + d = (d * LD(10.0)) + ((wrd2 >> 28) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 24) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 20) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 16) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 12) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 8) & 0xf); + d = (d * LD(10.0)) + ((wrd2 >> 4) & 0xf); + d = (d * LD(10.0)) + ((wrd2 ) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 28) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 24) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 20) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 16) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 12) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 8) & 0xf); + d = (d * LD(10.0)) + ((wrd3 >> 4) & 0xf); + d = (d * LD(10.0)) + ((wrd3 ) & 0xf); + + /* Check the sign of the mant and make the value in fp0 the same sign. */ + if (sm) + d = -d; + + /* + * Calculate power-of-ten factor from exponent. + */ + if (exp < 0) + { + exp = -exp; + pwr = POWL(LD(10.0), exp); + d = d / pwr; + } else + { + pwr = POWL(LD(10.0), exp); + d = d * pwr; + } + + fpu_debug(("make_packed(%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)d)); + return d; +} + +// from_pack +PRIVATE inline void FFPU extract_packed(fpu_register const & src, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3) +{ + fpu_register pwr; + int exp; + fpu_register d; + bool sm, se; + int dig; + + *wrd1 = *wrd2 = *wrd3 = 0; + + d = src; + sm = false; + if (isneg(src)) + { + d = -d; + sm = true; + } + + if (isnan(src)) + { + *wrd1 = sm ? 0xffff0000 : 0x7fff0000; + *wrd2 = 0xffffffff; + *wrd3 = 0xffffffff; + return; + } + if (isinf(src)) + { + *wrd1 = sm ? 0xffff0000 : 0x7fff0000; + *wrd2 = *wrd3 = 0; + return; + } + if (iszero(src)) + { + *wrd1 = sm ? 0x80000000 : 0x00000000; + *wrd2 = *wrd3 = 0; + return; + } + sm = false; + if (isneg(src)) + { + d = -d; + sm = true; + } + exp = (int)floor(LOG10L(d)); + se = false; + if (exp < 0) + { + exp = -exp; + se = true; + pwr = POWL(LD(10.0), exp); + d = d * pwr; + } else + { + pwr = POWL(LD(10.0), exp); + d = d / pwr; + } + dig = (int)d; d = LD(10) * (d - dig); *wrd1 |= dig; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 28; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 24; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 20; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 16; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 12; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 8; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig << 4; + dig = (int)d; d = LD(10) * (d - dig); *wrd2 |= dig; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 28; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 24; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 20; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 16; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 12; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 8; + dig = (int)d; d = LD(10) * (d - dig); *wrd3 |= dig << 4; + dig = (int)d; *wrd3 |= dig; + + dig = (exp / 100) % 10; + *wrd1 |= dig << 24; + dig = (exp / 10) % 10; + *wrd1 |= dig << 20; + dig = (exp) % 10; + *wrd1 |= dig << 16; + if (sm) + *wrd1 |= 0x80000000; + if (se) + *wrd1 |= 0x40000000; + fpu_debug(("extract_packed(%.04f) = %X,%X,%X\n",(double)src,*wrd1,*wrd2,*wrd3)); +} + +PRIVATE inline int FFPU get_fp_value (uae_u32 opcode, uae_u16 extra, fpu_register & src) +{ + uaecptr tmppc; + uae_u16 tmp; + int size; + int mode; + int reg; + uae_u32 ad = 0; + static int sz1[8] = {4, 4, 12, 12, 2, 8, 1, 0}; + static int sz2[8] = {4, 4, 12, 12, 2, 8, 2, 0}; + + // fpu_debug(("get_fp_value(%X,%X)\n",(int)opcode,(int)extra)); + // dump_first_bytes( regs.pc_p-4, 16 ); + + if ((extra & 0x4000) == 0) { + src = FPU registers[(extra >> 10) & 7]; + return 1; + } + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + + fpu_debug(("get_fp_value mode=%d, reg=%d, size=%d\n",(int)mode,(int)reg,(int)size)); + + switch (mode) { + case 0: + switch (size) { + case 6: + src = (fpu_register) (uae_s8) m68k_dreg (regs, reg); + break; + case 4: + src = (fpu_register) (uae_s16) m68k_dreg (regs, reg); + break; + case 0: + src = (fpu_register) (uae_s32) m68k_dreg (regs, reg); + break; + case 1: + src = make_single(m68k_dreg (regs, reg)); + break; + default: + return 0; + } + return 1; + case 1: + return 0; + case 2: + ad = m68k_areg (regs, reg); + break; + case 3: + ad = m68k_areg (regs, reg); + break; + case 4: + ad = m68k_areg (regs, reg) - (reg == 7 ? sz2[size] : sz1[size]); + break; + case 5: + ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); + break; + case 6: + ad = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch (reg) { + case 0: + ad = (uae_s32) (uae_s16) next_iword(); + break; + case 1: + ad = next_ilong(); + break; + case 2: + ad = m68k_getpc (); + ad += (uae_s32) (uae_s16) next_iword(); + fpu_debug(("get_fp_value next_iword()=%X\n",ad-m68k_getpc()-2)); + break; + case 3: + tmppc = m68k_getpc (); + tmp = (uae_u16)next_iword(); + ad = get_disp_ea_020 (tmppc, tmp); + break; + case 4: + ad = m68k_getpc (); + m68k_setpc (ad + sz2[size]); + // Immediate addressing mode && Operation Length == Byte -> + // Use the low-order byte of the extension word. + if(size == 6) ad++; + break; + default: + return 0; + } + } + + fpu_debug(("get_fp_value m68k_getpc()=%X\n",m68k_getpc())); + fpu_debug(("get_fp_value ad=%X\n",ad)); + fpu_debug(("get_fp_value get_long (ad)=%X\n",get_long (ad))); + //dump_first_bytes( get_real_address(ad, 0, 0)-64, 64 ); + //dump_first_bytes( get_real_address(ad, 0, 0), 64 ); + + switch (size) { + case 0: + src = (fpu_register) (uae_s32) get_long (ad); + break; + case 1: + src = make_single(get_long (ad)); + break; + case 2: { + uae_u32 wrd1, wrd2, wrd3; + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + src = make_extended(wrd1, wrd2, wrd3); + break; + } + case 3: { + uae_u32 wrd1, wrd2, wrd3; + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + src = make_packed(wrd1, wrd2, wrd3); + break; + } + case 4: + src = (fpu_register) (uae_s16) get_word(ad); + break; + case 5: { + uae_u32 wrd1, wrd2; + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + src = make_double(wrd1, wrd2); + break; + } + case 6: + src = (fpu_register) (uae_s8) get_byte(ad); + break; + default: + return 0; + } + + switch (mode) { + case 3: + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + break; + } + + // fpu_debug(("get_fp_value result = %.04f\n",(float)src)); + return 1; +} + +/* Convert the FP value to integer according to the current m68k rounding mode */ +PRIVATE inline uae_s32 FFPU toint(fpu_register const & src) +{ + fpu_register result; + switch (get_fpcr() & FPCR_ROUNDING_MODE) { + case FPCR_ROUND_ZERO: + result = fp_round_to_zero(src); + break; + case FPCR_ROUND_MINF: + result = fp_round_to_minus_infinity(src); + break; + case FPCR_ROUND_NEAR: + result = fp_round_to_nearest(src); + break; + case FPCR_ROUND_PINF: + result = fp_round_to_plus_infinity(src); + break; + default: + result = src; /* should never be reached */ + break; + } + return (uae_s32)result; +} + +PRIVATE inline int FFPU put_fp_value (uae_u32 opcode, uae_u16 extra, fpu_register const & value) +{ + uae_u16 tmp; + uaecptr tmppc; + int size; + int mode; + int reg; + uae_u32 ad; + static int sz1[8] = {4, 4, 12, 12, 2, 8, 1, 0}; + static int sz2[8] = {4, 4, 12, 12, 2, 8, 2, 0}; + + // fpu_debug(("put_fp_value(%.04f,%X,%X)\n",(float)value,(int)opcode,(int)extra)); + + if ((extra & 0x4000) == 0) { + int dest_reg = (extra >> 10) & 7; + FPU registers[dest_reg] = value; + make_fpsr(FPU registers[dest_reg]); + return 1; + } + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + ad = 0xffffffff; + switch (mode) { + case 0: + switch (size) { + case 6: + m68k_dreg (regs, reg) = ((toint(value) & 0xff) + | (m68k_dreg (regs, reg) & ~0xff)); + break; + case 4: + m68k_dreg (regs, reg) = ((toint(value) & 0xffff) + | (m68k_dreg (regs, reg) & ~0xffff)); + break; + case 0: + m68k_dreg (regs, reg) = toint(value); + break; + case 1: + m68k_dreg (regs, reg) = extract_single(value); + break; + default: + return 0; + } + return 1; + case 1: + return 0; + case 2: + ad = m68k_areg (regs, reg); + break; + case 3: + ad = m68k_areg (regs, reg); + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + ad = m68k_areg (regs, reg); + break; + case 5: + ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); + break; + case 6: + ad = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch (reg) { + case 0: + ad = (uae_s32) (uae_s16) next_iword(); + break; + case 1: + ad = next_ilong(); + break; + case 2: + ad = m68k_getpc (); + ad += (uae_s32) (uae_s16) next_iword(); + break; + case 3: + tmppc = m68k_getpc (); + tmp = (uae_u16)next_iword(); + ad = get_disp_ea_020 (tmppc, tmp); + break; + case 4: + ad = m68k_getpc (); + m68k_setpc (ad + sz2[size]); + break; + default: + return 0; + } + } + switch (size) { + case 0: + put_long (ad, toint(value)); + break; + case 1: + put_long (ad, extract_single(value)); + break; + case 2: { + uae_u32 wrd1, wrd2, wrd3; + extract_extended(value, &wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + break; + } + case 3: { + uae_u32 wrd1, wrd2, wrd3; + extract_packed(value, &wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + break; + } + case 4: + put_word(ad, (uae_s16) toint(value)); + break; + case 5: { + uae_u32 wrd1, wrd2; + extract_double(value, &wrd1, &wrd2); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + break; + } + case 6: + put_byte(ad, (uae_s8) toint(value)); + break; + default: + return 0; + } + return 1; +} + +PRIVATE inline int FFPU get_fp_ad(uae_u32 opcode, uae_u32 * ad) +{ + uae_u16 tmp; + uaecptr tmppc; + int mode; + int reg; + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + switch (mode) { + case 0: + case 1: + return 0; + case 2: + *ad = m68k_areg (regs, reg); + break; + case 3: + *ad = m68k_areg (regs, reg); + break; + case 4: + *ad = m68k_areg (regs, reg); + break; + case 5: + *ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); + break; + case 6: + *ad = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch (reg) { + case 0: + *ad = (uae_s32) (uae_s16) next_iword(); + break; + case 1: + *ad = next_ilong(); + break; + case 2: + *ad = m68k_getpc (); + *ad += (uae_s32) (uae_s16) next_iword(); + break; + case 3: + tmppc = m68k_getpc (); + tmp = (uae_u16)next_iword(); + *ad = get_disp_ea_020 (tmppc, tmp); + break; + default: + return 0; + } + } + return 1; +} + +#if FPU_DEBUG +# define CONDRET(s,x) fpu_debug(("fpp_cond %s = %d\n",s,(uint32)(x))); return (x) +#else +# define CONDRET(s,x) return (x) +#endif + +PRIVATE inline int FFPU fpp_cond(int condition) +{ + int N = (FPU result < 0.0); + int Z = (FPU result == 0.0); + int NaN = isnan(FPU result); + + if (NaN) + N = Z = 0; + + switch (condition & 0x1f) { + case 0x00: CONDRET("False",0); + case 0x01: CONDRET("Equal",Z); + case 0x02: CONDRET("Ordered Greater Than",!(NaN || Z || N)); + case 0x03: CONDRET("Ordered Greater Than or Equal",Z || !(NaN || N)); + case 0x04: CONDRET("Ordered Less Than",N && !(NaN || Z)); + case 0x05: CONDRET("Ordered Less Than or Equal",Z || (N && !NaN)); + case 0x06: CONDRET("Ordered Greater or Less Than",!(NaN || Z)); + case 0x07: CONDRET("Ordered",!NaN); + case 0x08: CONDRET("Unordered",NaN); + case 0x09: CONDRET("Unordered or Equal",NaN || Z); + case 0x0a: CONDRET("Unordered or Greater Than",NaN || !(N || Z)); + case 0x0b: CONDRET("Unordered or Greater or Equal",NaN || Z || !N); + case 0x0c: CONDRET("Unordered or Less Than",NaN || (N && !Z)); + case 0x0d: CONDRET("Unordered or Less or Equal",NaN || Z || N); + case 0x0e: CONDRET("Not Equal",!Z); + case 0x0f: CONDRET("True",1); + case 0x10: CONDRET("Signaling False",0); + case 0x11: CONDRET("Signaling Equal",Z); + case 0x12: CONDRET("Greater Than",!(NaN || Z || N)); + case 0x13: CONDRET("Greater Than or Equal",Z || !(NaN || N)); + case 0x14: CONDRET("Less Than",N && !(NaN || Z)); + case 0x15: CONDRET("Less Than or Equal",Z || (N && !NaN)); + case 0x16: CONDRET("Greater or Less Than",!(NaN || Z)); + case 0x17: CONDRET("Greater, Less or Equal",!NaN); + case 0x18: CONDRET("Not Greater, Less or Equal",NaN); + case 0x19: CONDRET("Not Greater or Less Than",NaN || Z); + case 0x1a: CONDRET("Not Less Than or Equal",NaN || !(N || Z)); + case 0x1b: CONDRET("Not Less Than",NaN || Z || !N); + case 0x1c: CONDRET("Not Greater Than or Equal", NaN || (N && !Z)); + case 0x1d: CONDRET("Not Greater Than",NaN || Z || N); + case 0x1e: CONDRET("Signaling Not Equal",!Z); + case 0x1f: CONDRET("Signaling True",1); + default: CONDRET("",-1); + } +} + +void FFPU fpuop_dbcc(uae_u32 opcode, uae_u32 extra) +{ + fpu_debug(("fdbcc_opp %X, %X at %08lx\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); + + uaecptr pc = (uae_u32) m68k_getpc (); + uae_s32 disp = (uae_s32) (uae_s16) next_iword(); + int cc = fpp_cond(extra & 0x3f); + if (cc == -1) { + m68k_setpc (pc - 4); + op_illg (opcode); + } else if (!cc) { + int reg = opcode & 0x7; + + // this may have leaked. + /* + m68k_dreg (regs, reg) = ((m68k_dreg (regs, reg) & ~0xffff) + | ((m68k_dreg (regs, reg) - 1) & 0xffff)); + */ + m68k_dreg (regs, reg) = ((m68k_dreg (regs, reg) & 0xffff0000) + | (((m68k_dreg (regs, reg) & 0xffff) - 1) & 0xffff)); + + + // condition reversed. + // if ((m68k_dreg (regs, reg) & 0xffff) == 0xffff) + if ((m68k_dreg (regs, reg) & 0xffff) != 0xffff) + m68k_setpc (pc + disp); + } +} + +void FFPU fpuop_scc(uae_u32 opcode, uae_u32 extra) +{ + fpu_debug(("fscc_opp %X, %X at %08lx\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); + + uae_u32 ad = 0; + int cc = fpp_cond(extra & 0x3f); + if (cc == -1) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } + else if ((opcode & 0x38) == 0) { + m68k_dreg (regs, opcode & 7) = (m68k_dreg (regs, opcode & 7) & ~0xff) | + (cc ? 0xff : 0x00); + } + else if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } + else + put_byte(ad, cc ? 0xff : 0x00); +} + +void FFPU fpuop_trapcc(uae_u32 opcode, uaecptr oldpc, uae_u32 extra) +{ + fpu_debug(("ftrapcc_opp %X, %X at %08lx\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); + + int cc = fpp_cond(extra & 0x3f); + if (cc == -1) { + m68k_setpc (oldpc); + op_illg (opcode); + } + if (cc) + Exception(7, oldpc - 2); +} + +// NOTE that we get here also when there is a FNOP (nontrapping false, displ 0) +void FFPU fpuop_bcc(uae_u32 opcode, uaecptr pc, uae_u32 extra) +{ + fpu_debug(("fbcc_opp %X, %X at %08lx, jumpto=%X\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc (), extra )); + + int cc = fpp_cond(opcode & 0x3f); + if (cc == -1) { + m68k_setpc (pc); + op_illg (opcode); + } + else if (cc) { + if ((opcode & 0x40) == 0) + extra = (uae_s32) (uae_s16) extra; + m68k_setpc (pc + extra); + } +} + +// FSAVE has no post-increment +// 0x1f180000 == IDLE state frame, coprocessor version number 1F +void FFPU fpuop_save(uae_u32 opcode) +{ + fpu_debug(("fsave_opp at %08lx\n", m68k_getpc ())); + + uae_u32 ad = 0; + int incr = (opcode & 0x38) == 0x20 ? -1 : 1; + int i; + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 2); + op_illg (opcode); + return; + } + + if (CPUType == 4) { + // Put 4 byte 68040 IDLE frame. + if (incr < 0) { + ad -= 4; + put_long (ad, 0x41000000); + } + else { + put_long (ad, 0x41000000); + ad += 4; + } + } else { + // Put 28 byte 68881 IDLE frame. + if (incr < 0) { + fpu_debug(("fsave_opp pre-decrement\n")); + ad -= 4; + // What's this? Some BIU flags, or (incorrectly placed) command/condition? + put_long (ad, 0x70000000); + for (i = 0; i < 5; i++) { + ad -= 4; + put_long (ad, 0x00000000); + } + ad -= 4; + put_long (ad, 0x1f180000); // IDLE, vers 1f + } + else { + put_long (ad, 0x1f180000); // IDLE, vers 1f + ad += 4; + for (i = 0; i < 5; i++) { + put_long (ad, 0x00000000); + ad += 4; + } + // What's this? Some BIU flags, or (incorrectly placed) command/condition? + put_long (ad, 0x70000000); + ad += 4; + } + } + if ((opcode & 0x38) == 0x18) { + m68k_areg (regs, opcode & 7) = ad; // Never executed on a 68881 + fpu_debug(("PROBLEM: fsave_opp post-increment\n")); + } + if ((opcode & 0x38) == 0x20) { + m68k_areg (regs, opcode & 7) = ad; + fpu_debug(("fsave_opp pre-decrement %X -> A%d\n",ad,opcode & 7)); + } +} + +// FRESTORE has no pre-decrement +void FFPU fpuop_restore(uae_u32 opcode) +{ + fpu_debug(("frestore_opp at %08lx\n", m68k_getpc ())); + + uae_u32 ad = 0; + uae_u32 d; + int incr = (opcode & 0x38) == 0x20 ? -1 : 1; + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 2); + op_illg (opcode); + return; + } + + if (CPUType == 4) { + // 68040 + if (incr < 0) { + fpu_debug(("PROBLEM: frestore_opp incr < 0\n")); + // this may be wrong, but it's never called. + ad -= 4; + d = get_long (ad); + if ((d & 0xff000000) != 0) { // Not a NULL frame? + if ((d & 0x00ff0000) == 0) { // IDLE + fpu_debug(("frestore_opp found IDLE frame at %X\n",ad-4)); + } + else if ((d & 0x00ff0000) == 0x00300000) { // UNIMP + fpu_debug(("PROBLEM: frestore_opp found UNIMP frame at %X\n",ad-4)); + ad -= 44; + } + else if ((d & 0x00ff0000) == 0x00600000) { // BUSY + fpu_debug(("PROBLEM: frestore_opp found BUSY frame at %X\n",ad-4)); + ad -= 92; + } + } + } + else { + d = get_long (ad); + fpu_debug(("frestore_opp frame at %X = %X\n",ad,d)); + ad += 4; + if ((d & 0xff000000) != 0) { // Not a NULL frame? + if ((d & 0x00ff0000) == 0) { // IDLE + fpu_debug(("frestore_opp found IDLE frame at %X\n",ad-4)); + } + else if ((d & 0x00ff0000) == 0x00300000) { // UNIMP + fpu_debug(("PROBLEM: frestore_opp found UNIMP frame at %X\n",ad-4)); + ad += 44; + } + else if ((d & 0x00ff0000) == 0x00600000) { // BUSY + fpu_debug(("PROBLEM: frestore_opp found BUSY frame at %X\n",ad-4)); + ad += 92; + } + } + } + } + else { + // 68881 + if (incr < 0) { + fpu_debug(("PROBLEM: frestore_opp incr < 0\n")); + // this may be wrong, but it's never called. + ad -= 4; + d = get_long (ad); + if ((d & 0xff000000) != 0) { + if ((d & 0x00ff0000) == 0x00180000) + ad -= 6 * 4; + else if ((d & 0x00ff0000) == 0x00380000) + ad -= 14 * 4; + else if ((d & 0x00ff0000) == 0x00b40000) + ad -= 45 * 4; + } + } + else { + d = get_long (ad); + fpu_debug(("frestore_opp frame at %X = %X\n",ad,d)); + ad += 4; + if ((d & 0xff000000) != 0) { // Not a NULL frame? + if ((d & 0x00ff0000) == 0x00180000) { // IDLE + fpu_debug(("frestore_opp found IDLE frame at %X\n",ad-4)); + ad += 6 * 4; + } + else if ((d & 0x00ff0000) == 0x00380000) {// UNIMP? shouldn't it be 3C? + ad += 14 * 4; + fpu_debug(("PROBLEM: frestore_opp found UNIMP? frame at %X\n",ad-4)); + } + else if ((d & 0x00ff0000) == 0x00b40000) {// BUSY + fpu_debug(("PROBLEM: frestore_opp found BUSY frame at %X\n",ad-4)); + ad += 45 * 4; + } + } + } + } + if ((opcode & 0x38) == 0x18) { + m68k_areg (regs, opcode & 7) = ad; + fpu_debug(("frestore_opp post-increment %X -> A%d\n",ad,opcode & 7)); + } + if ((opcode & 0x38) == 0x20) { + m68k_areg (regs, opcode & 7) = ad; // Never executed on a 68881 + fpu_debug(("PROBLEM: frestore_opp pre-decrement\n")); + } +} + +void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) +{ + int reg; + fpu_register src; + + fpu_debug(("FPP %04lx %04x at %08lx\n", opcode & 0xffff, extra & 0xffff, + m68k_getpc () - 4)); + + dump_registers( "START"); + + switch ((extra >> 13) & 0x7) { + case 3: + fpu_debug(("FMOVE -> \n")); + if (put_fp_value (opcode, extra, FPU registers[(extra >> 7) & 7]) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } + dump_registers( "END "); + return; + case 4: + case 5: + if ((opcode & 0x38) == 0) { + if (extra & 0x2000) { // dr bit + if (extra & 0x1000) { + // according to the manual, the msb bits are always zero. + m68k_dreg (regs, opcode & 7) = get_fpcr() & 0xFFFF; + fpu_debug(("FMOVEM FPU fpcr (%X) -> D%d\n", get_fpcr(), opcode & 7)); + } + if (extra & 0x0800) { + m68k_dreg (regs, opcode & 7) = get_fpsr(); + fpu_debug(("FMOVEM FPU fpsr (%X) -> D%d\n", get_fpsr(), opcode & 7)); + } + if (extra & 0x0400) { + m68k_dreg (regs, opcode & 7) = FPU instruction_address; + fpu_debug(("FMOVEM FPU instruction_address (%X) -> D%d\n", FPU instruction_address, opcode & 7)); + } + } + else { + if (extra & 0x1000) { + set_fpcr( m68k_dreg (regs, opcode & 7) ); + fpu_debug(("FMOVEM D%d (%X) -> FPU fpcr\n", opcode & 7, get_fpcr())); + } + if (extra & 0x0800) { + set_fpsr( m68k_dreg (regs, opcode & 7) ); + fpu_debug(("FMOVEM D%d (%X) -> FPU fpsr\n", opcode & 7, get_fpsr())); + } + if (extra & 0x0400) { + FPU instruction_address = m68k_dreg (regs, opcode & 7); + fpu_debug(("FMOVEM D%d (%X) -> FPU instruction_address\n", opcode & 7, FPU instruction_address)); + } + } +// } else if ((opcode & 0x38) == 1) { + } + else if ((opcode & 0x38) == 8) { + if (extra & 0x2000) { // dr bit + if (extra & 0x1000) { + // according to the manual, the msb bits are always zero. + m68k_areg (regs, opcode & 7) = get_fpcr() & 0xFFFF; + fpu_debug(("FMOVEM FPU fpcr (%X) -> A%d\n", get_fpcr(), opcode & 7)); + } + if (extra & 0x0800) { + m68k_areg (regs, opcode & 7) = get_fpsr(); + fpu_debug(("FMOVEM FPU fpsr (%X) -> A%d\n", get_fpsr(), opcode & 7)); + } + if (extra & 0x0400) { + m68k_areg (regs, opcode & 7) = FPU instruction_address; + fpu_debug(("FMOVEM FPU instruction_address (%X) -> A%d\n", FPU instruction_address, opcode & 7)); + } + } else { + if (extra & 0x1000) { + set_fpcr( m68k_areg (regs, opcode & 7) ); + fpu_debug(("FMOVEM A%d (%X) -> FPU fpcr\n", opcode & 7, get_fpcr())); + } + if (extra & 0x0800) { + set_fpsr( m68k_areg (regs, opcode & 7) ); + fpu_debug(("FMOVEM A%d (%X) -> FPU fpsr\n", opcode & 7, get_fpsr())); + } + if (extra & 0x0400) { + FPU instruction_address = m68k_areg (regs, opcode & 7); + fpu_debug(("FMOVEM A%d (%X) -> FPU instruction_address\n", opcode & 7, FPU instruction_address)); + } + } + } + else if ((opcode & 0x3f) == 0x3c) { + if ((extra & 0x2000) == 0) { + if (extra & 0x1000) { + set_fpcr( next_ilong() ); + fpu_debug(("FMOVEM #<%X> -> FPU fpcr\n", get_fpcr())); + } + if (extra & 0x0800) { + set_fpsr( next_ilong() ); + fpu_debug(("FMOVEM #<%X> -> FPU fpsr\n", get_fpsr())); + } + if (extra & 0x0400) { + FPU instruction_address = next_ilong(); + fpu_debug(("FMOVEM #<%X> -> FPU instruction_address\n", FPU instruction_address)); + } + } + } + else if (extra & 0x2000) { + /* FMOVEM FPP->memory */ + uae_u32 ad = 0; + int incr = 0; + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + if ((opcode & 0x38) == 0x20) { + if (extra & 0x1000) + incr += 4; + if (extra & 0x0800) + incr += 4; + if (extra & 0x0400) + incr += 4; + } + ad -= incr; + if (extra & 0x1000) { + // according to the manual, the msb bits are always zero. + put_long (ad, get_fpcr() & 0xFFFF); + fpu_debug(("FMOVEM FPU fpcr (%X) -> mem %X\n", get_fpcr(), ad )); + ad += 4; + } + if (extra & 0x0800) { + put_long (ad, get_fpsr()); + fpu_debug(("FMOVEM FPU fpsr (%X) -> mem %X\n", get_fpsr(), ad )); + ad += 4; + } + if (extra & 0x0400) { + put_long (ad, FPU instruction_address); + fpu_debug(("FMOVEM FPU instruction_address (%X) -> mem %X\n", FPU instruction_address, ad )); + ad += 4; + } + ad -= incr; + if ((opcode & 0x38) == 0x18) // post-increment? + m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) // pre-decrement? + m68k_areg (regs, opcode & 7) = ad; + } + else { + /* FMOVEM memory->FPP */ + uae_u32 ad = 0; + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + + // ad = (opcode & 0x38) == 0x20 ? ad - 12 : ad; + int incr = 0; + if((opcode & 0x38) == 0x20) { + if (extra & 0x1000) + incr += 4; + if (extra & 0x0800) + incr += 4; + if (extra & 0x0400) + incr += 4; + ad = ad - incr; + } + + if (extra & 0x1000) { + set_fpcr( get_long (ad) ); + fpu_debug(("FMOVEM mem %X (%X) -> FPU fpcr\n", ad, get_fpcr() )); + ad += 4; + } + if (extra & 0x0800) { + set_fpsr( get_long (ad) ); + fpu_debug(("FMOVEM mem %X (%X) -> FPU fpsr\n", ad, get_fpsr() )); + ad += 4; + } + if (extra & 0x0400) { + FPU instruction_address = get_long (ad); + fpu_debug(("FMOVEM mem %X (%X) -> FPU instruction_address\n", ad, FPU instruction_address )); + ad += 4; + } + if ((opcode & 0x38) == 0x18) // post-increment? + m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) // pre-decrement? +// m68k_areg (regs, opcode & 7) = ad - 12; + m68k_areg (regs, opcode & 7) = ad - incr; + } + dump_registers( "END "); + return; + case 6: + case 7: { + uae_u32 ad = 0, list = 0; + int incr = 0; + if (extra & 0x2000) { + /* FMOVEM FPP->memory */ + fpu_debug(("FMOVEM FPP->memory\n")); + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 1: /* dynamic pred */ + list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 3: /* dynamic postinc */ + list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + incr = 1; + break; + } + + if (incr < 0) { + for(reg=7; reg>=0; reg--) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + extract_extended(FPU registers[reg],&wrd1, &wrd2, &wrd3); + ad -= 4; + put_long (ad, wrd3); + ad -= 4; + put_long (ad, wrd2); + ad -= 4; + put_long (ad, wrd1); + } + list <<= 1; + } + } + else { + for(reg=0; reg<8; reg++) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + extract_extended(FPU registers[reg],&wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + ad += 4; + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) // post-increment? + m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) // pre-decrement? + m68k_areg (regs, opcode & 7) = ad; + } + else { + /* FMOVEM memory->FPP */ + fpu_debug(("FMOVEM memory->FPP\n")); + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + fpu_debug(("memory->FMOVEM FPP not legal mode.\n")); + list = extra & 0xff; + incr = -1; + break; + case 1: /* dynamic pred */ + fpu_debug(("memory->FMOVEM FPP not legal mode.\n")); + list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 3: /* dynamic postinc */ + list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + incr = 1; + break; + } + + /**/ + if (incr < 0) { + // not reached + for(reg=7; reg>=0; reg--) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + ad -= 4; + wrd3 = get_long (ad); + ad -= 4; + wrd2 = get_long (ad); + ad -= 4; + wrd1 = get_long (ad); + // FPU registers[reg] = make_extended(wrd1, wrd2, wrd3); + make_extended_no_normalize (wrd1, wrd2, wrd3, FPU registers[reg]); + } + list <<= 1; + } + } + else { + for(reg=0; reg<8; reg++) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + ad += 4; + // FPU registers[reg] = make_extended(wrd1, wrd2, wrd3); + make_extended_no_normalize (wrd1, wrd2, wrd3, FPU registers[reg]); + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) // post-increment? + m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) // pre-decrement? + m68k_areg (regs, opcode & 7) = ad; + } + dump_registers( "END "); + return; + } + case 0: + case 2: + reg = (extra >> 7) & 7; + if ((extra & 0xfc00) == 0x5c00) { + fpu_debug(("FMOVECR memory->FPP\n")); + switch (extra & 0x7f) { + case 0x00: + // FPU registers[reg] = 4.0 * atan(1.0); + FPU registers[reg] = LD(3.1415926535897932384626433832795029); + fpu_debug(("FP const: Pi\n")); + break; + case 0x0b: + // FPU registers[reg] = log10 (2.0); + FPU registers[reg] = LD(0.30102999566398119521); // 0.3010299956639811952137388947244930L + fpu_debug(("FP const: Log 10 (2)\n")); + break; + case 0x0c: + // FPU registers[reg] = exp (1.0); + FPU registers[reg] = LD(2.7182818284590452353); // 2.7182818284590452353602874713526625L + fpu_debug(("FP const: e\n")); + break; + case 0x0d: + // FPU registers[reg] = log (exp (1.0)) / log (2.0); + FPU registers[reg] = LD(1.4426950408889634073599246810019); + fpu_debug(("FP const: Log 2 (e)\n")); + break; + case 0x0e: + // FPU registers[reg] = log (exp (1.0)) / log (10.0); + FPU registers[reg] = LD(0.4342944819032518276511289189166051); + fpu_debug(("FP const: Log 10 (e)\n")); + break; + case 0x0f: + FPU registers[reg] = 0.0; + fpu_debug(("FP const: zero\n")); + break; + case 0x30: + // FPU registers[reg] = log (2.0); + FPU registers[reg] = LD(0.6931471805599453094172321214581766); + fpu_debug(("FP const: ln(2)\n")); + break; + case 0x31: + // FPU registers[reg] = log (10.0); + FPU registers[reg] = LD(2.3025850929940456840179914546843642); + fpu_debug(("FP const: ln(10)\n")); + break; + case 0x32: + // ?? + FPU registers[reg] = LD(1.0e0); + fpu_debug(("FP const: 1.0e0\n")); + break; + case 0x33: + FPU registers[reg] = LD(1.0e1); + fpu_debug(("FP const: 1.0e1\n")); + break; + case 0x34: + FPU registers[reg] = LD(1.0e2); + fpu_debug(("FP const: 1.0e2\n")); + break; + case 0x35: + FPU registers[reg] = LD(1.0e4); + fpu_debug(("FP const: 1.0e4\n")); + break; + case 0x36: + FPU registers[reg] = LD(1.0e8); + fpu_debug(("FP const: 1.0e8\n")); + break; + case 0x37: + FPU registers[reg] = LD(1.0e16); + fpu_debug(("FP const: 1.0e16\n")); + break; + case 0x38: + FPU registers[reg] = LD(1.0e32); + fpu_debug(("FP const: 1.0e32\n")); + break; + case 0x39: + FPU registers[reg] = LD(1.0e64); + fpu_debug(("FP const: 1.0e64\n")); + break; + case 0x3a: + FPU registers[reg] = LD(1.0e128); + fpu_debug(("FP const: 1.0e128\n")); + break; + case 0x3b: + FPU registers[reg] = LD(1.0e256); + fpu_debug(("FP const: 1.0e256\n")); + break; +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + case 0x3c: + FPU registers[reg] = LD(1.0e512); + fpu_debug(("FP const: 1.0e512\n")); + break; + case 0x3d: + FPU registers[reg] = LD(1.0e1024); + fpu_debug(("FP const: 1.0e1024\n")); + break; + case 0x3e: + FPU registers[reg] = LD(1.0e2048); + fpu_debug(("FP const: 1.0e2048\n")); + break; + case 0x3f: + FPU registers[reg] = LD(1.0e4096); + fpu_debug(("FP const: 1.0e4096\n")); +#endif + break; + default: + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + break; + } + // these *do* affect the status reg + make_fpsr(FPU registers[reg]); + dump_registers( "END "); + return; + } + + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + fpu_debug(("returned from get_fp_value m68k_getpc()=%X\n",m68k_getpc())); + + if (FPU is_integral) { + // 68040-specific operations + switch (extra & 0x7f) { + case 0x40: /* FSMOVE */ + fpu_debug(("FSMOVE %.04f\n",(double)src)); + FPU registers[reg] = (float)src; + make_fpsr(FPU registers[reg]); + break; + case 0x44: /* FDMOVE */ + fpu_debug(("FDMOVE %.04f\n",(double)src)); + FPU registers[reg] = (double)src; + make_fpsr(FPU registers[reg]); + break; + case 0x41: /* FSSQRT */ + fpu_debug(("FSQRT %.04f\n",(double)src)); + FPU registers[reg] = (float)fp_sqrt (src); + make_fpsr(FPU registers[reg]); + break; + case 0x45: /* FDSQRT */ + fpu_debug(("FSQRT %.04f\n",(double)src)); + FPU registers[reg] = (double)fp_sqrt (src); + make_fpsr(FPU registers[reg]); + break; + case 0x58: /* FSABS */ + fpu_debug(("FSABS %.04f\n",(double)src)); + FPU registers[reg] = (float)fp_fabs(src); + make_fpsr(FPU registers[reg]); + break; + case 0x5c: /* FDABS */ + fpu_debug(("FDABS %.04f\n",(double)src)); + FPU registers[reg] = (double)fp_fabs(src); + make_fpsr(FPU registers[reg]); + break; + case 0x5a: /* FSNEG */ + fpu_debug(("FSNEG %.04f\n",(double)src)); + FPU registers[reg] = (float)-src; + make_fpsr(FPU registers[reg]); + break; + case 0x5e: /* FDNEG */ + fpu_debug(("FDNEG %.04f\n",(double)src)); + FPU registers[reg] = (double)-src; + make_fpsr(FPU registers[reg]); + break; + case 0x60: /* FSDIV */ + fpu_debug(("FSDIV %.04f\n",(double)src)); + FPU registers[reg] = (float)(FPU registers[reg] / src); + make_fpsr(FPU registers[reg]); + break; + case 0x64: /* FDDIV */ + fpu_debug(("FDDIV %.04f\n",(double)src)); + FPU registers[reg] = (double)(FPU registers[reg] / src); + make_fpsr(FPU registers[reg]); + break; + case 0x62: /* FSADD */ + fpu_debug(("FSADD %.04f\n",(double)src)); + FPU registers[reg] = (float)(FPU registers[reg] + src); + make_fpsr(FPU registers[reg]); + break; + case 0x66: /* FDADD */ + fpu_debug(("FDADD %.04f\n",(double)src)); + FPU registers[reg] = (double)(FPU registers[reg] + src); + make_fpsr(FPU registers[reg]); + break; + case 0x68: /* FSSUB */ + fpu_debug(("FSSUB %.04f\n",(double)src)); + FPU registers[reg] = (float)(FPU registers[reg] - src); + make_fpsr(FPU registers[reg]); + break; + case 0x6c: /* FDSUB */ + fpu_debug(("FDSUB %.04f\n",(double)src)); + FPU registers[reg] = (double)(FPU registers[reg] - src); + make_fpsr(FPU registers[reg]); + break; + case 0x63: /* FSMUL */ + case 0x67: /* FDMUL */ + fpu_debug(("FMUL %.04f\n",(double)src)); + get_dest_flags(FPU registers[reg]); + get_source_flags(src); + if (fl_dest.in_range && fl_source.in_range) { + if ((extra & 0x7f) == 0x63) + FPU registers[reg] = (float)(FPU registers[reg] * src); + else + FPU registers[reg] = (double)(FPU registers[reg] * src); + } + else if (fl_dest.nan || fl_source.nan || + (fl_dest.zero && fl_source.infinity) || + (fl_dest.infinity && fl_source.zero) ) { + make_nan( FPU registers[reg] ); + } + else if (fl_dest.zero || fl_source.zero ) { + if ( (fl_dest.negative && !fl_source.negative) || + (!fl_dest.negative && fl_source.negative) ) { + make_zero_negative(FPU registers[reg]); + } + else { + make_zero_positive(FPU registers[reg]); + } + } + else { + if ( (fl_dest.negative && !fl_source.negative) || + (!fl_dest.negative && fl_source.negative) ) { + make_inf_negative(FPU registers[reg]); + } + else { + make_inf_positive(FPU registers[reg]); + } + } + make_fpsr(FPU registers[reg]); + break; + default: + // Continue decode-execute 6888x instructions below + goto process_6888x_instructions; + } + fpu_debug(("END m68k_getpc()=%X\n",m68k_getpc())); + dump_registers( "END "); + return; + } + + process_6888x_instructions: + switch (extra & 0x7f) { + case 0x00: /* FMOVE */ + fpu_debug(("FMOVE %.04f\n",(double)src)); + FPU registers[reg] = src; + make_fpsr(FPU registers[reg]); + break; + case 0x01: /* FINT */ + fpu_debug(("FINT %.04f\n",(double)src)); + FPU registers[reg] = toint(src); + make_fpsr(FPU registers[reg]); + break; + case 0x02: /* FSINH */ + fpu_debug(("FSINH %.04f\n",(double)src)); + FPU registers[reg] = fp_sinh (src); + make_fpsr(FPU registers[reg]); + break; + case 0x03: /* FINTRZ */ + fpu_debug(("FINTRZ %.04f\n",(double)src)); + FPU registers[reg] = fp_round_to_zero(src); + make_fpsr(FPU registers[reg]); + break; + case 0x04: /* FSQRT */ + fpu_debug(("FSQRT %.04f\n",(double)src)); + FPU registers[reg] = fp_sqrt (src); + make_fpsr(FPU registers[reg]); + break; + case 0x06: /* FLOGNP1 */ + fpu_debug(("FLOGNP1 %.04f\n",(double)src)); + FPU registers[reg] = fp_log (src + 1.0); + make_fpsr(FPU registers[reg]); + break; + case 0x08: /* FETOXM1 */ + fpu_debug(("FETOXM1 %.04f\n",(double)src)); + FPU registers[reg] = fp_exp (src) - 1.0; + make_fpsr(FPU registers[reg]); + break; + case 0x09: /* FTANH */ + fpu_debug(("FTANH %.04f\n",(double)src)); + FPU registers[reg] = fp_tanh (src); + make_fpsr(FPU registers[reg]); + break; + case 0x0a: /* FATAN */ + fpu_debug(("FATAN %.04f\n",(double)src)); + FPU registers[reg] = fp_atan (src); + make_fpsr(FPU registers[reg]); + break; + case 0x0c: /* FASIN */ + fpu_debug(("FASIN %.04f\n",(double)src)); + FPU registers[reg] = fp_asin (src); + make_fpsr(FPU registers[reg]); + break; + case 0x0d: /* FATANH */ + fpu_debug(("FATANH %.04f\n",(double)src)); + FPU registers[reg] = fp_atanh (src); + make_fpsr(FPU registers[reg]); + break; + case 0x0e: /* FSIN */ + fpu_debug(("FSIN %.04f\n",(double)src)); + FPU registers[reg] = fp_sin (src); + make_fpsr(FPU registers[reg]); + break; + case 0x0f: /* FTAN */ + fpu_debug(("FTAN %.04f\n",(double)src)); + FPU registers[reg] = fp_tan (src); + make_fpsr(FPU registers[reg]); + break; + case 0x10: /* FETOX */ + fpu_debug(("FETOX %.04f\n",(double)src)); + FPU registers[reg] = fp_exp (src); + make_fpsr(FPU registers[reg]); + break; + case 0x11: /* FTWOTOX */ + fpu_debug(("FTWOTOX %.04f\n",(double)src)); + FPU registers[reg] = fp_pow(2.0, src); + make_fpsr(FPU registers[reg]); + break; + case 0x12: /* FTENTOX */ + fpu_debug(("FTENTOX %.04f\n",(double)src)); + FPU registers[reg] = fp_pow(10.0, src); + make_fpsr(FPU registers[reg]); + break; + case 0x14: /* FLOGN */ + fpu_debug(("FLOGN %.04f\n",(double)src)); + FPU registers[reg] = fp_log (src); + make_fpsr(FPU registers[reg]); + break; + case 0x15: /* FLOG10 */ + fpu_debug(("FLOG10 %.04f\n",(double)src)); + FPU registers[reg] = fp_log10 (src); + make_fpsr(FPU registers[reg]); + break; + case 0x16: /* FLOG2 */ + fpu_debug(("FLOG2 %.04f\n",(double)src)); + FPU registers[reg] = fp_log (src) / fp_log (2.0); + make_fpsr(FPU registers[reg]); + break; + case 0x18: /* FABS */ + fpu_debug(("FABS %.04f\n",(double)src)); + FPU registers[reg] = fp_fabs(src); + make_fpsr(FPU registers[reg]); + break; + case 0x19: /* FCOSH */ + fpu_debug(("FCOSH %.04f\n",(double)src)); + FPU registers[reg] = fp_cosh(src); + make_fpsr(FPU registers[reg]); + break; + case 0x1a: /* FNEG */ + fpu_debug(("FNEG %.04f\n",(double)src)); + FPU registers[reg] = -src; + make_fpsr(FPU registers[reg]); + break; + case 0x1c: /* FACOS */ + fpu_debug(("FACOS %.04f\n",(double)src)); + FPU registers[reg] = fp_acos(src); + make_fpsr(FPU registers[reg]); + break; + case 0x1d: /* FCOS */ + fpu_debug(("FCOS %.04f\n",(double)src)); + FPU registers[reg] = fp_cos(src); + make_fpsr(FPU registers[reg]); + break; + case 0x1e: /* FGETEXP */ + fpu_debug(("FGETEXP %.04f\n",(double)src)); + if( isinf(src) ) { + make_nan( FPU registers[reg] ); + } + else { + FPU registers[reg] = fast_fgetexp( src ); + } + make_fpsr(FPU registers[reg]); + break; + case 0x1f: /* FGETMAN */ + fpu_debug(("FGETMAN %.04f\n",(double)src)); + if( src == 0 ) { + FPU registers[reg] = 0; + } + else if( isinf(src) ) { + make_nan( FPU registers[reg] ); + } + else { + FPU registers[reg] = src; + fast_remove_exponent( FPU registers[reg] ); + } + make_fpsr(FPU registers[reg]); + break; + case 0x20: /* FDIV */ + fpu_debug(("FDIV %.04f\n",(double)src)); + FPU registers[reg] /= src; + make_fpsr(FPU registers[reg]); + break; + case 0x21: /* FMOD */ + fpu_debug(("FMOD %.04f\n",(double)src)); + // FPU registers[reg] = FPU registers[reg] - (fpu_register) ((int) (FPU registers[reg] / src)) * src; + { + fpu_register quot = fp_round_to_zero(FPU registers[reg] / src); + uae_u32 sign = get_quotient_sign(FPU registers[reg],src); + FPU registers[reg] = FPU registers[reg] - quot * src; + make_fpsr(FPU registers[reg]); + make_quotient(quot, sign); + } + break; + case 0x23: /* FMUL */ + fpu_debug(("FMUL %.04f\n",(double)src)); + get_dest_flags(FPU registers[reg]); + get_source_flags(src); + if (fl_dest.in_range && fl_source.in_range) { + FPU registers[reg] *= src; + if (unlikely(isinf(FPU registers[reg]))) + { + isneg(FPU registers[reg]) ? make_inf_negative(FPU registers[reg]) : make_inf_positive(FPU registers[reg]); + } + } + else if (fl_dest.nan || fl_source.nan || + (fl_dest.zero && fl_source.infinity) || + (fl_dest.infinity && fl_source.zero) ) { + make_nan( FPU registers[reg] ); + } + else if (fl_dest.zero || fl_source.zero ) { + if ( (fl_dest.negative && !fl_source.negative) || + (!fl_dest.negative && fl_source.negative) ) { + make_zero_negative(FPU registers[reg]); + } + else { + make_zero_positive(FPU registers[reg]); + } + } + else { + if ( (fl_dest.negative && !fl_source.negative) || + (!fl_dest.negative && fl_source.negative) ) { + make_inf_negative(FPU registers[reg]); + } + else { + make_inf_positive(FPU registers[reg]); + } + } + make_fpsr(FPU registers[reg]); + break; + case 0x24: /* FSGLDIV */ + fpu_debug(("FSGLDIV %.04f\n",(double)src)); + FPU registers[reg] = (float)(FPU registers[reg] / src); + make_fpsr(FPU registers[reg]); + break; + case 0x25: /* FREM */ + fpu_debug(("FREM %.04f\n",(double)src)); + // FPU registers[reg] = FPU registers[reg] - (double) ((int) (FPU registers[reg] / src + 0.5)) * src; + { + fpu_register quot = fp_round_to_nearest(FPU registers[reg] / src); + uae_u32 sign = get_quotient_sign(FPU registers[reg],src); + FPU registers[reg] = FPU registers[reg] - quot * src; + make_fpsr(FPU registers[reg]); + make_quotient(quot,sign); + } + break; + + case 0x26: /* FSCALE */ + fpu_debug(("FSCALE %.04f\n",(double)src)); + // TODO: overflow flags + get_dest_flags(FPU registers[reg]); + get_source_flags(src); + if (fl_source.in_range && fl_dest.in_range) { + // When the absolute value of the source operand is >= 2^14, + // an overflow or underflow always results. + // Here (int) cast is okay. + int scale_factor = (int)fp_round_to_zero(src); +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = FPU registers[reg]; + sxp.ieee.exponent += scale_factor; + FPU registers[reg] = sxp.value; +#else + fp_declare_init_shape(sxp, double); + sxp.value = FPU registers[reg]; + uae_u32 exp = sxp.ieee.exponent + scale_factor; + if (exp < FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS) + exp = 0; + else if (exp > FP_EXTENDED_EXP_BIAS + FP_DOUBLE_EXP_BIAS) + exp = FP_DOUBLE_EXP_MAX; + else + exp += FP_DOUBLE_EXP_BIAS - FP_EXTENDED_EXP_BIAS; + sxp.ieee.exponent = exp; + FPU registers[reg] = sxp.value; +#endif + } + else if (fl_source.infinity) { + // Returns NaN for any Infinity source + make_nan( FPU registers[reg] ); + } + make_fpsr(FPU registers[reg]); + break; + case 0x27: /* FSGLMUL */ + fpu_debug(("FSGLMUL %.04f\n",(double)src)); + FPU registers[reg] = (float)(FPU registers[reg] * src); + make_fpsr(FPU registers[reg]); + break; + case 0x28: /* FSUB */ + fpu_debug(("FSUB %.04f\n",(double)src)); + FPU registers[reg] -= src; + make_fpsr(FPU registers[reg]); + break; + case 0x22: /* FADD */ + fpu_debug(("FADD %.04f\n",(double)src)); + FPU registers[reg] += src; + if (unlikely(isinf(FPU registers[reg]))) + { + isneg(FPU registers[reg]) ? make_inf_negative(FPU registers[reg]) : make_inf_positive(FPU registers[reg]); + } + make_fpsr(FPU registers[reg]); + break; + case 0x30: /* FSINCOS */ + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + fpu_debug(("FSINCOS %.04f\n",(double)src)); + // Cosine must be calculated first if same register + FPU registers[extra & 7] = fp_cos(src); + FPU registers[reg] = fp_sin (src); + // Set FPU fpsr according to the sine result + make_fpsr(FPU registers[reg]); + break; + case 0x38: /* FCMP */ + fpu_debug(("FCMP %.04f\n",(double)src)); + set_fpsr(0); + if (isinf(FPU registers[reg])) + { + if (isinf(src) && isneg(FPU registers[reg]) == isneg (src)) + make_fpsr(0); + else + make_fpsr(FPU registers[reg]); + } + else if (isinf(src)) + make_fpsr(-src); + else + make_fpsr(FPU registers[reg] - src); + break; + case 0x3a: /* FTST */ + fpu_debug(("FTST %.04f\n",(double)src)); + set_fpsr(0); + make_fpsr(src); + break; + default: + fpu_debug(("ILLEGAL F OP %X\n",opcode)); + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + break; + } + fpu_debug(("END m68k_getpc()=%X\n",m68k_getpc())); + dump_registers( "END "); + return; + } + + fpu_debug(("ILLEGAL F OP 2 %X\n",opcode)); + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); +} + + +void fpu_set_fpsr(uae_u32 new_fpsr) +{ + set_fpsr(new_fpsr); +} + +uae_u32 fpu_get_fpsr(void) +{ + return get_fpsr(); +} + +void fpu_set_fpcr(uae_u32 new_fpcr) +{ + set_fpcr(new_fpcr); +} + +uae_u32 fpu_get_fpcr(void) +{ + return get_fpcr(); +} + +/* -------------------------- Initialization -------------------------- */ + +PRIVATE uae_u8 m_fpu_state_original[108]; // 90/94/108 + +PUBLIC void FFPU fpu_init (bool integral_68040) +{ + fpu_debug(("fpu_init\n")); + + static bool initialized_lookup_tables = false; + if (!initialized_lookup_tables) { + fpu_init_native_fflags(); + fpu_init_native_exceptions(); + fpu_init_native_accrued_exceptions(); + initialized_lookup_tables = true; + } + + FPU is_integral = integral_68040; + FPU instruction_address = 0; + FPU fpsr.quotient = 0; + set_fpcr(0); + set_fpsr(0); + +#if defined(FPU_USE_X86_ROUNDING) + // Initial state after boot, reset and frestore(null frame) + x86_control_word = CW_INITIAL; +#elif defined(USE_X87_ASSEMBLY) + volatile unsigned short int cw; + __asm__ __volatile__("fnstcw %0" : "=m" (cw)); + cw &= ~0x0300; cw |= 0x0300; // CW_PC_EXTENDED + cw &= ~0x0C00; cw |= 0x0000; // CW_RC_NEAR + __asm__ __volatile__("fldcw %0" : : "m" (cw)); +#endif + + FPU result = 1; + + for (int i = 0; i < 8; i++) + make_nan(FPU registers[i]); +} + +PUBLIC void FFPU fpu_exit (void) +{ + fpu_debug(("fpu_exit\n")); +} + +PUBLIC void FFPU fpu_reset (void) +{ + fpu_debug(("fpu_reset\n")); + fpu_exit(); + fpu_init(FPU is_integral); +} diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_ieee.h b/BasiliskII/src/uae_cpu/fpu/fpu_ieee.h new file mode 100644 index 00000000..3321891a --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/fpu_ieee.h @@ -0,0 +1,154 @@ +/* + * fpu/fpu_ieee.h - Extra Definitions for the IEEE FPU core + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef FPU_IEEE_H +#define FPU_IEEE_H + +/* NOTE: this file shall be included from fpu/fpu_uae.cpp */ +#undef PUBLIC +#define PUBLIC extern + +#undef PRIVATE +#define PRIVATE static + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +// Lauri-- full words to avoid partial register stalls. +struct double_flags { + uae_u32 in_range; + uae_u32 zero; + uae_u32 infinity; + uae_u32 nan; + uae_u32 negative; +}; +PRIVATE double_flags fl_source; +PRIVATE double_flags fl_dest; +PRIVATE inline void FFPU get_dest_flags(fpu_register const & r); +PRIVATE inline void FFPU get_source_flags(fpu_register const & r); + +PRIVATE inline void FFPU make_nan(fpu_register & r); +PRIVATE inline void FFPU make_zero_positive(fpu_register & r); +PRIVATE inline void FFPU make_zero_negative(fpu_register & r); +PRIVATE inline void FFPU make_inf_positive(fpu_register & r); +PRIVATE inline void FFPU make_inf_negative(fpu_register & r); + +// MJ PRIVATE inline void FFPU fast_scale(fpu_register & r, int add); +PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r); + +// May be optimized for particular processors +#ifndef FPU_USE_NATIVE_FLAGS +PRIVATE inline void FFPU make_fpsr(fpu_register const & r); +#endif + +// Normalize to range 1..2 +PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r); + +// The sign of the quotient is the exclusive-OR of the sign bits +// of the source and destination operands. +PRIVATE inline uae_u32 FFPU get_quotient_sign( + fpu_register const & ra, fpu_register const & rb +); + +// Quotient Byte is loaded with the sign and least significant +// seven bits of the quotient. +PRIVATE inline void FFPU make_quotient( + fpu_register const & quotient, uae_u32 sign +); + +// to_single +PRIVATE inline fpu_register FFPU make_single( + uae_u32 value +); + +// from_single +PRIVATE inline uae_u32 FFPU extract_single( + fpu_register const & src +); + +// to_exten +PRIVATE inline fpu_register FFPU make_extended( + uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3 +); + +/* + Would be so much easier with full size floats :( + ... this is so vague. +*/ +// to_exten_no_normalize +PRIVATE inline void FFPU make_extended_no_normalize( + uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3, fpu_register & result +); + +// from_exten +PRIVATE inline void FFPU extract_extended(fpu_register const & src, + uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3 +); + +// to_double +PRIVATE inline fpu_register FFPU make_double( + uae_u32 wrd1, uae_u32 wrd2 +); + +// from_double +PRIVATE inline void FFPU extract_double(fpu_register const & src, + uae_u32 * wrd1, uae_u32 * wrd2 +); + +PRIVATE inline fpu_register FFPU make_packed( + uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3 +); + +PRIVATE inline void FFPU extract_packed( + fpu_register const & src, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3 +); + +PRIVATE inline int FFPU get_fp_value( + uae_u32 opcode, uae_u16 extra, fpu_register & src +); + +PRIVATE inline int FFPU put_fp_value( + uae_u32 opcode, uae_u16 extra, fpu_register const & value +); + +PRIVATE inline int FFPU get_fp_ad( + uae_u32 opcode, uae_u32 * ad +); + +PRIVATE inline int FFPU fpp_cond( + int condition +); + +#endif /* FPU_IEEE_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_mpfr.cpp b/BasiliskII/src/uae_cpu/fpu/fpu_mpfr.cpp new file mode 100644 index 00000000..1975aba8 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/fpu_mpfr.cpp @@ -0,0 +1,2110 @@ +/* + * fpu_mpfr.cpp - emulate 68881/68040 fpu with mpfr + * + * Copyright (c) 2012, 2013 Andreas Schwab + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "sysdeps.h" +#include +#include "memory.h" +#include "readcpu.h" +#include "newcpu.h" +#include "main.h" +#define FPU_IMPLEMENTATION +#include "fpu/fpu.h" + +#include "fpu/flags.h" +#include "fpu/exceptions.h" +#include "fpu/rounding.h" +#include "fpu/impl.h" + +#define SINGLE_PREC 24 +#define SINGLE_MIN_EXP -126 +#define SINGLE_MAX_EXP 127 +#define SINGLE_BIAS 127 +#define DOUBLE_PREC 53 +#define DOUBLE_MIN_EXP -1022 +#define DOUBLE_MAX_EXP 1023 +#define DOUBLE_BIAS 1023 +#define EXTENDED_PREC 64 +#define EXTENDED_MIN_EXP -16383 +#define EXTENDED_MAX_EXP 16383 +#define EXTENDED_BIAS 16383 + +fpu_t fpu; +// The constant ROM +// Constants 48 to 63 are mapped to index 16 to 31 +const int num_fpu_constants = 32; +static mpfr_t fpu_constant_rom[num_fpu_constants]; +#define FPU_CONSTANT_ONE fpu_constant_rom[18] +// Exceptions generated during execution in addition to the ones +// maintained by mpfr +static uae_u32 cur_exceptions; +static uaecptr cur_instruction_address; + +static void +set_format (int prec) +{ + // MPFR represents numbers as 0.m*2^e + switch (prec) + { + case SINGLE_PREC: + mpfr_set_emin (SINGLE_MIN_EXP + 1 - (SINGLE_PREC - 1)); + mpfr_set_emax (SINGLE_MAX_EXP + 1); + break; + case DOUBLE_PREC: + mpfr_set_emin (DOUBLE_MIN_EXP + 1 - (DOUBLE_PREC - 1)); + mpfr_set_emax (DOUBLE_MAX_EXP + 1); + break; + case EXTENDED_PREC: + mpfr_set_emin (EXTENDED_MIN_EXP + 1 - (EXTENDED_PREC - 1)); + mpfr_set_emax (EXTENDED_MAX_EXP + 1); + break; + } +} + +static mpfr_rnd_t +get_cur_rnd () +{ + switch (get_rounding_mode ()) + { + default: + case FPCR_ROUND_NEAR: + return MPFR_RNDN; + case FPCR_ROUND_ZERO: + return MPFR_RNDZ; + case FPCR_ROUND_MINF: + return MPFR_RNDD; + case FPCR_ROUND_PINF: + return MPFR_RNDU; + } +} + +static mpfr_prec_t +get_cur_prec () +{ + switch (get_rounding_precision ()) + { + default: + case FPCR_PRECISION_EXTENDED: + return EXTENDED_PREC; + case FPCR_PRECISION_SINGLE: + return SINGLE_PREC; + case FPCR_PRECISION_DOUBLE: + return DOUBLE_PREC; + } +} + +#define DEFAULT_NAN_BITS 0xffffffffffffffffULL + +static void +set_nan (fpu_register ®, uae_u64 nan_bits, int nan_sign) +{ + mpfr_set_nan (reg.f); + reg.nan_bits = nan_bits; + reg.nan_sign = nan_sign; +} + +static void +set_nan (fpu_register ®) +{ + set_nan (reg, DEFAULT_NAN_BITS, 0); +} + +static bool fpu_inited; + +void +fpu_init (bool integral_68040) +{ + fpu.is_integral = integral_68040; + + mpfr_set_default_prec (EXTENDED_PREC); + mpfr_set_default_rounding_mode (MPFR_RNDN); + set_format (EXTENDED_PREC); + + for (int i = 0; i < 8; i++) + mpfr_init (fpu.registers[i].f); + mpfr_init (fpu.result.f); + + // Initialize constant ROM + for (int i = 0; i < num_fpu_constants; i++) + mpfr_init (fpu_constant_rom[i]); + + // 0: pi + mpfr_const_pi (fpu_constant_rom[0], MPFR_RNDN); + // 11: log10 (2) + mpfr_set_ui (fpu_constant_rom[11], 2, MPFR_RNDN); + mpfr_log10 (fpu_constant_rom[11], fpu_constant_rom[11], MPFR_RNDZ); + // 12: e + mpfr_set_ui (fpu_constant_rom[12], 1, MPFR_RNDN); + mpfr_exp (fpu_constant_rom[12], fpu_constant_rom[12], MPFR_RNDZ); + // 13: log2 (e) + mpfr_log2 (fpu_constant_rom[13], fpu_constant_rom[12], MPFR_RNDU); + // 14: log10 (e) + mpfr_log10 (fpu_constant_rom[14], fpu_constant_rom[12], MPFR_RNDU); + // 15: 0 + mpfr_set_zero (fpu_constant_rom[15], 0); + // 48: ln (2) + mpfr_const_log2 (fpu_constant_rom[16], MPFR_RNDN); + // 49: ln (10) + mpfr_set_ui (fpu_constant_rom[17], 10, MPFR_RNDN); + mpfr_log (fpu_constant_rom[17], fpu_constant_rom[17], MPFR_RNDN); + // 50 to 63: powers of 10 + mpfr_set_ui (fpu_constant_rom[18], 1, MPFR_RNDN); + for (int i = 19; i < 32; i++) + { + mpfr_set_ui (fpu_constant_rom[i], 1L << (i - 19) , MPFR_RNDN); + mpfr_exp10 (fpu_constant_rom[i], fpu_constant_rom[i], MPFR_RNDN); + } + + fpu_inited = true; + + fpu_reset (); +} + +void +fpu_exit () +{ + if (!fpu_inited) return; + + for (int i = 0; i < 8; i++) + mpfr_clear (fpu.registers[i].f); + mpfr_clear (fpu.result.f); + for (int i = 0; i < num_fpu_constants; i++) + mpfr_clear (fpu_constant_rom[i]); +} + +void +fpu_reset () +{ + set_fpcr (0); + set_fpsr (0); + fpu.instruction_address = 0; + + for (int i = 0; i < 8; i++) + set_nan (fpu.registers[i]); +} + +fpu_register::operator long double () +{ + return mpfr_get_ld (f, MPFR_RNDN); +} + +fpu_register & +fpu_register::operator= (long double x) +{ + mpfr_set_ld (f, x, MPFR_RNDN); + nan_bits = DEFAULT_NAN_BITS; + nan_sign = 0; + return *this; +} + +static bool +get_fp_addr (uae_u32 opcode, uae_u32 *addr, bool write) +{ + uaecptr pc; + int mode; + int reg; + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + switch (mode) + { + case 0: + case 1: + return false; + case 2: + *addr = m68k_areg (regs, reg); + break; + case 3: + *addr = m68k_areg (regs, reg); + break; + case 4: + *addr = m68k_areg (regs, reg); + break; + case 5: + *addr = m68k_areg (regs, reg) + (uae_s16) next_iword(); + break; + case 6: + *addr = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch (reg) + { + case 0: + *addr = (uae_s16) next_iword(); + break; + case 1: + *addr = next_ilong(); + break; + case 2: + if (write) + return false; + pc = m68k_getpc (); + *addr = pc + (uae_s16) next_iword(); + break; + case 3: + if (write) + return false; + pc = m68k_getpc (); + *addr = get_disp_ea_020 (pc, next_iword()); + break; + default: + return false; + } + } + return true; +} + +static void +set_from_single (fpu_register &value, uae_u32 data) +{ + int s = data >> 31; + int e = (data >> 23) & 0xff; + uae_u32 m = data & 0x7fffff; + + if (e == 0xff) + { + if (m != 0) + { + if (!(m & 0x400000)) + cur_exceptions |= FPSR_EXCEPTION_SNAN; + set_nan (value, (uae_u64) (m | 0xc00000) << (32 + 8), s); + } + else + mpfr_set_inf (value.f, 0); + } + else + { + if (e != 0) + // Add integer bit + m |= 0x800000; + else + e++; + // Remove bias + e -= SINGLE_BIAS; + mpfr_set_ui_2exp (value.f, m, e - (SINGLE_PREC - 1), MPFR_RNDN); + } + mpfr_setsign (value.f, value.f, s, MPFR_RNDN); +} + +static void +set_from_double (fpu_register &value, uae_u32 words[2]) +{ + int s = words[0] >> 31; + int e = (words[0] >> 20) & 0x7ff; + uae_u32 m = words[0] & 0xfffff; + + if (e == 0x7ff) + { + if ((m | words[1]) != 0) + { + if (!(m & 0x80000)) + cur_exceptions |= FPSR_EXCEPTION_SNAN; + set_nan (value, (((uae_u64) (m | 0x180000) << (32 + 11)) + | ((uae_u64) words[1] << 11)), s); + } + else + mpfr_set_inf (value.f, 0); + } + else + { + if (e != 0) + // Add integer bit + m |= 0x100000; + else + e++; + // Remove bias + e -= DOUBLE_BIAS; + mpfr_set_uj_2exp (value.f, ((uintmax_t) m << 32) | words[1], + e - (DOUBLE_PREC - 1), MPFR_RNDN); + } + mpfr_setsign (value.f, value.f, s, MPFR_RNDN); +} + +static void +set_from_extended (fpu_register &value, uae_u32 words[3], bool check_snan) +{ + int s = words[0] >> 31; + int e = (words[0] >> 16) & 0x7fff; + + if (e == 0x7fff) + { + if (((words[1] & 0x7fffffff) | words[2]) != 0) + { + if (check_snan) + { + if ((words[1] & 0x40000000) == 0) + cur_exceptions |= FPSR_EXCEPTION_SNAN; + words[1] |= 0x40000000; + } + set_nan (value, ((uae_u64) words[1] << 32) | words[2], s); + } + else + mpfr_set_inf (value.f, 0); + } + else + { + // Remove bias + e -= EXTENDED_BIAS; + mpfr_set_uj_2exp (value.f, ((uintmax_t) words[1] << 32) | words[2], + e - (EXTENDED_PREC - 1), MPFR_RNDN); + } + mpfr_setsign (value.f, value.f, s, MPFR_RNDN); +} + +#define from_bcd(d) ((d) < 10 ? (d) : (d) - 10) + +static void +set_from_packed (fpu_register &value, uae_u32 words[3]) +{ + char str[32], *p = str; + int sm = words[0] >> 31; + int se = (words[0] >> 30) & 1; + int i; + + if (((words[0] >> 16) & 0x7fff) == 0x7fff) + { + if ((words[1] | words[2]) != 0) + { + if ((words[1] & 0x40000000) == 0) + cur_exceptions |= FPSR_EXCEPTION_SNAN; + set_nan (value, ((uae_u64) (words[1] | 0x40000000) << 32) | words[2], + sm); + } + else + mpfr_set_inf (value.f, 0); + } + else + { + if (sm) + *p++ = '-'; + *p++ = from_bcd (words[0] & 15) + '0'; + *p++ = '.'; + for (i = 0; i < 8; i++) + { + p[i] = from_bcd ((words[1] >> (28 - i * 4)) & 15) + '0'; + p[i + 8] = from_bcd ((words[2] >> (28 - i * 4)) & 15) + '0'; + } + p += 16; + *p++ = 'e'; + if (se) + *p++ = '-'; + *p++ = from_bcd ((words[0] >> 24) & 15) + '0'; + *p++ = from_bcd ((words[0] >> 20) & 15) + '0'; + *p++ = from_bcd ((words[0] >> 16) & 15) + '0'; + *p = 0; + mpfr_set_str (value.f, str, 10, MPFR_RNDN); + } + mpfr_setsign (value.f, value.f, sm, MPFR_RNDN); +} + +static bool +get_fp_value (uae_u32 opcode, uae_u32 extra, fpu_register &value) +{ + int mode, reg, size; + uaecptr pc; + uae_u32 addr; + uae_u32 words[3]; + static const int sz1[8] = {4, 4, 12, 12, 2, 8, 1, 0}; + static const int sz2[8] = {4, 4, 12, 12, 2, 8, 2, 0}; + + if ((extra & 0x4000) == 0) + { + mpfr_set (value.f, fpu.registers[(extra >> 10) & 7].f, MPFR_RNDN); + value.nan_bits = fpu.registers[(extra >> 10) & 7].nan_bits; + value.nan_sign = fpu.registers[(extra >> 10) & 7].nan_sign; + /* Check for SNaN. */ + if (mpfr_nan_p (value.f) && (value.nan_bits & (1ULL << 62)) == 0) + { + value.nan_bits |= 1ULL << 62; + cur_exceptions |= FPSR_EXCEPTION_SNAN; + } + return true; + } + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + switch (mode) + { + case 0: + switch (size) + { + case 6: + mpfr_set_si (value.f, (uae_s8) m68k_dreg (regs, reg), MPFR_RNDN); + break; + case 4: + mpfr_set_si (value.f, (uae_s16) m68k_dreg (regs, reg), MPFR_RNDN); + break; + case 0: + mpfr_set_si (value.f, (uae_s32) m68k_dreg (regs, reg), MPFR_RNDN); + break; + case 1: + set_from_single (value, m68k_dreg (regs, reg)); + break; + default: + return false; + } + return true; + case 1: + return false; + case 2: + case 3: + addr = m68k_areg (regs, reg); + break; + case 4: + addr = m68k_areg (regs, reg) - (reg == 7 ? sz2[size] : sz1[size]); + break; + case 5: + addr = m68k_areg (regs, reg) + (uae_s16) next_iword (); + break; + case 6: + addr = get_disp_ea_020 (m68k_areg (regs, reg), next_iword ()); + break; + case 7: + switch (reg) + { + case 0: + addr = (uae_s16) next_iword (); + break; + case 1: + addr = next_ilong (); + break; + case 2: + pc = m68k_getpc (); + addr = pc + (uae_s16) next_iword (); + break; + case 3: + pc = m68k_getpc (); + addr = get_disp_ea_020 (pc, next_iword ()); + break; + case 4: + addr = m68k_getpc (); + m68k_incpc (sz2[size]); + if (size == 6) // Immediate byte + addr++; + break; + default: + return false; + } + } + + switch (size) + { + case 0: + mpfr_set_si (value.f, (uae_s32) get_long (addr), MPFR_RNDN); + break; + case 1: + set_from_single (value, get_long (addr)); + break; + case 2: + words[0] = get_long (addr); + words[1] = get_long (addr + 4); + words[2] = get_long (addr + 8); + set_from_extended (value, words, true); + break; + case 3: + words[0] = get_long (addr); + words[1] = get_long (addr + 4); + words[2] = get_long (addr + 8); + set_from_packed (value, words); + break; + case 4: + mpfr_set_si (value.f, (uae_s16) get_word (addr), MPFR_RNDN); + break; + case 5: + words[0] = get_long (addr); + words[1] = get_long (addr + 4); + set_from_double (value, words); + break; + case 6: + mpfr_set_si (value.f, (uae_s8) get_byte (addr), MPFR_RNDN); + break; + default: + return false; + } + + switch (mode) + { + case 3: + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + break; + } + + return true; +} + +static void +update_exceptions () +{ + uae_u32 exc, aexc; + + exc = cur_exceptions; + // Add any mpfr detected exceptions + if (mpfr_underflow_p ()) + exc |= FPSR_EXCEPTION_UNFL; + if (mpfr_overflow_p ()) + exc |= FPSR_EXCEPTION_OVFL; + if (mpfr_inexflag_p ()) + exc |= FPSR_EXCEPTION_INEX2; + set_exception_status (exc); + + aexc = get_accrued_exception (); + if (exc & (FPSR_EXCEPTION_SNAN|FPSR_EXCEPTION_OPERR)) + aexc |= FPSR_ACCR_IOP; + if (exc & FPSR_EXCEPTION_OVFL) + aexc |= FPSR_ACCR_OVFL; + if ((exc & (FPSR_EXCEPTION_UNFL|FPSR_EXCEPTION_INEX2)) + == (FPSR_EXCEPTION_UNFL|FPSR_EXCEPTION_INEX2)) + aexc |= FPSR_ACCR_UNFL; + if (exc & FPSR_EXCEPTION_DZ) + aexc |= FPSR_ACCR_DZ; + if (exc & (FPSR_EXCEPTION_INEX1|FPSR_EXCEPTION_INEX2|FPSR_EXCEPTION_OVFL)) + aexc |= FPSR_ACCR_INEX; + set_accrued_exception (aexc); + + if ((fpu.fpcr.exception_enable & exc) != 0) + { + fpu.instruction_address = cur_instruction_address; + // TODO: raise exceptions + // Problem: FPSP040 depends on proper FPU stack frames, it would suffer + // undefined behaviour with our dummy FSAVE implementation + } +} + +static void +set_fp_register (int reg, mpfr_t value, uae_u64 nan_bits, int nan_sign, + int t, mpfr_rnd_t rnd, bool do_flags) +{ + mpfr_subnormalize (value, t, rnd); + mpfr_set (fpu.registers[reg].f, value, rnd); + fpu.registers[reg].nan_bits = nan_bits; + fpu.registers[reg].nan_sign = nan_sign; + if (do_flags) + { + uae_u32 flags = 0; + + if (mpfr_zero_p (fpu.registers[reg].f)) + flags |= FPSR_CCB_ZERO; + if (mpfr_signbit (fpu.registers[reg].f)) + flags |= FPSR_CCB_NEGATIVE; + if (mpfr_nan_p (fpu.registers[reg].f)) + flags |= FPSR_CCB_NAN; + if (mpfr_inf_p (fpu.registers[reg].f)) + flags |= FPSR_CCB_INFINITY; + set_fpccr (flags); + } +} + +static void +set_fp_register (int reg, mpfr_t value, int t, mpfr_rnd_t rnd, bool do_flags) +{ + set_fp_register (reg, value, DEFAULT_NAN_BITS, 0, t, rnd, do_flags); +} + +static void +set_fp_register (int reg, fpu_register &value, int t, mpfr_rnd_t rnd, + bool do_flags) +{ + set_fp_register (reg, value.f, value.nan_bits, value.nan_sign, t, rnd, + do_flags); +} + +static uae_u32 +extract_to_single (fpu_register &value) +{ + uae_u32 word; + int t; + mpfr_rnd_t rnd = get_cur_rnd (); + MPFR_DECL_INIT (single, SINGLE_PREC); + + set_format (SINGLE_PREC); + // Round to single + t = mpfr_set (single, value.f, rnd); + t = mpfr_check_range (single, t, rnd); + mpfr_subnormalize (single, t, rnd); + set_format (EXTENDED_PREC); + + if (mpfr_inf_p (single)) + word = 0x7f800000; + else if (mpfr_nan_p (single)) + { + if ((value.nan_bits & (1ULL << 62)) == 0) + { + value.nan_bits |= 1ULL << 62; + cur_exceptions |= FPSR_EXCEPTION_SNAN; + } + word = 0x7f800000 | ((value.nan_bits >> (32 + 8)) & 0x7fffff); + if (value.nan_sign) + word |= 0x80000000; + } + else if (mpfr_zero_p (single)) + word = 0; + else + { + int e; + mpz_t f; + mpz_init (f); + word = 0; + // Get exponent and mantissa + e = mpfr_get_z_2exp (f, single); + // Move binary point + e += SINGLE_PREC - 1; + // Add bias + e += SINGLE_BIAS; + if (e <= 0) + { + // Denormalized number + mpz_tdiv_q_2exp (f, f, -e + 1); + e = 0; + } + mpz_export (&word, 0, 1, 4, 0, 0, f); + // Remove integer bit + word &= 0x7fffff; + word |= e << 23; + mpz_clear (f); + } + if (mpfr_signbit (single)) + word |= 0x80000000; + return word; +} + +static void +extract_to_double (fpu_register &value, uint32_t *words) +{ + int t; + mpfr_rnd_t rnd = get_cur_rnd (); + MPFR_DECL_INIT (dbl, DOUBLE_PREC); + + set_format (DOUBLE_PREC); + // Round to double + t = mpfr_set (dbl, value.f, rnd); + t = mpfr_check_range (dbl, t, rnd); + mpfr_subnormalize (dbl, t, rnd); + set_format (EXTENDED_PREC); + + if (mpfr_inf_p (dbl)) + { + words[0] = 0x7ff00000; + words[1] = 0; + } + else if (mpfr_nan_p (dbl)) + { + if ((value.nan_bits & (1ULL << 62)) == 0) + { + value.nan_bits |= 1ULL << 62; + cur_exceptions |= FPSR_EXCEPTION_SNAN; + } + words[0] = 0x7ff00000 | ((value.nan_bits >> (32 + 11)) & 0xfffff); + words[1] = value.nan_bits >> 11; + if (value.nan_sign) + words[0] |= 0x80000000; + } + else if (mpfr_zero_p (dbl)) + { + words[0] = 0; + words[1] = 0; + } + else + { + int e, off = 0; + mpz_t f; + mpz_init (f); + words[0] = words[1] = 0; + // Get exponent and mantissa + e = mpfr_get_z_2exp (f, dbl); + // Move binary point + e += DOUBLE_PREC - 1; + // Add bias + e += DOUBLE_BIAS; + if (e <= 0) + { + // Denormalized number + mpz_tdiv_q_2exp (f, f, -e + 1); + if (e <= -20) + // No more than 32 bits left + off = 1; + e = 0; + } + mpz_export (&words[off], 0, 1, 4, 0, 0, f); + // Remove integer bit + words[0] &= 0xfffff; + words[0] |= e << 20; + mpz_clear (f); + } + if (mpfr_signbit (dbl)) + words[0] |= 0x80000000; +} + +static void +extract_to_extended (fpu_register &value, uint32_t *words) +{ + if (mpfr_inf_p (value.f)) + { + words[0] = 0x7fff0000; + words[1] = 0; + words[2] = 0; + } + else if (mpfr_nan_p (value.f)) + { + words[0] = 0x7fff0000; + words[1] = value.nan_bits >> 32; + words[2] = value.nan_bits; + if (value.nan_sign) + words[0] |= 0x80000000; + } + else if (mpfr_zero_p (value.f)) + { + words[0] = 0; + words[1] = 0; + words[2] = 0; + } + else + { + int e, off = 0; + mpz_t f; + + mpz_init (f); + words[0] = words[1] = words[2] = 0; + // Get exponent and mantissa + e = mpfr_get_z_2exp (f, value.f); + // Move binary point + e += EXTENDED_PREC - 1; + // Add bias + e += EXTENDED_BIAS; + if (e < 0) + { + // Denormalized number + mpz_tdiv_q_2exp (f, f, -e); + if (e <= -32) + // No more than 32 bits left + off = 1; + e = 0; + } + mpz_export (&words[1 + off], 0, 1, 4, 0, 0, f); + words[0] = e << 16; + mpz_clear (f); + } + if (mpfr_signbit (value.f)) + words[0] |= 0x80000000; +} + +static void +extract_to_packed (fpu_register &value, int k, uae_u32 *words) +{ + if (mpfr_inf_p (value.f)) + { + words[0] = 0x7fff0000; + words[1] = 0; + words[2] = 0; + } + else if (mpfr_nan_p (value.f)) + { + words[0] = 0x7fff0000; + words[1] = value.nan_bits >> 32; + words[2] = value.nan_bits; + if (value.nan_sign) + words[0] |= 0x80000000; + } + else if (mpfr_zero_p (value.f)) + { + words[0] = 0; + words[1] = 0; + words[2] = 0; + } + else + { + char str[100], *p = str; + mpfr_exp_t e; + mpfr_rnd_t rnd = get_cur_rnd (); + + words[0] = words[1] = words[2] = 0; + if (k >= 64) + k -= 128; + else if (k >= 18) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + if (k <= 0) + { + MPFR_DECL_INIT (temp, 16); + + mpfr_log10 (temp, value.f, rnd); + k = mpfr_get_si (temp, MPFR_RNDZ) - k + 1; + } + if (k <= 0) + k = 1; + else if (k >= 18) + k = 17; + mpfr_get_str (str, &e, 10, k, value.f, rnd); + e--; + if (*p == '-') + p++; + // Pad to 17 digits + while (k < 17) + p[k++] = '0'; + if (e < 0) + { + words[0] |= 0x40000000; + e = -e; + } + words[0] |= (e % 10) << 16; + e /= 10; + words[0] |= (e % 10) << 20; + e /= 10; + words[0] |= (e % 10) << 24; + e /= 10; + if (e) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + words[0] |= e << 12; + words[0] |= *p++ & 15; + for (k = 0; k < 8; k++) + words[1] = (words[1] << 4) | (*p++ & 15); + for (k = 0; k < 8; k++) + words[2] = (words[2] << 4) | (*p++ & 15); + + } + if (mpfr_signbit (value.f)) + words[0] |= 0x80000000; +} + +static long +extract_to_integer (mpfr_t value, long min, long max) +{ + long result; + mpfr_rnd_t rnd = get_cur_rnd (); + + if (mpfr_fits_slong_p (value, rnd)) + { + result = mpfr_get_si (value, rnd); + if (result > max) + { + result = max; + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (result < min) + { + result = min; + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + } + else + { + if (!mpfr_signbit (value)) + result = max; + else + result = min; + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + return result; +} + +static bool +fpuop_fmove_memory (uae_u32 opcode, uae_u32 extra) +{ + int mode, reg, size; + uaecptr pc; + uae_u32 addr; + uae_u32 words[3]; + static const int sz1[8] = {4, 4, 12, 12, 2, 8, 1, 0}; + static const int sz2[8] = {4, 4, 12, 12, 2, 8, 2, 0}; + + mpfr_clear_flags (); + cur_exceptions = 0; + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + fpu_register &value = fpu.registers[(extra >> 7) & 7]; + + switch (mode) + { + case 0: + switch (size) + { + case 0: + m68k_dreg (regs, reg) = extract_to_integer (value.f, -0x7fffffff-1, 0x7fffffff); + break; + case 1: + m68k_dreg (regs, reg) = extract_to_single (value); + break; + case 4: + m68k_dreg (regs, reg) &= ~0xffff; + m68k_dreg (regs, reg) |= extract_to_integer (value.f, -32768, 32767) & 0xffff; + break; + case 6: + m68k_dreg (regs, reg) &= ~0xff; + m68k_dreg (regs, reg) |= extract_to_integer (value.f, -128, 127) & 0xff; + break; + default: + return false; + } + update_exceptions (); + return true; + case 1: + return false; + case 2: + addr = m68k_areg (regs, reg); + break; + case 3: + addr = m68k_areg (regs, reg); + break; + case 4: + addr = m68k_areg (regs, reg) - (reg == 7 ? sz2[size] : sz1[size]); + break; + case 5: + addr = m68k_areg (regs, reg) + (uae_s16) next_iword(); + break; + case 6: + addr = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch (reg) + { + case 0: + addr = (uae_s16) next_iword(); + break; + case 1: + addr = next_ilong(); + break; + case 2: + pc = m68k_getpc (); + addr = pc + (uae_s16) next_iword(); + break; + case 3: + pc = m68k_getpc (); + addr = get_disp_ea_020 (pc, next_iword ()); + break; + case 4: + addr = m68k_getpc (); + m68k_incpc (sz2[size]); + break; + default: + return false; + } + } + + switch (size) + { + case 0: + put_long (addr, extract_to_integer (value.f, -0x7fffffff-1, 0x7fffffff)); + break; + case 1: + put_long (addr, extract_to_single (value)); + break; + case 2: + extract_to_extended (value, words); + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + put_long (addr + 8, words[2]); + break; + case 3: + extract_to_packed (value, extra & 0x7f, words); + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + put_long (addr + 8, words[2]); + break; + case 4: + put_word (addr, extract_to_integer (value.f, -32768, 32767)); + break; + case 5: + extract_to_double (value, words); + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + break; + case 6: + put_byte (addr, extract_to_integer (value.f, -128, 127)); + break; + case 7: + extract_to_packed (value, m68k_dreg (regs, (extra >> 4) & 7) & 0x7f, words); + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + put_long (addr + 8, words[2]); + break; + } + + switch (mode) + { + case 3: + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + break; + } + + update_exceptions (); + return true; +} + +static bool +fpuop_fmovem_control (uae_u32 opcode, uae_u32 extra) +{ + int list, mode, reg; + uae_u32 addr; + + list = (extra >> 10) & 7; + mode = (opcode >> 3) & 7; + reg = opcode & 7; + + if (list == 0) + return false; + + if (extra & 0x2000) + { + // FMOVEM to + if (mode == 0) + { + switch (list) + { + case 1: + m68k_dreg (regs, reg) = fpu.instruction_address; + break; + case 2: + m68k_dreg (regs, reg) = get_fpsr (); + break; + case 4: + m68k_dreg (regs, reg) = get_fpcr (); + break; + default: + return false; + } + } + else if (mode == 1) + { + if (list != 1) + return false; + m68k_areg (regs, reg) = fpu.instruction_address; + } + else + { + int nwords; + + if (!get_fp_addr (opcode, &addr, true)) + return false; + nwords = (list & 1) + ((list >> 1) & 1) + ((list >> 2) & 1); + if (mode == 4) + addr -= nwords * 4; + if (list & 4) + { + put_long (addr, get_fpcr ()); + addr += 4; + } + if (list & 2) + { + put_long (addr, get_fpsr ()); + addr += 4; + } + if (list & 1) + { + put_long (addr, fpu.instruction_address); + addr += 4; + } + if (mode == 4) + m68k_areg (regs, reg) = addr - nwords * 4; + else if (mode == 3) + m68k_areg (regs, reg) = addr; + } + } + else + { + // FMOVEM from + + if (mode == 0) + { + switch (list) + { + case 1: + fpu.instruction_address = m68k_dreg (regs, reg); + break; + case 2: + set_fpsr (m68k_dreg (regs, reg)); + break; + case 4: + set_fpcr (m68k_dreg (regs, reg)); + break; + default: + return false; + } + } + else if (mode == 1) + { + if (list != 1) + return false; + fpu.instruction_address = m68k_areg (regs, reg); + } + else if ((opcode & 077) == 074) + { + switch (list) + { + case 1: + fpu.instruction_address = next_ilong (); + break; + case 2: + set_fpsr (next_ilong ()); + break; + case 4: + set_fpcr (next_ilong ()); + break; + default: + return false; + } + } + else + { + int nwords; + + if (!get_fp_addr (opcode, &addr, false)) + return false; + nwords = (list & 1) + ((list >> 1) & 1) + ((list >> 2) & 1); + if (mode == 4) + addr -= nwords * 4; + if (list & 4) + { + set_fpcr (get_long (addr)); + addr += 4; + } + if (list & 2) + { + set_fpsr (get_long (addr)); + addr += 4; + } + if (list & 1) + { + fpu.instruction_address = get_long (addr); + addr += 4; + } + if (mode == 4) + m68k_areg (regs, reg) = addr - nwords * 4; + else if (mode == 3) + m68k_areg (regs, reg) = addr; + } + } + + return true; +} + +static bool +fpuop_fmovem_register (uae_u32 opcode, uae_u32 extra) +{ + uae_u32 addr; + uae_u32 words[3]; + int list; + int i; + + set_format (EXTENDED_PREC); + if (!get_fp_addr (opcode, &addr, extra & 0x2000)) + return false; + if (extra & 0x800) + list = m68k_dreg (regs, (extra >> 4) & 7) & 0xff; + else + list = extra & 0xff; + + if (extra & 0x2000) + { + // FMOVEM to memory + + switch (opcode & 070) + { + case 030: + return false; + case 040: + if (extra & 0x1000) + return false; + for (i = 7; i >= 0; i--) + if (list & (1 << i)) + { + extract_to_extended (fpu.registers[i], words); + addr -= 12; + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + put_long (addr + 8, words[2]); + } + m68k_areg (regs, opcode & 7) = addr; + break; + default: + if ((extra & 0x1000) == 0) + return false; + for (i = 0; i < 8; i++) + if (list & (0x80 >> i)) + { + extract_to_extended (fpu.registers[i], words); + put_long (addr, words[0]); + put_long (addr + 4, words[1]); + put_long (addr + 8, words[2]); + addr += 12; + } + if ((opcode & 070) == 030) + m68k_areg (regs, opcode & 7) = addr; + break; + } + } + else + { + // FMOVEM from memory + + if ((opcode & 070) == 040) + return false; + + if ((extra & 0x1000) == 0) + return false; + for (i = 0; i < 8; i++) + if (list & (0x80 >> i)) + { + words[0] = get_long (addr); + words[1] = get_long (addr + 4); + words[2] = get_long (addr + 8); + addr += 12; + set_from_extended (fpu.registers[i], words, false); + } + if ((opcode & 070) == 030) + m68k_areg (regs, opcode & 7) = addr; + } + return true; +} + +static int +do_getexp (mpfr_t value, mpfr_rnd_t rnd) +{ + int t = 0; + + if (mpfr_inf_p (value)) + { + mpfr_set_nan (value); + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (!mpfr_nan_p (value) && !mpfr_zero_p (value)) + t = mpfr_set_si (value, mpfr_get_exp (value) - 1, rnd); + return t; +} + +static int +do_getman (mpfr_t value) +{ + if (mpfr_inf_p (value)) + { + mpfr_set_nan (value); + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (!mpfr_nan_p (value) && !mpfr_zero_p (value)) + mpfr_set_exp (value, 1); + return 0; +} + +static int +do_scale (mpfr_t value, mpfr_t reg, mpfr_rnd_t rnd) +{ + long scale; + int t = 0; + + if (mpfr_nan_p (value)) + ; + else if (mpfr_inf_p (value)) + { + mpfr_set_nan (value); + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (mpfr_fits_slong_p (value, rnd)) + { + scale = mpfr_get_si (value, MPFR_RNDZ); + mpfr_clear_inexflag (); + t = mpfr_mul_2si (value, reg, scale, rnd); + } + else + mpfr_set_inf (value, -mpfr_signbit (value)); + return t; +} + +static int +do_remainder (mpfr_t value, mpfr_t reg, mpfr_rnd_t rnd) +{ + long quo; + int t = 0; + + if (mpfr_nan_p (value) || mpfr_nan_p (reg)) + ; + else if (mpfr_zero_p (value) || mpfr_inf_p (reg)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_remquo (value, &quo, reg, value, rnd); + if (quo < 0) + quo = (-quo & 0x7f) | 0x80; + else + quo &= 0x7f; + fpu.fpsr.quotient = quo << 16; + return t; +} + +// Unfortunately, mpfr_fmod does not return the quotient bits, so we +// have to reimplement it here +static int +mpfr_rem1 (mpfr_t rem, int *quo, mpfr_t x, mpfr_t y, mpfr_rnd_t rnd) +{ + mpfr_exp_t ex, ey; + int inex, sign, signx = mpfr_signbit (x); + mpz_t mx, my, r; + + mpz_init (mx); + mpz_init (my); + mpz_init (r); + + ex = mpfr_get_z_2exp (mx, x); /* x = mx*2^ex */ + ey = mpfr_get_z_2exp (my, y); /* y = my*2^ey */ + + /* to get rid of sign problems, we compute it separately: + quo(-x,-y) = quo(x,y), rem(-x,-y) = -rem(x,y) + quo(-x,y) = -quo(x,y), rem(-x,y) = -rem(x,y) + thus quo = sign(x/y)*quo(|x|,|y|), rem = sign(x)*rem(|x|,|y|) */ + sign = (signx != mpfr_signbit (y)); + mpz_abs (mx, mx); + mpz_abs (my, my); + + /* divide my by 2^k if possible to make operations mod my easier */ + { + unsigned long k = mpz_scan1 (my, 0); + ey += k; + mpz_fdiv_q_2exp (my, my, k); + } + + if (ex <= ey) + { + /* q = x/y = mx/(my*2^(ey-ex)) */ + mpz_mul_2exp (my, my, ey - ex); /* divide mx by my*2^(ey-ex) */ + /* 0 <= |r| <= |my|, r has the same sign as mx */ + mpz_tdiv_qr (mx, r, mx, my); + /* mx is the quotient */ + mpz_tdiv_r_2exp (mx, mx, 7); + *quo = mpz_get_si (mx); + } + else /* ex > ey */ + { + /* to get the low 7 more bits of the quotient, we first compute + R = X mod Y*2^7, where X and Y are defined below. Then the + low 7 of the quotient are floor(R/Y). */ + mpz_mul_2exp (my, my, 7); /* 2^7*Y */ + + mpz_set_ui (r, 2); + mpz_powm_ui (r, r, ex - ey, my); /* 2^(ex-ey) mod my */ + mpz_mul (r, r, mx); + mpz_mod (r, r, my); + + /* now 0 <= r < 2^7*Y */ + mpz_fdiv_q_2exp (my, my, 7); /* back to Y */ + mpz_tdiv_qr (mx, r, r, my); + /* oldr = mx*my + newr */ + *quo = mpz_get_si (mx); + + /* now 0 <= |r| < |my| */ + } + + if (mpz_cmp_ui (r, 0) == 0) + { + inex = mpfr_set_ui (rem, 0, MPFR_RNDN); + /* take into account sign of x */ + if (signx) + mpfr_neg (rem, rem, MPFR_RNDN); + } + else + { + /* take into account sign of x */ + if (signx) + mpz_neg (r, r); + inex = mpfr_set_z_2exp (rem, r, ex > ey ? ey : ex, rnd); + } + + if (sign) + *quo |= 0x80; + + mpz_clear (mx); + mpz_clear (my); + mpz_clear (r); + + return inex; +} + +static int +do_fmod (mpfr_t value, mpfr_t reg, mpfr_rnd_t rnd) +{ + int t = 0; + + if (mpfr_nan_p (value) || mpfr_nan_p (reg)) + mpfr_set_nan (value); + else if (mpfr_zero_p (value) || mpfr_inf_p (reg)) + { + mpfr_set_nan (value); + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (mpfr_zero_p (reg) || mpfr_inf_p (value)) + { + fpu.fpsr.quotient = 0; + t = mpfr_set (value, reg, rnd); + } + else + { + int quo; + + t = mpfr_rem1 (value, &quo, reg, value, rnd); + fpu.fpsr.quotient = quo << 16; + } + return t; +} + +static void +do_fcmp (mpfr_t source, mpfr_t dest) +{ + uae_u32 flags = 0; + + if (mpfr_nan_p (source) || mpfr_nan_p (dest)) + flags |= FPSR_CCB_NAN; + else + { + int cmp = mpfr_cmp (dest, source); + if (cmp < 0) + flags |= FPSR_CCB_NEGATIVE; + else if (cmp == 0) + { + flags |= FPSR_CCB_ZERO; + if ((mpfr_zero_p (dest) || mpfr_inf_p (dest)) && mpfr_signbit (dest)) + flags |= FPSR_CCB_NEGATIVE; + } + } + set_fpccr (flags); +} + +static void +do_ftst (mpfr_t value) +{ + uae_u32 flags = 0; + + if (mpfr_signbit (value)) + flags |= FPSR_CCB_NEGATIVE; + if (mpfr_nan_p (value)) + flags |= FPSR_CCB_NAN; + else if (mpfr_zero_p (value)) + flags |= FPSR_CCB_ZERO; + else if (mpfr_inf_p (value)) + flags |= FPSR_CCB_INFINITY; + set_fpccr (flags); +} + +static bool +fpuop_general (uae_u32 opcode, uae_u32 extra) +{ + mpfr_prec_t prec = get_cur_prec (); + mpfr_rnd_t rnd = get_cur_rnd (); + int reg = (extra >> 7) & 7; + int t = 0; + fpu_register value; + bool ret; + + mpfr_init2 (value.f, prec); + value.nan_bits = DEFAULT_NAN_BITS; + value.nan_sign = 0; + + mpfr_clear_flags (); + set_format (prec); + cur_exceptions = 0; + cur_instruction_address = m68k_getpc () - 4; + if ((extra & 0xfc00) == 0x5c00) + { + // FMOVECR + int rom_index = extra & 0x7f; + if (rom_index == 0 || (rom_index >= 11 && rom_index <= 15)) + t = mpfr_set (value.f, fpu_constant_rom[rom_index], rnd); + else if (rom_index >= 48 && rom_index <= 63) + t = mpfr_set (value.f, fpu_constant_rom[rom_index - 32], rnd); + else + mpfr_set_zero (value.f, 0); + set_fp_register (reg, value, t, rnd, true); + } + else if (extra & 0x40) + { + static const char valid[64] = + { + 1, 1, 0, 0, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 1, 1, 0, 1, 1, + 1, 0, 0, 0, 1, 0, 0, 0 + }; + + if (extra & 4) + // FD... + prec = DOUBLE_PREC; + else + // FS... + prec = SINGLE_PREC; + set_format (prec); + MPFR_DECL_INIT (value2, prec); + + if (!fpu.is_integral) + { + ret = false; + goto out; + } + if (!valid[extra & 0x3b]) + { + ret = false; + goto out; + } + if (!get_fp_value (opcode, extra, value)) + { + ret = false; + goto out; + } + + switch (extra & 0x3f) + { + case 0: // FSMOVE + case 4: // FDMOVE + mpfr_set (value2, value.f, rnd); + break; + case 1: // FSSQRT + case 5: // FDSQRT + if (mpfr_sgn (value.f) < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sqrt (value2, value.f, rnd); + break; + case 24: // FSABS + case 28: // FDABS + t = mpfr_abs (value2, value.f, rnd); + break; + case 26: // FSNEG + case 30: // FDNEG + t = mpfr_neg (value2, value.f, rnd); + break; + case 32: // FSDIV + case 36: // FDDIV + if (mpfr_zero_p (value.f)) + { + if (mpfr_regular_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_zero_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (mpfr_inf_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_div (value2, fpu.registers[reg].f, value.f, rnd); + break; + case 34: // FSADD + case 38: // FDADD + if (mpfr_inf_p (fpu.registers[reg].f) && mpfr_inf_p (value.f) + && mpfr_signbit (fpu.registers[reg].f) != mpfr_signbit (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_add (value2, fpu.registers[reg].f, value.f, rnd); + break; + case 35: // FSMUL + case 39: // FDMUL + if ((mpfr_zero_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + || (mpfr_inf_p (value.f) && mpfr_zero_p (fpu.registers[reg].f))) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_mul (value2, fpu.registers[reg].f, value.f, rnd); + break; + case 40: // FSSUB + case 44: // FDSUB + if (mpfr_inf_p (fpu.registers[reg].f) && mpfr_inf_p (value.f) + && mpfr_signbit (fpu.registers[reg].f) == mpfr_signbit (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sub (value2, fpu.registers[reg].f, value.f, rnd); + break; + } + set_fp_register (reg, value2, t, rnd, true); + } + else if ((extra & 0x30) == 0x30) + { + if ((extra & 15) > 10 || (extra & 15) == 9) + { + ret = false; + goto out; + } + if (!get_fp_value (opcode, extra, value)) + { + ret = false; + goto out; + } + + if ((extra & 15) < 8) + { + // FSINCOS + int reg2 = extra & 7; + MPFR_DECL_INIT (value2, prec); + + if (mpfr_inf_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sin_cos (value.f, value2, value.f, rnd); + if (reg2 != reg) + set_fp_register (reg2, value2, t >> 2, rnd, false); + set_fp_register (reg, value, t & 3, rnd, true); + } + else if ((extra & 15) == 8) + // FCMP + do_fcmp (value.f, fpu.registers[reg].f); + else + // FTST + do_ftst (value.f); + } + else + { + static const char valid[64] = + { + 1, 1, 1, 1, 1, 0, 1, 0, + 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 0, 1, 1, 1, 0, + 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1 + }; + if (!valid[extra & 0x3f]) + { + ret = false; + goto out; + } + if (!get_fp_value (opcode, extra, value)) + { + ret = false; + goto out; + } + + switch (extra & 0x3f) + { + case 0: // FMOVE + break; + case 1: // FINT + t = mpfr_rint (value.f, value.f, rnd); + break; + case 2: // FSINH + t = mpfr_sinh (value.f, value.f, rnd); + break; + case 3: // FINTRZ + t = mpfr_rint (value.f, value.f, MPFR_RNDZ); + break; + case 4: // FSQRT + if (mpfr_sgn (value.f) < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sqrt (value.f, value.f, rnd); + break; + case 6: // FLOGNP1 + if (!mpfr_nan_p (value.f)) + { + int cmp = mpfr_cmp_si (value.f, -1); + if (cmp == 0) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (cmp < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + t = mpfr_log1p (value.f, value.f, rnd); + break; + case 8: // FETOXM1 + t = mpfr_expm1 (value.f, value.f, rnd); + break; + case 9: // FTANH + t = mpfr_tanh (value.f, value.f, rnd); + break; + case 10: // FATAN + t = mpfr_atan (value.f, value.f, rnd); + break; + case 12: // FASIN + if (mpfr_cmpabs (value.f, FPU_CONSTANT_ONE) > 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_asin (value.f, value.f, rnd); + break; + case 13: // FATANH + if (mpfr_cmpabs (value.f, FPU_CONSTANT_ONE) > 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_atanh (value.f, value.f, rnd); + break; + case 14: // FSIN + if (mpfr_inf_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sin (value.f, value.f, rnd); + break; + case 15: // FTAN + if (mpfr_inf_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_tan (value.f, value.f, rnd); + break; + case 16: // FETOX + t = mpfr_exp (value.f, value.f, rnd); + break; + case 17: // FTWOTOX + t = mpfr_ui_pow (value.f, 2, value.f, rnd); + break; + case 18: // FTENTOX + t = mpfr_ui_pow (value.f, 10, value.f, rnd); + break; + case 20: // FLOGN + if (mpfr_zero_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_sgn (value.f) < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_log (value.f, value.f, rnd); + break; + case 21: // FLOG10 + if (mpfr_zero_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_sgn (value.f) < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_log10 (value.f, value.f, rnd); + break; + case 22: // FLOG2 + if (mpfr_zero_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_sgn (value.f) < 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_log2 (value.f, value.f, rnd); + break; + case 24: // FABS + t = mpfr_abs (value.f, value.f, rnd); + value.nan_sign = 0; + break; + case 25: // FCOSH + t = mpfr_cosh (value.f, value.f, rnd); + break; + case 26: // FNEG + t = mpfr_neg (value.f, value.f, rnd); + value.nan_sign = !value.nan_sign; + break; + case 28: // FACOS + if (mpfr_cmpabs (value.f, FPU_CONSTANT_ONE) > 0) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_acos (value.f, value.f, rnd); + break; + case 29: // FCOS + if (mpfr_inf_p (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_cos (value.f, value.f, rnd); + break; + case 30: // FGETEXP + t = do_getexp (value.f, rnd); + break; + case 31: // FGETMAN + t = do_getman (value.f); + break; + case 32: // FDIV + if (mpfr_zero_p (value.f)) + { + if (mpfr_regular_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_zero_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (mpfr_inf_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_div (value.f, fpu.registers[reg].f, value.f, rnd); + break; + case 33: // FMOD + t = do_fmod (value.f, fpu.registers[reg].f, rnd); + break; + case 34: // FADD + if (mpfr_inf_p (fpu.registers[reg].f) && mpfr_inf_p (value.f) + && mpfr_signbit (fpu.registers[reg].f) != mpfr_signbit (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_add (value.f, fpu.registers[reg].f, value.f, rnd); + break; + case 35: // FMUL + if ((mpfr_zero_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + || (mpfr_inf_p (value.f) && mpfr_zero_p (fpu.registers[reg].f))) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_mul (value.f, fpu.registers[reg].f, value.f, rnd); + break; + case 36: // FSGLDIV + { + MPFR_DECL_INIT (value2, SINGLE_PREC); + + set_format (SINGLE_PREC); + if (mpfr_zero_p (value.f)) + { + if (mpfr_regular_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_DZ; + else if (mpfr_zero_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + } + else if (mpfr_inf_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_div (value2, fpu.registers[reg].f, value.f, rnd); + mpfr_set (value.f, value2, rnd); + } + break; + case 37: // FREM + t = do_remainder (value.f, fpu.registers[reg].f, rnd); + break; + case 38: // FSCALE + t = do_scale (value.f, fpu.registers[reg].f, rnd); + break; + case 39: // FSGLMUL + { + MPFR_DECL_INIT (value2, SINGLE_PREC); + + set_format (SINGLE_PREC); + if ((mpfr_zero_p (value.f) && mpfr_inf_p (fpu.registers[reg].f)) + || (mpfr_inf_p (value.f) && mpfr_zero_p (fpu.registers[reg].f))) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_mul (value2, fpu.registers[reg].f, value.f, rnd); + mpfr_set (value.f, value2, rnd); + } + break; + case 40: // FSUB + if (mpfr_inf_p (fpu.registers[reg].f) && mpfr_inf_p (value.f) + && mpfr_signbit (fpu.registers[reg].f) == mpfr_signbit (value.f)) + cur_exceptions |= FPSR_EXCEPTION_OPERR; + t = mpfr_sub (value.f, fpu.registers[reg].f, value.f, rnd); + break; + } + set_fp_register (reg, value, t, rnd, true); + } + update_exceptions (); + ret = true; + out: + mpfr_clear (value.f); + return ret; +} + +void +fpuop_arithmetic (uae_u32 opcode, uae_u32 extra) +{ + bool valid; + + switch ((extra >> 13) & 7) + { + case 3: + valid = fpuop_fmove_memory (opcode, extra); + break; + case 4: + case 5: + valid = fpuop_fmovem_control (opcode, extra); + break; + case 6: + case 7: + valid = fpuop_fmovem_register (opcode, extra); + break; + case 0: + case 2: + valid = fpuop_general (opcode, extra); + break; + default: + valid = false; + break; + } + + if (!valid) + { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } +} + +static bool +check_fp_cond (uae_u32 pred) +{ + uae_u32 fpcc = get_fpccr (); + + if ((pred & 16) != 0 && (fpcc & FPSR_CCB_NAN) != 0) + { + // IEEE non-aware test + set_exception_status (get_exception_status () | FPSR_EXCEPTION_BSUN); + set_accrued_exception (get_accrued_exception () | FPSR_ACCR_IOP); + } + + switch (pred & 15) + { + case 0: // F / SF + return false; + case 1: // EQ /SEQ + return (fpcc & FPSR_CCB_ZERO) != 0; + case 2: // OGT / GT + return (fpcc & (FPSR_CCB_NAN | FPSR_CCB_ZERO | FPSR_CCB_NEGATIVE)) == 0; + case 3: // OGE / GE + return (fpcc & FPSR_CCB_ZERO) != 0 || (fpcc & (FPSR_CCB_NAN | FPSR_CCB_NEGATIVE)) == 0; + case 4: // OLT / LT + return (fpcc & (FPSR_CCB_NEGATIVE | FPSR_CCB_NAN | FPSR_CCB_ZERO)) == FPSR_CCB_NEGATIVE; + case 5: // OLE / LE + return (fpcc & FPSR_CCB_ZERO) != 0 || (fpcc & (FPSR_CCB_NEGATIVE | FPSR_CCB_NAN)) == FPSR_CCB_NEGATIVE; + case 6: // OGL / GL + return (fpcc & (FPSR_CCB_NAN | FPSR_CCB_ZERO)) == 0; + case 7: // OR / GLE + return (fpcc & FPSR_CCB_NAN) == 0; + case 8: // UN / NGLE + return (fpcc & FPSR_CCB_NAN) != 0; + case 9: // UEQ / NGL + return (fpcc & (FPSR_CCB_NAN | FPSR_CCB_ZERO)) != 0; + case 10: // UGT / NLE + return (fpcc & FPSR_CCB_NAN) != 0 || (fpcc & (FPSR_CCB_NEGATIVE | FPSR_CCB_ZERO)) == 0; + case 11: // UGE / NLT + return (fpcc & (FPSR_CCB_NEGATIVE | FPSR_CCB_NAN | FPSR_CCB_ZERO)) != FPSR_CCB_NEGATIVE; + case 12: // ULT / NGE + return (fpcc & FPSR_CCB_NAN) != 0 || (fpcc & (FPSR_CCB_NEGATIVE | FPSR_CCB_ZERO)) == FPSR_CCB_NEGATIVE; + case 13: // ULE / NGT + return (fpcc & (FPSR_CCB_NAN | FPSR_CCB_ZERO | FPSR_CCB_NEGATIVE)) != 0; + case 14: // NE / SNE + return (fpcc & FPSR_CCB_ZERO) == 0; + case 15: // T / ST + return true; + default: + return false; + } +} + +void +fpuop_bcc (uae_u32 opcode, uaecptr pc, uae_u32 disp) +{ + if (check_fp_cond (opcode)) + { + if (!(opcode & (1 << 6))) + disp = (uae_s16) disp; + m68k_setpc (pc + disp); + } +} + +void +fpuop_scc (uae_u32 opcode, uae_u32 extra) +{ + uae_u32 addr; + int value = check_fp_cond (extra) ? 0xff : 0; + if ((opcode & 070) == 0) + { + int reg = opcode & 7; + m68k_dreg (regs, reg) = (m68k_dreg (regs, reg) & ~0xff) | value; + } + else if (!get_fp_addr (opcode, &addr, true)) + { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } + else + { + switch (opcode & 070) + { + case 030: + m68k_areg (regs, opcode & 7) += (opcode & 7) == 7 ? 2 : 1; + break; + case 040: + addr -= (opcode & 7) == 7 ? 2 : 1; + m68k_areg (regs, opcode & 7) = addr; + } + put_byte (addr, value); + } +} + +void +fpuop_dbcc (uae_u32 opcode, uae_u32 extra) +{ + uaecptr pc = m68k_getpc (); + uae_s16 disp = next_iword (); + + if (!check_fp_cond (extra)) + { + int reg = opcode & 7; + uae_u16 cnt = (m68k_dreg (regs, reg) & 0xffff) - 1; + m68k_dreg (regs, reg) = (m68k_dreg (regs, reg) & ~0xffff) | cnt; + if (cnt != 0xffff) + m68k_setpc (pc + disp); + } +} + +void +fpuop_trapcc (uae_u32, uaecptr oldpc, uae_u32 extra) +{ + if (check_fp_cond (extra)) + Exception (7, oldpc - 2); +} + +void +fpuop_save (uae_u32 opcode) +{ + uae_u32 addr; + + if ((opcode & 070) == 030 + || !get_fp_addr (opcode, &addr, true)) + { + m68k_setpc (m68k_getpc () - 2); + op_illg (opcode); + return; + } + + if (fpu.is_integral) + { + // 4 byte 68040 IDLE frame + // FIXME: generate proper FPU stack frames that does not result + // in undefined behaviour from FPSP040 + if ((opcode & 070) == 040) + { + addr -= 4; + m68k_areg (regs, opcode & 7) = addr; + } + put_long (addr, 0x41000000); + } + else + { + // 28 byte 68881 IDLE frame + if ((opcode & 070) == 040) + { + addr -= 28; + m68k_areg (regs, opcode & 7) = addr; + } + put_long (addr, 0x1f180000); + for (int i = 0; i < 6; i++) + { + addr += 4; + put_long (addr, 0); + } + } +} + +void +fpuop_restore (uae_u32 opcode) +{ + uae_u32 addr; + uae_u32 format; + + if ((opcode & 070) == 040 + || !get_fp_addr (opcode, &addr, false)) + { + m68k_setpc (m68k_getpc () - 2); + op_illg (opcode); + return; + } + + format = get_long (addr); + addr += 4; + if ((format & 0xff000000) == 0) + // NULL frame + fpu_reset (); + else + addr += (format & 0xff0000) >> 16; + if ((opcode & 070) == 030) + m68k_areg (regs, opcode & 7) = addr; +} + +void fpu_set_fpsr(uae_u32 new_fpsr) +{ + set_fpsr(new_fpsr); +} + +uae_u32 fpu_get_fpsr(void) +{ + return get_fpsr(); +} + +void fpu_set_fpcr(uae_u32 new_fpcr) +{ + set_fpcr(new_fpcr); +} + +uae_u32 fpu_get_fpcr(void) +{ + return get_fpcr(); +} diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_uae.cpp b/BasiliskII/src/uae_cpu/fpu/fpu_uae.cpp new file mode 100644 index 00000000..23efd8ef --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/fpu_uae.cpp @@ -0,0 +1,2553 @@ +/* + * fpu/fpu_uae.cpp - the old UAE FPU + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * UAE - The Un*x Amiga Emulator + * + * MC68881 emulation + * + * Copyright 1996 Herman ten Brugge + * + * + * Following fixes by Lauri Pesonen, July 1999: + * + * FMOVEM list handling: + * The lookup tables did not work correctly, rewritten. + * FINT: + * (int) cast does not work, fixed. + * Further, now honors the FPU fpcr rounding modes. + * FINTRZ: + * (int) cast cannot be used, fixed. + * FGETEXP: + * Input argument value 0 returned erroneous value. + * FMOD: + * (int) cast cannot be used. Replaced by proper rounding. + * Quotient byte handling was missing. + * FREM: + * (int) cast cannot be used. Replaced by proper rounding. + * Quotient byte handling was missing. + * FSCALE: + * Input argument value 0 was not handled correctly. + * FMOVEM Control Registers to/from address FPU registers An: + * A bug caused the code never been called. + * FMOVEM Control Registers pre-decrement: + * Moving of control regs from memory to FPP was not handled properly, + * if not all of the three FPU registers were moved. + * Condition code "Not Greater Than or Equal": + * Returned erroneous value. + * FSINCOS: + * Cosine must be loaded first if same register. + * FMOVECR: + * Status register was not updated (yes, this affects it). + * FMOVE -> reg: + * Status register was not updated (yes, this affects it). + * FMOVE reg -> reg: + * Status register was not updated. + * FDBcc: + * The loop termination condition was wrong. + * Possible leak from int16 to int32 fixed. + * get_fp_value: + * Immediate addressing mode && Operation Length == Byte -> + * Use the low-order byte of the extension word. + * Now FPU fpcr high 16 bits are always read as zeroes, no matter what was + * written to them. + * + * Other: + * - Optimized single/double/extended to/from conversion functions. + * Huge speed boost, but not (necessarily) portable to other systems. + * Enabled/disabled by #define FPU_HAVE_IEEE_DOUBLE 1 + * - Optimized versions of FSCALE, FGETEXP, FGETMAN + * - Conversion routines now handle NaN and infinity better. + * - Some constants precalculated. Not all compilers can optimize the + * expressions previously used. + * + * TODO: + * - Floating point exceptions. + * - More Infinity/NaN/overflow/underflow checking. + * - FPU instruction_address (only needed when exceptions are implemented) + * - Should be written in assembly to support long doubles. + * - Precision rounding single/double + */ + + +#include "sysdeps.h" +#include "memory.h" +#include "readcpu.h" +#include "newcpu.h" +#include "main.h" +#define FPU_IMPLEMENTATION +#include "fpu/fpu.h" +#include "fpu/fpu_uae.h" + +#ifdef HAVE_NEW_HEADERS +#define _GLIBCPP_USE_C99 1 +# include +# include +using namespace __gnu_cxx; +#undef _GLIBCPP_USE_C99 +#else +# include +# include +#endif + +/* Global FPU context */ +fpu_t fpu; + +/* -------------------------------------------------------------------------- */ +/* --- Native Support --- */ +/* -------------------------------------------------------------------------- */ + +#include "fpu/flags.h" +#include "fpu/exceptions.h" +#include "fpu/rounding.h" +#include "fpu/impl.h" + +#include "fpu/flags.cpp" +#include "fpu/exceptions.cpp" + +/* -------------------------------------------------------------------------- */ +/* --- Scopes Definition --- */ +/* -------------------------------------------------------------------------- */ + +#undef PUBLIC +#define PUBLIC /**/ + +#undef PRIVATE +#define PRIVATE static + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +/* -------------------------------------------------------------------------- */ +/* --- Debugging --- */ +/* -------------------------------------------------------------------------- */ + +PUBLIC void FFPU fpu_dump_registers(void) +{ + for (int i = 0; i < 8; i++){ + printf ("FP%d: %g ", i, fpu_get_register(i)); + if ((i & 3) == 3) + printf ("\n"); + } +} + +PUBLIC void FFPU fpu_dump_flags(void) +{ + printf ("N=%d Z=%d I=%d NAN=%d\n", + (get_fpsr() & FPSR_CCB_NEGATIVE) != 0, + (get_fpsr() & FPSR_CCB_ZERO)!= 0, + (get_fpsr() & FPSR_CCB_INFINITY) != 0, + (get_fpsr() & FPSR_CCB_NAN) != 0); +} + +/* single : S 8*E 23*F */ +/* double : S 11*E 52*F */ +/* extended : S 15*E 64*F */ +/* E = 0 & F = 0 -> 0 */ +/* E = MAX & F = 0 -> Infin */ +/* E = MAX & F # 0 -> NotANumber */ +/* E = biased by 127 (single) ,1023 (double) ,16383 (extended) */ + +#if FPU_DEBUG + +PUBLIC void FFPU dump_registers(const char * str) +{ + char temp_str[512]; + + sprintf(temp_str, "%s: %.04f, %.04f, %.04f, %.04f, %.04f, %.04f, %.04f, %.04f\n", + str, + fpu_get_register(0), fpu_get_register(1), fpu_get_register(2), fpu_get_register(3), + fpu_get_register(4), fpu_get_register(5), fpu_get_register(6), fpu_get_register(7) ); + + fpu_debug((temp_str)); +} + +PUBLIC void FFPU dump_first_bytes(uae_u8 * buffer, uae_s32 actual) +{ + char temp_buf1[256], temp_buf2[10]; + int bytes = sizeof(temp_buf1)/3-1-3; + if (actual < bytes) + bytes = actual; + + temp_buf1[0] = 0; + for (int i = 0; i < bytes; i++) { + sprintf(temp_buf2, "%02x ", (uae_u32)buffer[i]); + strcat(temp_buf1, temp_buf2); + } + + strcat(temp_buf1, "\n"); + fpu_debug((temp_buf1)); +} + +#else + +PUBLIC void FFPU dump_registers(const char *) +{ +} + +#define dump_first_bytes(a,b) + +#endif + +PRIVATE inline fpu_register FFPU round_to_zero(fpu_register const & x) +{ + return (x < 0.0 ? ceil(x) : floor(x)); +} + +PRIVATE inline fpu_register FFPU round_to_nearest(fpu_register const & x) +{ + return floor(x + 0.5); +} + +#if FPU_HAVE_IEEE_DOUBLE + +#ifndef HAVE_ISNAN +#define isnan(x) do_isnan((x)) +#endif + +PRIVATE inline bool FFPU do_isnan(fpu_register const & r) +{ + fpu_register_parts const p = { r }; + if ((p.parts[FHI] & 0x7FF00000) == 0x7FF00000) { + // logical or is faster here. + if ((p.parts[FHI] & 0x000FFFFF) || p.parts[FLO]) { + return true; + } + } + return false; +} + +#ifndef HAVE_ISINF +#define isinf(x) do_isinf((x)) +#endif + +PRIVATE inline bool FFPU do_isinf(fpu_register const & r) +{ + fpu_register_parts const p = { r }; + if ((p.parts[FHI] & 0x7FF00000) == 0x7FF00000 && p.parts[FLO] == 0) { + return true; + } + return false; +} + +#ifndef HAVE_ISNEG +#define isneg(x) do_isneg((x)) +#endif + +PRIVATE inline bool FFPU do_isneg(fpu_register const & r) +{ + fpu_register_parts const p = { r }; + return ((p.parts[FHI] & 0x80000000) != 0); +} + +#ifndef HAVE_ISZERO +#define iszero(x) do_iszero((x)) +#endif + +PRIVATE inline bool FFPU do_iszero(fpu_register const & r) +{ + fpu_register_parts const p = { r }; + return (((p.parts[FHI] & 0x7FF00000) == 0) && p.parts[FLO] == 0); +} + +// May be optimized for particular processors +#ifndef FPU_USE_NATIVE_FLAGS +PRIVATE inline void FFPU make_fpsr(fpu_register const & r) +{ + FPU fpsr.condition_codes + = (iszero(r) ? NATIVE_FFLAG_ZERO : 0) + | (isneg(r) ? NATIVE_FFLAG_NEGATIVE : 0) + | (isnan(r) ? NATIVE_FFLAG_NAN : 0) + | (isinf(r) ? NATIVE_FFLAG_INFINITY : 0) + ; +} +#endif + +PRIVATE inline void FFPU get_dest_flags(fpu_register const & r) +{ + fl_dest.negative = isneg(r); + fl_dest.zero = iszero(r); + fl_dest.infinity = isinf(r); + fl_dest.nan = isnan(r); + fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan; +} + +PRIVATE inline void FFPU get_source_flags(fpu_register const & r) +{ + fl_source.negative = isneg(r); + fl_source.zero = iszero(r); + fl_source.infinity = isinf(r); + fl_source.nan = isnan(r); + fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan; +} + +PRIVATE inline void FFPU make_nan(fpu_register & r) +{ + fpu_register_parts p; + p.parts[FLO] = 0xffffffff; + p.parts[FHI] = 0x7fffffff; + r = p.val; +} + +PRIVATE inline void FFPU make_zero_positive(fpu_register & r) +{ + fpu_register_parts p; + p.parts[FLO] = p.parts[FHI] = 0; + r = p.val; +} + +PRIVATE inline void FFPU make_zero_negative(fpu_register & r) +{ + fpu_register_parts p; + p.parts[FLO] = 0; + p.parts[FHI] = 0x80000000; + r = p.val; +} + +PRIVATE inline void FFPU make_inf_positive(fpu_register & r) +{ + fpu_register_parts p; + p.parts[FLO] = 0; + p.parts[FHI] = 0x7FF00000; + r = p.val; +} + +PRIVATE inline void FFPU make_inf_negative(fpu_register & r) +{ + fpu_register_parts p; + p.parts[FLO] = 0; + p.parts[FHI] = 0xFFF00000; + r = p.val; +} + +PRIVATE inline void FFPU fast_scale(fpu_register & r, int add) +{ + fpu_register_parts p = { r }; + int exp = (p.parts[FHI] & 0x7FF00000) >> 20; + // TODO: overflow flags + exp += add; + if(exp >= 2047) { + make_inf_positive(r); + return; + } else if(exp < 0) { + // keep sign (+/- 0) + p.parts[FHI] &= 0x80000000; + } else { + p.parts[FHI] = (p.parts[FHI] & 0x800FFFFF) | ((uae_u32)exp << 20); + } + r = p.val; +} + +PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r) +{ + fpu_register_parts const p = { r }; + int exp = (p.parts[FHI] & 0x7FF00000) >> 20; + return( exp - 1023 ); +} + +// Normalize to range 1..2 +PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r) +{ + fpu_register_parts p = { r }; + p.parts[FHI] = (p.parts[FHI] & 0x800FFFFF) | 0x3FF00000; + r = p.val; +} + +// The sign of the quotient is the exclusive-OR of the sign bits +// of the source and destination operands. +PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb) +{ + fpu_register_parts const a = { ra }; + fpu_register_parts const b = { rb }; + return (((a.parts[FHI] ^ b.parts[FHI]) & 0x80000000) ? FPSR_QUOTIENT_SIGN : 0); +} + +// Quotient Byte is loaded with the sign and least significant +// seven bits of the quotient. +PRIVATE inline void FFPU make_quotient(fpu_register const & quotient, uae_u32 sign) +{ + uae_u32 lsb = (uae_u32)fabs(quotient) & 0x7f; + FPU fpsr.quotient = sign | (lsb << 16); +} + +// to_single +PRIVATE inline fpu_register FFPU make_single(uae_u32 value) +{ + if ((value & 0x7fffffff) == 0) + return (0.0); + + fpu_register result; + fpu_register_parts p; + + uae_u32 sign = (value & 0x80000000); + uae_u32 exp = ((value & 0x7F800000) >> 23) + 1023 - 127; + + p.parts[FLO] = value << 29; + p.parts[FHI] = sign | (exp << 20) | ((value & 0x007FFFFF) >> 3); + + result = p.val; + + fpu_debug(("make_single (%X) = %.04f\n",value,(double)result)); + + return(result); +} + +// from_single +PRIVATE inline uae_u32 FFPU extract_single(fpu_register const & src) +{ + if (src == 0.0) + return 0; + + uae_u32 result; + fpu_register_parts const p = { src }; + + uae_u32 sign = (p.parts[FHI] & 0x80000000); + uae_u32 exp = (p.parts[FHI] & 0x7FF00000) >> 20; + + if(exp + 127 < 1023) { + exp = 0; + } else if(exp > 1023 + 127) { + exp = 255; + } else { + exp = exp + 127 - 1023; + } + + result = sign | (exp << 23) | ((p.parts[FHI] & 0x000FFFFF) << 3) | (p.parts[FLO] >> 29); + + fpu_debug(("extract_single (%.04f) = %X\n",(double)src,result)); + + return (result); +} + +// to_exten +PRIVATE inline fpu_register FFPU make_extended(uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +{ + if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) + return 0.0; + + fpu_register result; + fpu_register_parts p; + + uae_u32 sign = wrd1 & 0x80000000; + uae_u32 exp = (wrd1 >> 16) & 0x7fff; + + // The explicit integer bit is not set, must normalize. + if((wrd2 & 0x80000000) == 0) { + fpu_debug(("make_extended denormalized mantissa (%X,%X,%X)\n",wrd1,wrd2,wrd3)); + if( wrd2 | wrd3 ) { + // mantissa, not fraction. + uae_u64 man = ((uae_u64)wrd2 << 32) | wrd3; + while( exp > 0 && (man & UVAL64(0x8000000000000000)) == 0 ) { + man <<= 1; + exp--; + } + wrd2 = (uae_u32)( man >> 32 ); + wrd3 = (uae_u32)( man & 0xFFFFFFFF ); + } else { + if(exp == 0x7FFF) { + // Infinity. + } else { + // Zero + exp = 16383 - 1023; + } + } + } + + if(exp < 16383 - 1023) { + // should set underflow. + exp = 0; + } else if(exp > 16383 + 1023) { + // should set overflow. + exp = 2047; + } else { + exp = exp + 1023 - 16383; + } + + // drop the explicit integer bit. + p.parts[FLO] = (wrd2 << 21) | (wrd3 >> 11); + p.parts[FHI] = sign | (exp << 20) | ((wrd2 & 0x7FFFFFFF) >> 11); + + result = p.val; + + fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result)); + + return(result); +} + +/* + Would be so much easier with full size floats :( + ... this is so vague. +*/ +// make_extended_no_normalize +PRIVATE inline void FFPU make_extended_no_normalize( + uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3, fpu_register & result +) +{ + // Is it zero? + if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) { + make_zero_positive(result); + return; + } + + // Is it NaN? + if( (wrd1 & 0x7FFF0000) == 0x7FFF0000 ) { + if( (wrd1 & 0x0000FFFF) || wrd2 || wrd3 ) { + make_nan(result); + return; + } + } + + uae_u32 sign = wrd1 & 0x80000000; + uae_u32 exp = (wrd1 >> 16) & 0x7fff; + + if(exp < 16383 - 1023) { + // should set underflow. + exp = 0; + } else if(exp > 16383 + 1023) { + // should set overflow. + exp = 2047; + } else { + exp = exp + 1023 - 16383; + } + + // drop the explicit integer bit. + fpu_register_parts p; + p.parts[FLO] = (wrd2 << 21) | (wrd3 >> 11); + p.parts[FHI] = sign | (exp << 20) | ((wrd2 & 0x7FFFFFFF) >> 11); + + result = p.val; + + fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result)); +} + +// from_exten +PRIVATE inline void FFPU extract_extended(fpu_register const & src, + uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3 +) +{ + if (src == 0.0) { + *wrd1 = *wrd2 = *wrd3 = 0; + return; + } + + fpu_register_parts const p = { src }; + + fpu_debug(("extract_extended (%X,%X)\n",p.parts[FLO],p.parts[FHI])); + + uae_u32 sign = p.parts[FHI] & 0x80000000; + + uae_u32 exp = ((p.parts[FHI] >> 20) & 0x7ff); + // Check for maximum + if(exp == 0x7FF) { + exp = 0x7FFF; + } else { + exp += 16383 - 1023; + } + + *wrd1 = sign | (exp << 16); + // always set the explicit integer bit. + *wrd2 = 0x80000000 | ((p.parts[FHI] & 0x000FFFFF) << 11) | ((p.parts[FLO] & 0xFFE00000) >> 21); + *wrd3 = p.parts[FLO] << 11; + + fpu_debug(("extract_extended (%.04f) = %X,%X,%X\n",(double)src,*wrd1,*wrd2,*wrd3)); +} + +// to_double +PRIVATE inline fpu_register FFPU make_double(uae_u32 wrd1, uae_u32 wrd2) +{ + if ((wrd1 & 0x7fffffff) == 0 && wrd2 == 0) + return 0.0; + + fpu_register result; + fpu_register_parts p; + p.parts[FLO] = wrd2; + p.parts[FHI] = wrd1; + + result = p.val; + + fpu_debug(("make_double (%X,%X) = %.04f\n",wrd1,wrd2,(double)result)); + + return(result); +} + +// from_double +PRIVATE inline void FFPU extract_double(fpu_register const & src, + uae_u32 * wrd1, uae_u32 * wrd2 +) +{ +/* + if (src == 0.0) { + *wrd1 = *wrd2 = 0; + return; + } +*/ + fpu_register_parts const p = { src }; + *wrd2 = p.parts[FLO]; + *wrd1 = p.parts[FHI]; + + fpu_debug(("extract_double (%.04f) = %X,%X\n",(double)src,*wrd1,*wrd2)); +} + +#else // !FPU_HAVE_IEEE_DOUBLE + +#ifndef FPU_USE_NATIVE_FLAGS +PRIVATE inline void FFPU make_fpsr(fpu_register const & r) +{ + FPU fpsr.condition_codes + = ((r == 0.0) ? NATIVE_FFLAG_ZERO : 0) + | ((r < 0.0) ? NATIVE_FFLAG_NEGATIVE : 0) + ; +} +#endif + +// make_single +PRIVATE inline fpu_register FFPU make_single(uae_u32 value) +{ + if ((value & 0x7fffffff) == 0) + return (0.0); + + fpu_register frac = (fpu_register) ((value & 0x7fffff) | 0x800000) / 8388608.0; + if (value & 0x80000000) + frac = -frac; + + fpu_register result = ldexp (frac, (int)((value >> 23) & 0xff) - 127); + fpu_debug(("make_single (%X) = %.04f\n",value,(double)result)); + + return (result); +} + +// extract_single +PRIVATE inline uae_u32 FFPU extract_single(fpu_register const & src) +{ + int expon; + uae_u32 tmp, result; + fpu_register frac; +#if FPU_DEBUG + fpu_register src0 = src; +#endif + + if (src == 0.0) + return 0; + if (src < 0) { + tmp = 0x80000000; + src = -src; + } else { + tmp = 0; + } + frac = frexp (src, &expon); + frac += 0.5 / 16777216.0; + if (frac >= 1.0) { + frac /= 2.0; + expon++; + } + result = tmp | (((expon + 127 - 1) & 0xff) << 23) | (((int) (frac * 16777216.0)) & 0x7fffff); + + // fpu_debug(("extract_single (%.04f) = %X\n",(float)src0,result)); + + return (result); +} + +// to exten +PRIVATE inline fpu_register FFPU make_extended(uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +{ + fpu_register frac, result; + + if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) + return 0.0; + frac = (fpu_register) wrd2 / 2147483648.0 + + (fpu_register) wrd3 / 9223372036854775808.0; + if (wrd1 & 0x80000000) + frac = -frac; + result = ldexp (frac, (int)((wrd1 >> 16) & 0x7fff) - 16383); + + fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result)); + + return result; +} + +// extract_extended +PRIVATE inline void FFPU extract_extended(fpu_register const & src, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3) +{ + int expon; + fpu_register frac; +#if FPU_DEBUG + fpu_register src0 = src; +#endif + + if (src == 0.0) { + *wrd1 = 0; + *wrd2 = 0; + *wrd3 = 0; + return; + } + if (src < 0) { + *wrd1 = 0x80000000; + src = -src; + } else { + *wrd1 = 0; + } + frac = frexp (src, &expon); + frac += 0.5 / 18446744073709551616.0; + if (frac >= 1.0) { + frac /= 2.0; + expon++; + } + *wrd1 |= (((expon + 16383 - 1) & 0x7fff) << 16); + *wrd2 = (uae_u32) (frac * 4294967296.0); + *wrd3 = (uae_u32) (frac * 18446744073709551616.0 - *wrd2 * 4294967296.0); + + // fpu_debug(("extract_extended (%.04f) = %X,%X,%X\n",(float)src0,*wrd1,*wrd2,*wrd3)); +} + +// make_double +PRIVATE inline fpu_register FFPU make_double(uae_u32 wrd1, uae_u32 wrd2) +{ + if ((wrd1 & 0x7fffffff) == 0 && wrd2 == 0) + return 0.0; + + fpu_register frac = + (fpu_register) ((wrd1 & 0xfffff) | 0x100000) / 1048576.0 + + (fpu_register) wrd2 / 4503599627370496.0; + + if (wrd1 & 0x80000000) + frac = -frac; + + fpu_register result = ldexp (frac, (int)((wrd1 >> 20) & 0x7ff) - 1023); + fpu_debug(("make_double (%X,%X) = %.04f\n",wrd1,wrd2,(double)result)); + + return result; +} + +// extract_double +PRIVATE inline void FFPU extract_double(fpu_register const & src, uae_u32 * wrd1, uae_u32 * wrd2) +{ + int expon; + int tmp; + fpu_register frac frac; +#if FPU_DEBUG + fpu_register src0 = src; +#endif + + if (src == 0.0) { + *wrd1 = 0; + *wrd2 = 0; + return; + } + if (src < 0) { + *wrd1 = 0x80000000; + src = -src; + } else { + *wrd1 = 0; + } + frac = frexp (src, &expon); + frac += 0.5 / 9007199254740992.0; + if (frac >= 1.0) { + frac /= 2.0; + expon++; + } + tmp = (uae_u32) (frac * 2097152.0); + *wrd1 |= (((expon + 1023 - 1) & 0x7ff) << 20) | (tmp & 0xfffff); + *wrd2 = (uae_u32) (frac * 9007199254740992.0 - tmp * 4294967296.0); + + // fpu_debug(("extract_double (%.04f) = %X,%X\n",(float)src0,*wrd1,*wrd2)); +} + +#endif + +// to_pack +PRIVATE inline fpu_register FFPU make_packed(uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +{ + fpu_double d; + char *cp; + char str[100]; + + cp = str; + if (wrd1 & 0x80000000) + *cp++ = '-'; + *cp++ = (char)((wrd1 & 0xf) + '0'); + *cp++ = '.'; + *cp++ = (char)(((wrd2 >> 28) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 24) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 20) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 16) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 12) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 8) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 4) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 0) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 28) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 24) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 20) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 16) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 12) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 8) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 4) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 0) & 0xf) + '0'); + *cp++ = 'E'; + if (wrd1 & 0x40000000) + *cp++ = '-'; + *cp++ = (char)(((wrd1 >> 24) & 0xf) + '0'); + *cp++ = (char)(((wrd1 >> 20) & 0xf) + '0'); + *cp++ = (char)(((wrd1 >> 16) & 0xf) + '0'); + *cp = 0; + sscanf(str, "%le", &d); + + fpu_debug(("make_packed str = %s\n",str)); + + fpu_debug(("make_packed(%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)d)); + return d; +} + +// from_pack +PRIVATE inline void FFPU extract_packed(fpu_register const & src, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3) +{ + int i; + int t; + char *cp; + char str[100]; + + sprintf(str, "%.16e", src); + + fpu_debug(("extract_packed(%.04f,%s)\n",(double)src,str)); + + cp = str; + *wrd1 = *wrd2 = *wrd3 = 0; + if (*cp == '-') { + cp++; + *wrd1 = 0x80000000; + } + if (*cp == '+') + cp++; + *wrd1 |= (*cp++ - '0'); + if (*cp == '.') + cp++; + for (i = 0; i < 8; i++) { + *wrd2 <<= 4; + if (*cp >= '0' && *cp <= '9') + *wrd2 |= *cp++ - '0'; + } + for (i = 0; i < 8; i++) { + *wrd3 <<= 4; + if (*cp >= '0' && *cp <= '9') + *wrd3 |= *cp++ - '0'; + } + if (*cp == 'e' || *cp == 'E') { + cp++; + if (*cp == '-') { + cp++; + *wrd1 |= 0x40000000; + } + if (*cp == '+') + cp++; + t = 0; + for (i = 0; i < 3; i++) { + if (*cp >= '0' && *cp <= '9') + t = (t << 4) | (*cp++ - '0'); + } + *wrd1 |= t << 16; + } + + fpu_debug(("extract_packed(%.04f) = %X,%X,%X\n",(double)src,*wrd1,*wrd2,*wrd3)); +} + +PRIVATE inline int FFPU get_fp_value (uae_u32 opcode, uae_u16 extra, fpu_register & src) +{ + uaecptr tmppc; + uae_u16 tmp; + int size; + int mode; + int reg; + uae_u32 ad = 0; + static int sz1[8] = {4, 4, 12, 12, 2, 8, 1, 0}; + static int sz2[8] = {4, 4, 12, 12, 2, 8, 2, 0}; + + // fpu_debug(("get_fp_value(%X,%X)\n",(int)opcode,(int)extra)); + // dump_first_bytes( regs.pc_p-4, 16 ); + + if ((extra & 0x4000) == 0) { + src = FPU registers[(extra >> 10) & 7]; + return 1; + } + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + + fpu_debug(("get_fp_value mode=%d, reg=%d, size=%d\n",(int)mode,(int)reg,(int)size)); + + switch (mode) { + case 0: + switch (size) { + case 6: + src = (fpu_register) (uae_s8) m68k_dreg (regs, reg); + break; + case 4: + src = (fpu_register) (uae_s16) m68k_dreg (regs, reg); + break; + case 0: + src = (fpu_register) (uae_s32) m68k_dreg (regs, reg); + break; + case 1: + src = make_single(m68k_dreg (regs, reg)); + break; + default: + return 0; + } + return 1; + case 1: + return 0; + case 2: + ad = m68k_areg (regs, reg); + break; + case 3: + ad = m68k_areg (regs, reg); + break; + case 4: + ad = m68k_areg (regs, reg) - (reg == 7 ? sz2[size] : sz1[size]); + break; + case 5: + ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); + break; + case 6: + ad = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch (reg) { + case 0: + ad = (uae_s32) (uae_s16) next_iword(); + break; + case 1: + ad = next_ilong(); + break; + case 2: + ad = m68k_getpc (); + ad += (uae_s32) (uae_s16) next_iword(); + fpu_debug(("get_fp_value next_iword()=%X\n",ad-m68k_getpc()-2)); + break; + case 3: + tmppc = m68k_getpc (); + tmp = (uae_u16)next_iword(); + ad = get_disp_ea_020 (tmppc, tmp); + break; + case 4: + ad = m68k_getpc (); + m68k_setpc (ad + sz2[size]); + // Immediate addressing mode && Operation Length == Byte -> + // Use the low-order byte of the extension word. + if(size == 6) ad++; + break; + default: + return 0; + } + } + + fpu_debug(("get_fp_value m68k_getpc()=%X\n",m68k_getpc())); + fpu_debug(("get_fp_value ad=%X\n",ad)); + fpu_debug(("get_fp_value get_long (ad)=%X\n",get_long (ad))); + dump_first_bytes( get_real_address(ad, 0, 0)-64, 64 ); + dump_first_bytes( get_real_address(ad, 0, 0), 64 ); + + switch (size) { + case 0: + src = (fpu_register) (uae_s32) get_long (ad); + break; + case 1: + src = make_single(get_long (ad)); + break; + case 2: { + uae_u32 wrd1, wrd2, wrd3; + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + src = make_extended(wrd1, wrd2, wrd3); + break; + } + case 3: { + uae_u32 wrd1, wrd2, wrd3; + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + src = make_packed(wrd1, wrd2, wrd3); + break; + } + case 4: + src = (fpu_register) (uae_s16) get_word(ad); + break; + case 5: { + uae_u32 wrd1, wrd2; + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + src = make_double(wrd1, wrd2); + break; + } + case 6: + src = (fpu_register) (uae_s8) get_byte(ad); + break; + default: + return 0; + } + + switch (mode) { + case 3: + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + break; + } + + // fpu_debug(("get_fp_value result = %.04f\n",(float)src)); + return 1; +} + +PRIVATE inline int FFPU put_fp_value (uae_u32 opcode, uae_u16 extra, fpu_register const & value) +{ + uae_u16 tmp; + uaecptr tmppc; + int size; + int mode; + int reg; + uae_u32 ad; + static int sz1[8] = {4, 4, 12, 12, 2, 8, 1, 0}; + static int sz2[8] = {4, 4, 12, 12, 2, 8, 2, 0}; + + // fpu_debug(("put_fp_value(%.04f,%X,%X)\n",(float)value,(int)opcode,(int)extra)); + + if ((extra & 0x4000) == 0) { + int dest_reg = (extra >> 10) & 7; + FPU registers[dest_reg] = value; + make_fpsr(FPU registers[dest_reg]); + return 1; + } + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + ad = 0xffffffff; + switch (mode) { + case 0: + switch (size) { + case 6: + m68k_dreg (regs, reg) + = (((uae_s32) value & 0xff) + | (m68k_dreg (regs, reg) & ~0xff)); + break; + case 4: + m68k_dreg (regs, reg) + = (((uae_s32) value & 0xffff) + | (m68k_dreg (regs, reg) & ~0xffff)); + break; + case 0: + m68k_dreg (regs, reg) = (uae_s32) value; + break; + case 1: + m68k_dreg (regs, reg) = extract_single(value); + break; + default: + return 0; + } + return 1; + case 1: + return 0; + case 2: + ad = m68k_areg (regs, reg); + break; + case 3: + ad = m68k_areg (regs, reg); + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + ad = m68k_areg (regs, reg); + break; + case 5: + ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); + break; + case 6: + ad = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch (reg) { + case 0: + ad = (uae_s32) (uae_s16) next_iword(); + break; + case 1: + ad = next_ilong(); + break; + case 2: + ad = m68k_getpc (); + ad += (uae_s32) (uae_s16) next_iword(); + break; + case 3: + tmppc = m68k_getpc (); + tmp = (uae_u16)next_iword(); + ad = get_disp_ea_020 (tmppc, tmp); + break; + case 4: + ad = m68k_getpc (); + m68k_setpc (ad + sz2[size]); + break; + default: + return 0; + } + } + switch (size) { + case 0: + put_long (ad, (uae_s32) value); + break; + case 1: + put_long (ad, extract_single(value)); + break; + case 2: { + uae_u32 wrd1, wrd2, wrd3; + extract_extended(value, &wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + break; + } + case 3: { + uae_u32 wrd1, wrd2, wrd3; + extract_packed(value, &wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + break; + } + case 4: + put_word(ad, (uae_s16) value); + break; + case 5: { + uae_u32 wrd1, wrd2; + extract_double(value, &wrd1, &wrd2); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + break; + } + case 6: + put_byte(ad, (uae_s8) value); + break; + default: + return 0; + } + return 1; +} + +PRIVATE inline int FFPU get_fp_ad(uae_u32 opcode, uae_u32 * ad) +{ + uae_u16 tmp; + uaecptr tmppc; + int mode; + int reg; + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + switch (mode) { + case 0: + case 1: + return 0; + case 2: + *ad = m68k_areg (regs, reg); + break; + case 3: + *ad = m68k_areg (regs, reg); + break; + case 4: + *ad = m68k_areg (regs, reg); + break; + case 5: + *ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); + break; + case 6: + *ad = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch (reg) { + case 0: + *ad = (uae_s32) (uae_s16) next_iword(); + break; + case 1: + *ad = next_ilong(); + break; + case 2: + *ad = m68k_getpc (); + *ad += (uae_s32) (uae_s16) next_iword(); + break; + case 3: + tmppc = m68k_getpc (); + tmp = (uae_u16)next_iword(); + *ad = get_disp_ea_020 (tmppc, tmp); + break; + default: + return 0; + } + } + return 1; +} + +#if FPU_DEBUG +# define CONDRET(s,x) fpu_debug(("fpp_cond %s = %d\n",s,(uint32)(x))); return (x) +#else +# define CONDRET(s,x) return (x) +#endif + +PRIVATE inline int FFPU fpp_cond(int condition) +{ +#if 1 +# define N ((FPU fpsr.condition_codes & NATIVE_FFLAG_NEGATIVE) == NATIVE_FFLAG_NEGATIVE) +# define Z ((FPU fpsr.condition_codes & NATIVE_FFLAG_ZERO) == NATIVE_FFLAG_ZERO) +# define I ((FPU fpsr.condition_codes & NATIVE_FFLAG_INFINITY) == NATIVE_FFLAG_INFINITY) +# define NaN ((FPU fpsr.condition_codes & NATIVE_FFLAG_NAN) == NATIVE_FFLAG_NAN) +#else +# define N ((FPU fpsr.condition_codes & NATIVE_FFLAG_NEGATIVE) != 0) +# define Z ((FPU fpsr.condition_codes & NATIVE_FFLAG_ZERO) != 0) +# define I ((FPU fpsr.condition_codes & NATIVE_FFLAG_INFINITY) != 0) +# define NaN ((FPU fpsr.condition_codes & NATIVE_FFLAG_NAN) != 0) +#endif + +#if 0 + return fpcctrue(condition); +#else + switch (condition & 0x1f) { + case 0x00: CONDRET("False",0); + case 0x01: CONDRET("Equal",Z); + case 0x02: CONDRET("Ordered Greater Than",!(NaN || Z || N)); + case 0x03: CONDRET("Ordered Greater Than or Equal",Z || !(NaN || N)); + case 0x04: CONDRET("Ordered Less Than",N && !(NaN || Z)); + case 0x05: CONDRET("Ordered Less Than or Equal",Z || (N && !NaN)); + case 0x06: CONDRET("Ordered Greater or Less Than",!(NaN || Z)); + case 0x07: CONDRET("Ordered",!NaN); + case 0x08: CONDRET("Unordered",NaN); + case 0x09: CONDRET("Unordered or Equal",NaN || Z); + case 0x0a: CONDRET("Unordered or Greater Than",NaN || !(N || Z)); + case 0x0b: CONDRET("Unordered or Greater or Equal",NaN || Z || !N); + case 0x0c: CONDRET("Unordered or Less Than",NaN || (N && !Z)); + case 0x0d: CONDRET("Unordered or Less or Equal",NaN || Z || N); + case 0x0e: CONDRET("Not Equal",!Z); + case 0x0f: CONDRET("True",1); + case 0x10: CONDRET("Signaling False",0); + case 0x11: CONDRET("Signaling Equal",Z); + case 0x12: CONDRET("Greater Than",!(NaN || Z || N)); + case 0x13: CONDRET("Greater Than or Equal",Z || !(NaN || N)); + case 0x14: CONDRET("Less Than",N && !(NaN || Z)); + case 0x15: CONDRET("Less Than or Equal",Z || (N && !NaN)); + case 0x16: CONDRET("Greater or Less Than",!(NaN || Z)); + case 0x17: CONDRET("Greater, Less or Equal",!NaN); + case 0x18: CONDRET("Not Greater, Less or Equal",NaN); + case 0x19: CONDRET("Not Greater or Less Than",NaN || Z); + case 0x1a: CONDRET("Not Less Than or Equal",NaN || !(N || Z)); + case 0x1b: CONDRET("Not Less Than",NaN || Z || !N); + case 0x1c: CONDRET("Not Greater Than or Equal", NaN || (N && !Z)); +// case 0x1c: CONDRET("Not Greater Than or Equal",!Z && (NaN || N)); + case 0x1d: CONDRET("Not Greater Than",NaN || Z || N); + case 0x1e: CONDRET("Signaling Not Equal",!Z); + case 0x1f: CONDRET("Signaling True",1); + default: CONDRET("",-1); + } +#endif + +# undef N +# undef Z +# undef I +# undef NaN +} + +void FFPU fpuop_dbcc(uae_u32 opcode, uae_u32 extra) +{ + fpu_debug(("fdbcc_opp %X, %X at %08lx\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); + + uaecptr pc = (uae_u32) m68k_getpc (); + uae_s32 disp = (uae_s32) (uae_s16) next_iword(); + int cc = fpp_cond(extra & 0x3f); + if (cc == -1) { + m68k_setpc (pc - 4); + op_illg (opcode); + } else if (!cc) { + int reg = opcode & 0x7; + + // this may have leaked. + /* + m68k_dreg (regs, reg) = ((m68k_dreg (regs, reg) & ~0xffff) + | ((m68k_dreg (regs, reg) - 1) & 0xffff)); + */ + m68k_dreg (regs, reg) = ((m68k_dreg (regs, reg) & 0xffff0000) + | (((m68k_dreg (regs, reg) & 0xffff) - 1) & 0xffff)); + + + // condition reversed. + // if ((m68k_dreg (regs, reg) & 0xffff) == 0xffff) + if ((m68k_dreg (regs, reg) & 0xffff) != 0xffff) + m68k_setpc (pc + disp); + } +} + +void FFPU fpuop_scc(uae_u32 opcode, uae_u32 extra) +{ + fpu_debug(("fscc_opp %X, %X at %08lx\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); + + uae_u32 ad; + int cc = fpp_cond(extra & 0x3f); + if (cc == -1) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } + else if ((opcode & 0x38) == 0) { + m68k_dreg (regs, opcode & 7) = (m68k_dreg (regs, opcode & 7) & ~0xff) | + (cc ? 0xff : 0x00); + } + else if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } + else + put_byte(ad, cc ? 0xff : 0x00); +} + +void FFPU fpuop_trapcc(uae_u32 opcode, uaecptr oldpc, uae_u32 extra) +{ + fpu_debug(("ftrapcc_opp %X, %X at %08lx\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); + + int cc = fpp_cond(extra & 0x3f); + if (cc == -1) { + m68k_setpc (oldpc); + op_illg (opcode); + } + if (cc) + Exception(7, oldpc - 2); +} + +// NOTE that we get here also when there is a FNOP (nontrapping false, displ 0) +void FFPU fpuop_bcc(uae_u32 opcode, uaecptr pc, uae_u32 extra) +{ + fpu_debug(("fbcc_opp %X, %X at %08lx, jumpto=%X\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc (), extra )); + + int cc = fpp_cond(opcode & 0x3f); + if (cc == -1) { + m68k_setpc (pc); + op_illg (opcode); + } + else if (cc) { + if ((opcode & 0x40) == 0) + extra = (uae_s32) (uae_s16) extra; + m68k_setpc (pc + extra); + } +} + +// FSAVE has no post-increment +// 0x1f180000 == IDLE state frame, coprocessor version number 1F +void FFPU fpuop_save(uae_u32 opcode) +{ + fpu_debug(("fsave_opp at %08lx\n", m68k_getpc ())); + + uae_u32 ad; + int incr = (opcode & 0x38) == 0x20 ? -1 : 1; + int i; + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 2); + op_illg (opcode); + return; + } + + if (CPUType == 4) { + // Put 4 byte 68040 IDLE frame. + if (incr < 0) { + ad -= 4; + put_long (ad, 0x41000000); + } + else { + put_long (ad, 0x41000000); + ad += 4; + } + } else { + // Put 28 byte 68881 IDLE frame. + if (incr < 0) { + fpu_debug(("fsave_opp pre-decrement\n")); + ad -= 4; + // What's this? Some BIU flags, or (incorrectly placed) command/condition? + put_long (ad, 0x70000000); + for (i = 0; i < 5; i++) { + ad -= 4; + put_long (ad, 0x00000000); + } + ad -= 4; + put_long (ad, 0x1f180000); // IDLE, vers 1f + } + else { + put_long (ad, 0x1f180000); // IDLE, vers 1f + ad += 4; + for (i = 0; i < 5; i++) { + put_long (ad, 0x00000000); + ad += 4; + } + // What's this? Some BIU flags, or (incorrectly placed) command/condition? + put_long (ad, 0x70000000); + ad += 4; + } + } + if ((opcode & 0x38) == 0x18) { + m68k_areg (regs, opcode & 7) = ad; // Never executed on a 68881 + fpu_debug(("PROBLEM: fsave_opp post-increment\n")); + } + if ((opcode & 0x38) == 0x20) { + m68k_areg (regs, opcode & 7) = ad; + fpu_debug(("fsave_opp pre-decrement %X -> A%d\n",ad,opcode & 7)); + } +} + +// FRESTORE has no pre-decrement +void FFPU fpuop_restore(uae_u32 opcode) +{ + fpu_debug(("frestore_opp at %08lx\n", m68k_getpc ())); + + uae_u32 ad; + uae_u32 d; + int incr = (opcode & 0x38) == 0x20 ? -1 : 1; + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 2); + op_illg (opcode); + return; + } + + if (CPUType == 4) { + // 68040 + if (incr < 0) { + fpu_debug(("PROBLEM: frestore_opp incr < 0\n")); + // this may be wrong, but it's never called. + ad -= 4; + d = get_long (ad); + if ((d & 0xff000000) != 0) { // Not a NULL frame? + if ((d & 0x00ff0000) == 0) { // IDLE + fpu_debug(("frestore_opp found IDLE frame at %X\n",ad-4)); + } + else if ((d & 0x00ff0000) == 0x00300000) { // UNIMP + fpu_debug(("PROBLEM: frestore_opp found UNIMP frame at %X\n",ad-4)); + ad -= 44; + } + else if ((d & 0x00ff0000) == 0x00600000) { // BUSY + fpu_debug(("PROBLEM: frestore_opp found BUSY frame at %X\n",ad-4)); + ad -= 92; + } + } + } + else { + d = get_long (ad); + fpu_debug(("frestore_opp frame at %X = %X\n",ad,d)); + ad += 4; + if ((d & 0xff000000) != 0) { // Not a NULL frame? + if ((d & 0x00ff0000) == 0) { // IDLE + fpu_debug(("frestore_opp found IDLE frame at %X\n",ad-4)); + } + else if ((d & 0x00ff0000) == 0x00300000) { // UNIMP + fpu_debug(("PROBLEM: frestore_opp found UNIMP frame at %X\n",ad-4)); + ad += 44; + } + else if ((d & 0x00ff0000) == 0x00600000) { // BUSY + fpu_debug(("PROBLEM: frestore_opp found BUSY frame at %X\n",ad-4)); + ad += 92; + } + } + } + } + else { + // 68881 + if (incr < 0) { + fpu_debug(("PROBLEM: frestore_opp incr < 0\n")); + // this may be wrong, but it's never called. + ad -= 4; + d = get_long (ad); + if ((d & 0xff000000) != 0) { + if ((d & 0x00ff0000) == 0x00180000) + ad -= 6 * 4; + else if ((d & 0x00ff0000) == 0x00380000) + ad -= 14 * 4; + else if ((d & 0x00ff0000) == 0x00b40000) + ad -= 45 * 4; + } + } + else { + d = get_long (ad); + fpu_debug(("frestore_opp frame at %X = %X\n",ad,d)); + ad += 4; + if ((d & 0xff000000) != 0) { // Not a NULL frame? + if ((d & 0x00ff0000) == 0x00180000) { // IDLE + fpu_debug(("frestore_opp found IDLE frame at %X\n",ad-4)); + ad += 6 * 4; + } + else if ((d & 0x00ff0000) == 0x00380000) {// UNIMP? shouldn't it be 3C? + ad += 14 * 4; + fpu_debug(("PROBLEM: frestore_opp found UNIMP? frame at %X\n",ad-4)); + } + else if ((d & 0x00ff0000) == 0x00b40000) {// BUSY + fpu_debug(("PROBLEM: frestore_opp found BUSY frame at %X\n",ad-4)); + ad += 45 * 4; + } + } + } + } + if ((opcode & 0x38) == 0x18) { + m68k_areg (regs, opcode & 7) = ad; + fpu_debug(("frestore_opp post-increment %X -> A%d\n",ad,opcode & 7)); + } + if ((opcode & 0x38) == 0x20) { + m68k_areg (regs, opcode & 7) = ad; // Never executed on a 68881 + fpu_debug(("PROBLEM: frestore_opp pre-decrement\n")); + } +} + +void FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) +{ + int reg; + fpu_register src; + + fpu_debug(("FPP %04lx %04x at %08lx\n", opcode & 0xffff, extra & 0xffff, + m68k_getpc () - 4)); + + dump_registers( "START"); + + switch ((extra >> 13) & 0x7) { + case 3: + fpu_debug(("FMOVE -> \n")); + if (put_fp_value (opcode, extra, FPU registers[(extra >> 7) & 7]) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } + dump_registers( "END "); + return; + case 4: + case 5: + if ((opcode & 0x38) == 0) { + if (extra & 0x2000) { // dr bit + if (extra & 0x1000) { + // according to the manual, the msb bits are always zero. + m68k_dreg (regs, opcode & 7) = get_fpcr() & 0xFFFF; + fpu_debug(("FMOVEM FPU fpcr (%X) -> D%d\n", get_fpcr(), opcode & 7)); + } + if (extra & 0x0800) { + m68k_dreg (regs, opcode & 7) = get_fpsr(); + fpu_debug(("FMOVEM FPU fpsr (%X) -> D%d\n", get_fpsr(), opcode & 7)); + } + if (extra & 0x0400) { + m68k_dreg (regs, opcode & 7) = FPU instruction_address; + fpu_debug(("FMOVEM FPU instruction_address (%X) -> D%d\n", FPU instruction_address, opcode & 7)); + } + } + else { + if (extra & 0x1000) { + set_fpcr( m68k_dreg (regs, opcode & 7) ); + fpu_debug(("FMOVEM D%d (%X) -> FPU fpcr\n", opcode & 7, get_fpcr())); + } + if (extra & 0x0800) { + set_fpsr( m68k_dreg (regs, opcode & 7) ); + fpu_debug(("FMOVEM D%d (%X) -> FPU fpsr\n", opcode & 7, get_fpsr())); + } + if (extra & 0x0400) { + FPU instruction_address = m68k_dreg (regs, opcode & 7); + fpu_debug(("FMOVEM D%d (%X) -> FPU instruction_address\n", opcode & 7, FPU instruction_address)); + } + } +// } else if ((opcode & 0x38) == 1) { + } + else if ((opcode & 0x38) == 8) { + if (extra & 0x2000) { // dr bit + if (extra & 0x1000) { + // according to the manual, the msb bits are always zero. + m68k_areg (regs, opcode & 7) = get_fpcr() & 0xFFFF; + fpu_debug(("FMOVEM FPU fpcr (%X) -> A%d\n", get_fpcr(), opcode & 7)); + } + if (extra & 0x0800) { + m68k_areg (regs, opcode & 7) = get_fpsr(); + fpu_debug(("FMOVEM FPU fpsr (%X) -> A%d\n", get_fpsr(), opcode & 7)); + } + if (extra & 0x0400) { + m68k_areg (regs, opcode & 7) = FPU instruction_address; + fpu_debug(("FMOVEM FPU instruction_address (%X) -> A%d\n", FPU instruction_address, opcode & 7)); + } + } else { + if (extra & 0x1000) { + set_fpcr( m68k_areg (regs, opcode & 7) ); + fpu_debug(("FMOVEM A%d (%X) -> FPU fpcr\n", opcode & 7, get_fpcr())); + } + if (extra & 0x0800) { + set_fpsr( m68k_areg (regs, opcode & 7) ); + fpu_debug(("FMOVEM A%d (%X) -> FPU fpsr\n", opcode & 7, get_fpsr())); + } + if (extra & 0x0400) { + FPU instruction_address = m68k_areg (regs, opcode & 7); + fpu_debug(("FMOVEM A%d (%X) -> FPU instruction_address\n", opcode & 7, FPU instruction_address)); + } + } + } + else if ((opcode & 0x3f) == 0x3c) { + if ((extra & 0x2000) == 0) { + if (extra & 0x1000) { + set_fpcr( next_ilong() ); + fpu_debug(("FMOVEM #<%X> -> FPU fpcr\n", get_fpcr())); + } + if (extra & 0x0800) { + set_fpsr( next_ilong() ); + fpu_debug(("FMOVEM #<%X> -> FPU fpsr\n", get_fpsr())); + } + if (extra & 0x0400) { + FPU instruction_address = next_ilong(); + fpu_debug(("FMOVEM #<%X> -> FPU instruction_address\n", FPU instruction_address)); + } + } + } + else if (extra & 0x2000) { + /* FMOVEM FPP->memory */ + uae_u32 ad; + int incr = 0; + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + if ((opcode & 0x38) == 0x20) { + if (extra & 0x1000) + incr += 4; + if (extra & 0x0800) + incr += 4; + if (extra & 0x0400) + incr += 4; + } + ad -= incr; + if (extra & 0x1000) { + // according to the manual, the msb bits are always zero. + put_long (ad, get_fpcr() & 0xFFFF); + fpu_debug(("FMOVEM FPU fpcr (%X) -> mem %X\n", get_fpcr(), ad )); + ad += 4; + } + if (extra & 0x0800) { + put_long (ad, get_fpsr()); + fpu_debug(("FMOVEM FPU fpsr (%X) -> mem %X\n", get_fpsr(), ad )); + ad += 4; + } + if (extra & 0x0400) { + put_long (ad, FPU instruction_address); + fpu_debug(("FMOVEM FPU instruction_address (%X) -> mem %X\n", FPU instruction_address, ad )); + ad += 4; + } + ad -= incr; + if ((opcode & 0x38) == 0x18) // post-increment? + m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) // pre-decrement? + m68k_areg (regs, opcode & 7) = ad; + } + else { + /* FMOVEM memory->FPP */ + uae_u32 ad; + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + + // ad = (opcode & 0x38) == 0x20 ? ad - 12 : ad; + int incr = 0; + if((opcode & 0x38) == 0x20) { + if (extra & 0x1000) + incr += 4; + if (extra & 0x0800) + incr += 4; + if (extra & 0x0400) + incr += 4; + ad = ad - incr; + } + + if (extra & 0x1000) { + set_fpcr( get_long (ad) ); + fpu_debug(("FMOVEM mem %X (%X) -> FPU fpcr\n", ad, get_fpcr() )); + ad += 4; + } + if (extra & 0x0800) { + set_fpsr( get_long (ad) ); + fpu_debug(("FMOVEM mem %X (%X) -> FPU fpsr\n", ad, get_fpsr() )); + ad += 4; + } + if (extra & 0x0400) { + FPU instruction_address = get_long (ad); + fpu_debug(("FMOVEM mem %X (%X) -> FPU instruction_address\n", ad, FPU instruction_address )); + ad += 4; + } + if ((opcode & 0x38) == 0x18) // post-increment? + m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) // pre-decrement? +// m68k_areg (regs, opcode & 7) = ad - 12; + m68k_areg (regs, opcode & 7) = ad - incr; + } + dump_registers( "END "); + return; + case 6: + case 7: { + uae_u32 ad, list = 0; + int incr = 0; + if (extra & 0x2000) { + /* FMOVEM FPP->memory */ + fpu_debug(("FMOVEM FPP->memory\n")); + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 1: /* dynamic pred */ + list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 3: /* dynamic postinc */ + list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + incr = 1; + break; + } + + if (incr < 0) { + for(reg=7; reg>=0; reg--) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + extract_extended(FPU registers[reg],&wrd1, &wrd2, &wrd3); + ad -= 4; + put_long (ad, wrd3); + ad -= 4; + put_long (ad, wrd2); + ad -= 4; + put_long (ad, wrd1); + } + list <<= 1; + } + } + else { + for(reg=0; reg<8; reg++) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + extract_extended(FPU registers[reg],&wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + ad += 4; + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) // post-increment? + m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) // pre-decrement? + m68k_areg (regs, opcode & 7) = ad; + } + else { + /* FMOVEM memory->FPP */ + fpu_debug(("FMOVEM memory->FPP\n")); + + if (get_fp_ad(opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + fpu_debug(("memory->FMOVEM FPP not legal mode.\n")); + list = extra & 0xff; + incr = -1; + break; + case 1: /* dynamic pred */ + fpu_debug(("memory->FMOVEM FPP not legal mode.\n")); + list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 3: /* dynamic postinc */ + list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + incr = 1; + break; + } + + /**/ + if (incr < 0) { + // not reached + for(reg=7; reg>=0; reg--) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + ad -= 4; + wrd3 = get_long (ad); + ad -= 4; + wrd2 = get_long (ad); + ad -= 4; + wrd1 = get_long (ad); + // FPU registers[reg] = make_extended(wrd1, wrd2, wrd3); + make_extended_no_normalize (wrd1, wrd2, wrd3, FPU registers[reg]); + } + list <<= 1; + } + } + else { + for(reg=0; reg<8; reg++) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + ad += 4; + // FPU registers[reg] = make_extended(wrd1, wrd2, wrd3); + make_extended_no_normalize (wrd1, wrd2, wrd3, FPU registers[reg]); + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) // post-increment? + m68k_areg (regs, opcode & 7) = ad; + if ((opcode & 0x38) == 0x20) // pre-decrement? + m68k_areg (regs, opcode & 7) = ad; + } + dump_registers( "END "); + return; + } + case 0: + case 2: + reg = (extra >> 7) & 7; + if ((extra & 0xfc00) == 0x5c00) { + fpu_debug(("FMOVECR memory->FPP\n")); + switch (extra & 0x7f) { + case 0x00: + // FPU registers[reg] = 4.0 * atan(1.0); + FPU registers[reg] = 3.1415926535897932384626433832795; + fpu_debug(("FP const: Pi\n")); + break; + case 0x0b: + // FPU registers[reg] = log10 (2.0); + FPU registers[reg] = 0.30102999566398119521373889472449; + fpu_debug(("FP const: Log 10 (2)\n")); + break; + case 0x0c: + // FPU registers[reg] = exp (1.0); + FPU registers[reg] = 2.7182818284590452353602874713527; + fpu_debug(("FP const: e\n")); + break; + case 0x0d: + // FPU registers[reg] = log (exp (1.0)) / log (2.0); + FPU registers[reg] = 1.4426950408889634073599246810019; + fpu_debug(("FP const: Log 2 (e)\n")); + break; + case 0x0e: + // FPU registers[reg] = log (exp (1.0)) / log (10.0); + FPU registers[reg] = 0.43429448190325182765112891891661; + fpu_debug(("FP const: Log 10 (e)\n")); + break; + case 0x0f: + FPU registers[reg] = 0.0; + fpu_debug(("FP const: zero\n")); + break; + case 0x30: + // FPU registers[reg] = log (2.0); + FPU registers[reg] = 0.69314718055994530941723212145818; + fpu_debug(("FP const: ln(2)\n")); + break; + case 0x31: + // FPU registers[reg] = log (10.0); + FPU registers[reg] = 2.3025850929940456840179914546844; + fpu_debug(("FP const: ln(10)\n")); + break; + case 0x32: + // ?? + FPU registers[reg] = 1.0e0; + fpu_debug(("FP const: 1.0e0\n")); + break; + case 0x33: + FPU registers[reg] = 1.0e1; + fpu_debug(("FP const: 1.0e1\n")); + break; + case 0x34: + FPU registers[reg] = 1.0e2; + fpu_debug(("FP const: 1.0e2\n")); + break; + case 0x35: + FPU registers[reg] = 1.0e4; + fpu_debug(("FP const: 1.0e4\n")); + break; + case 0x36: + FPU registers[reg] = 1.0e8; + fpu_debug(("FP const: 1.0e8\n")); + break; + case 0x37: + FPU registers[reg] = 1.0e16; + fpu_debug(("FP const: 1.0e16\n")); + break; + case 0x38: + FPU registers[reg] = 1.0e32; + fpu_debug(("FP const: 1.0e32\n")); + break; + case 0x39: + FPU registers[reg] = 1.0e64; + fpu_debug(("FP const: 1.0e64\n")); + break; + case 0x3a: + FPU registers[reg] = 1.0e128; + fpu_debug(("FP const: 1.0e128\n")); + break; + case 0x3b: + FPU registers[reg] = 1.0e256; + fpu_debug(("FP const: 1.0e256\n")); + break; + + // Valid for 64 bits only (see fpu.cpp) +#if 0 + case 0x3c: + FPU registers[reg] = 1.0e512; + fpu_debug(("FP const: 1.0e512\n")); + break; + case 0x3d: + FPU registers[reg] = 1.0e1024; + fpu_debug(("FP const: 1.0e1024\n")); + break; + case 0x3e: + FPU registers[reg] = 1.0e2048; + fpu_debug(("FP const: 1.0e2048\n")); + break; + case 0x3f: + FPU registers[reg] = 1.0e4096; + fpu_debug(("FP const: 1.0e4096\n")); + break; +#endif + default: + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + break; + } + // these *do* affect the status reg + make_fpsr(FPU registers[reg]); + dump_registers( "END "); + return; + } + + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + fpu_debug(("returned from get_fp_value m68k_getpc()=%X\n",m68k_getpc())); +#if 0 // MJ added, not tested now + if (FPU is_integral) { + // 68040-specific operations + switch (extra & 0x7f) { + case 0x40: /* FSMOVE */ + fpu_debug(("FSMOVE %.04f\n",(double)src)); + FPU registers[reg] = (float)src; + make_fpsr(FPU registers[reg]); + break; + case 0x44: /* FDMOVE */ + fpu_debug(("FDMOVE %.04f\n",(double)src)); + FPU registers[reg] = (double)src; + make_fpsr(FPU registers[reg]); + break; + case 0x41: /* FSSQRT */ + fpu_debug(("FSQRT %.04f\n",(double)src)); + FPU registers[reg] = (float)sqrt (src); + make_fpsr(FPU registers[reg]); + break; + case 0x45: /* FDSQRT */ + fpu_debug(("FSQRT %.04f\n",(double)src)); + FPU registers[reg] = (double)sqrt (src); + make_fpsr(FPU registers[reg]); + break; + case 0x58: /* FSABS */ + fpu_debug(("FSABS %.04f\n",(double)src)); + FPU registers[reg] = (float)fabs(src); + make_fpsr(FPU registers[reg]); + break; + case 0x5c: /* FDABS */ + fpu_debug(("FDABS %.04f\n",(double)src)); + FPU registers[reg] = (double)fabs(src); + make_fpsr(FPU registers[reg]); + break; + case 0x5a: /* FSNEG */ + fpu_debug(("FSNEG %.04f\n",(double)src)); + FPU registers[reg] = (float)-src; + make_fpsr(FPU registers[reg]); + break; + case 0x5e: /* FDNEG */ + fpu_debug(("FDNEG %.04f\n",(double)src)); + FPU registers[reg] = (double)-src; + make_fpsr(FPU registers[reg]); + break; + case 0x60: /* FSDIV */ + fpu_debug(("FSDIV %.04f\n",(double)src)); + FPU registers[reg] = (float)(FPU registers[reg] / src); + make_fpsr(FPU registers[reg]); + break; + case 0x64: /* FDDIV */ + fpu_debug(("FDDIV %.04f\n",(double)src)); + FPU registers[reg] = (double)(FPU registers[reg] / src); + make_fpsr(FPU registers[reg]); + break; + case 0x62: /* FSADD */ + fpu_debug(("FSADD %.04f\n",(double)src)); + FPU registers[reg] = (float)(FPU registers[reg] + src); + make_fpsr(FPU registers[reg]); + break; + case 0x66: /* FDADD */ + fpu_debug(("FDADD %.04f\n",(double)src)); + FPU registers[reg] = (double)(FPU registers[reg] + src); + make_fpsr(FPU registers[reg]); + break; + case 0x68: /* FSSUB */ + fpu_debug(("FSSUB %.04f\n",(double)src)); + FPU registers[reg] = (float)(FPU registers[reg] - src); + make_fpsr(FPU registers[reg]); + break; + case 0x6c: /* FDSUB */ + fpu_debug(("FDSUB %.04f\n",(double)src)); + FPU registers[reg] = (double)(FPU registers[reg] - src); + make_fpsr(FPU registers[reg]); + break; + case 0x63: /* FSMUL */ + case 0x67: /* FDMUL */ + get_dest_flags(FPU registers[reg]); + get_source_flags(src); + if(fl_dest.in_range && fl_source.in_range) { + if ((extra & 0x7f) == 0x63) + FPU registers[reg] = (float)(FPU registers[reg] * src); + else + FPU registers[reg] = (double)(FPU registers[reg] * src); + } + else if (fl_dest.nan || fl_source.nan || + fl_dest.zero && fl_source.infinity || + fl_dest.infinity && fl_source.zero ) { + make_nan( FPU registers[reg] ); + } + else if (fl_dest.zero || fl_source.zero ) { + if (fl_dest.negative && !fl_source.negative || + !fl_dest.negative && fl_source.negative) { + make_zero_negative(FPU registers[reg]); + } + else { + make_zero_positive(FPU registers[reg]); + } + } + else { + if( fl_dest.negative && !fl_source.negative || + !fl_dest.negative && fl_source.negative) { + make_inf_negative(FPU registers[reg]); + } + else { + make_inf_positive(FPU registers[reg]); + } + } + make_fpsr(FPU registers[reg]); + break; + default: + // Continue decode-execute 6888x instructions below + goto process_6888x_instructions; + } + fpu_debug(("END m68k_getpc()=%X\n",m68k_getpc())); + dump_registers( "END "); + return; + } + + process_6888x_instructions: +#endif + switch (extra & 0x7f) { + case 0x00: /* FMOVE */ + fpu_debug(("FMOVE %.04f\n",(double)src)); + FPU registers[reg] = src; + // -> reg DOES affect the status reg + make_fpsr(FPU registers[reg]); + break; + case 0x01: /* FINT */ + fpu_debug(("FINT %.04f\n",(double)src)); + // FPU registers[reg] = (int) (src + 0.5); + // FIXME: use native rounding mode flags + switch (get_fpcr() & 0x30) { + case FPCR_ROUND_ZERO: + FPU registers[reg] = round_to_zero(src); + break; + case FPCR_ROUND_MINF: + FPU registers[reg] = floor(src); + break; + case FPCR_ROUND_NEAR: + FPU registers[reg] = round_to_nearest(src); + break; + case FPCR_ROUND_PINF: + FPU registers[reg] = ceil(src); + break; + } + make_fpsr(FPU registers[reg]); + break; + case 0x02: /* FSINH */ + fpu_debug(("FSINH %.04f\n",(double)src)); + FPU registers[reg] = sinh (src); + make_fpsr(FPU registers[reg]); + break; + case 0x03: /* FINTRZ */ + fpu_debug(("FINTRZ %.04f\n",(double)src)); + // FPU registers[reg] = (int) src; + FPU registers[reg] = round_to_zero(src); + make_fpsr(FPU registers[reg]); + break; + case 0x04: /* FSQRT */ + fpu_debug(("FSQRT %.04f\n",(double)src)); + FPU registers[reg] = sqrt (src); + make_fpsr(FPU registers[reg]); + break; + case 0x06: /* FLOGNP1 */ + fpu_debug(("FLOGNP1 %.04f\n",(double)src)); + FPU registers[reg] = log (src + 1.0); + make_fpsr(FPU registers[reg]); + break; + case 0x08: /* FETOXM1 */ + fpu_debug(("FETOXM1 %.04f\n",(double)src)); + FPU registers[reg] = exp (src) - 1.0; + make_fpsr(FPU registers[reg]); + break; + case 0x09: /* FTANH */ + fpu_debug(("FTANH %.04f\n",(double)src)); + FPU registers[reg] = tanh (src); + make_fpsr(FPU registers[reg]); + break; + case 0x0a: /* FATAN */ + fpu_debug(("FATAN %.04f\n",(double)src)); + FPU registers[reg] = atan (src); + make_fpsr(FPU registers[reg]); + break; + case 0x0c: /* FASIN */ + fpu_debug(("FASIN %.04f\n",(double)src)); + FPU registers[reg] = asin (src); + make_fpsr(FPU registers[reg]); + break; + case 0x0d: /* FATANH */ + fpu_debug(("FATANH %.04f\n",(double)src)); +#if HAVE_ATANH + FPU registers[reg] = atanh (src); +#else + /* The BeBox doesn't have atanh, and it isn't in the HPUX libm either */ + FPU registers[reg] = log ((1 + src) / (1 - src)) / 2; +#endif + make_fpsr(FPU registers[reg]); + break; + case 0x0e: /* FSIN */ + fpu_debug(("FSIN %.04f\n",(double)src)); + FPU registers[reg] = sin (src); + make_fpsr(FPU registers[reg]); + break; + case 0x0f: /* FTAN */ + fpu_debug(("FTAN %.04f\n",(double)src)); + FPU registers[reg] = tan (src); + make_fpsr(FPU registers[reg]); + break; + case 0x10: /* FETOX */ + fpu_debug(("FETOX %.04f\n",(double)src)); + FPU registers[reg] = exp (src); + make_fpsr(FPU registers[reg]); + break; + case 0x11: /* FTWOTOX */ + fpu_debug(("FTWOTOX %.04f\n",(double)src)); + FPU registers[reg] = pow(2.0, src); + make_fpsr(FPU registers[reg]); + break; + case 0x12: /* FTENTOX */ + fpu_debug(("FTENTOX %.04f\n",(double)src)); + FPU registers[reg] = pow(10.0, src); + make_fpsr(FPU registers[reg]); + break; + case 0x14: /* FLOGN */ + fpu_debug(("FLOGN %.04f\n",(double)src)); + FPU registers[reg] = log (src); + make_fpsr(FPU registers[reg]); + break; + case 0x15: /* FLOG10 */ + fpu_debug(("FLOG10 %.04f\n",(double)src)); + FPU registers[reg] = log10 (src); + make_fpsr(FPU registers[reg]); + break; + case 0x16: /* FLOG2 */ + fpu_debug(("FLOG2 %.04f\n",(double)src)); + FPU registers[reg] = log (src) / log (2.0); + make_fpsr(FPU registers[reg]); + break; + case 0x18: /* FABS */ + case 0x58: /* single precision rounding */ + case 0x5C: /* double precision rounding */ + fpu_debug(("FABS %.04f\n",(double)src)); + FPU registers[reg] = src < 0 ? -src : src; + make_fpsr(FPU registers[reg]); + break; + case 0x19: /* FCOSH */ + fpu_debug(("FCOSH %.04f\n",(double)src)); + FPU registers[reg] = cosh(src); + make_fpsr(FPU registers[reg]); + break; + case 0x1a: /* FNEG */ + fpu_debug(("FNEG %.04f\n",(double)src)); + FPU registers[reg] = -src; + make_fpsr(FPU registers[reg]); + break; + case 0x1c: /* FACOS */ + fpu_debug(("FACOS %.04f\n",(double)src)); + FPU registers[reg] = acos(src); + make_fpsr(FPU registers[reg]); + break; + case 0x1d: /* FCOS */ + fpu_debug(("FCOS %.04f\n",(double)src)); + FPU registers[reg] = cos(src); + make_fpsr(FPU registers[reg]); + break; + case 0x1e: /* FGETEXP */ + fpu_debug(("FGETEXP %.04f\n",(double)src)); +#if FPU_HAVE_IEEE_DOUBLE + if( isinf(src) ) { + make_nan( FPU registers[reg] ); + } + else { + FPU registers[reg] = fast_fgetexp( src ); + } +#else + if(src == 0) { + FPU registers[reg] = (fpu_register)0; + } + else { + int expon; + frexp (src, &expon); + FPU registers[reg] = (fpu_register) (expon - 1); + } +#endif + make_fpsr(FPU registers[reg]); + break; + case 0x1f: /* FGETMAN */ + fpu_debug(("FGETMAN %.04f\n",(double)src)); +#if FPU_HAVE_IEEE_DOUBLE + if( src == 0 ) { + FPU registers[reg] = 0; + } + else if( isinf(src) ) { + make_nan( FPU registers[reg] ); + } + else { + FPU registers[reg] = src; + fast_remove_exponent( FPU registers[reg] ); + } +#else + { + int expon; + FPU registers[reg] = frexp (src, &expon) * 2.0; + } +#endif + make_fpsr(FPU registers[reg]); + break; + case 0x20: /* FDIV */ + fpu_debug(("FDIV %.04f\n",(double)src)); + FPU registers[reg] /= src; + make_fpsr(FPU registers[reg]); + break; + case 0x21: /* FMOD */ + fpu_debug(("FMOD %.04f\n",(double)src)); + // FPU registers[reg] = FPU registers[reg] - (fpu_register) ((int) (FPU registers[reg] / src)) * src; + { + fpu_register quot = round_to_zero(FPU registers[reg] / src); +#if FPU_HAVE_IEEE_DOUBLE + uae_u32 sign = get_quotient_sign(FPU registers[reg],src); +#endif + FPU registers[reg] = FPU registers[reg] - quot * src; + make_fpsr(FPU registers[reg]); +#if FPU_HAVE_IEEE_DOUBLE + make_quotient(quot, sign); +#endif + } + break; + case 0x22: /* FADD */ + case 0x62: /* single */ + case 0x66: /* double */ + fpu_debug(("FADD %.04f\n",(double)src)); + FPU registers[reg] += src; + make_fpsr(FPU registers[reg]); + break; + case 0x23: /* FMUL */ + fpu_debug(("FMUL %.04f\n",(double)src)); +#if FPU_HAVE_IEEE_DOUBLE + get_dest_flags(FPU registers[reg]); + get_source_flags(src); + if(fl_dest.in_range && fl_source.in_range) { + FPU registers[reg] *= src; + } + else if (fl_dest.nan || fl_source.nan || + (fl_dest.zero && fl_source.infinity) || + (fl_dest.infinity && fl_source.zero) ) { + make_nan( FPU registers[reg] ); + } + else if (fl_dest.zero || fl_source.zero ) { + if (( fl_dest.negative && !fl_source.negative) || + (!fl_dest.negative && fl_source.negative)) { + make_zero_negative(FPU registers[reg]); + } + else { + make_zero_positive(FPU registers[reg]); + } + } + else { + if(( fl_dest.negative && !fl_source.negative) || + (!fl_dest.negative && fl_source.negative)) { + make_inf_negative(FPU registers[reg]); + } + else { + make_inf_positive(FPU registers[reg]); + } + } +#else + fpu_debug(("FMUL %.04f\n",(double)src)); + FPU registers[reg] *= src; +#endif + make_fpsr(FPU registers[reg]); + break; + case 0x24: /* FSGLDIV */ + fpu_debug(("FSGLDIV %.04f\n",(double)src)); + // TODO: round to float. + FPU registers[reg] /= src; + make_fpsr(FPU registers[reg]); + break; + case 0x25: /* FREM */ + fpu_debug(("FREM %.04f\n",(double)src)); + // FPU registers[reg] = FPU registers[reg] - (double) ((int) (FPU registers[reg] / src + 0.5)) * src; + { + fpu_register quot = round_to_nearest(FPU registers[reg] / src); +#if FPU_HAVE_IEEE_DOUBLE + uae_u32 sign = get_quotient_sign(FPU registers[reg],src); +#endif + FPU registers[reg] = FPU registers[reg] - quot * src; + make_fpsr(FPU registers[reg]); +#if FPU_HAVE_IEEE_DOUBLE + make_quotient(quot,sign); +#endif + } + break; + + case 0x26: /* FSCALE */ + fpu_debug(("FSCALE %.04f\n",(double)src)); + + // TODO: + // Overflow, underflow + +#if FPU_HAVE_IEEE_DOUBLE + if( isinf(FPU registers[reg]) ) { + make_nan( FPU registers[reg] ); + } + else { + // When the absolute value of the source operand is >= 2^14, + // an overflow or underflow always results. + // Here (int) cast is okay. + fast_scale( FPU registers[reg], (int)round_to_zero(src) ); + } +#else + if (src != 0) { // Manual says: src==0 -> FPn + FPU registers[reg] *= exp (log (2.0) * src); + } +#endif + make_fpsr(FPU registers[reg]); + break; + case 0x27: /* FSGLMUL */ + fpu_debug(("FSGLMUL %.04f\n",(double)src)); + FPU registers[reg] *= src; + make_fpsr(FPU registers[reg]); + break; + case 0x28: /* FSUB */ + fpu_debug(("FSUB %.04f\n",(double)src)); + FPU registers[reg] -= src; + make_fpsr(FPU registers[reg]); + break; + case 0x30: /* FSINCOS */ + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + fpu_debug(("FSINCOS %.04f\n",(double)src)); + // Cosine must be calculated first if same register + FPU registers[extra & 7] = cos(src); + FPU registers[reg] = sin (src); + // Set FPU fpsr according to the sine result + make_fpsr(FPU registers[reg]); + break; + case 0x38: /* FCMP */ + fpu_debug(("FCMP %.04f\n",(double)src)); + + // The infinity bit is always cleared by the FCMP + // instruction since it is not used by any of the + // conditional predicate equations. + +#if FPU_HAVE_IEEE_DOUBLE + if( isinf(src) ) { + if( isneg(src) ) { + // negative infinity + if( isinf(FPU registers[reg]) && isneg(FPU registers[reg]) ) { + // Zero, Negative + FPU fpsr.condition_codes = NATIVE_FFLAG_ZERO | NATIVE_FFLAG_NEGATIVE; + fpu_debug(("-INF cmp -INF -> NZ\n")); + } + else { + // None + FPU fpsr.condition_codes = 0; + fpu_debug(("x cmp -INF -> None\n")); + } + } + else { + // positive infinity + if( isinf(FPU registers[reg]) && !isneg(FPU registers[reg]) ) { + // Zero + FPU fpsr.condition_codes = NATIVE_FFLAG_ZERO; + fpu_debug(("+INF cmp +INF -> Z\n")); + } + else { + // Negative + FPU fpsr.condition_codes = NATIVE_FFLAG_NEGATIVE; + fpu_debug(("X cmp +INF -> N\n")); + } + } + } + else { + fpu_register tmp = FPU registers[reg] - src; + FPU fpsr.condition_codes + = (iszero(tmp) ? NATIVE_FFLAG_ZERO : 0) + | (isneg(tmp) ? NATIVE_FFLAG_NEGATIVE : 0) + ; + } +#else + { + fpu_register tmp = FPU registers[reg] - src; + make_fpsr(tmp); + } +#endif + break; + case 0x3a: /* FTST */ + fpu_debug(("FTST %.04f\n",(double)src)); + // make_fpsr(FPU registers[reg]); + make_fpsr(src); + break; + default: + fpu_debug(("ILLEGAL F OP %X\n",opcode)); + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + break; + } + fpu_debug(("END m68k_getpc()=%X\n",m68k_getpc())); + dump_registers( "END "); + return; + } + + fpu_debug(("ILLEGAL F OP 2 %X\n",opcode)); + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); +} + + +void fpu_set_fpsr(uae_u32 new_fpsr) +{ + set_fpsr(new_fpsr); +} + +uae_u32 fpu_get_fpsr(void) +{ + return get_fpsr(); +} + +void fpu_set_fpcr(uae_u32 new_fpcr) +{ + set_fpcr(new_fpcr); +} + +uae_u32 fpu_get_fpcr(void) +{ + return get_fpcr(); +} + +/* -------------------------- Initialization -------------------------- */ + +void FFPU fpu_init (bool integral_68040) +{ + fpu_debug(("fpu_init\n")); + + static bool initialized_lookup_tables = false; + if (!initialized_lookup_tables) { + fpu_init_native_fflags(); + fpu_init_native_exceptions(); + fpu_init_native_accrued_exceptions(); + initialized_lookup_tables = true; + } + + FPU is_integral = integral_68040; + set_fpcr(0); + set_fpsr(0); + FPU instruction_address = 0; +} + +void FFPU fpu_exit (void) +{ + fpu_debug(("fpu_exit\n")); +} + +void FFPU fpu_reset (void) +{ + fpu_debug(("fpu_reset\n")); + fpu_exit(); + fpu_init(FPU is_integral); +} diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_uae.h b/BasiliskII/src/uae_cpu/fpu/fpu_uae.h new file mode 100644 index 00000000..d8930e32 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/fpu_uae.h @@ -0,0 +1,217 @@ +/* + * fpu/fpu_uae.h - Extra Definitions for the old UAE FPU core + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef FPU_UAE_H +#define FPU_UAE_H + +// Only define if you have IEEE 64 bit doubles. +#define FPU_HAVE_IEEE_DOUBLE 1 + +/* NOTE: this file shall be included from fpu/fpu_uae.cpp */ +#undef PUBLIC +#define PUBLIC extern + +#undef PRIVATE +#define PRIVATE static + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +enum { +#ifdef WORDS_BIGENDIAN + FHI = 0, + FLO = 1 +#else + FHI = 1, + FLO = 0 +#endif +}; + +// Floating-point rounding support +PRIVATE inline fpu_register round_to_zero(fpu_register const & x); +PRIVATE inline fpu_register round_to_nearest(fpu_register const & x); + +#if FPU_HAVE_IEEE_DOUBLE + +// Lauri-- full words to avoid partial register stalls. +struct double_flags { + uae_u32 in_range; + uae_u32 zero; + uae_u32 infinity; + uae_u32 nan; + uae_u32 negative; +}; +PRIVATE double_flags fl_source; +PRIVATE double_flags fl_dest; +PRIVATE inline void FFPU get_dest_flags(fpu_register const & r); +PRIVATE inline void FFPU get_source_flags(fpu_register const & r); + +PRIVATE inline bool FFPU do_isnan(fpu_register const & r); +PRIVATE inline bool FFPU do_isinf(fpu_register const & r); +PRIVATE inline bool FFPU do_isneg(fpu_register const & r); +PRIVATE inline bool FFPU do_iszero(fpu_register const & r); + +PRIVATE inline void FFPU make_nan(fpu_register & r); +PRIVATE inline void FFPU make_zero_positive(fpu_register & r); +PRIVATE inline void FFPU make_zero_negative(fpu_register & r); +PRIVATE inline void FFPU make_inf_positive(fpu_register & r); +PRIVATE inline void FFPU make_inf_negative(fpu_register & r); + +PRIVATE inline void FFPU fast_scale(fpu_register & r, int add); +PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r); + +// May be optimized for particular processors +#ifndef FPU_USE_NATIVE_FLAGS +PRIVATE inline void FFPU make_fpsr(fpu_register const & r); +#endif + +// Normalize to range 1..2 +PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r); + +// The sign of the quotient is the exclusive-OR of the sign bits +// of the source and destination operands. +PRIVATE inline uae_u32 FFPU get_quotient_sign( + fpu_register const & ra, fpu_register const & rb +); + +// Quotient Byte is loaded with the sign and least significant +// seven bits of the quotient. +PRIVATE inline void FFPU make_quotient( + fpu_register const & quotient, uae_u32 sign +); + +// to_single +PRIVATE inline fpu_register FFPU make_single( + uae_u32 value +); + +// from_single +PRIVATE inline uae_u32 FFPU extract_single( + fpu_register const & src +); + +// to_exten +PRIVATE inline fpu_register FFPU make_extended( + uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3 +); + +/* + Would be so much easier with full size floats :( + ... this is so vague. +*/ +// to_exten_no_normalize +PRIVATE inline void FFPU make_extended_no_normalize( + uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3, fpu_register & result +); + +// from_exten +PRIVATE inline void FFPU extract_extended(fpu_register const & src, + uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3 +); + +// to_double +PRIVATE inline fpu_register FFPU make_double( + uae_u32 wrd1, uae_u32 wrd2 +); + +// from_double +PRIVATE inline void FFPU extract_double(fpu_register const & src, + uae_u32 * wrd1, uae_u32 * wrd2 +); + +#else /* !FPU_HAVE_IEEE_DOUBLE */ + +// FIXME: may be optimized for particular processors +#ifndef FPU_USE_NATIVE_FLAGS +PRIVATE inline void FFPU make_fpsr(fpu_register const & r); +#endif + +// to_single +PRIVATE inline fpu_register make_single( + uae_u32 value +); + +// from_single +PRIVATE inline uae_u32 FFPU extract_single( + fpu_register const & src +); + +// to exten +PRIVATE inline fpu_register FFPU make_extended( + uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3 +); + +// from_exten +PRIVATE inline void FFPU extract_extended( + fpu_register const & src, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3 +); + +// to_double +PRIVATE inline fpu_register FFPU make_double( + uae_u32 wrd1, uae_u32 wrd2 +); + +// from_double +PRIVATE inline void FFPU extract_double( + fpu_register const & src, uae_u32 * wrd1, uae_u32 * wrd2 +); + +#endif /* FPU_HAVE_IEEE_DOUBLE */ + +PRIVATE inline fpu_register FFPU make_packed( + uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3 +); + +PRIVATE inline void FFPU extract_packed( + fpu_register const & src, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3 +); + +PRIVATE inline int FFPU get_fp_value( + uae_u32 opcode, uae_u16 extra, fpu_register & src +); + +PRIVATE inline int FFPU put_fp_value( + uae_u32 opcode, uae_u16 extra, fpu_register const & value +); + +PRIVATE inline int FFPU get_fp_ad( + uae_u32 opcode, uae_u32 * ad +); + +PRIVATE inline int FFPU fpp_cond( + int condition +); + +#endif /* FPU_UAE_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_x86.cpp b/BasiliskII/src/uae_cpu/fpu/fpu_x86.cpp new file mode 100644 index 00000000..a4c6af2d --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/fpu_x86.cpp @@ -0,0 +1,6791 @@ +/* + * fpu/fpu_x86.cpp - 68881/68040 fpu code for x86/Windows an Linux/x86. + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * Interface + * Almost the same as original. Please see the comments in "fpu.h". + * + * + * Why assembly? + * The reason is not really speed, but to get infinities, + * NANs and flags finally working. + * + * + * How to maintain Mac and x86 FPU flags -- plan B + * + * regs.piar is not updated. + * + * regs.FPU fpcr always contains the real 68881/68040 control word. + * + * regs.FPU fpsr is not kept up-to-date, for efficiency reasons. + * Most of the FPU commands update this in a way or another, but it is not + * read nearly that often. Therefore, three host-specific words hold the + * status byte and exception byte ("x86_status_word"), accrued exception + * byte ("x86_status_word_accrued") and the quotient byte ("FPU fpsr.quotient"), + * as explained below. + * + * CONDITION CODE - QUOTIENT - EXCEPTION STATUS - ACCRUED EXCEPTION + * CONDITION CODE (N,Z,I,NAN) + * - updated after each opcode, if needed. + * - x86 assembly opcodes call FXAM and store the status word to + * "x86_status_word". + * - When regs.FPU fpsr is actually used, the value of "x86_status_word" + * is translated. + * QUOTIENT BYTE + * - Updated by frem, fmod, frestore(null frame) + * - Stored in "FPU fpsr.quotient" in correct bit position, combined when + * regs.FPU fpsr is actually used. + * EXCEPTION STATUS (BSUN,SNAN,OPERR,OVFL,UNFL,DZ,INEX2,INEX1) + * - updated after each opcode, if needed. + * - Saved in x86 form in "x86_status_word". + * - When regs.FPU fpsr is actually used, the value of "x86_status_word" + * is translated. + * - Only fcc_op can set BSUN + * ACCRUED EXCEPTION (ACCR_IOP,ACCR_OVFL,ACCR_UNFL,ACCR_DZ,ACCR_INEX) + * - updated after each opcode, if needed. + * - Logically OR'ed in x86 form to "x86_status_word_accrued". + * - When regs.FPU fpsr is actually used, the value of + * "x86_status_word_accrued" is translated. + * + * When "x86_status_word" and "x86_status_word_accrued" are stored, + * all pending x86 FPU exceptions are cleared, if there are any. + * + * Writing to "regs.FPU fpsr" reverse-maps to x86 status/exception values and + * stores the values in "x86_status_word", "x86_status_word_accrued" + * and "FPU fpsr.quotient". + * + * So, "x86_status_word" and "x86_status_word_accrued" are not in + * correct bit positions and have x86 values, but "FPU fpsr.quotient" is at + * correct position. + * + * Note that it does not matter that the reverse-mapping is not exact + * (both SW_IE and SW_DE are mapped to ACCR_IOP, but ACCR_IOP maps to + * SW_IE only), the MacOS always sees the correct exception bits. + * + * Also note the usage of the fake BSUN flag SW_FAKE_BSUN. If you change + * the x86 FPU code, you must make sure that you don't generate any FPU + * stack faults. + * + * + * x86 co-processor initialization: + * + * Bit Code Use + * 0 IM Invalid operation exception mask 1 Disabled + * 1 DM Denormalized operand exception mask 1 Disabled + * 2 ZM Zerodivide exception mask 1 Disabled + * 3 OM Overflow exception mask 1 Disabled + * 4 UM Underflow exception mask 1 Disabled + * 5 PM Precision exception mask 1 Disabled + * 6 - - - - + * 7 IEM Interrupt enable mask 0 Enabled + * 8 PC Precision control\ 1 - 64 bits + * 9 PC Precision control/ 1 / + * 10 RC Rounding control\ 0 - Nearest even + * 11 RC Rounding control/ 0 / + * 12 IC Infinity control 1 Affine + * 13 - - - - + * 14 - - - - + * 15 - - - - + * + * + * TODO: + * - Exceptions are not implemented. + * - All tbyte variables should be aligned to 16-byte boundaries. + * (for best efficiency). + * - FTRAPcc code looks like broken. + * - If USE_3_BIT_QUOTIENT is 0, exceptions should be checked after + * float -> int rounding (frem,fmod). + * - The speed can be greatly improved. Do this only after you are sure + * that there are no major bugs. + * - Support for big-endian byte order (but all assembly code needs to + * be rewritten anyway) + * I have some non-portable code like *((uae_u16 *)&m68k_dreg(regs, reg)) = newv; + * Sorry about that, you need to change these. I could do it myself, but better + * not, I would have no way to test them out. + * I tried to mark all spots with a comment TODO_BIGENDIAN. + * - to_double() may need renormalization code. Or then again, maybe not. + * - Signaling NANs should be handled better. The current mapping of + * signaling nan exception to denormalized operand exception is only + * based on the idea that the (possible) handler sees that "something + * seriously wrong" and takes the same action. Should not really get (m)any + * of those since normalization is handled on to_exten() + * + */ + +# include +# include + +#include "sysdeps.h" +#include "memory.h" +#include "readcpu.h" +#include "newcpu.h" +#define FPU_IMPLEMENTATION +#include "fpu/fpu.h" +#include "fpu/fpu_x86.h" +#include "fpu/fpu_x86_asm.h" + +/* Global FPU context */ +fpu_t fpu; + +/* -------------------------------------------------------------------------- */ +/* --- Native Support --- */ +/* -------------------------------------------------------------------------- */ + +#include "fpu/flags.h" +#include "fpu/exceptions.h" +#include "fpu/rounding.h" +#include "fpu/impl.h" + +#include "fpu/flags.cpp" +#include "fpu/exceptions.cpp" +#include "fpu/rounding.cpp" + +/* -------------------------------------------------------------------------- */ +/* --- Scopes Definition --- */ +/* -------------------------------------------------------------------------- */ + +#undef PUBLIC +#define PUBLIC /**/ + +#undef PRIVATE +#define PRIVATE static + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +/* ---------------------------- Compatibility ---------------------------- */ + +#define BYTE uint8 +#define WORD uint16 +#define DWORD uint32 +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +/* ---------------------------- Configuration ---------------------------- */ + +/* +If USE_3_BIT_QUOTIENT is set to 1, FREM and FMOD use a faster version +with only 3 quotient bits (those provided by the x86 FPU). If set to 0, +they calculate the same 7 bits that m68k does. It seems (as for now) that +3 bits suffice for all Mac programs I have tried. + +If you decide that you need all 7 bits (USE_3_BIT_QUOTIENT is 0), +consider checking the host exception flags after FISTP (search for +"TODO:Quotient". The result may be too large to fit into a dword. +*/ +/* +gb-- I only tested the following configurations: + USE_3_BIT_QUOTIENT 1 -- still changes to apply if no 3-bit quotient + FPU_DEBUG 1 or 0 + USE_CONSISTENCY_CHECKING 0 + I3_ON_ILLEGAL_FPU_OP 0 -- and this won't change + I3_ON_FTRAPCC 0 -- and this won't change +*/ +#define USE_3_BIT_QUOTIENT 1 + +//#define FPU_DEBUG 0 -- now defined in "fpu/fpu.h" +#define USE_CONSISTENCY_CHECKING 0 + +#define I3_ON_ILLEGAL_FPU_OP 0 +#define I3_ON_FTRAPCC 0 + +/* ---------------------------- Debugging ---------------------------- */ + +PUBLIC void FFPU fpu_dump_registers(void) +{ + for (int i = 0; i < 8; i++){ + printf ("FP%d: %g ", i, fpu_get_register(i)); + if ((i & 3) == 3) + printf ("\n"); + } +} + +PUBLIC void FFPU fpu_dump_flags(void) +{ + printf ("N=%d Z=%d I=%d NAN=%d\n", + (get_fpsr() & FPSR_CCB_NEGATIVE) != 0, + (get_fpsr() & FPSR_CCB_ZERO)!= 0, + (get_fpsr() & FPSR_CCB_INFINITY) != 0, + (get_fpsr() & FPSR_CCB_NAN) != 0); +} + +#include "debug.h" + +#if FPU_DEBUG + +PRIVATE void FFPU dump_first_bytes_buf(char *b, uae_u8* buf, uae_s32 actual) +{ + char bb[10]; + int32 i, bytes = min(actual,100); + + *b = 0; + for (i=0; i= 10) _ix = 0; + + sprintf( _s[_ix], "%.04f", (float)f ); + return( _s[_ix] ); +} + +PUBLIC void FFPU dump_registers(const char *s) +{ + char b[512]; + + sprintf( + b, + "%s: %s, %s, %s, %s, %s, %s, %s, %s\r\n", + s, + etos(FPU registers[0]), + etos(FPU registers[1]), + etos(FPU registers[2]), + etos(FPU registers[3]), + etos(FPU registers[4]), + etos(FPU registers[5]), + etos(FPU registers[6]), + etos(FPU registers[7]) + ); + D(bug((char*)b)); +} + +#else + +PUBLIC void FFPU dump_registers(const char *) +{ +} + +PUBLIC void FFPU dump_first_bytes(uae_u8 *, uae_s32) +{ +} + +#endif + + +/* ---------------------------- FPU consistency ---------------------------- */ + +#if USE_CONSISTENCY_CHECKING +PRIVATE void FFPU FPU_CONSISTENCY_CHECK_START(void) +{ +/* _asm { + FNSTSW checked_sw_atstart + } */ + __asm__ __volatile__("fnstsw %0" : "=m" (checked_sw_atstart)); +} + +PRIVATE void FFPU FPU_CONSISTENCY_CHECK_STOP(const char *name) +{ + uae_u16 checked_sw_atend; +// _asm FNSTSW checked_sw_atend + __asm__ __volatile__("fnstsw %0" : "=m" (checked_sw_attend)); + char msg[256]; + + // Check for FPU stack overflows/underflows. + if( (checked_sw_atend & 0x3800) != (checked_sw_atstart & 0x3800) ) { + wsprintf( + msg, + "FPU stack leak at %s, %X, %X\r\n", + name, + (int)(checked_sw_atstart & 0x3800) >> 11, + (int)(checked_sw_atend & 0x3800) >> 11 + ); + OutputDebugString(msg); + } + + // Observe status mapping. + /* + if(checked_sw_atstart != 0x400 || checked_sw_atend != 0x400) { + wsprintf( + msg, "Op %s, x86_status_word before=%X, x86_status_word after=%X\r\n", + name, (int)checked_sw_atstart, (int)checked_sw_atend + ); + OutputDebugString(msg); + } + */ +} +#else +PRIVATE void FFPU FPU_CONSISTENCY_CHECK_START(void) +{ +} + +PRIVATE void FFPU FPU_CONSISTENCY_CHECK_STOP(const char *) +{ +} +#endif + + +/* ---------------------------- Status byte ---------------------------- */ + +// Map x86 FXAM codes -> m68k fpu status byte +#define SW_Z_I_NAN_MASK (SW_C0|SW_C2|SW_C3) +#define SW_Z (SW_C3) +#define SW_I (SW_C0|SW_C2) +#define SW_NAN (SW_C0) +#define SW_FINITE (SW_C2) +#define SW_EMPTY_REGISTER (SW_C0|SW_C3) +#define SW_DENORMAL (SW_C2|SW_C3) +#define SW_UNSUPPORTED (0) +#define SW_N (SW_C1) + +// Initial state after boot, reset and frestore(null frame) +#define SW_INITIAL SW_FINITE + + +/* ---------------------------- Status functions ---------------------------- */ + +PRIVATE void inline FFPU SET_BSUN_ON_NAN () +{ + if( (x86_status_word & (SW_Z_I_NAN_MASK)) == SW_NAN ) { + x86_status_word |= SW_FAKE_BSUN; + x86_status_word_accrued |= SW_IE; + } +} + +PRIVATE void inline FFPU build_ex_status () +{ + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } +} + +// TODO_BIGENDIAN; all of these. +/* ---------------------------- Type functions ---------------------------- */ + +/* +When the FPU creates a NAN, the NAN always contains the same bit pattern +in the mantissa. All bits of the mantissa are ones for any precision. +When the user creates a NAN, any nonzero bit pattern can be stored in the mantissa. +*/ +PRIVATE inline void FFPU MAKE_NAN (fpu_register & f) +{ + // Make it non-signaling. + uae_u8 * p = (uae_u8 *) &f; + memset( p, 0xFF, sizeof(fpu_register) - 1 ); + p[9] = 0x7F; +} + +/* +For single- and double-precision infinities the fraction is a zero. +For extended-precision infinities, the mantissa�s MSB, the explicit +integer bit, can be either one or zero. +*/ +PRIVATE inline uae_u32 FFPU IS_INFINITY (fpu_register const & f) +{ + uae_u8 * p = (uae_u8 *) &f; + if( ((p[9] & 0x7F) == 0x7F) && p[8] == 0xFF ) { + if ((*((uae_u32 *)&p[0]) == 0) && + ((*((uae_u32 *)&p[4]) & 0x7FFFFFFF) == 0)) + return(1); + } + return(0); +} + +PRIVATE inline uae_u32 FFPU IS_NAN (fpu_register const & f) +{ + uae_u8 * p = (uae_u8 *) &f; + if( ((p[9] & 0x7F) == 0x7F) && p[8] == 0xFF ) { + if ((*((uae_u32 *)&p[0]) == 0) && + ((*((uae_u32 *)&p[4]) & 0x7FFFFFFF) != 0)) + return(1); + } + return(0); +} + +PRIVATE inline uae_u32 FFPU IS_ZERO (fpu_register const & f) +{ + uae_u8 * p = (uae_u8 *) &f; + return *((uae_u32 *)p) == 0 && + *((uae_u32 *)&p[4]) == 0 && + ( *((uae_u16 *)&p[8]) & 0x7FFF ) == 0; +} + +PRIVATE inline void FFPU MAKE_INF_POSITIVE (fpu_register & f) +{ + uae_u8 * p = (uae_u8 *) &f; + memset( p, 0, sizeof(fpu_register)-2 ); + *((uae_u16 *)&p[8]) = 0x7FFF; +} + +PRIVATE inline void FFPU MAKE_INF_NEGATIVE (fpu_register & f) +{ + uae_u8 * p = (uae_u8 *) &f; + memset( p, 0, sizeof(fpu_register)-2 ); + *((uae_u16 *)&p[8]) = 0xFFFF; +} + +PRIVATE inline void FFPU MAKE_ZERO_POSITIVE (fpu_register & f) +{ + uae_u32 * const p = (uae_u32 *) &f; + memset( p, 0, sizeof(fpu_register) ); +} + +PRIVATE inline void FFPU MAKE_ZERO_NEGATIVE (fpu_register & f) +{ + uae_u32 * const p = (uae_u32 *) &f; + memset( p, 0, sizeof(fpu_register) ); + *((uae_u32 *)&p[4]) = 0x80000000; +} + +PRIVATE inline uae_u32 FFPU IS_NEGATIVE (fpu_register const & f) +{ + uae_u8 * p = (uae_u8 *) &f; + return( (p[9] & 0x80) != 0 ); +} + + +/* ---------------------------- Conversions ---------------------------- */ + +PRIVATE void FFPU signed_to_extended ( uae_s32 x, fpu_register & f ) +{ + FPU_CONSISTENCY_CHECK_START(); + +/* _asm { + MOV ESI, [f] + FILD DWORD PTR [x] + FSTP TBYTE PTR [ESI] + } */ + + __asm__ __volatile__("fildl %1\n\tfstpt %0" : "=m" (f) : "m" (x)); + D(bug("signed_to_extended (%X) = %s\r\n",(int)x,etos(f))); + FPU_CONSISTENCY_CHECK_STOP("signed_to_extended"); +} + +PRIVATE uae_s32 FFPU extended_to_signed_32 ( fpu_register const & f ) +{ + FPU_CONSISTENCY_CHECK_START(); + volatile uae_s32 tmp; + volatile WORD sw_temp; + +/* _asm { + MOV EDI, [f] + FLD TBYTE PTR [EDI] + FISTP DWORD PTR tmp + FNSTSW sw_temp + } */ + + __asm__ __volatile__( + "fldt %2\n" + "fistpl %0\n" + "fnstsw %1\n" + : "=m" (tmp), "=m" (sw_temp) + : "m" (f) + ); + + if(sw_temp & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + if(sw_temp & (SW_OE|SW_UE|SW_DE|SW_IE)) { // Map SW_OE to OPERR. + x86_status_word |= SW_IE; + x86_status_word_accrued |= SW_IE; + // Setting the value to zero might not be the right way to go, + // but I'll leave it like this for now. + tmp = 0; + } + if(sw_temp & SW_PE) { + x86_status_word |= SW_PE; + x86_status_word_accrued |= SW_PE; + } + } + + D(bug("extended_to_signed_32 (%s) = %X\r\n",etos(f),(int)tmp)); + FPU_CONSISTENCY_CHECK_STOP("extended_to_signed_32"); + return tmp; +} + +PRIVATE uae_s16 FFPU extended_to_signed_16 ( fpu_register const & f ) +{ + FPU_CONSISTENCY_CHECK_START(); + volatile uae_s16 tmp; + volatile WORD sw_temp; + +/* _asm { + MOV EDI, [f] + FLD TBYTE PTR [EDI] + FISTP WORD PTR tmp + FNSTSW sw_temp + } */ + + __asm__ __volatile__( + "fldt %2\n" + "fistp %0\n" + "fnstsw %1\n" + : "=m" (tmp), "=m" (sw_temp) + : "m" (f) + ); + + if(sw_temp & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + if(sw_temp & (SW_OE|SW_UE|SW_DE|SW_IE)) { // Map SW_OE to OPERR. + x86_status_word |= SW_IE; + x86_status_word_accrued |= SW_IE; + tmp = 0; + } + if(sw_temp & SW_PE) { + x86_status_word |= SW_PE; + x86_status_word_accrued |= SW_PE; + } + } + + D(bug("extended_to_signed_16 (%s) = %X\r\n",etos(f),(int)tmp)); + FPU_CONSISTENCY_CHECK_STOP("extended_to_signed_16"); + return tmp; +} + +PRIVATE uae_s8 FFPU extended_to_signed_8 ( fpu_register const & f ) +{ + FPU_CONSISTENCY_CHECK_START(); + volatile uae_s16 tmp; + volatile WORD sw_temp; + +/* _asm { + MOV EDI, [f] + FLD TBYTE PTR [EDI] + FISTP WORD PTR tmp + FNSTSW sw_temp + } */ + + __asm__ __volatile__( + "fldt %2\n" + "fistp %0\n" + "fnstsw %1\n" + : "=m" (tmp), "=m" (sw_temp) + : "m" (f) + ); + + if(sw_temp & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + if(sw_temp & (SW_OE|SW_UE|SW_DE|SW_IE)) { // Map SW_OE to OPERR. + x86_status_word |= SW_IE; + x86_status_word_accrued |= SW_IE; + tmp = 0; + } + if(sw_temp & SW_PE) { + x86_status_word |= SW_PE; + x86_status_word_accrued |= SW_PE; + } + } + + if(tmp > 127 || tmp < -128) { // OPERR + x86_status_word |= SW_IE; + x86_status_word_accrued |= SW_IE; + } + + D(bug("extended_to_signed_8 (%s) = %X\r\n",etos(f),(int)tmp)); + FPU_CONSISTENCY_CHECK_STOP("extended_to_signed_8"); + return (uae_s8)tmp; +} + +PRIVATE void FFPU double_to_extended ( double x, fpu_register & f ) +{ + FPU_CONSISTENCY_CHECK_START(); + +/* _asm { + MOV EDI, [f] + FLD QWORD PTR [x] + FSTP TBYTE PTR [EDI] + } */ + + __asm__ __volatile__( + "fldl %1\n" + "fstpt %0\n" + : "=m" (f) + : "m" (x) + ); + + FPU_CONSISTENCY_CHECK_STOP("double_to_extended"); +} + +PRIVATE fpu_double FFPU extended_to_double( fpu_register const & f ) +{ + FPU_CONSISTENCY_CHECK_START(); + double result; + +/* _asm { + MOV ESI, [f] + FLD TBYTE PTR [ESI] + FSTP QWORD PTR result + } */ + + __asm__ __volatile__( + "fldt %1\n" + "fstpl %0\n" + : "=m" (result) + : "m" (f) + ); + + FPU_CONSISTENCY_CHECK_STOP("extended_to_double"); + return result; +} + +PRIVATE void FFPU to_single ( uae_u32 src, fpu_register & f ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [f] + FLD DWORD PTR src + FSTP TBYTE PTR [ESI] + } */ + + __asm__ __volatile__( + "flds %1\n" + "fstpt %0\n" + : "=m" (f) + : "m" (src) + ); + + D(bug("to_single (%X) = %s\r\n",src,etos(f))); + FPU_CONSISTENCY_CHECK_STOP("to_single"); +} + +// TODO_BIGENDIAN +PRIVATE void FFPU to_exten_no_normalize ( uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3, fpu_register & f ) +{ + FPU_CONSISTENCY_CHECK_START(); + uae_u32 *p = (uae_u32 *)&f; + + uae_u32 sign = (wrd1 & 0x80000000) >> 16; + uae_u32 exp = (wrd1 >> 16) & 0x7fff; + p[0] = wrd3; + p[1] = wrd2; + *((uae_u16 *)&p[2]) = (uae_u16)(sign | exp); + + D(bug("to_exten_no_normalize (%X,%X,%X) = %s\r\n",wrd1,wrd2,wrd3,etos(f))); + FPU_CONSISTENCY_CHECK_STOP("to_exten_no_normalize"); +} + +PRIVATE void FFPU to_exten ( uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3, fpu_register & f ) +{ + FPU_CONSISTENCY_CHECK_START(); + uae_u32 *p = (uae_u32 *)&f; + + uae_u32 sign = (wrd1 & 0x80000000) >> 16; + uae_u32 exp = (wrd1 >> 16) & 0x7fff; + + // The explicit integer bit is not set, must normalize. + // Don't do it for zeroes, infinities or nans. + if( (wrd2 & 0x80000000) == 0 && exp != 0 && exp != 0x7FFF ) { + D(bug("to_exten denormalized mantissa (%X,%X,%X)\r\n",wrd1,wrd2,wrd3)); + if( wrd2 | wrd3 ) { + // mantissa, not fraction. + uae_u64 man = ((uae_u64)wrd2 << 32) | wrd3; + while( exp > 0 && (man & UVAL64(0x8000000000000000)) == 0 ) { + man <<= 1; + exp--; + } + wrd2 = (uae_u32)( man >> 32 ); + wrd3 = (uae_u32)( man & 0xFFFFFFFF ); + if( exp == 0 || (wrd2 & 0x80000000) == 0 ) { + // underflow + wrd2 = wrd3 = exp = 0; + sign = 0; + } + } else { + if(exp != 0x7FFF && exp != 0) { + // Make a non-signaling nan. + exp = 0x7FFF; + sign = 0; + wrd2 = 0x80000000; + } + } + } + + p[0] = wrd3; + p[1] = wrd2; + *((uae_u16 *)&p[2]) = (uae_u16)(sign | exp); + + D(bug("to_exten (%X,%X,%X) = %s\r\n",wrd1,wrd2,wrd3,etos(f))); + FPU_CONSISTENCY_CHECK_STOP("to_exten"); +} + +PRIVATE void FFPU to_double ( uae_u32 wrd1, uae_u32 wrd2, fpu_register & f ) +{ + FPU_CONSISTENCY_CHECK_START(); + + // gb-- make GCC happy + union { + uae_u64 q; + uae_u32 l[2]; + } src; + + // Should renormalize if needed. I'm not sure that x86 and m68k FPU's + // do it the sama way. This should be extremely rare however. + // to_exten() is often called with denormalized values. + + src.l[0] = wrd2; + src.l[1] = wrd1; + +/* _asm { + FLD QWORD PTR src + MOV EDI, [f] + FSTP TBYTE PTR [EDI] + } */ + + __asm__ __volatile__( + "fldl %1\n" + "fstpt %0\n" + : "=m" (f) + : "m" (src.q) + ); + + D(bug("to_double (%X,%X) = %s\r\n",wrd1,wrd2,etos(f))); + FPU_CONSISTENCY_CHECK_STOP("to_double"); +} + +PRIVATE uae_u32 FFPU from_single ( fpu_register const & f ) +{ + FPU_CONSISTENCY_CHECK_START(); + volatile uae_u32 dest; + volatile WORD sw_temp; + +/* _asm { + MOV EDI, [f] + FLD TBYTE PTR [EDI] + FSTP DWORD PTR dest + FNSTSW sw_temp + } */ + + __asm__ __volatile__( + "fldt %2\n" + "fstps %0\n" + "fnstsw %1\n" + : "=m" (dest), "=m" (sw_temp) + : "m" (f) + ); + + sw_temp &= SW_EXCEPTION_MASK; + if(sw_temp) { +// _asm FNCLEX + asm("fnclex"); + x86_status_word = (x86_status_word & ~SW_EXCEPTION_MASK) | sw_temp; + x86_status_word_accrued |= sw_temp; + } + + D(bug("from_single (%s) = %X\r\n",etos(f),dest)); + FPU_CONSISTENCY_CHECK_STOP("from_single"); + return dest; +} + +// TODO_BIGENDIAN +PRIVATE void FFPU from_exten ( fpu_register const & f, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3 ) +{ + FPU_CONSISTENCY_CHECK_START(); + uae_u32 *p = (uae_u32 *)&f; + *wrd3 = p[0]; + *wrd2 = p[1]; + *wrd1 = ( (uae_u32)*((uae_u16 *)&p[2]) ) << 16; + + D(bug("from_exten (%s) = %X,%X,%X\r\n",etos(f),*wrd1,*wrd2,*wrd3)); + FPU_CONSISTENCY_CHECK_STOP("from_exten"); +} + +PRIVATE void FFPU from_double ( fpu_register const & f, uae_u32 *wrd1, uae_u32 *wrd2 ) +{ + FPU_CONSISTENCY_CHECK_START(); + volatile uae_u32 dest[2]; + volatile WORD sw_temp; + +/* _asm { + MOV EDI, [f] + FLD TBYTE PTR [EDI] + FSTP QWORD PTR dest + FNSTSW sw_temp + } */ + + __asm__ __volatile__( + "fldt %2\n" + "fstpl %0\n" + "fnstsw %1\n" + : "=m" (dest), "=m" (sw_temp) + : "m" (f) + ); + + sw_temp &= SW_EXCEPTION_MASK; + if(sw_temp) { +// _asm FNCLEX + asm("fnclex"); + x86_status_word = (x86_status_word & ~SW_EXCEPTION_MASK) | sw_temp; + x86_status_word_accrued |= sw_temp; + } + + // TODO: There is a partial memory stall, nothing happens until FSTP retires. + // On PIII, could use MMX move w/o any penalty. + *wrd2 = dest[0]; + *wrd1 = dest[1]; + + D(bug("from_double (%s) = %X,%X\r\n",etos(f),dest[1],dest[0])); + FPU_CONSISTENCY_CHECK_STOP("from_double"); +} + +PRIVATE void FFPU do_fmove ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + } */ + + __asm__ __volatile__( + "fldt %2\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + FPU_CONSISTENCY_CHECK_STOP("do_fmove"); +} + +PRIVATE void FFPU do_fsmove ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + FPU_CONSISTENCY_CHECK_STOP("do_fsmove"); +} + +PRIVATE void FFPU do_fdmove ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + FPU_CONSISTENCY_CHECK_STOP("do_fdmove"); +} + +/* +PRIVATE void FFPU do_fmove_no_status ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FSTP TBYTE PTR [EDI] + } + FPU_CONSISTENCY_CHECK_STOP("do_fmove_no_status"); +} +*/ + + +/* ---------------------------- Operations ---------------------------- */ + +PRIVATE void FFPU do_fint ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FRNDINT + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldt %2\n" + "frndint\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fint"); +} + +PRIVATE void FFPU do_fintrz ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + WORD cw_temp; + +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FSTCW cw_temp + and cw_temp, ~X86_ROUNDING_MODE + or cw_temp, CW_RC_ZERO + FLDCW cw_temp + FLD TBYTE PTR [ESI] + FRNDINT + FXAM + FNSTSW x86_status_word + FLDCW x86_control_word + FSTP TBYTE PTR [EDI] + } */ + + __asm__ __volatile__( + "fstcw %0\n" + "andl $(~X86_ROUNDING_MODE), %0\n" + "orl $CW_RC_ZERO, %0\n" + "fldcw %0\n" + "fldt %3\n" + "frndint\n" + "fxam \n" + "fnstsw %1\n" + "fldcw %4\n" + "fstpt %2\n" + : "+m" (cw_temp), "=m" (x86_status_word), "=m" (dest) + : "m" (src), "m" (x86_control_word) + ); + + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fintrz"); +} + +PRIVATE void FFPU do_fsqrt ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FSQRT + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + } */ + + __asm__ __volatile__( + "fldt %2\n" + "fsqrt \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsqrt"); +} + +PRIVATE void FFPU do_fssqrt ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fsqrt \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fssqrt"); +} + +PRIVATE void FFPU do_fdsqrt ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fsqrt \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdsqrt"); +} + +PRIVATE void FFPU do_ftst ( fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + FLD TBYTE PTR [ESI] + FXAM + FNSTSW x86_status_word + FSTP ST(0) + } */ + + __asm__ __volatile__( + "fldt %1\n" + "fxam \n" + "fnstsw %0\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word) + : "m" (src) + ); + + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_ftst"); +} + +// These functions are calculated in 53 bits accuracy only. +// Exception checking is not complete. +PRIVATE void FFPU do_fsinh ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = sinh(x); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_fsinh"); +} + +PRIVATE void FFPU do_flognp1 ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = log (x + 1.0); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_flognp1"); +} + +PRIVATE void FFPU do_fetoxm1 ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = exp (x) - 1.0; + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_fetoxm1"); +} + +PRIVATE void FFPU do_ftanh ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = tanh (x); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_ftanh"); +} + +PRIVATE void FFPU do_fatan ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = atan (x); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_fatan"); +} + +PRIVATE void FFPU do_fasin ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = asin (x); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_fasin"); +} + +PRIVATE void FFPU do_fatanh ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = log ((1 + x) / (1 - x)) / 2; + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_fatanh"); +} + +PRIVATE void FFPU do_fetox ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = exp (x); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_fetox"); +} + +PRIVATE void FFPU do_ftwotox ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = pow(2.0, x); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_ftwotox"); +} + +PRIVATE void FFPU do_ftentox ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = pow(10.0, x); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_ftentox"); +} + +PRIVATE void FFPU do_flogn ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = log (x); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_flogn"); +} + +PRIVATE void FFPU do_flog10 ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = log10 (x); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_flog10"); +} + +PRIVATE void FFPU do_flog2 ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = log (x) / log (2.0); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_flog2"); +} + +PRIVATE void FFPU do_facos ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = acos(x); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_facos"); +} + +PRIVATE void FFPU do_fcosh ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + double x, y; + x = extended_to_double( src ); + y = cosh(x); + double_to_extended( y, dest ); + do_ftst( dest ); + FPU_CONSISTENCY_CHECK_STOP("do_fcosh"); +} + +PRIVATE void FFPU do_fsin ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FSIN + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldt %2\n" + "fsin \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsin"); +} + +// TODO: Should check for out-of-range condition (partial tangent) +PRIVATE void FFPU do_ftan ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FPTAN + FSTP ST(0) ; pop 1.0 (the 8087/287 compatibility thing) + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldt %2\n" + "fptan \n" + "fstp %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_PE - SW_UE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_ftan"); +} + +PRIVATE void FFPU do_fabs ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FABS + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldt %2\n" + "fabs \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + // x86 fabs should not rise any exceptions (except stack underflow) + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fabs"); +} + +PRIVATE void FFPU do_fsabs ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fabs \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + // x86 fabs should not rise any exceptions (except stack underflow) + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsabs"); +} + +PRIVATE void FFPU do_fdabs ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fabs \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + // x86 fabs should not rise any exceptions (except stack underflow) + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdabs"); +} + +PRIVATE void FFPU do_fneg ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FCHS + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldt %2\n" + "fchs \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + // x86 fchs should not rise any exceptions (except stack underflow) + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fneg"); +} + +PRIVATE void FFPU do_fsneg ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fchs \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + // x86 fchs should not rise any exceptions (except stack underflow) + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsneg"); +} + +PRIVATE void FFPU do_fdneg ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fchs \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + // x86 fchs should not rise any exceptions (except stack underflow) + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdneg"); +} + +PRIVATE void FFPU do_fcos ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FCOS + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldt %2\n" + "fcos \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fcos"); +} + +PRIVATE void FFPU do_fgetexp ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FXTRACT + FSTP ST(0) ; pop mantissa + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldt %2\n" + "fxtract\n" + "fstp %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fgetexp"); +} + +PRIVATE void FFPU do_fgetman ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FXTRACT + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + FSTP ST(0) ; pop exponent + } */ + __asm__ __volatile__( + "fldt %2\n" + "fxtract\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "=m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fgetman"); +} + +PRIVATE void FFPU do_fdiv ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FDIV ST(0),ST(1) + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + FSTP ST(0) + } */ + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fdiv %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdiv"); +} + +PRIVATE void FFPU do_fsdiv ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fdiv %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsdiv"); +} + +PRIVATE void FFPU do_fddiv ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fdiv %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fddiv"); +} + +// The sign of the quotient is the exclusive-OR of the sign bits +// of the source and destination operands. +// Quotient Byte is loaded with the sign and least significant +// seven bits of the quotient. + +PRIVATE void FFPU do_fmod ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + + volatile uint16 status; + uae_u32 quot; +#if !USE_3_BIT_QUOTIENT + WORD cw_temp; +#endif + + uae_u8 * dest_p = (uae_u8 *)&dest; + uae_u8 * src_p = (uae_u8 *)&src; + uae_u32 sign = (dest_p[9] ^ src_p[9]) & 0x80; + +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + +#if !USE_3_BIT_QUOTIENT + MOV CX, x86_control_word + AND CX, ~X86_ROUNDING_MODE + OR CX, CW_RC_ZERO + MOV cw_temp, CX + FLDCW cw_temp + + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FDIV ST(0),ST(1) + FABS + FISTP DWORD PTR quot + FSTP ST(0) + FLDCW x86_control_word + // TODO:Quotient + // Should clear any possible exceptions here +#endif + + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + +// loop until the remainder is not partial any more. +partial_loop: + FPREM + FNSTSW status + TEST status, SW_C2 + JNE partial_loop + + + FXAM + FNSTSW x86_status_word + + FSTP TBYTE PTR [EDI] + FSTP ST(0) + } */ + +#if !USE_3_BIT_QUOTIENT + + __asm__ __volatile__( + "movl %6, %%ecx\n" // %6: x86_control_word (read) + "andl $(~X86_ROUNDING_MODE), %%ecx\n" + "orl $CW_RC_ZERO, %%ecx\n" + "movl %%ecx, %0\n" // %0: cw_temp (read/write) + "fldcw %0\n" + "fldt %5\n" + "fldt %4\n" + "fdiv %%st(1), %%st(0)\n" + "fabs \n" + "fistpl %1\n" // %1: quot (read/write) + "fstp %%st(0)\n" + "fldcw %6\n" + "fldt %5\n" + "fldt %4\n" + "0:\n" // partial_loop + "fprem \n" + "fnstsw %2\n" // %2: status (read/write) + "testl $SW_C2, %2\n" + "jne 0b\n" + "fxam \n" + "fnstsw %3\n" // %3: x86_status_word (write) + "fstpt %4\n" + "fstp %%st(0)\n" + : "+m" (cw_temp), "+m" (quot), "+m" (status), "=m" (x86_status_word), "+m" (dest) + : "m" (src), "m" (x86_control_word) + : "ecx" + ); + +#else + + __asm__ __volatile__( + "fldt %3\n" + "fldt %2\n" + "0:\n" // partial_loop + "fprem \n" + "fnstsw %0\n" // %0: status (read/write) + "testl $SW_C2, %0\n" + "jne 0b\n" + "fxam \n" + "fnstsw %1\n" // %1: x86_status_word (write) + "fstpt %2\n" + "fstp %%st(0)\n" + : "+m" (status), "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + +#endif + + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE); + x86_status_word_accrued |= x86_status_word; + } + +#if USE_3_BIT_QUOTIENT + // SW_C1 Set to least significant bit of quotient (Q0). + // SW_C3 Set to bit 1 (Q1) of the quotient. + // SW_C0 Set to bit 2 (Q2) of the quotient. + quot = ((status & SW_C0) >> 6) | ((status & SW_C3) >> 13) | ((status & SW_C1) >> 9); + FPU fpsr.quotient = (sign | quot) << 16; +#else + FPU fpsr.quotient = (sign | (quot&0x7F)) << 16; +#endif + + FPU_CONSISTENCY_CHECK_STOP("do_fmod"); +} + +PRIVATE void FFPU do_frem ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + + volatile uint16 status; + uae_u32 quot; +#if !USE_3_BIT_QUOTIENT + WORD cw_temp; +#endif + + uae_u8 * dest_p = (uae_u8 *)&dest; + uae_u8 * src_p = (uae_u8 *)&src; + uae_u32 sign = (dest_p[9] ^ src_p[9]) & 0x80; + +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + +#if !USE_3_BIT_QUOTIENT + MOV CX, x86_control_word + AND CX, ~X86_ROUNDING_MODE + OR CX, CW_RC_NEAR + MOV cw_temp, CX + FLDCW cw_temp + + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FDIV ST(0),ST(1) + FABS + FISTP DWORD PTR quot + FSTP ST(0) + FLDCW x86_control_word + // TODO:Quotient + // Should clear any possible exceptions here +#endif + + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + +// loop until the remainder is not partial any more. +partial_loop: + FPREM1 + FNSTSW status + TEST status, SW_C2 + JNE partial_loop + + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + FSTP ST(0) + } */ + +#if !USE_3_BIT_QUOTIENT + + __asm__ __volatile__( + "movl %6, %%ecx\n" // %6: x86_control_word (read) + "andl $(~X86_ROUNDING_MODE), %%ecx\n" + "orl $CW_RC_NEAR, %%ecx\n" + "movl %%ecx, %0\n" // %0: cw_temp (read/write) + "fldcw %0\n" + "fldt %5\n" + "fldt %4\n" + "fdiv %%st(1), %%st(0)\n" + "fabs \n" + "fistpl %1\n" // %1: quot (read/write) + "fstp %%st(0)\n" + "fldcw %6\n" + "fldt %5\n" + "fldt %4\n" + "0:\n" // partial_loop + "fprem1 \n" + "fnstsw %2\n" // %2: status (read/write) + "testl $SW_C2, %2\n" + "jne 0b\n" + "fxam \n" + "fnstsw %3\n" // %3: x86_status_word (write) + "fstpt %4\n" + "fstp %%st(0)\n" + : "+m" (cw_temp), "+m" (quot), "+m" (status), "=m" (x86_status_word), "+m" (dest) + : "m" (src), "m" (x86_control_word) + : "ecx" + ); + +#else + + __asm__ __volatile__( + "fldt %3\n" + "fldt %2\n" + "0:\n" // partial_loop + "fprem1 \n" + "fnstsw %0\n" // %0: status (read/write) + "testl $SW_C2, %0\n" + "jne 0b\n" + "fxam \n" + "fnstsw %1\n" // %1: x86_status_word (write) + "fstpt %2\n" + "fstp %%st(0)\n" + : "+m" (status), "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + +#endif + + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE); + x86_status_word_accrued |= x86_status_word; + } + +#if USE_3_BIT_QUOTIENT + // SW_C1 Set to least significant bit of quotient (Q0). + // SW_C3 Set to bit 1 (Q1) of the quotient. + // SW_C0 Set to bit 2 (Q2) of the quotient. + quot = ((status & SW_C0) >> 6) | ((status & SW_C3) >> 13) | ((status & SW_C1) >> 9); + FPU fpsr.quotient = (sign | quot) << 16; +#else + FPU fpsr.quotient = (sign | (quot&0x7F)) << 16; +#endif + + FPU_CONSISTENCY_CHECK_STOP("do_frem"); +} + +// Faster versions. The current rounding mode is already correct. +#if !USE_3_BIT_QUOTIENT +PRIVATE void FFPU do_fmod_dont_set_cw ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + + volatile uint16 status; + uae_u32 quot; + + uae_u8 * dest_p = (uae_u8 *)&dest; + uae_u8 * src_p = (uae_u8 *)&src; + uae_u32 sign = (dest_p[9] ^ src_p[9]) & 0x80; + + _asm { + MOV ESI, [src] + MOV EDI, [dest] + + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FDIV ST(0),ST(1) + FABS + FISTP DWORD PTR quot + FSTP ST(0) + // TODO:Quotient + // Should clear any possible exceptions here + + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + +// loop until the remainder is not partial any more. +partial_loop: + FPREM + FNSTSW status + TEST status, SW_C2 + JNE partial_loop + + FXAM + FNSTSW x86_status_word + + FSTP TBYTE PTR [EDI] + FSTP ST(0) + } + if(x86_status_word & SW_EXCEPTION_MASK) { + _asm FNCLEX + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE); + x86_status_word_accrued |= x86_status_word; + } + FPU fpsr.quotient = (sign | (quot&0x7F)) << 16; + FPU_CONSISTENCY_CHECK_STOP("do_fmod_dont_set_cw"); +} + +PRIVATE void FFPU do_frem_dont_set_cw ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + + volatile uint16 status; + uae_u32 quot; + + uae_u8 * dest_p = (uae_u8 *)&dest; + uae_u8 * src_p = (uae_u8 *)&src; + uae_u32 sign = (dest_p[9] ^ src_p[9]) & 0x80; + + _asm { + MOV ESI, [src] + MOV EDI, [dest] + + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FDIV ST(0),ST(1) + FABS + FISTP DWORD PTR quot + FSTP ST(0) + // TODO:Quotient + // Should clear any possible exceptions here + + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + +// loop until the remainder is not partial any more. +partial_loop: + FPREM1 + FNSTSW status + TEST status, SW_C2 + JNE partial_loop + + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + FSTP ST(0) + } + if(x86_status_word & SW_EXCEPTION_MASK) { + _asm FNCLEX + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE); + x86_status_word_accrued |= x86_status_word; + } + FPU fpsr.quotient = (sign | (quot&0x7F)) << 16; + FPU_CONSISTENCY_CHECK_STOP("do_frem_dont_set_cw"); +} +#endif //USE_3_BIT_QUOTIENT + +PRIVATE void FFPU do_fadd ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FADD + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fadd \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE - SW_OE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fadd"); +} + +PRIVATE void FFPU do_fsadd ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fadd \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE - SW_OE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsadd"); +} + +PRIVATE void FFPU do_fdadd ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fadd \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE - SW_OE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdadd"); +} + +PRIVATE void FFPU do_fmul ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FMUL + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fmul \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fmul"); +} + +PRIVATE void FFPU do_fsmul ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fmul \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsmul"); +} + +PRIVATE void FFPU do_fdmul ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fmul \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdmul"); +} + +PRIVATE void FFPU do_fsgldiv ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + WORD cw_temp; +/* _asm { + FSTCW cw_temp + and cw_temp, ~X86_ROUNDING_PRECISION + or cw_temp, PRECISION_CONTROL_SINGLE + FLDCW cw_temp + + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FDIV ST(0),ST(1) + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + FSTP ST(0) + FLDCW x86_control_word + } */ + __asm__ __volatile__( + "fstcw %0\n" + "andl $(~X86_ROUNDING_PRECISION), %0\n" + "orl $PRECISION_CONTROL_SINGLE, %0\n" + "fldcw %0\n" + "fldt %3\n" + "fldt %2\n" + "fdiv %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %1\n" + "fstpt %2\n" + "fstp %%st(0)\n" + "fldcw %4\n" + : "+m" (cw_temp), "=m" (x86_status_word), "+m" (dest) + : "m" (src), "m" (x86_control_word) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsgldiv"); +} + +PRIVATE void FFPU do_fscale ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FSCALE + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + FSTP ST(0) + } */ + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fscale \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_UE - SW_OE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fscale"); +} + +PRIVATE void FFPU do_fsglmul ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + WORD cw_temp; + +/* _asm { + FSTCW cw_temp + and cw_temp, ~X86_ROUNDING_PRECISION + or cw_temp, PRECISION_CONTROL_SINGLE + FLDCW cw_temp + + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FMUL + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + + FLDCW x86_control_word + } */ + __asm__ __volatile__( + "fstcw %0\n" + "andl $(~X86_ROUNDING_PRECISION), %0\n" + "orl $PRECISION_CONTROL_SINGLE, %0\n" + "fldcw %0\n" + "fldt %3\n" + "fldt %2\n" + "fmul \n" + "fxam \n" + "fnstsw %1\n" + "fstpt %2\n" + "fldcw %4\n" + : "+m" (cw_temp), "=m" (x86_status_word), "+m" (dest) + : "m" (src), "m" (x86_status_word) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsglmul"); +} + +PRIVATE void FFPU do_fsub ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FSUB ST(0),ST(1) + FXAM + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + FSTP ST(0) + } */ + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fsub %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE - SW_OE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsub"); +} + +PRIVATE void FFPU do_fssub ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fsub %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE - SW_OE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fssub"); +} + +PRIVATE void FFPU do_fdsub ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fsub %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "+m" (dest) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE - SW_OE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fdsub"); +} + +PRIVATE void FFPU do_fsincos ( fpu_register & dest_sin, fpu_register & dest_cos, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest_cos] + FLD TBYTE PTR [ESI] + FSINCOS + FSTP TBYTE PTR [EDI] + FXAM + MOV EDI, [dest_sin] + FNSTSW x86_status_word + FSTP TBYTE PTR [EDI] + FSTP ST(0) + } */ + __asm__ __volatile__( + "fldt %3\n" + "fsincos\n" + "fstpt %1\n" + "fxam \n" + "fnstsw %0\n" + "fstpt %2\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word), "=m" (dest_cos), "=m" (dest_sin) + : "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~(SW_EXCEPTION_MASK - SW_IE - SW_UE - SW_PE); + x86_status_word_accrued |= x86_status_word; + } + FPU_CONSISTENCY_CHECK_STOP("do_fsincos"); +} + +PRIVATE void FFPU do_fcmp ( fpu_register & dest, fpu_register const & src ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + MOV ESI, [src] + MOV EDI, [dest] + FLD TBYTE PTR [ESI] + FLD TBYTE PTR [EDI] + FSUB ST(0),ST(1) + FXAM + FNSTSW x86_status_word + FSTP ST(0) + FSTP ST(0) + } */ + __asm__ __volatile__( + "fldt %2\n" + "fldt %1\n" + "fsub %%st(1), %%st(0)\n" + "fxam \n" + "fnstsw %0\n" + "fstp %%st(0)\n" + "fstp %%st(0)\n" + : "=m" (x86_status_word) + : "m" (dest), "m" (src) + ); + if(x86_status_word & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + x86_status_word &= ~SW_EXCEPTION_MASK; + } + FPU_CONSISTENCY_CHECK_STOP("do_fcmp"); +} + +// More or less original. Should be reviewed. +PRIVATE fpu_double FFPU to_pack(uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) +{ + FPU_CONSISTENCY_CHECK_START(); + + double d; + char *cp; + char str[100]; + + cp = str; + if (wrd1 & 0x80000000) + *cp++ = '-'; + *cp++ = (char)((wrd1 & 0xf) + '0'); + *cp++ = '.'; + *cp++ = (char)(((wrd2 >> 28) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 24) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 20) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 16) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 12) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 8) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 4) & 0xf) + '0'); + *cp++ = (char)(((wrd2 >> 0) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 28) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 24) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 20) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 16) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 12) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 8) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 4) & 0xf) + '0'); + *cp++ = (char)(((wrd3 >> 0) & 0xf) + '0'); + *cp++ = 'E'; + if (wrd1 & 0x40000000) + *cp++ = '-'; + *cp++ = (char)(((wrd1 >> 24) & 0xf) + '0'); + *cp++ = (char)(((wrd1 >> 20) & 0xf) + '0'); + *cp++ = (char)(((wrd1 >> 16) & 0xf) + '0'); + *cp = 0; + sscanf(str, "%le", &d); + + D(bug("to_pack str = %s\r\n",str)); + + D(bug("to_pack(%X,%X,%X) = %.04f\r\n",wrd1,wrd2,wrd3,(float)d)); + + FPU_CONSISTENCY_CHECK_STOP("to_pack"); + + return d; +} + +// More or less original. Should be reviewed. +PRIVATE void FFPU from_pack (fpu_double src, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3) +{ + FPU_CONSISTENCY_CHECK_START(); + + int i; + int t; + char *cp; + char str[100]; + int exponent_digit_count = 0; + + sprintf(str, "%.16e", src); + + D(bug("from_pack(%.04f,%s)\r\n",(float)src,str)); + + cp = str; + *wrd1 = *wrd2 = *wrd3 = 0; + if (*cp == '-') { + cp++; + *wrd1 = 0x80000000; + } + if (*cp == '+') + cp++; + *wrd1 |= (*cp++ - '0'); + if (*cp == '.') + cp++; + for (i = 0; i < 8; i++) { + *wrd2 <<= 4; + if (*cp >= '0' && *cp <= '9') + *wrd2 |= *cp++ - '0'; + } + for (i = 0; i < 8; i++) { + *wrd3 <<= 4; + if (*cp >= '0' && *cp <= '9') + *wrd3 |= *cp++ - '0'; + } + if (*cp == 'e' || *cp == 'E') { + cp++; + if (*cp == '-') { + cp++; + *wrd1 |= 0x40000000; + } + if (*cp == '+') + cp++; + t = 0; + for (i = 0; i < 3; i++) { + if (*cp >= '0' && *cp <= '9') { + t = (t << 4) | (*cp++ - '0'); + exponent_digit_count++; + } + } + *wrd1 |= t << 16; + } + + D(bug("from_pack(%.04f) = %X,%X,%X\r\n",(float)src,*wrd1,*wrd2,*wrd3)); + + WORD sw_temp; +// _asm FNSTSW sw_temp + __asm__ __volatile__("fnstsw %0" : "=m" (sw_temp)); + if(sw_temp & SW_EXCEPTION_MASK) { +// _asm FNCLEX + __asm__ __volatile__("fnclex"); + if(sw_temp & SW_PE) { + x86_status_word |= SW_PE; + x86_status_word_accrued |= SW_PE; + } + } + + /* + OPERR is set if the k-factor > + 17 or the magnitude of + the decimal exponent exceeds three digits; + cleared otherwise. + */ + if(exponent_digit_count > 3) { + x86_status_word |= SW_IE; + x86_status_word_accrued |= SW_IE; + } + + FPU_CONSISTENCY_CHECK_STOP("from_pack"); +} + +PRIVATE int FFPU get_fp_value (uae_u32 opcode, uae_u32 extra, fpu_register & src) +{ + static const int sz1[8] = {4, 4, 12, 12, 2, 8, 1, 0}; + static const int sz2[8] = {4, 4, 12, 12, 2, 8, 2, 0}; + + // D(bug("get_fp_value(%X,%X)\r\n",(int)opcode,(int)extra)); + // dump_first_bytes( regs.pc_p-4, 16 ); + + if ((extra & 0x4000) == 0) { + memcpy( &src, &FPU registers[(extra >> 10) & 7], sizeof(fpu_register) ); +// do_fmove_no_status( src, FPU registers[(extra >> 10) & 7] ); + return 1; + } + + int mode = (opcode >> 3) & 7; + int reg = opcode & 7; + int size = (extra >> 10) & 7; + uae_u32 ad = 0; + + // D(bug("get_fp_value mode=%d, reg=%d, size=%d\r\n",(int)mode,(int)reg,(int)size)); + + switch ((uae_u8)mode) { + case 0: + switch ((uae_u8)size) { + case 6: + signed_to_extended( (uae_s32)(uae_s8) m68k_dreg (regs, reg), src ); + break; + case 4: + signed_to_extended( (uae_s32)(uae_s16) m68k_dreg (regs, reg), src ); + break; + case 0: + signed_to_extended( (uae_s32) m68k_dreg (regs, reg), src ); + break; + case 1: + to_single( m68k_dreg (regs, reg), src ); + break; + default: + return 0; + } + return 1; + case 1: + return 0; + case 2: + ad = m68k_areg (regs, reg); + break; + case 3: + ad = m68k_areg (regs, reg); + break; + case 4: + ad = m68k_areg (regs, reg) - (reg == 7 ? sz2[size] : sz1[size]); + break; + case 5: + ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); + break; + case 6: + ad = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch ((uae_u8)reg) { + case 0: + ad = (uae_s32) (uae_s16) next_iword(); + break; + case 1: + ad = next_ilong(); + break; + case 2: + ad = m68k_getpc (); + ad += (uae_s32) (uae_s16) next_iword(); + break; + case 3: { + uaecptr tmppc = m68k_getpc (); + uae_u16 tmp = (uae_u16)next_iword(); + ad = get_disp_ea_020 (tmppc, tmp); + } + break; + case 4: + ad = m68k_getpc (); + m68k_setpc (ad + sz2[size]); + + /* + +0000 000004 FSCALE.B #$01,FP2 | F23C 5926 0001 + F23C 1111001000111100 + 5926 0101100100100110 + 0001 0000000000000001 + mode = 7 + reg = 4 + size = 6 + */ + // Immediate addressing mode && Operation Length == Byte -> + // Use the low-order byte of the extension word. + + if(size == 6) ad++; + + // May be faster on a PII(I), sz2[size] is already in register + // ad += sz2[size] - sz1[size]; + + break; + default: + return 0; + } + } + + switch ((uae_u8)size) { + case 0: + signed_to_extended( (uae_s32) get_long (ad), src ); + break; + case 1: + to_single( get_long (ad), src ); + break; + + case 2:{ + uae_u32 wrd1, wrd2, wrd3; + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + to_exten( wrd1, wrd2, wrd3, src ); + } + break; + case 3:{ + uae_u32 wrd1, wrd2, wrd3; + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + double_to_extended( to_pack(wrd1, wrd2, wrd3), src ); + } + break; + case 4: + signed_to_extended( (uae_s32)(uae_s16) get_word(ad), src ); + break; + case 5:{ + uae_u32 wrd1, wrd2; + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + to_double(wrd1, wrd2, src); + } + break; + case 6: + signed_to_extended( (uae_s32)(uae_s8) get_byte(ad), src ); + break; + default: + return 0; + } + + switch (mode) { + case 3: + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + break; + } + + // D(bug("get_fp_value result = %.04f\r\n",(float)src)); + + return 1; +} + +PRIVATE int FFPU put_fp_value (fpu_register const & value, uae_u32 opcode, uae_u32 extra) +{ + static const int sz1[8] = {4, 4, 12, 12, 2, 8, 1, 0}; + static const int sz2[8] = {4, 4, 12, 12, 2, 8, 2, 0}; + + // D(bug("put_fp_value(%.04f,%X,%X)\r\n",(float)value,(int)opcode,(int)extra)); + + if ((extra & 0x4000) == 0) { + int dest_reg = (extra >> 10) & 7; + do_fmove( FPU registers[dest_reg], value ); + build_ex_status(); + return 1; + } + + int mode = (opcode >> 3) & 7; + int reg = opcode & 7; + int size = (extra >> 10) & 7; + uae_u32 ad = 0xffffffff; + + // Clear exception status + x86_status_word &= ~SW_EXCEPTION_MASK; + + switch ((uae_u8)mode) { + case 0: + switch ((uae_u8)size) { + case 6: + *((uae_u8 *)&m68k_dreg(regs, reg)) = extended_to_signed_8(value); + break; + case 4: + // TODO_BIGENDIAN + *((uae_u16 *)&m68k_dreg(regs, reg)) = extended_to_signed_16(value); + break; + case 0: + m68k_dreg (regs, reg) = extended_to_signed_32(value); + break; + case 1: + m68k_dreg (regs, reg) = from_single(value); + break; + default: + return 0; + } + return 1; + case 1: + return 0; + case 2: + ad = m68k_areg (regs, reg); + break; + case 3: + ad = m68k_areg (regs, reg); + m68k_areg (regs, reg) += reg == 7 ? sz2[size] : sz1[size]; + break; + case 4: + m68k_areg (regs, reg) -= reg == 7 ? sz2[size] : sz1[size]; + ad = m68k_areg (regs, reg); + break; + case 5: + ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); + break; + case 6: + ad = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch ((uae_u8)reg) { + case 0: + ad = (uae_s32) (uae_s16) next_iword(); + break; + case 1: + ad = next_ilong(); + break; + case 2: + ad = m68k_getpc (); + ad += (uae_s32) (uae_s16) next_iword(); + break; + case 3: { + uaecptr tmppc = m68k_getpc (); + uae_u16 tmp = (uae_u16)next_iword(); + ad = get_disp_ea_020 (tmppc, tmp); + } + break; + case 4: + ad = m68k_getpc (); + m68k_setpc (ad + sz2[size]); + break; + default: + return 0; + } + } + switch ((uae_u8)size) { + case 0: + put_long (ad, (uae_s32) extended_to_signed_32(value)); + break; + case 1: + put_long (ad, from_single(value)); + break; + case 2: { + uae_u32 wrd1, wrd2, wrd3; + from_exten(value, &wrd1, &wrd2, &wrd3); + + x86_status_word &= ~SW_EXCEPTION_MASK; + if(wrd3) { // TODO: not correct! Just a "smart" guess. + x86_status_word |= SW_PE; + x86_status_word_accrued |= SW_PE; + } + + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + } + break; + case 3: { + uae_u32 wrd1, wrd2, wrd3; + from_pack(extended_to_double(value), &wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + } + break; + case 4: + put_word(ad, extended_to_signed_16(value)); + break; + case 5:{ + uae_u32 wrd1, wrd2; + from_double(value, &wrd1, &wrd2); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + } + break; + case 6: + put_byte(ad, extended_to_signed_8(value)); + + break; + default: + return 0; + } + return 1; +} + +PRIVATE int FFPU get_fp_ad(uae_u32 opcode, uae_u32 * ad) +{ + int mode = (opcode >> 3) & 7; + int reg = opcode & 7; + switch ( (uae_u8)mode ) { + case 0: + case 1: + if( (opcode & 0xFF00) == 0xF300 ) { + // fsave, frestore + m68k_setpc (m68k_getpc () - 2); + } else { + m68k_setpc (m68k_getpc () - 4); + } + op_illg (opcode); + dump_registers( "END "); + return 0; + case 2: + *ad = m68k_areg (regs, reg); + break; + case 3: + *ad = m68k_areg (regs, reg); + break; + case 4: + *ad = m68k_areg (regs, reg); + break; + case 5: + *ad = m68k_areg (regs, reg) + (uae_s32) (uae_s16) next_iword(); + break; + case 6: + *ad = get_disp_ea_020 (m68k_areg (regs, reg), next_iword()); + break; + case 7: + switch ( (uae_u8)reg ) { + case 0: + *ad = (uae_s32) (uae_s16) next_iword(); + break; + case 1: + *ad = next_ilong(); + break; + case 2: + *ad = m68k_getpc (); + *ad += (uae_s32) (uae_s16) next_iword(); + break; + case 3: { + uaecptr tmppc = m68k_getpc (); + uae_u16 tmp = (uae_u16)next_iword(); + *ad = get_disp_ea_020 (tmppc, tmp); + } + break; + default: + if( (opcode & 0xFF00) == 0xF300 ) { + // fsave, frestore + m68k_setpc (m68k_getpc () - 2); + } else { + m68k_setpc (m68k_getpc () - 4); + } + op_illg (opcode); + dump_registers( "END "); + return 0; + } + } + return 1; +} + +#if FPU_DEBUG +#define CONDRET(s,x) D(bug("fpp_cond %s = %d\r\n",s,(uint32)(x))); return (x) +#else +#define CONDRET(s,x) return (x) +#endif + +PRIVATE int FFPU fpp_cond(uae_u32 opcode, int condition) +{ + +#define N (x86_status_word & SW_N) +#define Z ((x86_status_word & (SW_Z_I_NAN_MASK)) == SW_Z) +#define I ((x86_status_word & (SW_Z_I_NAN_MASK)) == (SW_I)) +#define NotANumber ((x86_status_word & (SW_Z_I_NAN_MASK)) == SW_NAN) + + switch (condition & 0x1f) { + // Common Tests, no BSUN + case 0x01: + CONDRET("Equal",Z); + case 0x0e: + CONDRET("Not Equal",!Z); + + // IEEE Nonaware Tests, BSUN + case 0x12: + SET_BSUN_ON_NAN(); + CONDRET("Greater Than",!(NotANumber || Z || N)); + case 0x1d: + SET_BSUN_ON_NAN(); + CONDRET("Not Greater Than",NotANumber || Z || N); + case 0x13: + SET_BSUN_ON_NAN(); + CONDRET("Greater Than or Equal",Z || !(NotANumber || N)); + case 0x1c: + SET_BSUN_ON_NAN(); + CONDRET("Not Greater Than or Equal",!Z && (NotANumber || N)); + case 0x14: + SET_BSUN_ON_NAN(); + CONDRET("Less Than",N && !(NotANumber || Z)); + case 0x1b: + SET_BSUN_ON_NAN(); + CONDRET("Not Less Than",NotANumber || Z || !N); + case 0x15: + SET_BSUN_ON_NAN(); + CONDRET("Less Than or Equal",Z || (N && !NotANumber)); + case 0x1a: + SET_BSUN_ON_NAN(); + CONDRET("Not Less Than or Equal",NotANumber || !(N || Z)); + case 0x16: + SET_BSUN_ON_NAN(); + CONDRET("Greater or Less Than",!(NotANumber || Z)); + case 0x19: + SET_BSUN_ON_NAN(); + CONDRET("Not Greater or Less Than",NotANumber || Z); + case 0x17: + CONDRET("Greater, Less or Equal",!NotANumber); + case 0x18: + SET_BSUN_ON_NAN(); + CONDRET("Not Greater, Less or Equal",NotANumber); + + // IEEE Aware Tests, no BSUN + case 0x02: + CONDRET("Ordered Greater Than",!(NotANumber || Z || N)); + case 0x0d: + CONDRET("Unordered or Less or Equal",NotANumber || Z || N); + case 0x03: + CONDRET("Ordered Greater Than or Equal",Z || !(NotANumber || N)); + case 0x0c: + CONDRET("Unordered or Less Than",NotANumber || (N && !Z)); + case 0x04: + CONDRET("Ordered Less Than",N && !(NotANumber || Z)); + case 0x0b: + CONDRET("Unordered or Greater or Equal",NotANumber || Z || !N); + case 0x05: + CONDRET("Ordered Less Than or Equal",Z || (N && !NotANumber)); + case 0x0a: + CONDRET("Unordered or Greater Than",NotANumber || !(N || Z)); + case 0x06: + CONDRET("Ordered Greater or Less Than",!(NotANumber || Z)); + case 0x09: + CONDRET("Unordered or Equal",NotANumber || Z); + case 0x07: + CONDRET("Ordered",!NotANumber); + case 0x08: + CONDRET("Unordered",NotANumber); + + // Miscellaneous Tests, no BSUN + case 0x00: + CONDRET("False",0); + case 0x0f: + CONDRET("True",1); + + // Miscellaneous Tests, BSUN + case 0x10: + SET_BSUN_ON_NAN(); + CONDRET("Signaling False",0); + case 0x1f: + SET_BSUN_ON_NAN(); + CONDRET("Signaling True",1); + case 0x11: + SET_BSUN_ON_NAN(); + CONDRET("Signaling Equal",Z); + case 0x1e: + SET_BSUN_ON_NAN(); + CONDRET("Signaling Not Equal",!Z); + } + CONDRET("",-1); + +#undef N +#undef Z +#undef I +#undef NotANumber + +} + +PUBLIC void REGPARAM2 FFPU fpuop_dbcc(uae_u32 opcode, uae_u32 extra) +{ + uaecptr pc = (uae_u32) m68k_getpc (); + uae_s32 disp = (uae_s32) (uae_s16) next_iword(); + int cc; + + D(bug("fdbcc_opp %X, %X at %08lx\r\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); + + cc = fpp_cond(opcode, extra & 0x3f); + if (cc < 0) { + m68k_setpc (pc - 4); + op_illg (opcode); + } else if (!cc) { + int reg = opcode & 0x7; + + // TODO_BIGENDIAN + uae_u16 newv = (uae_u16)(m68k_dreg (regs, reg) & 0xffff) - 1; + *((uae_u16 *)&m68k_dreg(regs, reg)) = newv; + + if (newv != 0xffff) + m68k_setpc (pc + disp); + } +} + +PUBLIC void REGPARAM2 FFPU fpuop_scc(uae_u32 opcode, uae_u32 extra) +{ + uae_u32 ad; + int cc; + + D(bug("fscc_opp %X, %X at %08lx\r\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); + + cc = fpp_cond(opcode, extra & 0x3f); + if (cc < 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } else if ((opcode & 0x38) == 0) { + // TODO_BIGENDIAN + m68k_dreg (regs, opcode & 7) = (m68k_dreg (regs, opcode & 7) & ~0xff) | + (cc ? 0xff : 0x00); + } else { + if (get_fp_ad(opcode, &ad)) { + put_byte(ad, cc ? 0xff : 0x00); + } + } +} + +PUBLIC void REGPARAM2 FFPU fpuop_trapcc(uae_u32 opcode, uaecptr oldpc, uae_u32 extra) +{ + int cc; + + D(bug("ftrapcc_opp %X, %X at %08lx\r\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc ())); + +#if I3_ON_FTRAPCC +#error "FIXME: _asm int 3" + _asm int 3 +#endif + + // This must be broken. + cc = fpp_cond(opcode, extra & 0x3f); + + if (cc < 0) { + m68k_setpc (oldpc); + op_illg (opcode); + } else if (cc) + Exception(7, oldpc - 2); +} + +// NOTE that we get here also when there is a FNOP (nontrapping false, displ 0) +PUBLIC void REGPARAM2 FFPU fpuop_bcc(uae_u32 opcode, uaecptr pc, uae_u32 extra) +{ + int cc; + + D(bug("fbcc_opp %X, %X at %08lx, jumpto=%X\r\n", (uae_u32)opcode, (uae_u32)extra, m68k_getpc (), extra )); + + cc = fpp_cond(opcode, opcode & 0x3f); + if (cc < 0) { + m68k_setpc (pc); + op_illg (opcode); + } else if (cc) { + if ((opcode & 0x40) == 0) + extra = (uae_s32) (uae_s16) extra; + m68k_setpc (pc + extra); + } +} + +// FSAVE has no post-increment +// 0x1f180000 == IDLE state frame, coprocessor version number 1F +PUBLIC void REGPARAM2 FFPU fpuop_save(uae_u32 opcode) +{ + uae_u32 ad; + int incr = (opcode & 0x38) == 0x20 ? -1 : 1; + int i; + + D(bug("fsave_opp at %08lx\r\n", m68k_getpc ())); + + if (get_fp_ad(opcode, &ad)) { + if (FPU is_integral) { + // Put 4 byte 68040 IDLE frame. + if (incr < 0) { + ad -= 4; + put_long (ad, 0x41000000); + } else { + put_long (ad, 0x41000000); + ad += 4; + } + } else { + // Put 28 byte 68881 IDLE frame. + if (incr < 0) { + D(bug("fsave_opp pre-decrement\r\n")); + ad -= 4; + // What's this? Some BIU flags, or (incorrectly placed) command/condition? + put_long (ad, 0x70000000); + for (i = 0; i < 5; i++) { + ad -= 4; + put_long (ad, 0x00000000); + } + ad -= 4; + put_long (ad, 0x1f180000); // IDLE, vers 1f + } else { + put_long (ad, 0x1f180000); // IDLE, vers 1f + ad += 4; + for (i = 0; i < 5; i++) { + put_long (ad, 0x00000000); + ad += 4; + } + // What's this? Some BIU flags, or (incorrectly placed) command/condition? + put_long (ad, 0x70000000); + ad += 4; + } + } + if ((opcode & 0x38) == 0x18) { + m68k_areg (regs, opcode & 7) = ad; // Never executed on a 68881 + D(bug("PROBLEM: fsave_opp post-increment\r\n")); + } + if ((opcode & 0x38) == 0x20) { + m68k_areg (regs, opcode & 7) = ad; + D(bug("fsave_opp pre-decrement %X -> A%d\r\n",ad,opcode & 7)); + } + } +} + +PRIVATE void FFPU do_null_frestore () +{ + // A null-restore operation sets FP7-FP0 positive, nonsignaling NANs. + for( int i=0; i<8; i++ ) { + MAKE_NAN( FPU registers[i] ); + } + + FPU instruction_address = 0; + set_fpcr(0); + set_fpsr(0); + + x86_status_word = SW_INITIAL; + x86_status_word_accrued = 0; + FPU fpsr.quotient = 0; + + x86_control_word = CW_INITIAL; +/* _asm FLDCW x86_control_word + _asm FNCLEX */ + __asm__ __volatile__("fldcw %0\n\tfnclex" : : "m" (x86_control_word)); +} + +// FSAVE has no pre-decrement +PUBLIC void REGPARAM2 FFPU fpuop_restore(uae_u32 opcode) +{ + uae_u32 ad; + uae_u32 d; + int incr = (opcode & 0x38) == 0x20 ? -1 : 1; + + D(bug("frestore_opp at %08lx\r\n", m68k_getpc ())); + + if (get_fp_ad(opcode, &ad)) { + if (FPU is_integral) { + // 68040 + if (incr < 0) { + D(bug("PROBLEM: frestore_opp incr < 0\r\n")); + // this may be wrong, but it's never called. + ad -= 4; + d = get_long (ad); + if ((d & 0xff000000) == 0) { // NULL + D(bug("frestore_opp found NULL frame at %X\r\n",ad-4)); + do_null_frestore(); + } else if ((d & 0x00ff0000) == 0) { // IDLE + D(bug("frestore_opp found IDLE frame at %X\r\n",ad-4)); + } else if ((d & 0x00ff0000) == 0x00300000) { // UNIMP + D(bug("PROBLEM: frestore_opp found UNIMP frame at %X\r\n",ad-4)); + ad -= 44; + } else if ((d & 0x00ff0000) == 0x00600000) { // BUSY + D(bug("PROBLEM: frestore_opp found BUSY frame at %X\r\n",ad-4)); + ad -= 92; + } else { + D(bug("PROBLEM: frestore_opp did not find a frame at %X, d=%X\r\n",ad-4,d)); + } + } else { + d = get_long (ad); + D(bug("frestore_opp frame at %X = %X\r\n",ad,d)); + ad += 4; + if ((d & 0xff000000) == 0) { // NULL + D(bug("frestore_opp found NULL frame at %X\r\n",ad-4)); + do_null_frestore(); + } else if ((d & 0x00ff0000) == 0) { // IDLE + D(bug("frestore_opp found IDLE frame at %X\r\n",ad-4)); + } else if ((d & 0x00ff0000) == 0x00300000) { // UNIMP + D(bug("PROBLEM: frestore_opp found UNIMP frame at %X\r\n",ad-4)); + ad += 44; + } else if ((d & 0x00ff0000) == 0x00600000) { // BUSY + D(bug("PROBLEM: frestore_opp found BUSY frame at %X\r\n",ad-4)); + ad += 92; + } else { + D(bug("PROBLEM: frestore_opp did not find a frame at %X, d=%X\r\n",ad-4,d)); + } + } + } else { + // 68881 + if (incr < 0) { + D(bug("PROBLEM: frestore_opp incr < 0\r\n")); + // this may be wrong, but it's never called. + ad -= 4; + d = get_long (ad); + if ((d & 0xff000000) == 0) { // NULL + do_null_frestore(); + } else if ((d & 0x00ff0000) == 0x00180000) { + ad -= 6 * 4; + } else if ((d & 0x00ff0000) == 0x00380000) { + ad -= 14 * 4; + } else if ((d & 0x00ff0000) == 0x00b40000) { + ad -= 45 * 4; + } + } else { + d = get_long (ad); + D(bug("frestore_opp frame at %X = %X\r\n",ad,d)); + ad += 4; + if ((d & 0xff000000) == 0) { // NULL + D(bug("frestore_opp found NULL frame at %X\r\n",ad-4)); + do_null_frestore(); + } else if ((d & 0x00ff0000) == 0x00180000) { // IDLE + D(bug("frestore_opp found IDLE frame at %X\r\n",ad-4)); + ad += 6 * 4; + } else if ((d & 0x00ff0000) == 0x00380000) {// UNIMP? shouldn't it be 3C? + ad += 14 * 4; + D(bug("PROBLEM: frestore_opp found UNIMP? frame at %X\r\n",ad-4)); + } else if ((d & 0x00ff0000) == 0x00b40000) {// BUSY + D(bug("PROBLEM: frestore_opp found BUSY frame at %X\r\n",ad-4)); + ad += 45 * 4; + } else { + D(bug("PROBLEM: frestore_opp did not find a frame at %X, d=%X\r\n",ad-4,d)); + } + } + } + + if ((opcode & 0x38) == 0x18) { + m68k_areg (regs, opcode & 7) = ad; + D(bug("frestore_opp post-increment %X -> A%d\r\n",ad,opcode & 7)); + } + if ((opcode & 0x38) == 0x20) { + m68k_areg (regs, opcode & 7) = ad; // Never executed on a 68881 + D(bug("PROBLEM: frestore_opp pre-decrement\r\n")); + } + } +} + + +/* ---------------------------- Old-style interface ---------------------------- */ + +// #ifndef OPTIMIZED_8BIT_MEMORY_ACCESS +PUBLIC void REGPARAM2 FFPU fpuop_arithmetic(uae_u32 opcode, uae_u32 extra) +{ + uae_u32 mask = (extra & 0xFC7F) | ((opcode & 0x0038) << 4); + (*fpufunctbl[mask])(opcode,extra); +} +// #endif + + +/* ---------------------------- Illegal ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_illg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("ILLEGAL F OP 2 %X\r\n",opcode)); + +#if I3_ON_ILLEGAL_FPU_OP +#error "FIXME: asm int 3" + _asm int 3 +#endif + + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); +} + + +/* ---------------------------- FPP -> ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmove_2_ea( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVE -> \r\n")); + + if (put_fp_value (FPU registers[(extra >> 7) & 7], opcode, extra) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + } + + /* + Needed (among other things) by some Pack5/Elems68k transcendental + functions, they require the ACCR_INEX flag after a "MOVE.D, Dreg". + However, now put_fp_value() is responsible of clearing the exceptions + and merging statuses. + */ + + /* + WORD sw_temp; + _asm FNSTSW sw_temp + if(sw_temp & SW_PE) { + _asm FNCLEX + x86_status_word |= SW_PE; + x86_status_word_accrued |= SW_PE; + } + */ + + dump_registers( "END "); +} + + +/* ---------------------------- CONTROL REGS -> Dreg ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_none_2_Dreg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM control(none) -> D%d\r\n", opcode & 7)); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpiar_2_Dreg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM FPU instruction_address (%X) -> D%d\r\n", FPU instruction_address, opcode & 7)); + m68k_dreg (regs, opcode & 7) = FPU instruction_address; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_2_Dreg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpsr (%X) -> D%d\r\n", get_fpsr(), opcode & 7)); + m68k_dreg (regs, opcode & 7) = get_fpsr(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_2_Dreg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpcr (%X) -> D%d\r\n", get_fpcr(), opcode & 7)); + m68k_dreg (regs, opcode & 7) = get_fpcr(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_fpiar_2_Dreg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpsr (%X) -> D%d\r\n", get_fpsr(), opcode & 7)); + m68k_dreg (regs, opcode & 7) = get_fpsr(); + D(bug("FMOVEM FPU instruction_address (%X) -> D%d\r\n", FPU instruction_address, opcode & 7)); + m68k_dreg (regs, opcode & 7) = FPU instruction_address; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpiar_2_Dreg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpcr (%X) -> D%d\r\n", get_fpcr(), opcode & 7)); + m68k_dreg (regs, opcode & 7) = get_fpcr(); + D(bug("FMOVEM FPU instruction_address (%X) -> D%d\r\n", FPU instruction_address, opcode & 7)); + m68k_dreg (regs, opcode & 7) = FPU instruction_address; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_2_Dreg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpcr (%X) -> D%d\r\n", get_fpcr(), opcode & 7)); + m68k_dreg (regs, opcode & 7) = get_fpcr(); + D(bug("FMOVEM regs.FPU fpsr (%X) -> D%d\r\n", get_fpsr(), opcode & 7)); + m68k_dreg (regs, opcode & 7) = get_fpsr(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Dreg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpcr (%X) -> D%d\r\n", get_fpcr(), opcode & 7)); + m68k_dreg (regs, opcode & 7) = get_fpcr(); + D(bug("FMOVEM regs.FPU fpsr (%X) -> D%d\r\n", get_fpsr(), opcode & 7)); + m68k_dreg (regs, opcode & 7) = get_fpsr(); + D(bug("FMOVEM FPU instruction_address (%X) -> D%d\r\n", FPU instruction_address, opcode & 7)); + m68k_dreg (regs, opcode & 7) = FPU instruction_address; + dump_registers( "END "); +} + + +/* ---------------------------- Dreg -> CONTROL REGS ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_none( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM D%d -> control(none)\r\n", opcode & 7)); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpiar( uae_u32 opcode, uae_u32 extra ) +{ + FPU instruction_address = m68k_dreg (regs, opcode & 7); + D(bug("FMOVEM D%d (%X) -> FPU instruction_address\r\n", opcode & 7, FPU instruction_address)); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpsr( uae_u32 opcode, uae_u32 extra ) +{ + set_fpsr( m68k_dreg (regs, opcode & 7) ); + D(bug("FMOVEM D%d (%X) -> regs.FPU fpsr\r\n", opcode & 7, get_fpsr())); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpsr_fpiar( uae_u32 opcode, uae_u32 extra ) +{ + set_fpsr( m68k_dreg (regs, opcode & 7) ); + D(bug("FMOVEM D%d (%X) -> regs.FPU fpsr\r\n", opcode & 7, get_fpsr())); + FPU instruction_address = m68k_dreg (regs, opcode & 7); + D(bug("FMOVEM D%d (%X) -> FPU instruction_address\r\n", opcode & 7, FPU instruction_address)); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpcr( uae_u32 opcode, uae_u32 extra ) +{ + set_fpcr( m68k_dreg (regs, opcode & 7) ); + D(bug("FMOVEM D%d (%X) -> regs.FPU fpcr\r\n", opcode & 7, get_fpcr())); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpcr_fpiar( uae_u32 opcode, uae_u32 extra ) +{ + set_fpcr( m68k_dreg (regs, opcode & 7) ); + D(bug("FMOVEM D%d (%X) -> regs.FPU fpcr\r\n", opcode & 7, get_fpcr())); + FPU instruction_address = m68k_dreg (regs, opcode & 7); + D(bug("FMOVEM D%d (%X) -> FPU instruction_address\r\n", opcode & 7, FPU instruction_address)); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpcr_fpsr( uae_u32 opcode, uae_u32 extra ) +{ + set_fpcr( m68k_dreg (regs, opcode & 7) ); + D(bug("FMOVEM D%d (%X) -> regs.FPU fpcr\r\n", opcode & 7, get_fpcr())); + set_fpsr( m68k_dreg (regs, opcode & 7) ); + D(bug("FMOVEM D%d (%X) -> regs.FPU fpsr\r\n", opcode & 7, get_fpsr())); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpcr_fpsr_fpiar( uae_u32 opcode, uae_u32 extra ) +{ + set_fpcr( m68k_dreg (regs, opcode & 7) ); + D(bug("FMOVEM D%d (%X) -> regs.FPU fpcr\r\n", opcode & 7, get_fpcr())); + set_fpsr( m68k_dreg (regs, opcode & 7) ); + D(bug("FMOVEM D%d (%X) -> regs.FPU fpsr\r\n", opcode & 7, get_fpsr())); + FPU instruction_address = m68k_dreg (regs, opcode & 7); + D(bug("FMOVEM D%d (%X) -> FPU instruction_address\r\n", opcode & 7, FPU instruction_address)); + dump_registers( "END "); +} + + +/* ---------------------------- CONTROL REGS -> Areg ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_none_2_Areg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM control(none) -> A%d\r\n", opcode & 7)); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpiar_2_Areg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM FPU instruction_address (%X) -> A%d\r\n", FPU instruction_address, opcode & 7)); + m68k_areg (regs, opcode & 7) = FPU instruction_address; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_2_Areg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpsr (%X) -> A%d\r\n", get_fpsr(), opcode & 7)); + m68k_areg (regs, opcode & 7) = get_fpsr(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_2_Areg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpcr (%X) -> A%d\r\n", get_fpcr(), opcode & 7)); + m68k_areg (regs, opcode & 7) = get_fpcr(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_fpiar_2_Areg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpsr (%X) -> A%d\r\n", get_fpsr(), opcode & 7)); + m68k_areg (regs, opcode & 7) = get_fpsr(); + D(bug("FMOVEM FPU instruction_address (%X) -> A%d\r\n", FPU instruction_address, opcode & 7)); + m68k_areg (regs, opcode & 7) = FPU instruction_address; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpiar_2_Areg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpcr (%X) -> A%d\r\n", get_fpcr(), opcode & 7)); + m68k_areg (regs, opcode & 7) = get_fpcr(); + D(bug("FMOVEM FPU instruction_address (%X) -> A%d\r\n", FPU instruction_address, opcode & 7)); + m68k_areg (regs, opcode & 7) = FPU instruction_address; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_2_Areg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpcr (%X) -> A%d\r\n", get_fpcr(), opcode & 7)); + m68k_areg (regs, opcode & 7) = get_fpcr(); + D(bug("FMOVEM regs.FPU fpsr (%X) -> A%d\r\n", get_fpsr(), opcode & 7)); + m68k_areg (regs, opcode & 7) = get_fpsr(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Areg( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM regs.FPU fpcr (%X) -> A%d\r\n", get_fpcr(), opcode & 7)); + m68k_areg (regs, opcode & 7) = get_fpcr(); + D(bug("FMOVEM regs.FPU fpsr (%X) -> A%d\r\n", get_fpsr(), opcode & 7)); + m68k_areg (regs, opcode & 7) = get_fpsr(); + D(bug("FMOVEM FPU instruction_address (%X) -> A%d\r\n", FPU instruction_address, opcode & 7)); + m68k_areg (regs, opcode & 7) = FPU instruction_address; + dump_registers( "END "); +} + + +/* ---------------------------- Areg -> CONTROL REGS ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_none( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM A%d -> control(none)\r\n", opcode & 7)); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpiar( uae_u32 opcode, uae_u32 extra ) +{ + FPU instruction_address = m68k_areg (regs, opcode & 7); + D(bug("FMOVEM A%d (%X) -> FPU instruction_address\r\n", opcode & 7, FPU instruction_address)); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpsr( uae_u32 opcode, uae_u32 extra ) +{ + set_fpsr( m68k_areg (regs, opcode & 7) ); + D(bug("FMOVEM A%d (%X) -> regs.FPU fpsr\r\n", opcode & 7, get_fpsr())); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpsr_fpiar( uae_u32 opcode, uae_u32 extra ) +{ + set_fpsr( m68k_areg (regs, opcode & 7) ); + D(bug("FMOVEM A%d (%X) -> regs.FPU fpsr\r\n", opcode & 7, get_fpsr())); + FPU instruction_address = m68k_areg (regs, opcode & 7); + D(bug("FMOVEM A%d (%X) -> FPU instruction_address\r\n", opcode & 7, FPU instruction_address)); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpcr( uae_u32 opcode, uae_u32 extra ) +{ + set_fpcr( m68k_areg (regs, opcode & 7) ); + D(bug("FMOVEM A%d (%X) -> regs.FPU fpcr\r\n", opcode & 7, get_fpcr())); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpcr_fpiar( uae_u32 opcode, uae_u32 extra ) +{ + set_fpcr( m68k_areg (regs, opcode & 7) ); + D(bug("FMOVEM A%d (%X) -> regs.FPU fpcr\r\n", opcode & 7, get_fpcr())); + FPU instruction_address = m68k_areg (regs, opcode & 7); + D(bug("FMOVEM A%d (%X) -> FPU instruction_address\r\n", opcode & 7, FPU instruction_address)); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpcr_fpsr( uae_u32 opcode, uae_u32 extra ) +{ + set_fpcr( m68k_areg (regs, opcode & 7) ); + D(bug("FMOVEM A%d (%X) -> regs.FPU fpcr\r\n", opcode & 7, get_fpcr())); + set_fpsr( m68k_areg (regs, opcode & 7) ); + D(bug("FMOVEM A%d (%X) -> regs.FPU fpsr\r\n", opcode & 7, get_fpsr())); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpcr_fpsr_fpiar( uae_u32 opcode, uae_u32 extra ) +{ + set_fpcr( m68k_areg (regs, opcode & 7) ); + D(bug("FMOVEM A%d (%X) -> regs.FPU fpcr\r\n", opcode & 7, get_fpcr())); + set_fpsr( m68k_areg (regs, opcode & 7) ); + D(bug("FMOVEM A%d (%X) -> regs.FPU fpsr\r\n", opcode & 7, get_fpsr())); + FPU instruction_address = m68k_areg (regs, opcode & 7); + D(bug("FMOVEM A%d (%X) -> FPU instruction_address\r\n", opcode & 7, FPU instruction_address)); + dump_registers( "END "); +} + + +/* ---------------------------- CONTROL REGS -> --MEMORY---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_none_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM Control regs (none) -> mem\r\n" )); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpiar_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 4; + put_long (ad, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 4; + put_long (ad, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_fpiar_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 8; + put_long (ad, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad )); + put_long (ad+4, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad+4 )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 4; + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpiar_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 8; + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + put_long (ad+4, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad+4 )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 8; + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + put_long (ad+4, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad+4 )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 12; + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + put_long (ad+4, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad+4 )); + put_long (ad+8, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad+8 )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + + +/* ---------------------------- CONTROL REGS -> MEMORY++ ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_none_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM Control regs (none) -> mem\r\n" )); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpiar_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad )); + m68k_areg (regs, opcode & 7) = ad+4; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad )); + m68k_areg (regs, opcode & 7) = ad+4; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_fpiar_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad )); + put_long (ad+4, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad+4 )); + m68k_areg (regs, opcode & 7) = ad+8; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + m68k_areg (regs, opcode & 7) = ad+4; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpiar_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + put_long (ad+4, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad+4 )); + m68k_areg (regs, opcode & 7) = ad+8; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + dump_registers( "END "); + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + put_long (ad+4, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad+4 )); + m68k_areg (regs, opcode & 7) = ad+8; + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + put_long (ad+4, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad+4 )); + put_long (ad+8, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad+8 )); + m68k_areg (regs, opcode & 7) = ad+12; + dump_registers( "END "); + } +} + + +/* ---------------------------- CONTROL REGS -> MEMORY ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_none_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM Control regs (none) -> mem\r\n" )); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad )); + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad )); + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad )); + put_long (ad+4, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad+4 )); + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + put_long (ad+4, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad+4 )); + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + put_long (ad+4, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad+4 )); + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + put_long (ad, get_fpcr()); + D(bug("FMOVEM regs.FPU fpcr (%X) -> mem %X\r\n", get_fpcr(), ad )); + put_long (ad+4, get_fpsr()); + D(bug("FMOVEM regs.FPU fpsr (%X) -> mem %X\r\n", get_fpsr(), ad+4 )); + put_long (ad+8, FPU instruction_address); + D(bug("FMOVEM FPU instruction_address (%X) -> mem %X\r\n", FPU instruction_address, ad+8 )); + dump_registers( "END "); + } +} + + +/* ---------------------------- --MEMORY -> CONTROL REGS ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_none_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM --Mem -> control(none)\r\n")); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpiar_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 4; + FPU instruction_address = get_long (ad); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad, FPU instruction_address )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 4; + set_fpsr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad, get_fpsr() )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_fpiar_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 8; + set_fpsr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad, get_fpsr() )); + FPU instruction_address = get_long (ad+4); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad+4, FPU instruction_address )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 4; + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpiar_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 8; + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + FPU instruction_address = get_long (ad+4); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad+4, FPU instruction_address )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 8; + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + set_fpsr( get_long (ad+4) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad+4, get_fpsr() )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_fpiar_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + ad -= 12; + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + set_fpsr( get_long (ad+4) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad+4, get_fpsr() )); + FPU instruction_address = get_long (ad+8); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad+8, FPU instruction_address )); + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + + +/* ---------------------------- CONTROL REGS -> MEMORY++ ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_none_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM Mem++ -> control(none)\r\n")); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpiar_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + FPU instruction_address = get_long (ad); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad, FPU instruction_address )); + m68k_areg (regs, opcode & 7) = ad+4; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpsr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad, get_fpsr() )); + m68k_areg (regs, opcode & 7) = ad+4; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_fpiar_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpsr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad, get_fpsr() )); + FPU instruction_address = get_long (ad+4); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad+4, FPU instruction_address )); + m68k_areg (regs, opcode & 7) = ad+8; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + m68k_areg (regs, opcode & 7) = ad+4; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpiar_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + FPU instruction_address = get_long (ad+4); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad+4, FPU instruction_address )); + m68k_areg (regs, opcode & 7) = ad+8; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + set_fpsr( get_long (ad+4) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad+4, get_fpsr() )); + m68k_areg (regs, opcode & 7) = ad+8; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_fpiar_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + set_fpsr( get_long (ad+4) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad+4, get_fpsr() )); + FPU instruction_address = get_long (ad+8); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad+8, FPU instruction_address )); + m68k_areg (regs, opcode & 7) = ad+12; + dump_registers( "END "); + } +} + + +/* ---------------------------- MEMORY -> CONTROL REGS ---------------------------- */ +/* ---------------------------- and ---------------------------- */ +/* ---------------------------- IMMEDIATE -> CONTROL REGS ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_none_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVEM Mem -> control(none)\r\n")); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + if ((opcode & 0x3f) == 0x3c) { + FPU instruction_address = next_ilong(); + D(bug("FMOVEM #<%X> -> FPU instruction_address\r\n", FPU instruction_address)); + } else { + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + FPU instruction_address = get_long (ad); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad, FPU instruction_address )); + } + } + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + if ((opcode & 0x3f) == 0x3c) { + set_fpsr( next_ilong() ); + D(bug("FMOVEM #<%X> -> regs.FPU fpsr\r\n", get_fpsr())); + } else { + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpsr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad, get_fpsr() )); + } + } + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + if ((opcode & 0x3f) == 0x3c) { + set_fpsr( next_ilong() ); + D(bug("FMOVEM #<%X> -> regs.FPU fpsr\r\n", get_fpsr())); + FPU instruction_address = next_ilong(); + D(bug("FMOVEM #<%X> -> FPU instruction_address\r\n", FPU instruction_address)); + } else { + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpsr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad, get_fpsr() )); + FPU instruction_address = get_long (ad+4); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad+4, FPU instruction_address )); + } + } + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + if ((opcode & 0x3f) == 0x3c) { + set_fpcr( next_ilong() ); + D(bug("FMOVEM #<%X> -> regs.FPU fpcr\r\n", get_fpcr())); + } else { + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + } + } + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + if ((opcode & 0x3f) == 0x3c) { + set_fpcr( next_ilong() ); + D(bug("FMOVEM #<%X> -> regs.FPU fpcr\r\n", get_fpcr())); + FPU instruction_address = next_ilong(); + D(bug("FMOVEM #<%X> -> FPU instruction_address\r\n", FPU instruction_address)); + } else { + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + FPU instruction_address = get_long (ad+4); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad+4, FPU instruction_address )); + } + } + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + if ((opcode & 0x3f) == 0x3c) { + set_fpcr( next_ilong() ); + D(bug("FMOVEM #<%X> -> regs.FPU fpcr\r\n", get_fpcr())); + set_fpsr( next_ilong() ); + D(bug("FMOVEM #<%X> -> regs.FPU fpsr\r\n", get_fpsr())); + } else { + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + set_fpsr( get_long (ad+4) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad+4, get_fpsr() )); + } + } + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ) +{ + if ((opcode & 0x3f) == 0x3c) { + set_fpcr( next_ilong() ); + D(bug("FMOVEM #<%X> -> regs.FPU fpcr\r\n", get_fpcr())); + set_fpsr( next_ilong() ); + D(bug("FMOVEM #<%X> -> regs.FPU fpsr\r\n", get_fpsr())); + FPU instruction_address = next_ilong(); + D(bug("FMOVEM #<%X> -> FPU instruction_address\r\n", FPU instruction_address)); + } else { + uae_u32 ad; + if (get_fp_ad(opcode, &ad)) { + set_fpcr( get_long (ad) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpcr\r\n", ad, get_fpcr() )); + set_fpsr( get_long (ad+4) ); + D(bug("FMOVEM mem %X (%X) -> regs.FPU fpsr\r\n", ad+4, get_fpsr() )); + FPU instruction_address = get_long (ad+8); + D(bug("FMOVEM mem %X (%X) -> FPU instruction_address\r\n", ad+8, FPU instruction_address )); + } + } + dump_registers( "END "); +} + + +/* ---------------------------- FMOVEM MEMORY -> FPP ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_pred_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + ad -= 4; + wrd3 = get_long (ad); + ad -= 4; + wrd2 = get_long (ad); + ad -= 4; + wrd1 = get_long (ad); + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_pred_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + ad -= 4; + wrd3 = get_long (ad); + ad -= 4; + wrd2 = get_long (ad); + ad -= 4; + wrd1 = get_long (ad); + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_pred( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + ad -= 4; + wrd3 = get_long (ad); + ad -= 4; + wrd2 = get_long (ad); + ad -= 4; + wrd1 = get_long (ad); + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_pred_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + ad -= 4; + wrd3 = get_long (ad); + ad -= 4; + wrd2 = get_long (ad); + ad -= 4; + wrd1 = get_long (ad); + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_pred_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + ad -= 4; + wrd3 = get_long (ad); + ad -= 4; + wrd2 = get_long (ad); + ad -= 4; + wrd1 = get_long (ad); + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_pred( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + ad -= 4; + wrd3 = get_long (ad); + ad -= 4; + wrd2 = get_long (ad); + ad -= 4; + wrd1 = get_long (ad); + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_postinc_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + ad += 4; + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_postinc_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + ad += 4; + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_postinc( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + ad += 4; + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_postinc_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + ad += 4; + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_postinc_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + ad += 4; + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_postinc( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM memory->FPP\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + wrd1 = get_long (ad); + ad += 4; + wrd2 = get_long (ad); + ad += 4; + wrd3 = get_long (ad); + ad += 4; + to_exten_no_normalize (wrd1, wrd2, wrd3,FPU registers[reg]); + } + list <<= 1; + } + dump_registers( "END "); + } +} + + +/* ---------------------------- FPP -> FMOVEM MEMORY ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_pred_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + ad -= 4; + put_long (ad, wrd3); + ad -= 4; + put_long (ad, wrd2); + ad -= 4; + put_long (ad, wrd1); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_pred_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + ad -= 4; + put_long (ad, wrd3); + ad -= 4; + put_long (ad, wrd2); + ad -= 4; + put_long (ad, wrd1); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_pred( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + ad -= 4; + put_long (ad, wrd3); + ad -= 4; + put_long (ad, wrd2); + ad -= 4; + put_long (ad, wrd1); + } + list <<= 1; + } + dump_registers( "END "); + } +} +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_pred_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + ad -= 4; + put_long (ad, wrd3); + ad -= 4; + put_long (ad, wrd2); + ad -= 4; + put_long (ad, wrd1); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_pred_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + ad -= 4; + put_long (ad, wrd3); + ad -= 4; + put_long (ad, wrd2); + ad -= 4; + put_long (ad, wrd1); + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_pred( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=7; reg>=0; reg-- ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + ad -= 4; + put_long (ad, wrd3); + ad -= 4; + put_long (ad, wrd2); + ad -= 4; + put_long (ad, wrd1); + } + list <<= 1; + } + dump_registers( "END "); + } +} +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_postinc_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + ad += 4; + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_postinc_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + ad += 4; + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_postinc( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = extra & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + ad += 4; + } + list <<= 1; + } + dump_registers( "END "); + } +} +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_postinc_postincrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + ad += 4; + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_postinc_predecrement( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + ad += 4; + } + list <<= 1; + } + m68k_areg (regs, opcode & 7) = ad; + dump_registers( "END "); + } +} +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_postinc( uae_u32 opcode, uae_u32 extra ) +{ + uae_u32 ad, list = m68k_dreg (regs, (extra >> 4) & 3) & 0xff; + D(bug("FMOVEM FPP->memory\r\n")); + if (get_fp_ad(opcode, &ad)) { + for( int reg=0; reg<8; reg++ ) { + uae_u32 wrd1, wrd2, wrd3; + if( list & 0x80 ) { + from_exten(FPU registers[reg],&wrd1, &wrd2, &wrd3); + put_long (ad, wrd1); + ad += 4; + put_long (ad, wrd2); + ad += 4; + put_long (ad, wrd3); + ad += 4; + } + list <<= 1; + } + dump_registers( "END "); + } +} + + +/* ---------------------------- FMOVEM CONSTANT ROM -> FPP ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_do_fldpi( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: Pi\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_pi, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fldlg2( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: Log 10 (2)\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_lg2, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_e( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: e\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_e, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fldl2e( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: Log 2 (e)\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_l2e, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_log_10_e( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: Log 10 (e)\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_log_10_e, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fldz( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: zero\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_z, sizeof(fpu_register) ); + x86_status_word = SW_Z; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fldln2( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: ln(2)\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_ln2, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_ln_10( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: ln(10)\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_ln_10, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fld1( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e0\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1, sizeof(fpu_register) ); + x86_status_word = SW_FINITE; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e1( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e1\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e1, sizeof(fpu_register) ); + x86_status_word = SW_FINITE; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e2( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e2\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e2, sizeof(fpu_register) ); + x86_status_word = SW_FINITE; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e4( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e4\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e4, sizeof(fpu_register) ); + x86_status_word = SW_FINITE; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e8( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e8\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e8, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; // Is it really FPSR_EXCEPTION_INEX2? + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e16( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e16\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e16, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e32( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e32\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e32, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e64( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e64\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e64, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e128( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e128\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e128, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e256( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e256\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e256, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e512( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e512\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e512, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e1024( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e1024\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e1024, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e2048( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e2048\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e2048, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e4096( uae_u32 opcode, uae_u32 extra ) +{ + D(bug("FMOVECR memory->FPP FP const: 1.0e4096\r\n")); + memcpy( &FPU registers[(extra>>7) & 7], &const_1e4096, sizeof(fpu_register) ); + x86_status_word = SW_FINITE | FPSR_EXCEPTION_INEX2; + dump_registers( "END "); +} + + +/* -------------------------- 040 ALU -------------------------- */ +PRIVATE void REGPARAM2 FFPU fpuop_do_fsmove( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSMOVE %s\r\n",etos(src))); + do_fsmove( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdmove( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDMOVE %s\r\n",etos(src))); + do_fdmove( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fssqrt( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSSQRT %s\r\n",etos(src))); + do_fssqrt( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdsqrt( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDSQRT %s\r\n",etos(src))); + do_fdsqrt( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsabs( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSABS %s\r\n",etos(src))); + do_fsabs( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdabs( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDABS %s\r\n",etos(src))); + do_fdabs( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsneg( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSNEG %s\r\n",etos(src))); + do_fsneg( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdneg( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDNEG %s\r\n",etos(src))); + do_fdneg( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsdiv( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSDIV %s\r\n",etos(src))); + do_fsdiv( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fddiv( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDDIV %s\r\n",etos(src))); + do_fddiv( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsadd( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSADD %s\r\n",etos(src))); + do_fsadd( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdadd( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDADD %s\r\n",etos(src))); + do_fdadd( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fssub( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSSUB %s\r\n",etos(src))); + do_fssub( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdsub( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDSUB %s\r\n",etos(src))); + do_fdsub( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsmul( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSMUL %s\r\n",etos(src))); + do_fsmul( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdmul( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSMUL %s\r\n",etos(src))); + do_fsmul( FPU registers[reg], src ); + dump_registers( "END "); +} + +/* ---------------------------- ALU ---------------------------- */ + +PRIVATE void REGPARAM2 FFPU fpuop_do_fmove( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FMOVE %s\r\n",etos(src))); + do_fmove( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fint( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FINT %s, opcode=%X, extra=%X, ta %X\r\n",etos(src),opcode,extra,m68k_getpc())); + do_fint( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsinh( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSINH %s\r\n",etos(src))); + do_fsinh( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fintrz( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FINTRZ %s\r\n",etos(src))); + do_fintrz( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsqrt( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSQRT %s\r\n",etos(src))); + do_fsqrt( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_flognp1( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FLOGNP1 %s\r\n",etos(src))); + do_flognp1( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fetoxm1( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FETOXM1 %s\r\n",etos(src))); + do_fetoxm1( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_ftanh( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FTANH %s\r\n",etos(src))); + do_ftanh( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fatan( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FATAN %s\r\n",etos(src))); + do_fatan( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fasin( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FASIN %s\r\n",etos(src))); + do_fasin( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fatanh( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FATANH %s\r\n",etos(src))); + do_fatanh( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsin( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSIN %s\r\n",etos(src))); + do_fsin( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_ftan( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FTAN %s\r\n",etos(src))); + do_ftan( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fetox( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FETOX %s\r\n",etos(src))); + do_fetox( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_ftwotox( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FTWOTOX %s\r\n",etos(src))); + do_ftwotox( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_ftentox( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FTENTOX %s\r\n",etos(src))); + do_ftentox( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_flogn( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FLOGN %s\r\n",etos(src))); + do_flogn( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_flog10( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FLOG10 %s\r\n",etos(src))); + do_flog10( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_flog2( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FLOG2 %s\r\n",etos(src))); + do_flog2( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fabs( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FABS %s\r\n",etos(src))); + do_fabs( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fcosh( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FCOSH %s\r\n",etos(src))); + do_fcosh( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fneg( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FNEG %s\r\n",etos(src))); + do_fneg( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_facos( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FACOS %s\r\n",etos(src))); + do_facos( FPU registers[reg], src ); + build_ex_status(); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fcos( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FCOS %s\r\n",etos(src))); + do_fcos( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fgetexp( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FGETEXP %s\r\n",etos(src))); + + if( IS_INFINITY(src) ) { + MAKE_NAN( FPU registers[reg] ); + do_ftst( FPU registers[reg] ); + x86_status_word |= SW_IE; + } else { + do_fgetexp( FPU registers[reg], src ); + } + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fgetman( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FGETMAN %s\r\n",etos(src))); + if( IS_INFINITY(src) ) { + MAKE_NAN( FPU registers[reg] ); + do_ftst( FPU registers[reg] ); + x86_status_word |= SW_IE; + } else { + do_fgetman( FPU registers[reg], src ); + } + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fdiv( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FDIV %s\r\n",etos(src))); + do_fdiv( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fmod( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FMOD %s\r\n",etos(src))); + +#if USE_3_BIT_QUOTIENT + do_fmod( FPU registers[reg], src ); +#else + if( (x86_control_word & X86_ROUNDING_MODE) == CW_RC_ZERO ) { + do_fmod_dont_set_cw( FPU registers[reg], src ); + } else { + do_fmod( FPU registers[reg], src ); + } +#endif + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_frem( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FREM %s\r\n",etos(src))); +#if USE_3_BIT_QUOTIENT + do_frem( FPU registers[reg], src ); +#else + if( (x86_control_word & X86_ROUNDING_MODE) == CW_RC_NEAR ) { + do_frem_dont_set_cw( FPU registers[reg], src ); + } else { + do_frem( FPU registers[reg], src ); + } +#endif + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fadd( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FADD %s\r\n",etos(src))); + do_fadd( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fmul( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FMUL %s\r\n",etos(src))); + do_fmul( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsgldiv( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSGLDIV %s\r\n",etos(src))); + do_fsgldiv( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fscale( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSCALE %s, opcode=%X, extra=%X, ta %X\r\n",etos(src),opcode,extra,m68k_getpc())); + if( IS_INFINITY(FPU registers[reg]) ) { + MAKE_NAN( FPU registers[reg] ); + do_ftst( FPU registers[reg] ); + x86_status_word |= SW_IE; + } else { + // When the absolute value of the source operand is >= 2^14, + // an overflow or underflow always results. + do_fscale( FPU registers[reg], src ); + } + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsglmul( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSGLMUL %s\r\n",etos(src))); + do_fsglmul( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsub( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSUB %s\r\n",etos(src))); + do_fsub( FPU registers[reg], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fsincos( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FSINCOS %s\r\n",etos(src))); + do_fsincos( FPU registers[reg], FPU registers[extra & 7], src ); + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_fcmp( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FCMP %s\r\n",etos(src))); + + if( IS_INFINITY(src) ) { + if( IS_NEGATIVE(src) ) { + if( IS_INFINITY(FPU registers[reg]) && IS_NEGATIVE(FPU registers[reg]) ) { + x86_status_word = SW_Z | SW_N; + D(bug("-INF FCMP -INF -> NZ\r\n")); + } else { + x86_status_word = SW_FINITE; + D(bug("X FCMP -INF -> None\r\n")); + } + } else { + if( IS_INFINITY(FPU registers[reg]) && !IS_NEGATIVE(FPU registers[reg]) ) { + x86_status_word = SW_Z; + D(bug("+INF FCMP +INF -> Z\r\n")); + } else { + x86_status_word = SW_N; + D(bug("X FCMP +INF -> N\r\n")); + } + } + } else if( IS_INFINITY(FPU registers[reg]) ) { + if( IS_NEGATIVE(FPU registers[reg]) ) { + x86_status_word = SW_N; + D(bug("-INF FCMP X -> Negative\r\n")); + } else { + x86_status_word = SW_FINITE; + D(bug("+INF FCMP X -> None\r\n")); + } + } else { + do_fcmp( FPU registers[reg], src ); + } + + dump_registers( "END "); +} + +PRIVATE void REGPARAM2 FFPU fpuop_do_ftst( uae_u32 opcode, uae_u32 extra ) +{ + int reg = (extra >> 7) & 7; + fpu_register src; + if (get_fp_value (opcode, extra, src) == 0) { + m68k_setpc (m68k_getpc () - 4); + op_illg (opcode); + dump_registers( "END "); + return; + } + D(bug("FTST %s\r\n",etos(src))); + do_ftst( src ); + build_ex_status(); + dump_registers( "END "); +} + + + +/* ---------------------------- SETUP TABLES ---------------------------- */ + +PRIVATE void FFPU build_fpp_opp_lookup_table () +{ + for( uae_u32 opcode=0; opcode<=0x38; opcode+=8 ) { + for( uae_u32 extra=0; extra<65536; extra++ ) { + uae_u32 mask = (extra & 0xFC7F) | ((opcode & 0x0038) << 4); + fpufunctbl[mask] = & FFPU fpuop_illg; + + switch ((extra >> 13) & 0x7) { + case 3: + fpufunctbl[mask] = & FFPU fpuop_fmove_2_ea; + break; + case 4: + case 5: + if ((opcode & 0x38) == 0) { + if (extra & 0x2000) { // dr bit + switch( extra & 0x1C00 ) { + case 0x0000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_none_2_Dreg; + break; + case 0x0400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpiar_2_Dreg; + break; + case 0x0800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpsr_2_Dreg; + break; + case 0x0C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpsr_fpiar_2_Dreg; + break; + case 0x1000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_2_Dreg; + break; + case 0x1400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpiar_2_Dreg; + break; + case 0x1800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpsr_2_Dreg; + break; + case 0x1C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Dreg; + break; + } + } else { + switch( extra & 0x1C00 ) { + case 0x0000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Dreg_2_none; + break; + case 0x0400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Dreg_2_fpiar; + break; + case 0x0800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Dreg_2_fpsr; + break; + case 0x0C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Dreg_2_fpsr_fpiar; + break; + case 0x1000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Dreg_2_fpcr; + break; + case 0x1400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Dreg_2_fpcr_fpiar; + break; + case 0x1800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Dreg_2_fpcr_fpsr; + break; + case 0x1C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Dreg_2_fpcr_fpsr_fpiar; + break; + } + } + } else if ((opcode & 0x38) == 8) { + if (extra & 0x2000) { // dr bit + switch( extra & 0x1C00 ) { + case 0x0000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_none_2_Areg; + break; + case 0x0400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpiar_2_Areg; + break; + case 0x0800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpsr_2_Areg; + break; + case 0x0C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpsr_fpiar_2_Areg; + break; + case 0x1000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_2_Areg; + break; + case 0x1400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpiar_2_Areg; + break; + case 0x1800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpsr_2_Areg; + break; + case 0x1C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Areg; + break; + } + } else { + switch( extra & 0x1C00 ) { + case 0x0000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Areg_2_none; + break; + case 0x0400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Areg_2_fpiar; + break; + case 0x0800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Areg_2_fpsr; + break; + case 0x0C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Areg_2_fpsr_fpiar; + break; + case 0x1000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Areg_2_fpcr; + break; + case 0x1400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Areg_2_fpcr_fpiar; + break; + case 0x1800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Areg_2_fpcr_fpsr; + break; + case 0x1C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Areg_2_fpcr_fpsr_fpiar; + break; + } + } + } else if (extra & 0x2000) { + if ((opcode & 0x38) == 0x20) { + switch( extra & 0x1C00 ) { + case 0x0000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_none_2_Mem_predecrement; + break; + case 0x0400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpiar_2_Mem_predecrement; + break; + case 0x0800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpsr_2_Mem_predecrement; + break; + case 0x0C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpsr_fpiar_2_Mem_predecrement; + break; + case 0x1000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_2_Mem_predecrement; + break; + case 0x1400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpiar_2_Mem_predecrement; + break; + case 0x1800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpsr_2_Mem_predecrement; + break; + case 0x1C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Mem_predecrement; + break; + } + } else if ((opcode & 0x38) == 0x18) { + switch( extra & 0x1C00 ) { + case 0x0000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_none_2_Mem_postincrement; + break; + case 0x0400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpiar_2_Mem_postincrement; + break; + case 0x0800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpsr_2_Mem_postincrement; + break; + case 0x0C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpsr_fpiar_2_Mem_postincrement; + break; + case 0x1000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_2_Mem_postincrement; + break; + case 0x1400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpiar_2_Mem_postincrement; + break; + case 0x1800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpsr_2_Mem_postincrement; + break; + case 0x1C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Mem_postincrement; + break; + } + } else { + switch( extra & 0x1C00 ) { + case 0x0000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_none_2_Mem; + break; + case 0x0400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpiar_2_Mem; + break; + case 0x0800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpsr_2_Mem; + break; + case 0x0C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpsr_fpiar_2_Mem; + break; + case 0x1000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_2_Mem; + break; + case 0x1400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpiar_2_Mem; + break; + case 0x1800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpsr_2_Mem; + break; + case 0x1C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Mem; + break; + } + } + } else { + if ((opcode & 0x38) == 0x20) { + switch( extra & 0x1C00 ) { + case 0x0000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_none_predecrement; + break; + case 0x0400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpiar_predecrement; + break; + case 0x0800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpsr_predecrement; + break; + case 0x0C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpsr_fpiar_predecrement; + break; + case 0x1000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_predecrement; + break; + case 0x1400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_fpiar_predecrement; + break; + case 0x1800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_predecrement; + break; + case 0x1C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_fpiar_predecrement; + break; + } + } else if ((opcode & 0x38) == 0x18) { + switch( extra & 0x1C00 ) { + case 0x0000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_none_postincrement; + break; + case 0x0400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpiar_postincrement; + break; + case 0x0800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpsr_postincrement; + break; + case 0x0C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpsr_fpiar_postincrement; + break; + case 0x1000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_postincrement; + break; + case 0x1400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_fpiar_postincrement; + break; + case 0x1800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_postincrement; + break; + case 0x1C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_fpiar_postincrement; + break; + } + } else { + switch( extra & 0x1C00 ) { + case 0x0000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_none_2_Mem; + break; + case 0x0400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpiar_2_Mem; + break; + case 0x0800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpsr_2_Mem; + break; + case 0x0C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpsr_fpiar_2_Mem; + break; + case 0x1000: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_2_Mem; + break; + case 0x1400: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_fpiar_2_Mem; + break; + case 0x1800: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_2_Mem; + break; + case 0x1C00: + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_fpiar_2_Mem; + break; + } + } + break; + case 6: + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + if ((opcode & 0x38) == 0x18) // post-increment? + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_static_pred_postincrement; + else if ((opcode & 0x38) == 0x20) // pre-decrement? + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_static_pred_predecrement; + else + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_static_pred; + break; + case 1: /* dynamic pred */ + if ((opcode & 0x38) == 0x18) // post-increment? + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_dynamic_pred_postincrement; + else if ((opcode & 0x38) == 0x20) // pre-decrement? + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_dynamic_pred_predecrement; + else + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_dynamic_pred; + break; + case 2: /* static postinc */ + if ((opcode & 0x38) == 0x18) // post-increment? + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_static_postinc_postincrement; + else if ((opcode & 0x38) == 0x20) // pre-decrement? + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_static_postinc_predecrement; + else + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_static_postinc; + break; + case 3: /* dynamic postinc */ + if ((opcode & 0x38) == 0x18) // post-increment? + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_dynamic_postinc_postincrement; + else if ((opcode & 0x38) == 0x20) // pre-decrement? + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_dynamic_postinc_predecrement; + else + fpufunctbl[mask] = & FFPU fpuop_fmovem_Mem_2_fpp_dynamic_postinc; + break; + } + break; + case 7: + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + if ((opcode & 0x38) == 0x18) // post-increment? + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_static_pred_postincrement; + else if ((opcode & 0x38) == 0x20) // pre-decrement? + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_static_pred_predecrement; + else + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_static_pred; + break; + case 1: /* dynamic pred */ + if ((opcode & 0x38) == 0x18) // post-increment? + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_dynamic_pred_postincrement; + else if ((opcode & 0x38) == 0x20) // pre-decrement? + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_dynamic_pred_predecrement; + else + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_dynamic_pred; + break; + case 2: /* static postinc */ + if ((opcode & 0x38) == 0x18) // post-increment? + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_static_postinc_postincrement; + else if ((opcode & 0x38) == 0x20) // pre-decrement? + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_static_postinc_predecrement; + else + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_static_postinc; + break; + case 3: /* dynamic postinc */ + if ((opcode & 0x38) == 0x18) // post-increment? + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_dynamic_postinc_postincrement; + else if ((opcode & 0x38) == 0x20) // pre-decrement? + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_dynamic_postinc_predecrement; + else + fpufunctbl[mask] = & FFPU fpuop_fmovem_fpp_2_Mem_dynamic_postinc; + break; + } + break; + case 0: + case 2: + if ((extra & 0xfc00) == 0x5c00) { + switch (extra & 0x7f) { + case 0x00: + fpufunctbl[mask] = & FFPU fpuop_do_fldpi; + break; + case 0x0b: + fpufunctbl[mask] = & FFPU fpuop_do_fldlg2; + break; + case 0x0c: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_e; + break; + case 0x0d: + fpufunctbl[mask] = & FFPU fpuop_do_fldl2e; + break; + case 0x0e: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_log_10_e; + break; + case 0x0f: + fpufunctbl[mask] = & FFPU fpuop_do_fldz; + break; + case 0x30: + fpufunctbl[mask] = & FFPU fpuop_do_fldln2; + break; + case 0x31: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_ln_10; + break; + case 0x32: + fpufunctbl[mask] = & FFPU fpuop_do_fld1; + break; + case 0x33: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e1; + break; + case 0x34: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e2; + break; + case 0x35: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e4; + break; + case 0x36: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e8; + break; + case 0x37: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e16; + break; + case 0x38: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e32; + break; + case 0x39: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e64; + break; + case 0x3a: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e128; + break; + case 0x3b: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e256; + break; + case 0x3c: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e512; + break; + case 0x3d: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e1024; + break; + case 0x3e: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e2048; + break; + case 0x3f: + fpufunctbl[mask] = & FFPU fpuop_do_load_const_1e4096; + break; + } + break; + } + + if (FPU is_integral) { + switch (extra & 0x7f) { + case 0x40: + fpufunctbl[mask] = & FFPU fpuop_do_fsmove; + break; + case 0x44: + fpufunctbl[mask] = & FFPU fpuop_do_fdmove; + break; + case 0x41: + fpufunctbl[mask] = & FFPU fpuop_do_fssqrt; + break; + case 0x45: + fpufunctbl[mask] = & FFPU fpuop_do_fdsqrt; + break; + case 0x58: + fpufunctbl[mask] = & FFPU fpuop_do_fsabs; + break; + case 0x5c: + fpufunctbl[mask] = & FFPU fpuop_do_fdabs; + break; + case 0x5a: + fpufunctbl[mask] = & FFPU fpuop_do_fsneg; + break; + case 0x5e: + fpufunctbl[mask] = & FFPU fpuop_do_fdneg; + break; + case 0x60: + fpufunctbl[mask] = & FFPU fpuop_do_fsdiv; + break; + case 0x64: + fpufunctbl[mask] = & FFPU fpuop_do_fddiv; + break; + case 0x62: + fpufunctbl[mask] = & FFPU fpuop_do_fsadd; + break; + case 0x66: + fpufunctbl[mask] = & FFPU fpuop_do_fdadd; + break; + case 0x68: + fpufunctbl[mask] = & FFPU fpuop_do_fssub; + break; + case 0x6c: + fpufunctbl[mask] = & FFPU fpuop_do_fdsub; + break; + case 0x63: + fpufunctbl[mask] = & FFPU fpuop_do_fsmul; + break; + case 0x67: + fpufunctbl[mask] = & FFPU fpuop_do_fdmul; + break; + default: + break; + } + } + + switch (extra & 0x7f) { + case 0x00: + fpufunctbl[mask] = & FFPU fpuop_do_fmove; + break; + case 0x01: + fpufunctbl[mask] = & FFPU fpuop_do_fint; + break; + case 0x02: + fpufunctbl[mask] = & FFPU fpuop_do_fsinh; + break; + case 0x03: + fpufunctbl[mask] = & FFPU fpuop_do_fintrz; + break; + case 0x04: + fpufunctbl[mask] = & FFPU fpuop_do_fsqrt; + break; + case 0x06: + fpufunctbl[mask] = & FFPU fpuop_do_flognp1; + break; + case 0x08: + fpufunctbl[mask] = & FFPU fpuop_do_fetoxm1; + break; + case 0x09: + fpufunctbl[mask] = & FFPU fpuop_do_ftanh; + break; + case 0x0a: + fpufunctbl[mask] = & FFPU fpuop_do_fatan; + break; + case 0x0c: + fpufunctbl[mask] = & FFPU fpuop_do_fasin; + break; + case 0x0d: + fpufunctbl[mask] = & FFPU fpuop_do_fatanh; + break; + case 0x0e: + fpufunctbl[mask] = & FFPU fpuop_do_fsin; + break; + case 0x0f: + fpufunctbl[mask] = & FFPU fpuop_do_ftan; + break; + case 0x10: + fpufunctbl[mask] = & FFPU fpuop_do_fetox; + break; + case 0x11: + fpufunctbl[mask] = & FFPU fpuop_do_ftwotox; + break; + case 0x12: + fpufunctbl[mask] = & FFPU fpuop_do_ftentox; + break; + case 0x14: + fpufunctbl[mask] = & FFPU fpuop_do_flogn; + break; + case 0x15: + fpufunctbl[mask] = & FFPU fpuop_do_flog10; + break; + case 0x16: + fpufunctbl[mask] = & FFPU fpuop_do_flog2; + break; + case 0x18: + fpufunctbl[mask] = & FFPU fpuop_do_fabs; + break; + case 0x19: + fpufunctbl[mask] = & FFPU fpuop_do_fcosh; + break; + case 0x1a: + fpufunctbl[mask] = & FFPU fpuop_do_fneg; + break; + case 0x1c: + fpufunctbl[mask] = & FFPU fpuop_do_facos; + break; + case 0x1d: + fpufunctbl[mask] = & FFPU fpuop_do_fcos; + break; + case 0x1e: + fpufunctbl[mask] = & FFPU fpuop_do_fgetexp; + break; + case 0x1f: + fpufunctbl[mask] = & FFPU fpuop_do_fgetman; + break; + case 0x20: + fpufunctbl[mask] = & FFPU fpuop_do_fdiv; + break; + case 0x21: + fpufunctbl[mask] = & FFPU fpuop_do_fmod; + break; + case 0x22: + fpufunctbl[mask] = & FFPU fpuop_do_fadd; + break; + case 0x23: + fpufunctbl[mask] = & FFPU fpuop_do_fmul; + break; + case 0x24: + fpufunctbl[mask] = & FFPU fpuop_do_fsgldiv; + break; + case 0x25: + fpufunctbl[mask] = & FFPU fpuop_do_frem; + break; + case 0x26: + fpufunctbl[mask] = & FFPU fpuop_do_fscale; + break; + case 0x27: + fpufunctbl[mask] = & FFPU fpuop_do_fsglmul; + break; + case 0x28: + fpufunctbl[mask] = & FFPU fpuop_do_fsub; + break; + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + fpufunctbl[mask] = & FFPU fpuop_do_fsincos; + break; + case 0x38: + fpufunctbl[mask] = & FFPU fpuop_do_fcmp; + break; + case 0x3a: + fpufunctbl[mask] = & FFPU fpuop_do_ftst; + break; + } + } + } + } + } +} + +/* ---------------------------- CONSTANTS ---------------------------- */ + +PRIVATE void FFPU set_constant ( fpu_register & f, char *name, double value, uae_s32 mult ) +{ + FPU_CONSISTENCY_CHECK_START(); + if(mult == 1) { +/* _asm { + MOV ESI, [f] + FLD QWORD PTR [value] + FSTP TBYTE PTR [ESI] + } */ + __asm__ __volatile__( + "fldl %1\n" + "fstpt %0\n" + : "=m" (f) + : "m" (value) + ); + } else { +/* _asm { + MOV ESI, [f] + FILD DWORD PTR [mult] + FLD QWORD PTR [value] + FMUL + FSTP TBYTE PTR [ESI] + } */ + __asm__ __volatile__( + "fildl %2\n" + "fldl %1\n" + "fmul \n" + "fstpt %0\n" + : "=m" (f) + : "m" (value), "m" (mult) + ); + } + D(bug("set_constant (%s,%.04f) = %s\r\n",name,(float)value,etos(f))); + FPU_CONSISTENCY_CHECK_STOP( mult==1 ? "set_constant(mult==1)" : "set_constant(mult>1)" ); +} + +PRIVATE void FFPU do_fldpi ( fpu_register & dest ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + FLDPI + FXAM + FNSTSW x86_status_word + MOV EDI, [dest] + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldpi \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + ); + FPU_CONSISTENCY_CHECK_STOP("do_fldpi"); +} + +PRIVATE void FFPU do_fldlg2 ( fpu_register & dest ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + FLDLG2 + FXAM + FNSTSW x86_status_word + MOV EDI, [dest] + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldlg2 \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + ); + FPU_CONSISTENCY_CHECK_STOP("do_fldlg2"); +} + +PRIVATE void FFPU do_fldl2e ( fpu_register & dest ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + FLDL2E + FXAM + FNSTSW x86_status_word + MOV EDI, [dest] + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldl2e \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + ); + FPU_CONSISTENCY_CHECK_STOP("do_fldl2e"); +} + +PRIVATE void FFPU do_fldz ( fpu_register & dest ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + FLDZ + FXAM + FNSTSW x86_status_word + MOV EDI, [dest] + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldz \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + ); + FPU_CONSISTENCY_CHECK_STOP("do_fldz"); +} + +PRIVATE void FFPU do_fldln2 ( fpu_register & dest ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + FLDLN2 + FXAM + FNSTSW x86_status_word + MOV EDI, [dest] + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fldln2 \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + ); + FPU_CONSISTENCY_CHECK_STOP("do_fldln2"); +} + +PRIVATE void FFPU do_fld1 ( fpu_register & dest ) +{ + FPU_CONSISTENCY_CHECK_START(); +/* _asm { + FLD1 + FXAM + FNSTSW x86_status_word + MOV EDI, [dest] + FSTP TBYTE PTR [EDI] + } */ + __asm__ __volatile__( + "fld1 \n" + "fxam \n" + "fnstsw %0\n" + "fstpt %1\n" + : "=m" (x86_status_word), "=m" (dest) + ); + FPU_CONSISTENCY_CHECK_STOP("do_fld1"); +} + + +void fpu_set_fpsr(uae_u32 new_fpsr) +{ + set_fpsr(new_fpsr); +} + +uae_u32 fpu_get_fpsr(void) +{ + return get_fpsr(); +} + +void fpu_set_fpcr(uae_u32 new_fpcr) +{ + set_fpcr(new_fpcr); +} + +uae_u32 fpu_get_fpcr(void) +{ + return get_fpcr(); +} + +/* ---------------------------- MAIN INIT ---------------------------- */ + +#ifdef HAVE_SIGACTION +// Mega hackaround-that-happens-to-work: the following way to handle +// SIGFPE just happens to make the "fsave" below in fpu_init() *NOT* +// to abort with a floating point exception. However, we never +// actually reach sigfpe_handler(). +static void sigfpe_handler(int code, siginfo_t *sip, void *) +{ + if (code == SIGFPE && sip->si_code == FPE_FLTINV) { + fprintf(stderr, "Invalid floating point operation\n"); + abort(); + } +} +#endif + +PUBLIC void FFPU fpu_init( bool integral_68040 ) +{ + static bool done_first_time_initialization = false; + if (!done_first_time_initialization) { + fpu_init_native_fflags(); + fpu_init_native_exceptions(); + fpu_init_native_accrued_exceptions(); +#ifdef HAVE_SIGACTION + struct sigaction fpe_sa; + sigemptyset(&fpe_sa.sa_mask); + fpe_sa.sa_sigaction = sigfpe_handler; + fpe_sa.sa_flags = SA_SIGINFO; + sigaction(SIGFPE, &fpe_sa, 0); +#endif + done_first_time_initialization = true; + } + + __asm__ __volatile__("fsave %0" : "=m" (m_fpu_state_original)); + + FPU is_integral = integral_68040; + FPU instruction_address = 0; + set_fpcr(0); + set_fpsr(0); + + x86_control_word = CW_INITIAL; + x86_status_word = SW_INITIAL; + x86_status_word_accrued = 0; + FPU fpsr.quotient = 0; + + for( int i=0; i<8; i++ ) { + MAKE_NAN( FPU registers[i] ); + } + + build_fpp_opp_lookup_table(); + +/* _asm { + FNINIT + FLDCW x86_control_word + } */ + __asm__ __volatile__("fninit\nfldcw %0" : : "m" (x86_control_word)); + + do_fldpi( const_pi ); + do_fldlg2( const_lg2 ); + do_fldl2e( const_l2e ); + do_fldz( const_z ); + do_fldln2( const_ln2 ); + do_fld1( const_1 ); + + set_constant( const_e, "e", exp (1.0), 1 ); + set_constant( const_log_10_e, "Log 10 (e)", log (exp (1.0)) / log (10.0), 1 ); + set_constant( const_ln_10, "ln(10)", log (10.0), 1 ); + set_constant( const_1e1, "1.0e1", 1.0e1, 1 ); + set_constant( const_1e2, "1.0e2", 1.0e2, 1 ); + set_constant( const_1e4, "1.0e4", 1.0e4, 1 ); + set_constant( const_1e8, "1.0e8", 1.0e8, 1 ); + set_constant( const_1e16, "1.0e16", 1.0e16, 1 ); + set_constant( const_1e32, "1.0e32", 1.0e32, 1 ); + set_constant( const_1e64, "1.0e64", 1.0e64, 1 ) ; + set_constant( const_1e128, "1.0e128", 1.0e128, 1 ); + set_constant( const_1e256, "1.0e256", 1.0e256, 1 ); + set_constant( const_1e512, "1.0e512", 1.0e256, 10 ); + set_constant( const_1e1024, "1.0e1024", 1.0e256, 100 ); + set_constant( const_1e2048, "1.0e2048", 1.0e256, 1000 ); + set_constant( const_1e4096, "1.0e4096", 1.0e256, 10000 ); + + // Just in case. +/* _asm { + FNINIT + FLDCW x86_control_word + } */ + __asm__ __volatile__("fninit\nfldcw %0" : : "m" (x86_control_word)); +} + +PUBLIC void FFPU fpu_exit( void ) +{ + __asm__ __volatile__("frstor %0" : : "m" (m_fpu_state_original)); +} + +PUBLIC void FFPU fpu_reset( void ) +{ + fpu_exit(); + fpu_init(FPU is_integral); +} diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_x86.h b/BasiliskII/src/uae_cpu/fpu/fpu_x86.h new file mode 100644 index 00000000..52a2f310 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/fpu_x86.h @@ -0,0 +1,384 @@ +/* + * fpu/fpu_x86.h - Extra Definitions for the X86 assembly FPU core + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef FPU_X86_H +#define FPU_X86_H + +/* NOTE: this file shall be included from fpu/fpu_x86.cpp */ +#undef PUBLIC +#define PUBLIC extern + +#undef PRIVATE +#define PRIVATE static + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +// Status word +PRIVATE uae_u32 x86_status_word; +PRIVATE uae_u32 x86_status_word_accrued; + +// FPU jump table +typedef void REGPARAM2 ( *fpuop_func )( uae_u32, uae_u32 ); +PRIVATE fpuop_func fpufunctbl[65536]; + +// FPU consistency +PRIVATE uae_u32 checked_sw_atstart; + +// FMOVECR constants supported byt x86 FPU +PRIVATE fpu_register const_pi; +PRIVATE fpu_register const_lg2; +PRIVATE fpu_register const_l2e; +PRIVATE fpu_register const_z; +PRIVATE fpu_register const_ln2; +PRIVATE fpu_register const_1; + +// FMOVECR constants not not suported by x86 FPU +PRIVATE fpu_register const_e; +PRIVATE fpu_register const_log_10_e; +PRIVATE fpu_register const_ln_10; +PRIVATE fpu_register const_1e1; +PRIVATE fpu_register const_1e2; +PRIVATE fpu_register const_1e4; +PRIVATE fpu_register const_1e8; +PRIVATE fpu_register const_1e16; +PRIVATE fpu_register const_1e32; +PRIVATE fpu_register const_1e64; +PRIVATE fpu_register const_1e128; +PRIVATE fpu_register const_1e256; +PRIVATE fpu_register const_1e512; +PRIVATE fpu_register const_1e1024; +PRIVATE fpu_register const_1e2048; +PRIVATE fpu_register const_1e4096; + +// Saved host FPU state +PRIVATE uae_u8 m_fpu_state_original[108]; // 90/94/108 + +/* -------------------------------------------------------------------------- */ +/* --- Methods --- */ +/* -------------------------------------------------------------------------- */ + +// Debug support functions +PRIVATE void FFPU dump_first_bytes_buf(char *b, uae_u8* buf, uae_s32 actual); +PRIVATE char * FFPU etos(fpu_register const & e) REGPARAM; + +// FPU consistency +PRIVATE void FFPU FPU_CONSISTENCY_CHECK_START(void); +PRIVATE void FFPU FPU_CONSISTENCY_CHECK_STOP(const char *name); + +// Get special floating-point value class +PRIVATE inline uae_u32 FFPU IS_INFINITY (fpu_register const & f); +PRIVATE inline uae_u32 FFPU IS_NAN (fpu_register const & f); +PRIVATE inline uae_u32 FFPU IS_ZERO (fpu_register const & f); +PRIVATE inline uae_u32 FFPU IS_NEGATIVE (fpu_register const & f); + +// Make a special floating-point value +PRIVATE inline void FFPU MAKE_NAN (fpu_register & f); +PRIVATE inline void FFPU MAKE_INF_POSITIVE (fpu_register & f); +PRIVATE inline void FFPU MAKE_INF_NEGATIVE (fpu_register & f); +PRIVATE inline void FFPU MAKE_ZERO_POSITIVE (fpu_register & f); +PRIVATE inline void FFPU MAKE_ZERO_NEGATIVE (fpu_register & f); + +// Conversion from extended floating-point values +PRIVATE uae_s32 FFPU extended_to_signed_32 ( fpu_register const & f ) REGPARAM; +PRIVATE uae_s16 FFPU extended_to_signed_16 ( fpu_register const & f ) REGPARAM; +PRIVATE uae_s8 FFPU extended_to_signed_8 ( fpu_register const & f ) REGPARAM; +PRIVATE fpu_double FFPU extended_to_double( fpu_register const & f ) REGPARAM; +PRIVATE uae_u32 FFPU from_single ( fpu_register const & f ) REGPARAM; +PRIVATE void FFPU from_exten ( fpu_register const & f, uae_u32 *wrd1, uae_u32 *wrd2, uae_u32 *wrd3 ) REGPARAM; +PRIVATE void FFPU from_double ( fpu_register const & f, uae_u32 *wrd1, uae_u32 *wrd2 ) REGPARAM; +PRIVATE void FFPU from_pack (fpu_double src, uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3) REGPARAM; + +// Conversion to extended floating-point values +PRIVATE void FFPU signed_to_extended ( uae_s32 x, fpu_register & f ) REGPARAM; +PRIVATE void FFPU double_to_extended ( double x, fpu_register & f ) REGPARAM; +PRIVATE void FFPU to_single ( uae_u32 src, fpu_register & f ) REGPARAM; +PRIVATE void FFPU to_exten_no_normalize ( uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3, fpu_register & f ) REGPARAM; +PRIVATE void FFPU to_exten ( uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3, fpu_register & f ) REGPARAM; +PRIVATE void FFPU to_double ( uae_u32 wrd1, uae_u32 wrd2, fpu_register & f ) REGPARAM; +PRIVATE fpu_double FFPU to_pack(uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3) REGPARAM; + +// Atomic floating-point arithmetic operations +PRIVATE void FFPU do_fmove ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fmove_no_status ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fint ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fintrz ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fsqrt ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_ftst ( fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fsinh ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_flognp1 ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fetoxm1 ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_ftanh ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fatan ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fasin ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fatanh ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fetox ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_ftwotox ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_ftentox ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_flogn ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_flog10 ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_flog2 ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_facos ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fcosh ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fsin ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_ftan ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fabs ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fneg ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fcos ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fgetexp ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fgetman ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fdiv ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fmod ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_frem ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fmod_dont_set_cw ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_frem_dont_set_cw ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fadd ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fmul ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fsgldiv ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fscale ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fsglmul ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fsub ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fsincos ( fpu_register & dest_sin, fpu_register & dest_cos, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fcmp ( fpu_register & dest, fpu_register const & src ) REGPARAM; +PRIVATE void FFPU do_fldpi ( fpu_register & dest ) REGPARAM; +PRIVATE void FFPU do_fldlg2 ( fpu_register & dest ) REGPARAM; +PRIVATE void FFPU do_fldl2e ( fpu_register & dest ) REGPARAM; +PRIVATE void FFPU do_fldz ( fpu_register & dest ) REGPARAM; +PRIVATE void FFPU do_fldln2 ( fpu_register & dest ) REGPARAM; +PRIVATE void FFPU do_fld1 ( fpu_register & dest ) REGPARAM; + +// Instructions handlers +PRIVATE void REGPARAM2 FFPU fpuop_illg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmove_2_ea( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_none_2_Dreg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpiar_2_Dreg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_2_Dreg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_2_Dreg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_fpiar_2_Dreg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpiar_2_Dreg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_2_Dreg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Dreg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_none( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpiar( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpsr( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpsr_fpiar( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpcr( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpcr_fpiar( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpcr_fpsr( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Dreg_2_fpcr_fpsr_fpiar( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_none_2_Areg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpiar_2_Areg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_2_Areg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_2_Areg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_fpiar_2_Areg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpiar_2_Areg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_2_Areg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Areg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_none( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpiar( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpsr( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpsr_fpiar( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpcr( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpcr_fpiar( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpcr_fpsr( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Areg_2_fpcr_fpsr_fpiar( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_none_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpiar_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_fpiar_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpiar_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Mem_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_none_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpiar_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_fpiar_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpiar_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Mem_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_none_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpsr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpcr_fpsr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_none_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpiar_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_fpiar_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpiar_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_fpiar_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_none_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpiar_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_fpiar_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpiar_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_fpiar_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_none_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpsr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpcr_fpsr_fpiar_2_Mem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_pred_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_pred_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_pred( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_pred_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_pred_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_pred( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_postinc_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_postinc_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_static_postinc( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_postinc_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_postinc_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_Mem_2_fpp_dynamic_postinc( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_pred_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_pred_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_pred( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_pred_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_pred_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_pred( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_postinc_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_postinc_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_static_postinc( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_postinc_postincrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_postinc_predecrement( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_fmovem_fpp_2_Mem_dynamic_postinc( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fldpi( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fldlg2( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_e( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fldl2e( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_log_10_e( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fldz( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fldln2( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_ln_10( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fld1( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e1( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e2( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e4( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e8( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e16( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e32( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e64( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e128( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e256( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e512( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e1024( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e2048( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_load_const_1e4096( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fmove( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fint( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsinh( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fintrz( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsqrt( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_flognp1( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fetoxm1( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_ftanh( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fatan( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fasin( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fatanh( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsin( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_ftan( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fetox( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_ftwotox( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_ftentox( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_flogn( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_flog10( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_flog2( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fabs( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fcosh( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fneg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_facos( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fcos( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fgetexp( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fgetman( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdiv( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fmod( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_frem( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fadd( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fmul( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsgldiv( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fscale( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsglmul( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsub( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsincos( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fcmp( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_ftst( uae_u32 opcode, uae_u32 extra ); + +// 040 +PRIVATE void REGPARAM2 FFPU fpuop_do_fsmove( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdmove( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fssqrt( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdsqrt( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsabs( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdabs( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsneg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdneg( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsdiv( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fddiv( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsadd( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdadd( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fssub( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdsub( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fsmul( uae_u32 opcode, uae_u32 extra ); +PRIVATE void REGPARAM2 FFPU fpuop_do_fdmul( uae_u32 opcode, uae_u32 extra ); + +// Get & Put floating-point values +PRIVATE int FFPU get_fp_value (uae_u32 opcode, uae_u32 extra, fpu_register & src) REGPARAM; +PRIVATE int FFPU put_fp_value (fpu_register const & value, uae_u32 opcode, uae_u32 extra) REGPARAM; +PRIVATE int FFPU get_fp_ad(uae_u32 opcode, uae_u32 * ad) REGPARAM; + +// Floating-point condition-based instruction handlers +PRIVATE int FFPU fpp_cond(uae_u32 opcode, int condition) REGPARAM; + +// Misc functions +PRIVATE void inline FFPU set_host_fpu_control_word (); +PRIVATE void inline FFPU SET_BSUN_ON_NAN (); +PRIVATE void inline FFPU build_ex_status (); +PRIVATE void FFPU do_null_frestore (); +PRIVATE void FFPU build_fpp_opp_lookup_table (); +PRIVATE void FFPU set_constant ( fpu_register & f, char *name, double value, uae_s32 mult ); + +#endif /* FPU_X86_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/fpu_x86_asm.h b/BasiliskII/src/uae_cpu/fpu/fpu_x86_asm.h new file mode 100644 index 00000000..6e5a3766 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/fpu_x86_asm.h @@ -0,0 +1,104 @@ +/* + * fpu/fpu_x86_asm.h - Extra Definitions for the X86 assembly FPU core + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define DEFINE_X86_MACRO(name, value) \ + asm(".local " #name "\n\t" #name " = " #value) + +DEFINE_X86_MACRO(BSUN, 0x00008000); +DEFINE_X86_MACRO(SNAN, 0x00004000); +DEFINE_X86_MACRO(OPERR, 0x00002000); +DEFINE_X86_MACRO(OVFL, 0x00001000); +DEFINE_X86_MACRO(UNFL, 0x00000800); +DEFINE_X86_MACRO(DZ, 0x00000400); +DEFINE_X86_MACRO(INEX2, 0x00000200); +DEFINE_X86_MACRO(INEX1, 0x00000100); +DEFINE_X86_MACRO(ACCR_IOP, 0x80); +DEFINE_X86_MACRO(ACCR_OVFL, 0x40); +DEFINE_X86_MACRO(ACCR_UNFL, 0x20); +DEFINE_X86_MACRO(ACCR_DZ, 0x10); +DEFINE_X86_MACRO(ACCR_INEX, 0x08); +DEFINE_X86_MACRO(ROUND_CONTROL_MASK, 0x30); +DEFINE_X86_MACRO(ROUND_TO_NEAREST, 0); +DEFINE_X86_MACRO(ROUND_TO_ZERO, 0x10); +DEFINE_X86_MACRO(ROUND_TO_NEGATIVE_INFINITY, 0x20); +DEFINE_X86_MACRO(ROUND_TO_POSITIVE_INFINITY, 0x30); +DEFINE_X86_MACRO(PRECISION_CONTROL_MASK, 0xC0); +DEFINE_X86_MACRO(PRECISION_CONTROL_EXTENDED, 0); +DEFINE_X86_MACRO(PRECISION_CONTROL_DOUBLE, 0x80); +DEFINE_X86_MACRO(PRECISION_CONTROL_SINGLE, 0x40); +DEFINE_X86_MACRO(PRECISION_CONTROL_UNDEFINED, 0xC0); +DEFINE_X86_MACRO(CW_RESET, 0x0040); +DEFINE_X86_MACRO(CW_FINIT, 0x037F); +DEFINE_X86_MACRO(SW_RESET, 0x0000); +DEFINE_X86_MACRO(SW_FINIT, 0x0000); +DEFINE_X86_MACRO(TW_RESET, 0x5555); +DEFINE_X86_MACRO(TW_FINIT, 0x0FFF); +DEFINE_X86_MACRO(CW_X, 0x1000); +DEFINE_X86_MACRO(CW_RC_ZERO, 0x0C00); +DEFINE_X86_MACRO(CW_RC_UP, 0x0800); +DEFINE_X86_MACRO(CW_RC_DOWN, 0x0400); +DEFINE_X86_MACRO(CW_RC_NEAR, 0x0000); +DEFINE_X86_MACRO(CW_PC_EXTENDED, 0x0300); +DEFINE_X86_MACRO(CW_PC_DOUBLE, 0x0200); +DEFINE_X86_MACRO(CW_PC_RESERVED, 0x0100); +DEFINE_X86_MACRO(CW_PC_SINGLE, 0x0000); +DEFINE_X86_MACRO(CW_PM, 0x0020); +DEFINE_X86_MACRO(CW_UM, 0x0010); +DEFINE_X86_MACRO(CW_OM, 0x0008); +DEFINE_X86_MACRO(CW_ZM, 0x0004); +DEFINE_X86_MACRO(CW_DM, 0x0002); +DEFINE_X86_MACRO(CW_IM, 0x0001); +DEFINE_X86_MACRO(SW_B, 0x8000); +DEFINE_X86_MACRO(SW_C3, 0x4000); +DEFINE_X86_MACRO(SW_TOP_7, 0x3800); +DEFINE_X86_MACRO(SW_TOP_6, 0x3000); +DEFINE_X86_MACRO(SW_TOP_5, 0x2800); +DEFINE_X86_MACRO(SW_TOP_4, 0x2000); +DEFINE_X86_MACRO(SW_TOP_3, 0x1800); +DEFINE_X86_MACRO(SW_TOP_2, 0x1000); +DEFINE_X86_MACRO(SW_TOP_1, 0x0800); +DEFINE_X86_MACRO(SW_TOP_0, 0x0000); +DEFINE_X86_MACRO(SW_C2, 0x0400); +DEFINE_X86_MACRO(SW_C1, 0x0200); +DEFINE_X86_MACRO(SW_C0, 0x0100); +DEFINE_X86_MACRO(SW_ES, 0x0080); +DEFINE_X86_MACRO(SW_SF, 0x0040); +DEFINE_X86_MACRO(SW_PE, 0x0020); +DEFINE_X86_MACRO(SW_UE, 0x0010); +DEFINE_X86_MACRO(SW_OE, 0x0008); +DEFINE_X86_MACRO(SW_ZE, 0x0004); +DEFINE_X86_MACRO(SW_DE, 0x0002); +DEFINE_X86_MACRO(SW_IE, 0x0001); +DEFINE_X86_MACRO(X86_ROUNDING_MODE, 0x0C00); +DEFINE_X86_MACRO(X86_ROUNDING_PRECISION, 0x0300); + +#undef DEFINE_X86_MACRO diff --git a/BasiliskII/src/uae_cpu/fpu/impl.h b/BasiliskII/src/uae_cpu/fpu/impl.h new file mode 100644 index 00000000..af7946a3 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/impl.h @@ -0,0 +1,159 @@ +/* + * fpu/impl.h - extra functions and inline implementations + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef FPU_IMPL_H +#define FPU_IMPL_H + +/* NOTE: this file shall be included from fpu/core.h */ +#undef PUBLIC +#define PUBLIC /**/ + +#undef PRIVATE +#define PRIVATE /**/ + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +/* -------------------------------------------------------------------------- */ +/* --- X86 assembly fpu specific methods --- */ +/* -------------------------------------------------------------------------- */ + +#ifdef FPU_X86 + +/* Return the floating-point status register in m68k format */ +static inline uae_u32 FFPU get_fpsr(void) +{ + return to_m68k_fpcond[(x86_status_word & 0x4700) >> 8] + | FPU fpsr.quotient + | exception_host2mac[x86_status_word & (SW_FAKE_BSUN|SW_PE|SW_UE|SW_OE|SW_ZE|SW_DE|SW_IE)] + | accrued_exception_host2mac[x86_status_word_accrued & (SW_PE|SW_UE|SW_OE|SW_ZE|SW_DE|SW_IE)] + ; +} + +/* Set the floating-point status register from an m68k format */ +static inline void FFPU set_fpsr(uae_u32 new_fpsr) +{ + x86_status_word = to_host_fpcond[(new_fpsr & FPSR_CCB) >> 24 ] + | exception_mac2host[(new_fpsr & FPSR_EXCEPTION_STATUS) >> 8]; + x86_status_word_accrued = accrued_exception_mac2host[(new_fpsr & FPSR_ACCRUED_EXCEPTION) >> 3]; +} + +#endif + +/* -------------------------------------------------------------------------- */ +/* --- Original UAE and IEEE FPU core methods --- */ +/* -------------------------------------------------------------------------- */ + +#ifndef FPU_X86 + +/* Return the floating-point status register in m68k format */ +static inline uae_u32 FFPU get_fpsr(void) +{ + uae_u32 condition_codes = get_fpccr(); + uae_u32 exception_status = get_exception_status(); + uae_u32 accrued_exception = get_accrued_exception(); + uae_u32 quotient = FPU fpsr.quotient; + return (condition_codes | quotient | exception_status | accrued_exception); +} + +/* Set the floating-point status register from an m68k format */ +static inline void FFPU set_fpsr(uae_u32 new_fpsr) +{ + set_fpccr ( new_fpsr & FPSR_CCB ); + set_exception_status ( new_fpsr & FPSR_EXCEPTION_STATUS ); + set_accrued_exception ( new_fpsr & FPSR_ACCRUED_EXCEPTION ); + FPU fpsr.quotient = new_fpsr & FPSR_QUOTIENT; +} + +#endif + +/* -------------------------------------------------------------------------- */ +/* --- Common routines for control word --- */ +/* -------------------------------------------------------------------------- */ + +/* Return the floating-point control register in m68k format */ +static inline uae_u32 FFPU get_fpcr(void) +{ + uae_u32 rounding_precision = get_rounding_precision(); + uae_u32 rounding_mode = get_rounding_mode(); + uae_u32 exception_enable = FPU fpcr.exception_enable; + return (rounding_precision | rounding_mode | exception_enable); +} + +/* Set the floating-point control register from an m68k format */ +static inline void FFPU set_fpcr(uae_u32 new_fpcr) +{ + set_rounding_precision ( new_fpcr & FPCR_ROUNDING_PRECISION); + set_rounding_mode ( new_fpcr & FPCR_ROUNDING_MODE ); + set_host_control_word(); + FPU fpcr.exception_enable = new_fpcr & FPCR_EXCEPTION_ENABLE; +} + +/* -------------------------------------------------------------------------- */ +/* --- Specific part to X86 assembly FPU --- */ +/* -------------------------------------------------------------------------- */ + +#ifdef FPU_X86 + +/* Retrieve a floating-point register value and convert it to double precision */ +static inline double FFPU fpu_get_register(int r) +{ + double f; + __asm__ __volatile__("fldt %1\n\tfstpl %0" : "=m" (f) : "m" (FPU registers[r])); + return f; +} + +#endif + +/* -------------------------------------------------------------------------- */ +/* --- Specific to original UAE or new IEEE-based FPU core --- */ +/* -------------------------------------------------------------------------- */ + +#if defined(FPU_UAE) || defined(FPU_IEEE) + +/* Retrieve a floating-point register value and convert it to double precision */ +static inline double FFPU fpu_get_register(int r) +{ + return FPU registers[r]; +} + +#endif + +#endif /* FPU_IMPL_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/mathlib.cpp b/BasiliskII/src/uae_cpu/fpu/mathlib.cpp new file mode 100644 index 00000000..46d43c95 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/mathlib.cpp @@ -0,0 +1,105 @@ +/* + * fpu/mathlib.cpp - Floating-point math support library + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* NOTE: this file shall be included only from fpu/fpu_*.cpp */ +#undef PRIVATE +#define PRIVATE static + +#undef PUBLIC +#define PUBLIC /**/ + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +#if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY) + +PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y) +{ + fpu_extended value, exponent; + uae_s64 p = (uae_s64)y; + + if (x == 0.0) { + if (y > 0.0) + return (y == (double) p && (p & 1) != 0 ? x : 0.0); + else if (y < 0.0) + return (y == (double) p && (-p & 1) != 0 ? 1.0 / x : 1.0 / fp_fabs (x)); + } + + if (y == (double) p) { + fpu_extended r = 1.0; + if (p == 0) + return 1.0; + if (p < 0) { + p = -p; + x = 1.0 / x; + } + while (1) { + if (p & 1) + r *= x; + p >>= 1; + if (p == 0) + return r; + x *= x; + } + } + + __asm__ __volatile__("fyl2x" : "=t" (value) : "0" (x), "u" (1.0) : "st(1)"); + __asm__ __volatile__("fmul %%st(1) # y * log2(x)\n\t" + "fst %%st(1)\n\t" + "frndint # int(y * log2(x))\n\t" + "fxch\n\t" + "fsub %%st(1) # fract(y * log2(x))\n\t" + "f2xm1 # 2^(fract(y * log2(x))) - 1\n\t" + : "=t" (value), "=u" (exponent) : "0" (y), "1" (value)); + value += 1.0; + __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent)); + return value; +} + +PRIVATE fpu_extended fp_do_log1p(fpu_extended x) +{ + // TODO: handle NaN and +inf/-inf + fpu_extended value; + // The fyl2xp1 can only be used for values in + // -1 + sqrt(2) / 2 <= x <= 1 - sqrt(2) / 2 + // 0.29 is a safe value. + if (fp_fabs(x) <= 0.29) + __asm__ __volatile__("fldln2; fxch; fyl2xp1" : "=t" (value) : "0" (x)); + else + __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x + 1.0)); + return value; +} + +#endif diff --git a/BasiliskII/src/uae_cpu/fpu/mathlib.h b/BasiliskII/src/uae_cpu/fpu/mathlib.h new file mode 100644 index 00000000..c9a1951c --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/mathlib.h @@ -0,0 +1,1185 @@ +/* + * fpu/mathlib.h - Floating-point math support library + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef FPU_MATHLIB_H +#define FPU_MATHLIB_H + +/* NOTE: this file shall be included only from fpu/fpu_*.cpp */ +#undef PUBLIC +#define PUBLIC extern + +#undef PRIVATE +#define PRIVATE static + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +// Define the following macro if branches are expensive. If so, +// integer-based isnan() and isinf() functions are implemented. +// TODO: move to Makefile.in +#define BRANCHES_ARE_EXPENSIVE 1 + +// Use ISO C99 extended-precision math functions (glibc 2.1+) +#define FPU_USE_ISO_C99 1 + +// NOTE: this is irrelevant on Win32 platforms since the MS libraries +// don't support extended-precision floating-point computations +#ifdef WIN32 +#undef FPU_USE_ISO_C99 +#endif + +// Use faster implementation of math functions, but this could cause +// some incorrect results (?) +// TODO: actually implement the slower but safer versions +#define FPU_FAST_MATH 1 + +#if defined(FPU_USE_ISO_C99) +// NOTE: no prior shall be included at this point +#define __USE_ISOC99 1 // for glibc 2.2.X and newer +#define __USE_ISOC9X 1 // for glibc 2.1.X +#include +#else +#include +using namespace std; +#endif + +/* -------------------------------------------------------------------------- */ +/* --- Floating-point register types --- */ +/* -------------------------------------------------------------------------- */ + +// Single : S 8*E 23*F +#define FP_SINGLE_EXP_MAX 0xff +#define FP_SINGLE_EXP_BIAS 0x7f + +// Double : S 11*E 52*F +#define FP_DOUBLE_EXP_MAX 0x7ff +#define FP_DOUBLE_EXP_BIAS 0x3ff + +// Extended : S 15*E 64*F +#define FP_EXTENDED_EXP_MAX 0x7fff +#define FP_EXTENDED_EXP_BIAS 0x3fff + +// Zeroes : E = 0 & F = 0 +// Infinities : E = MAX & F = 0 +// Not-A-Number : E = MAX & F # 0 + +/* -------------------------------------------------------------------------- */ +/* --- Floating-point type shapes (IEEE-compliant) --- */ +/* -------------------------------------------------------------------------- */ + +// Taken from glibc 2.2.x: ieee754.h + +// IEEE-754 float format +union fpu_single_shape { + + fpu_single value; + + /* This is the IEEE 754 single-precision format. */ + struct { +#ifdef WORDS_BIGENDIAN + unsigned int negative:1; + unsigned int exponent:8; + unsigned int mantissa:23; +#else + unsigned int mantissa:23; + unsigned int exponent:8; + unsigned int negative:1; +#endif + } ieee; + + /* This format makes it easier to see if a NaN is a signalling NaN. */ + struct { +#ifdef WORDS_BIGENDIAN + unsigned int negative:1; + unsigned int exponent:8; + unsigned int quiet_nan:1; + unsigned int mantissa:22; +#else + unsigned int mantissa:22; + unsigned int quiet_nan:1; + unsigned int exponent:8; + unsigned int negative:1; +#endif + } ieee_nan; +}; + +// IEEE-754 double format +union fpu_double_shape { + fpu_double value; + + /* This is the IEEE 754 double-precision format. */ + struct { +#ifdef WORDS_BIGENDIAN + unsigned int negative:1; + unsigned int exponent:11; + /* Together these comprise the mantissa. */ + unsigned int mantissa0:20; + unsigned int mantissa1:32; +#else +# if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN + unsigned int mantissa0:20; + unsigned int exponent:11; + unsigned int negative:1; + unsigned int mantissa1:32; +# else + /* Together these comprise the mantissa. */ + unsigned int mantissa1:32; + unsigned int mantissa0:20; + unsigned int exponent:11; + unsigned int negative:1; +# endif +#endif + } ieee; + + /* This format makes it easier to see if a NaN is a signalling NaN. */ + struct { +#ifdef WORDS_BIGENDIAN + unsigned int negative:1; + unsigned int exponent:11; + unsigned int quiet_nan:1; + /* Together these comprise the mantissa. */ + unsigned int mantissa0:19; + unsigned int mantissa1:32; +#else +# if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN + unsigned int mantissa0:19; + unsigned int quiet_nan:1; + unsigned int exponent:11; + unsigned int negative:1; + unsigned int mantissa1:32; +# else + /* Together these comprise the mantissa. */ + unsigned int mantissa1:32; + unsigned int mantissa0:19; + unsigned int quiet_nan:1; + unsigned int exponent:11; + unsigned int negative:1; +# endif +#endif + } ieee_nan; + + /* This format is used to extract the sign_exponent and mantissa parts only */ + struct { +#if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN + unsigned int msw:32; + unsigned int lsw:32; +#else + unsigned int lsw:32; + unsigned int msw:32; +#endif + } parts; +}; + +#ifdef USE_LONG_DOUBLE +// IEEE-854 long double format +union fpu_extended_shape { + fpu_extended value; + + /* This is the IEEE 854 double-extended-precision format. */ + struct { +#ifdef WORDS_BIGENDIAN + unsigned int negative:1; + unsigned int exponent:15; + unsigned int empty:16; + unsigned int mantissa0:32; + unsigned int mantissa1:32; +#else +# if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN + unsigned int exponent:15; + unsigned int negative:1; + unsigned int empty:16; + unsigned int mantissa0:32; + unsigned int mantissa1:32; +# else + unsigned int mantissa1:32; + unsigned int mantissa0:32; + unsigned int exponent:15; + unsigned int negative:1; + unsigned int empty:16; +# endif +#endif + } ieee; + + /* This is for NaNs in the IEEE 854 double-extended-precision format. */ + struct { +#ifdef WORDS_BIGENDIAN + unsigned int negative:1; + unsigned int exponent:15; + unsigned int empty:16; + unsigned int one:1; + unsigned int quiet_nan:1; + unsigned int mantissa0:30; + unsigned int mantissa1:32; +#else +# if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN + unsigned int exponent:15; + unsigned int negative:1; + unsigned int empty:16; + unsigned int mantissa0:30; + unsigned int quiet_nan:1; + unsigned int one:1; + unsigned int mantissa1:32; +# else + unsigned int mantissa1:32; + unsigned int mantissa0:30; + unsigned int quiet_nan:1; + unsigned int one:1; + unsigned int exponent:15; + unsigned int negative:1; + unsigned int empty:16; +# endif +#endif + } ieee_nan; + + /* This format is used to extract the sign_exponent and mantissa parts only */ + struct { +#if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN + unsigned int sign_exponent:16; + unsigned int empty:16; + unsigned int msw:32; + unsigned int lsw:32; +#else + unsigned int lsw:32; + unsigned int msw:32; + unsigned int sign_exponent:16; + unsigned int empty:16; +#endif + } parts; +}; +#endif + +#ifdef USE_QUAD_DOUBLE +// IEEE-854 quad double format +union fpu_extended_shape { + fpu_extended value; + + /* This is the IEEE 854 quad-precision format. */ + struct { +#ifdef WORDS_BIGENDIAN + unsigned int negative:1; + unsigned int exponent:15; + unsigned int mantissa0:16; + unsigned int mantissa1:32; + unsigned int mantissa2:32; + unsigned int mantissa3:32; +#else + unsigned int mantissa3:32; + unsigned int mantissa2:32; + unsigned int mantissa1:32; + unsigned int mantissa0:16; + unsigned int exponent:15; + unsigned int negative:1; +#endif + } ieee; + + /* This is for NaNs in the IEEE 854 quad-precision format. */ + struct { +#ifdef WORDS_BIGENDIAN + unsigned int negative:1; + unsigned int exponent:15; + unsigned int quiet_nan:1; + unsigned int mantissa0:15; + unsigned int mantissa1:32; + unsigned int mantissa2:32; + unsigned int mantissa3:32; +#else + unsigned int mantissa3:32; + unsigned int mantissa2:32; + unsigned int mantissa1:32; + unsigned int mantissa0:15; + unsigned int quiet_nan:1; + unsigned int exponent:15; + unsigned int negative:1; +#endif + } ieee_nan; + + /* This format is used to extract the sign_exponent and mantissa parts only */ +#if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN + struct { + uae_u64 msw; + uae_u64 lsw; + } parts64; + struct { + uae_u32 w0; + uae_u32 w1; + uae_u32 w2; + uae_u32 w3; + } parts32; +#else + struct { + uae_u64 lsw; + uae_u64 msw; + } parts64; + struct { + uae_u32 w3; + uae_u32 w2; + uae_u32 w1; + uae_u32 w0; + } parts32; +#endif +}; +#endif + +// Declare a shape of the requested FP type +#define fp_declare_init_shape(psvar, ftype) \ + fpu_ ## ftype ## _shape psvar + +/* -------------------------------------------------------------------------- */ +/* --- Extra Math Functions --- */ +/* --- (most of them had to be defined before including ) --- */ +/* -------------------------------------------------------------------------- */ + +#undef isnan +#if 0 && defined(HAVE_ISNANL) +# define isnan(x) isnanl((x)) +#else +# define isnan(x) fp_do_isnan((x)) +#endif + +PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r) +{ +#ifdef BRANCHES_ARE_EXPENSIVE +#if !defined(USE_LONG_DOUBLE) + fp_declare_init_shape(sxp, double); + sxp.value = r; + uae_s32 hx = sxp.parts.msw; + uae_s32 lx = sxp.parts.lsw; + hx &= 0x7fffffff; + hx |= (uae_u32)(lx | (-lx)) >> 31; + hx = 0x7ff00000 - hx; + return (int)(((uae_u32)hx) >> 31); +#elif defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + uae_s64 hx = sxp.parts64.msw; + uae_s64 lx = sxp.parts64.lsw; + hx &= 0x7fffffffffffffffLL; + hx |= (uae_u64)(lx | (-lx)) >> 63; + hx = 0x7fff000000000000LL - hx; + return (int)((uae_u64)hx >> 63); +#else + fp_declare_init_shape(sxp, extended); + sxp.value = r; + uae_s32 se = sxp.parts.sign_exponent; + uae_s32 hx = sxp.parts.msw; + uae_s32 lx = sxp.parts.lsw; + se = (se & 0x7fff) << 1; + lx |= hx & 0x7fffffff; + se |= (uae_u32)(lx | (-lx)) >> 31; + se = 0xfffe - se; + return (int)(((uae_u32)(se)) >> 31); +#endif +#else +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + return (sxp.ieee_nan.exponent == FP_EXTENDED_EXP_MAX) +#else + fp_declare_init_shape(sxp, double); + sxp.value = r; + return (sxp.ieee_nan.exponent == FP_DOUBLE_EXP_MAX) +#endif + && (sxp.ieee_nan.mantissa0 != 0) + && (sxp.ieee_nan.mantissa1 != 0) +#ifdef USE_QUAD_DOUBLE + && (sxp.ieee_nan.mantissa2 != 0) + && (sxp.ieee_nan.mantissa3 != 0) +#endif + ; +#endif +} + +#undef isinf +#if 0 && defined(HAVE_ISINFL) +# define isinf(x) isinfl((x)) +#else +# define isinf(x) fp_do_isinf((x)) +#endif + +PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r) +{ +#ifdef BRANCHES_ARE_EXPENSIVE +#if !defined(USE_LONG_DOUBLE) + fp_declare_init_shape(sxp, double); + sxp.value = r; + uae_s32 hx = sxp.parts.msw; + uae_s32 lx = sxp.parts.lsw; + lx |= (hx & 0x7fffffff) ^ 0x7ff00000; + lx |= -lx; + return ~(lx >> 31) & (hx >> 30); +#elif defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + uae_s64 hx = sxp.parts64.msw; + uae_s64 lx = sxp.parts64.lsw; + lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL; + lx |= -lx; + return ~(lx >> 63) & (hx >> 62); +#else + fp_declare_init_shape(sxp, extended); + sxp.value = r; + uae_s32 se = sxp.parts.sign_exponent; + uae_s32 hx = sxp.parts.msw; + uae_s32 lx = sxp.parts.lsw; + /* This additional ^ 0x80000000 is necessary because in Intel's + internal representation of the implicit one is explicit. + NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */ +#ifdef CPU_i386 + lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff); +#else + lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff); +#endif + lx |= -lx; + se &= 0x8000; + return ~(lx >> 31) & (1 - (se >> 14)); +#endif +#else +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + return (sxp.ieee_nan.exponent == FP_EXTENDED_EXP_MAX) +#else + fp_declare_init_shape(sxp, double); + sxp.value = r; + return (sxp.ieee_nan.exponent == FP_DOUBLE_EXP_MAX) +#endif + && (sxp.ieee_nan.mantissa0 == 0) + && (sxp.ieee_nan.mantissa1 == 0) +#ifdef USE_QUAD_DOUBLE + && (sxp.ieee_nan.mantissa2 == 0) + && (sxp.ieee_nan.mantissa3 == 0) +#endif + ; +#endif +} + +#undef isneg +#define isneg(x) fp_do_isneg((x)) + +PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r) +{ +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); +#else + fp_declare_init_shape(sxp, double); +#endif + sxp.value = r; + return sxp.ieee.negative; +} + +#undef iszero +#define iszero(x) fp_do_iszero((x)) + +PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r) +{ + // TODO: BRANCHES_ARE_EXPENSIVE +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); +#else + fp_declare_init_shape(sxp, double); +#endif + sxp.value = r; + return (sxp.ieee.exponent == 0) + && (sxp.ieee.mantissa0 == 0) + && (sxp.ieee.mantissa1 == 0) +#ifdef USE_QUAD_DOUBLE + && (sxp.ieee.mantissa2 == 0) + && (sxp.ieee.mantissa3 == 0) +#endif + ; +} + +PRIVATE inline void FFPU get_dest_flags(fpu_register const & r) +{ + fl_dest.negative = isneg(r); + fl_dest.zero = iszero(r); + fl_dest.infinity = isinf(r); + fl_dest.nan = isnan(r); + fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan; +} + +PRIVATE inline void FFPU get_source_flags(fpu_register const & r) +{ + fl_source.negative = isneg(r); + fl_source.zero = iszero(r); + fl_source.infinity = isinf(r); + fl_source.nan = isnan(r); + fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan; +} + +PRIVATE inline void FFPU make_nan(fpu_register & r) +{ +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.ieee.exponent = FP_EXTENDED_EXP_MAX; + sxp.ieee.mantissa0 = 0xffffffff; +#else + fp_declare_init_shape(sxp, double); + sxp.ieee.exponent = FP_DOUBLE_EXP_MAX; + sxp.ieee.mantissa0 = 0xfffff; +#endif + sxp.ieee.mantissa1 = 0xffffffff; +#ifdef USE_QUAD_DOUBLE + sxp.ieee.mantissa2 = 0xffffffff; + sxp.ieee.mantissa3 = 0xffffffff; +#endif + r = sxp.value; +} + +PRIVATE inline void FFPU make_zero_positive(fpu_register & r) +{ +#if 1 + r = +0.0; +#else +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); +#else + fp_declare_init_shape(sxp, double); +#endif + sxp.ieee.negative = 0; + sxp.ieee.exponent = 0; + sxp.ieee.mantissa0 = 0; + sxp.ieee.mantissa1 = 0; +#ifdef USE_QUAD_DOUBLE + sxp.ieee.mantissa2 = 0; + sxp.ieee.mantissa3 = 0; +#endif + r = sxp.value; +#endif +} + +PRIVATE inline void FFPU make_zero_negative(fpu_register & r) +{ +#if 1 + r = -0.0; +#else +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); +#else + fp_declare_init_shape(sxp, double); +#endif + sxp.ieee.negative = 1; + sxp.ieee.exponent = 0; + sxp.ieee.mantissa0 = 0; + sxp.ieee.mantissa1 = 0; +#ifdef USE_QUAD_DOUBLE + sxp.ieee.mantissa2 = 0; + sxp.ieee.mantissa3 = 0; +#endif + r = sxp.value; +#endif +} + +PRIVATE inline void FFPU make_inf_positive(fpu_register & r) +{ +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.ieee_nan.exponent = FP_EXTENDED_EXP_MAX; +#else + fp_declare_init_shape(sxp, double); + sxp.ieee_nan.exponent = FP_DOUBLE_EXP_MAX; +#endif + sxp.ieee_nan.negative = 0; + sxp.ieee_nan.mantissa0 = 0; + sxp.ieee_nan.mantissa1 = 0; +#ifdef USE_QUAD_DOUBLE + sxp.ieee_nan.mantissa2 = 0; + sxp.ieee_nan.mantissa3 = 0; +#endif + r = sxp.value; +} + +PRIVATE inline void FFPU make_inf_negative(fpu_register & r) +{ +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.ieee_nan.exponent = FP_EXTENDED_EXP_MAX; +#else + fp_declare_init_shape(sxp, double); + sxp.ieee_nan.exponent = FP_DOUBLE_EXP_MAX; +#endif + sxp.ieee_nan.negative = 1; + sxp.ieee_nan.mantissa0 = 0; + sxp.ieee_nan.mantissa1 = 0; +#ifdef USE_QUAD_DOUBLE + sxp.ieee_nan.mantissa2 = 0; + sxp.ieee_nan.mantissa3 = 0; +#endif + r = sxp.value; +} + +PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r) +{ +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + return ((int) sxp.ieee.exponent - FP_EXTENDED_EXP_BIAS); +#else + fp_declare_init_shape(sxp, double); + sxp.value = r; + return ((int) sxp.ieee.exponent - FP_DOUBLE_EXP_BIAS); +#endif +} + +// Normalize to range 1..2 +PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r) +{ +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = r; + sxp.ieee.exponent = FP_EXTENDED_EXP_BIAS; +#else + fp_declare_init_shape(sxp, double); + sxp.value = r; + sxp.ieee.exponent = FP_DOUBLE_EXP_BIAS; +#endif + r = sxp.value; +} + +// The sign of the quotient is the exclusive-OR of the sign bits +// of the source and destination operands. +PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb) +{ +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sap, extended); + fp_declare_init_shape(sbp, extended); +#else + fp_declare_init_shape(sap, double); + fp_declare_init_shape(sbp, double); +#endif + sap.value = ra; + sbp.value = rb; + return ((sap.ieee.negative ^ sbp.ieee.negative) ? FPSR_QUOTIENT_SIGN : 0); +} + +/* -------------------------------------------------------------------------- */ +/* --- Math functions --- */ +/* -------------------------------------------------------------------------- */ + +#if defined(FPU_USE_ISO_C99) && (defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)) +# ifdef HAVE_LOGL +# define fp_log logl +# endif +# ifdef HAVE_LOG10L +# define fp_log10 log10l +# endif +# ifdef HAVE_EXPL +# define fp_exp expl +# endif +# ifdef HAVE_POWL +# define fp_pow powl +# endif +# ifdef HAVE_FABSL +# define fp_fabs fabsl +# endif +# ifdef HAVE_SQRTL +# define fp_sqrt sqrtl +# endif +# ifdef HAVE_SINL +# define fp_sin sinl +# endif +# ifdef HAVE_COSL +# define fp_cos cosl +# endif +# ifdef HAVE_TANL +# define fp_tan tanl +# endif +# ifdef HAVE_SINHL +# define fp_sinh sinhl +# endif +# ifdef HAVE_COSHL +# define fp_cosh coshl +# endif +# ifdef HAVE_TANHL +# define fp_tanh tanhl +# endif +# ifdef HAVE_ASINL +# define fp_asin asinl +# endif +# ifdef HAVE_ACOSL +# define fp_acos acosl +# endif +# ifdef HAVE_ATANL +# define fp_atan atanl +# endif +# ifdef HAVE_ASINHL +# define fp_asinh asinhl +# endif +# ifdef HAVE_ACOSHL +# define fp_acosh acoshl +# endif +# ifdef HAVE_ATANHL +# define fp_atanh atanhl +# endif +# ifdef HAVE_FLOORL +# define fp_floor floorl +# endif +# ifdef HAVE_CEILL +# define fp_ceil ceill +# endif +#endif + +#ifndef fp_log +# define fp_log log +#endif +#ifndef fp_log10 +# define fp_log10 log10 +#endif +#ifndef fp_exp +# define fp_exp exp +#endif +#ifndef fp_pow +# define fp_pow pow +#endif +#ifndef fp_fabs +# define fp_fabs fabs +#endif +#ifndef fp_sqrt +# define fp_sqrt sqrt +#endif +#ifndef fp_sin +# define fp_sin sin +#endif +#ifndef fp_cos +# define fp_cos cos +#endif +#ifndef fp_tan +# define fp_tan tan +#endif +#ifndef fp_sinh +# define fp_sinh sinh +#endif +#ifndef fp_cosh +# define fp_cosh cosh +#endif +#ifndef fp_tanh +# define fp_tanh tanh +#endif +#ifndef fp_asin +# define fp_asin asin +#endif +#ifndef fp_acos +# define fp_acos acos +#endif +#ifndef fp_atan +# define fp_atan atan +#endif +#ifndef fp_asinh +# define fp_asinh asinh +#endif +#ifndef fp_acosh +# define fp_acosh acosh +#endif +#ifndef fp_atanh +# define fp_atanh atanh +#endif +#ifndef fp_floor +# define fp_floor floor +#endif +#ifndef fp_ceil +# define fp_ceil ceil +#endif + +#if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY) +// Assembly optimized support functions. Taken from glibc 2.2.2 + +#undef fp_log +#define fp_log fp_do_log + +#ifndef FPU_FAST_MATH +// FIXME: unimplemented +PRIVATE fpu_extended fp_do_log(fpu_extended x); +#else +PRIVATE inline fpu_extended fp_do_log(fpu_extended x) +{ + fpu_extended value; + __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); + return value; +} +#endif + +#undef fp_log10 +#define fp_log10 fp_do_log10 + +#ifndef FPU_FAST_MATH +// FIXME: unimplemented +PRIVATE fpu_extended fp_do_log10(fpu_extended x); +#else +PRIVATE inline fpu_extended fp_do_log10(fpu_extended x) +{ + fpu_extended value; + __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); + return value; +} +#endif + +#undef fp_exp +#define fp_exp fp_do_exp + +#ifndef FPU_FAST_MATH +// FIXME: unimplemented +PRIVATE fpu_extended fp_do_exp(fpu_extended x); +#else +PRIVATE inline fpu_extended fp_do_exp(fpu_extended x) +{ + fpu_extended value, exponent; + if (isinf(x)) + { + if(isneg(x)) + return 0.; + else + return x; + } + __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t" + "fmul %%st(1) # x * log2(e)\n\t" + "fst %%st(1)\n\t" + "frndint # int(x * log2(e))\n\t" + "fxch\n\t" + "fsub %%st(1) # fract(x * log2(e))\n\t" + "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" + : "=t" (value), "=u" (exponent) : "0" (x)); + value += 1.0; + __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent)); + return value; +} +#endif + +#undef fp_pow +#define fp_pow fp_do_pow + +PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y); + +#undef fp_fabs +#define fp_fabs fp_do_fabs + +PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x) +{ + fpu_extended value; + __asm__ __volatile__("fabs" : "=t" (value) : "0" (x)); + return value; +} + +#undef fp_sqrt +#define fp_sqrt fp_do_sqrt + +PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x) +{ + fpu_extended value; + __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x)); + return value; +} + +#ifndef ACCURATE_SIN_COS_TAN +#undef fp_sin +#define fp_sin fp_do_sin + +PRIVATE inline fpu_extended fp_do_sin(fpu_extended x) +{ + fpu_extended value; + __asm__ __volatile__("fsin" : "=t" (value) : "0" (x)); + return value; +} + +#undef fp_cos +#define fp_cos fp_do_cos + +PRIVATE inline fpu_extended fp_do_cos(fpu_extended x) +{ + fpu_extended value; + __asm__ __volatile__("fcos" : "=t" (value) : "0" (x)); + return value; +} + +#undef fp_tan +#define fp_tan fp_do_tan + +PRIVATE inline fpu_extended fp_do_tan(fpu_extended x) +{ + fpu_extended value, value2; + __asm__ __volatile__("fptan" : "=t" (value2), "=u" (value) : "0" (x)); + return value; +} +#endif /* ACCURATE_SIN_COS_TAN */ + +#undef fp_expm1 +#define fp_expm1 fp_do_expm1 + +// Returns: exp(X) - 1.0 +PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x) +{ + fpu_extended value, exponent, temp, temp2; + if (isinf(x)) + { + if(isneg(x)) + return -1.; + else + return x; + } + __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" + "fmul %%st(1) # x * log2(e)\n\t" + "fst %%st(1)\n\t" + "frndint # int(x * log2(e))\n\t" + "fxch\n\t" + "fsub %%st(1) # fract(x * log2(e))\n\t" + "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" + "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" + : "=t" (value), "=u" (exponent) : "0" (x)); + __asm__ __volatile__("fld1 \n\t" + "fscale \n\t" + : "=t" (temp), "=u" (temp2) : "0" (exponent)); + temp -= 1.0; + return temp + value ? temp + value : x; +} + +#undef fp_sgn1 +#define fp_sgn1 fp_do_sgn1 + +PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x) +{ +#if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE) + fp_declare_init_shape(sxp, extended); + sxp.value = x; + sxp.ieee_nan.exponent = FP_EXTENDED_EXP_MAX>>1; + sxp.ieee_nan.one = 1; +#else + fp_declare_init_shape(sxp, double); + sxp.value = x; + sxp.ieee_nan.exponent = FP_DOUBLE_EXP_MAX>>1; +#endif + sxp.ieee_nan.quiet_nan = 0; + sxp.ieee_nan.mantissa0 = 0; + sxp.ieee_nan.mantissa1 = 0; + x = sxp.value; + return x; +} + +#undef fp_sinh +#define fp_sinh fp_do_sinh + +#ifndef FPU_FAST_MATH +// FIXME: unimplemented +PRIVATE fpu_extended fp_do_sinh(fpu_extended x); +#else +PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x) +{ + if (isinf(x)) return x; + fpu_extended exm1 = fp_expm1(fp_fabs(x)); + return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x); +} +#endif + +#undef fp_cosh +#define fp_cosh fp_do_cosh + +#ifndef FPU_FAST_MATH +// FIXME: unimplemented +PRIVATE fpu_extended fp_do_cosh(fpu_extended x); +#else +PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x) +{ + fpu_extended ex = fp_exp(x); + return 0.5 * (ex + 1.0 / ex); +} +#endif + +#undef fp_tanh +#define fp_tanh fp_do_tanh + +#ifndef FPU_FAST_MATH +// FIXME: unimplemented +PRIVATE fpu_extended fp_do_tanh(fpu_extended x); +#else +PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x) +{ + fpu_extended exm1 = fp_expm1(-fp_fabs(x + x)); + return exm1 / (exm1 + 2.0) * fp_sgn1(-x); +} +#endif + +#undef fp_atan2 +#define fp_atan2 fp_do_atan2 + +PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x) +{ + fpu_extended value; + __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)"); + return value; +} + +#undef fp_asin +#define fp_asin fp_do_asin + +PRIVATE inline fpu_extended fp_do_asin(fpu_extended x) +{ + return fp_atan2(x, fp_sqrt(1.0 - x * x)); +} + +#undef fp_acos +#define fp_acos fp_do_acos + +PRIVATE inline fpu_extended fp_do_acos(fpu_extended x) +{ + return fp_atan2(fp_sqrt(1.0 - x * x), x); +} + +#undef fp_atan +#define fp_atan fp_do_atan + +PRIVATE inline fpu_extended fp_do_atan(fpu_extended x) +{ + fpu_extended value; + __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)"); + return value; +} + +#undef fp_log1p +#define fp_log1p fp_do_log1p + +// Returns: ln(1.0 + X) +PRIVATE fpu_extended fp_do_log1p(fpu_extended x); + +#undef fp_asinh +#define fp_asinh fp_do_asinh + +PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x) +{ + fpu_extended y = fp_fabs(x); + return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x)); +} + +#undef fp_acosh +#define fp_acosh fp_do_acosh + +PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x) +{ + return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0)); +} + +#undef fp_atanh +#define fp_atanh fp_do_atanh + +PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x) +{ + fpu_extended y = fp_fabs(x); + return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x); +} + + +/* + * LLVM 2.9 crashes on first definition, + * clang with LLVM 3.x crashes on 2nd definition... sigh + */ +#if defined(__clang__) || !defined(__llvm__) +#define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \ +PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended __x) \ +{ \ + register long double __value; \ + register int __ignore; \ + volatile unsigned short __cw; \ + volatile unsigned short __cwtmp; \ + __asm __volatile ("fnstcw %3\n\t" \ + "movzwl %3, %1\n\t" \ + "andl $0xf3ff, %1\n\t" \ + "orl %5, %1\n\t" \ + "movw %w1, %2\n\t" \ + "fldcw %2\n\t" \ + "frndint\n\t" \ + "fldcw %3" \ + : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \ + "=m" (__cw) \ + : "0" (__x), "i"(rounding_mode)); \ + return __value; \ +} +#else +#define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \ +PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \ +{ \ + volatile unsigned short cw; \ + __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \ + volatile unsigned short cw_temp = (cw & 0xf3ff) | (rounding_mode); \ + __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \ + fpu_extended value; \ + __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \ + __asm__ __volatile__("fldcw %0" : : "m" (cw)); \ + return value; \ +} +#endif + +#undef fp_round_to_minus_infinity +#define fp_round_to_minus_infinity fp_do_round_to_minus_infinity + +DEFINE_ROUND_FUNC(minus_infinity, 0x400) + +#undef fp_round_to_plus_infinity +#define fp_round_to_plus_infinity fp_do_round_to_plus_infinity + +DEFINE_ROUND_FUNC(plus_infinity, 0x800) + +#undef fp_round_to_zero +#define fp_round_to_zero fp_do_round_to_zero + +DEFINE_ROUND_FUNC(zero, 0xc00) + +#undef fp_round_to_nearest +#define fp_round_to_nearest fp_do_round_to_nearest + +DEFINE_ROUND_FUNC(nearest, 0x000) + +#undef fp_ceil +#define fp_ceil fp_do_round_to_plus_infinity + +#undef fp_floor +#define fp_floor fp_do_round_to_minus_infinity + + +#endif /* USE_X87_ASSEMBLY */ + +#ifndef fp_round_to_minus_infinity +#define fp_round_to_minus_infinity(x) fp_floor(x) +#endif + +#ifndef fp_round_to_plus_infinity +#define fp_round_to_plus_infinity(x) fp_ceil(x) +#endif + +#ifndef fp_round_to_zero +#define fp_round_to_zero(x) ((int)(x)) +#endif + +#ifndef fp_round_to_nearest +#define fp_round_to_nearest(x) ((int)((x) + 0.5)) +#endif + +#endif /* FPU_MATHLIB_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/rounding.cpp b/BasiliskII/src/uae_cpu/fpu/rounding.cpp new file mode 100644 index 00000000..9942d4e8 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/rounding.cpp @@ -0,0 +1,69 @@ +/* + * fpu/rounding.cpp - system-dependant FPU rounding mode and precision + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#undef PRIVATE +#define PRIVATE /**/ + +#undef PUBLIC +#define PUBLIC /**/ + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +/* -------------------------------------------------------------------------- */ +/* --- Native X86 Rounding Mode --- */ +/* -------------------------------------------------------------------------- */ + +#ifdef FPU_USE_X86_ROUNDING_MODE +const uae_u32 FFPU x86_control_word_rm_mac2host[] = { + CW_RC_NEAR, + CW_RC_ZERO, + CW_RC_DOWN, + CW_RC_UP +}; +#endif + +/* -------------------------------------------------------------------------- */ +/* --- Native X86 Rounding Precision --- */ +/* -------------------------------------------------------------------------- */ + +#ifdef FPU_USE_X86_ROUNDING_PRECISION +const uae_u32 FFPU x86_control_word_rp_mac2host[] = { + CW_PC_EXTENDED, + CW_PC_SINGLE, + CW_PC_DOUBLE, + CW_PC_RESERVED +}; +#endif diff --git a/BasiliskII/src/uae_cpu/fpu/rounding.h b/BasiliskII/src/uae_cpu/fpu/rounding.h new file mode 100644 index 00000000..60c4baff --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/rounding.h @@ -0,0 +1,159 @@ +/* + * fpu/rounding.h - system-dependant FPU rounding mode and precision + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef FPU_ROUNDING_H +#define FPU_ROUNDING_H + +/* NOTE: this file shall be included from fpu/fpu_*.cpp */ +#undef PUBLIC +#define PUBLIC extern + +#undef PRIVATE +#define PRIVATE static + +#undef FFPU +#define FFPU /**/ + +#undef FPU +#define FPU fpu. + +/* Defaults to generic rounding mode and precision handling */ +#define FPU_USE_GENERIC_ROUNDING_MODE +#define FPU_USE_GENERIC_ROUNDING_PRECISION + +/* -------------------------------------------------------------------------- */ +/* --- Selection of floating-point rounding mode and precision --- */ +/* -------------------------------------------------------------------------- */ + +/* Optimized i386 fpu core must use native rounding mode */ +#if defined(FPU_X86) && defined(USE_X87_ASSEMBLY) +# undef FPU_USE_GENERIC_ROUNDING_MODE +# define FPU_USE_X86_ROUNDING_MODE +#endif + +/* Optimized i386 fpu core must use native rounding precision */ +#if defined(FPU_X86) && defined(USE_X87_ASSEMBLY) +# undef FPU_USE_GENERIC_ROUNDING_PRECISION +# define FPU_USE_X86_ROUNDING_PRECISION +#endif + +#if 0 // gb-- FIXME: that doesn't work +/* IEEE-based fpu core can have native rounding mode on i386 */ +#if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY) +# undef FPU_USE_GENERIC_ROUNDING_MODE +# define FPU_USE_X86_ROUNDING_MODE +#endif + +/* IEEE-based fpu core can have native rounding precision on i386 */ +#if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY) +# undef FPU_USE_GENERIC_ROUNDING_PRECISION +# define FPU_USE_X86_ROUNDING_PRECISION +#endif +#endif + +/* -------------------------------------------------------------------------- */ +/* --- Sanity checks --- */ +/* -------------------------------------------------------------------------- */ + +/* X86 rounding mode and precision work together */ +#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) +# define FPU_USE_X86_ROUNDING +# define CW_INITIAL (CW_RESET|CW_X|CW_PC_EXTENDED|CW_RC_NEAR|CW_PM|CW_UM|CW_OM|CW_ZM|CW_DM|CW_IM) + PRIVATE uae_u32 x86_control_word; +#endif + +/* Control word -- rounding mode */ +#ifdef FPU_USE_X86_ROUNDING_MODE +PUBLIC const uae_u32 x86_control_word_rm_mac2host[]; +#endif + +/* Control word -- rounding precision */ +#ifdef FPU_USE_X86_ROUNDING_PRECISION +PUBLIC const uae_u32 x86_control_word_rp_mac2host[]; +#endif + +#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) +/* Set host control word for rounding mode and rounding precision */ +PRIVATE inline void set_host_control_word(void) +{ + /* + Exception enable byte is ignored, but the same value is returned + that was previously set. + */ + x86_control_word + = (x86_control_word & ~(X86_ROUNDING_MODE|X86_ROUNDING_PRECISION)) + | x86_control_word_rm_mac2host[(FPU fpcr.rounding_mode & FPCR_ROUNDING_MODE) >> 4] + | x86_control_word_rp_mac2host[(FPU fpcr.rounding_precision & FPCR_ROUNDING_PRECISION) >> 6] + ; + __asm__ __volatile__("fldcw %0" : : "m" (x86_control_word)); +} +#endif + +/* -------------------------------------------------------------------------- */ +/* --- Generic rounding mode and precision --- */ +/* -------------------------------------------------------------------------- */ + +#if defined(FPU_USE_GENERIC_ROUNDING_MODE) && defined(FPU_USE_GENERIC_ROUNDING_PRECISION) +/* Set host control word for rounding mode and rounding precision */ +PRIVATE inline void set_host_control_word(void) + { } +#endif + +/* -------------------------------------------------------------------------- */ +/* --- Common rounding mode and precision --- */ +/* -------------------------------------------------------------------------- */ + +#if defined(FPU_USE_GENERIC_ROUNDING_MODE) || defined(FPU_USE_X86_ROUNDING_MODE) + +/* Return the current rounding mode in m68k format */ +static inline uae_u32 FFPU get_rounding_mode(void) + { return FPU fpcr.rounding_mode; } + +/* Convert and set to native rounding mode */ +static inline void FFPU set_rounding_mode(uae_u32 new_rounding_mode) + { FPU fpcr.rounding_mode = new_rounding_mode; } + +#endif + +#if defined(FPU_USE_GENERIC_ROUNDING_PRECISION) || defined(FPU_USE_X86_ROUNDING_PRECISION) + +/* Return the current rounding precision in m68k format */ +static inline uae_u32 FFPU get_rounding_precision(void) + { return FPU fpcr.rounding_precision; } + +/* Convert and set to native rounding precision */ +static inline void FFPU set_rounding_precision(uae_u32 new_rounding_precision) + { FPU fpcr.rounding_precision = new_rounding_precision; } + +#endif + +#endif /* FPU_ROUNDING_H */ diff --git a/BasiliskII/src/uae_cpu/fpu/types.h b/BasiliskII/src/uae_cpu/fpu/types.h new file mode 100644 index 00000000..afd3ab28 --- /dev/null +++ b/BasiliskII/src/uae_cpu/fpu/types.h @@ -0,0 +1,181 @@ +/* + * fpu/types.h - basic types for fpu registers + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * MC68881/68040 fpu emulation + * + * Original UAE FPU, copyright 1996 Herman ten Brugge + * Rewrite for x86, copyright 1999-2001 Lauri Pesonen + * New framework, copyright 2000-2001 Gwenole Beauchesne + * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef FPU_TYPES_H +#define FPU_TYPES_H + +#include "sysdeps.h" + +/* Default behavior is *not* to use long doubles */ +#undef USE_LONG_DOUBLE +#undef USE_QUAD_DOUBLE + +/* -------------------------------------------------------------------------- */ +/* --- Original UAE fpu core --- */ +/* -------------------------------------------------------------------------- */ + +#if defined(FPU_UAE) + +/* 4-byte floats */ +#if SIZEOF_FLOAT == 4 +typedef float uae_f32; +#elif SIZEOF_DOUBLE == 4 +typedef double uae_f32; +#else +#error "No 4 byte float type, you lose." +#endif + +/* 8-byte floats */ +#if SIZEOF_DOUBLE == 8 +typedef double uae_f64; +#elif SIZEOF_LONG_DOUBLE == 8 +typedef long double uae_f64; +#else +#error "No 8 byte float type, you lose." +#endif + +/* Original UAE FPU registers are only 8 bytes long */ +typedef uae_f64 fpu_register; +typedef fpu_register fpu_extended; +typedef uae_f64 fpu_double; +typedef uae_f32 fpu_single; + +/* -------------------------------------------------------------------------- */ +/* --- Optimized core for x86 --- */ +/* -------------------------------------------------------------------------- */ + +#elif defined(FPU_X86) + +/* 4-byte floats */ +#if SIZEOF_FLOAT == 4 +typedef float uae_f32; +#elif SIZEOF_DOUBLE == 4 +typedef double uae_f32; +#else +#error "No 4 byte float type, you lose." +#endif + +/* 8-byte floats */ +#if SIZEOF_DOUBLE == 8 +typedef float uae_f64; +#elif SIZEOF_LONG_DOUBLE == 8 +typedef double uae_f64; +#else +#error "No 8 byte float type, you lose." +#endif + +/* At least 10-byte floats are required */ +#if SIZEOF_LONG_DOUBLE >= 10 +typedef long double fpu_register; +#else +#error "No float type at least 10 bytes long, you lose." +#endif + +/* X86 FPU has a custom register type that maps to a native X86 register */ +typedef fpu_register fpu_extended; +typedef uae_f64 fpu_double; +typedef uae_f32 fpu_single; + +/* -------------------------------------------------------------------------- */ +/* --- C99 implementation --- */ +/* -------------------------------------------------------------------------- */ + +#elif defined(FPU_IEEE) + +#if HOST_FLOAT_FORMAT != IEEE_FLOAT_FORMAT +#error "No IEEE float format, you lose." +#endif + +/* 4-byte floats */ +#if SIZEOF_FLOAT == 4 +typedef float uae_f32; +#elif SIZEOF_DOUBLE == 4 +typedef double uae_f32; +#else +#error "No 4 byte float type, you lose." +#endif + +/* 8-byte floats */ +#if SIZEOF_DOUBLE == 8 +typedef double uae_f64; +#elif SIZEOF_LONG_DOUBLE == 8 +typedef long double uae_f64; +#else +#error "No 8 byte float type, you lose." +#endif + +/* 12-byte or 16-byte floats */ +#if SIZEOF_LONG_DOUBLE == 12 +typedef long double uae_f96; +typedef uae_f96 fpu_register; +#define USE_LONG_DOUBLE 1 +#elif SIZEOF_LONG_DOUBLE == 16 && (defined(CPU_i386) || defined(CPU_x86_64) || defined(CPU_ia64)) +/* Long doubles on x86-64 are really held in old x87 FPU stack. */ +typedef long double uae_f128; +typedef uae_f128 fpu_register; +#define USE_LONG_DOUBLE 1 +#elif 0 +/* Disable for now and probably for good as (i) the emulator + implementation is not correct, (ii) I don't know of any CPU which + handles this kind of format *natively* with conformance to IEEE. */ +typedef long double uae_f128; +typedef uae_f128 fpu_register; +#define USE_QUAD_DOUBLE 1 +#else +typedef uae_f64 fpu_register; +#endif + +/* We need all those floating-point types */ +typedef fpu_register fpu_extended; +typedef uae_f64 fpu_double; +typedef uae_f32 fpu_single; + +#elif defined(FPU_MPFR) + +#include + +struct fpu_register { + mpfr_t f; + uae_u64 nan_bits; + int nan_sign; + operator long double (); + fpu_register &operator=(long double); +}; + +#endif + +union fpu_register_parts { + fpu_register val; + uae_u32 parts[sizeof(fpu_register) / 4]; +}; + +#endif /* FPU_TYPES_H */ diff --git a/BasiliskII/src/uae_cpu/gencpu.c b/BasiliskII/src/uae_cpu/gencpu.c index 8e2502a3..8db74001 100644 --- a/BasiliskII/src/uae_cpu/gencpu.c +++ b/BasiliskII/src/uae_cpu/gencpu.c @@ -1,3 +1,27 @@ +/* + * gencpu.c - m68k emulation generator + * + * Copyright (c) 2009 ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ /* * UAE - The Un*x Amiga Emulator * @@ -16,22 +40,24 @@ * Copyright 1995, 1996 Bernd Schmidt */ -#include -#include -#include -#include +#define CC_FOR_BUILD 1 #include "sysdeps.h" #include "readcpu.h" -#if defined(SPARC_V8_ASSEMBLY) || defined(SPARC_V9_ASSEMBLY) -#define SPARC_ASSEMBLY 0 -#endif +#include +#include +#include +#include +#include +#undef abort #define BOOL_TYPE "int" +#define VERIFY_MMU_GENAMODE 0 static FILE *headerfile; static FILE *stblfile; +static FILE *functblfile; static int using_prefetch; static int using_exception_3; @@ -47,6 +73,23 @@ static int *opcode_next_clev; static int *opcode_last_postfix; static unsigned long *counts; +#define GENA_GETV_NO_FETCH 0 +#define GENA_GETV_FETCH 1 +#define GENA_GETV_FETCH_ALIGN 2 +#define GENA_MOVEM_DO_INC 0 +#define GENA_MOVEM_NO_INC 1 +#define GENA_MOVEM_MOVE16 2 + +#define XLATE_LOG 0 +#define XLATE_PHYS 1 +#define XLATE_SFC 2 +#define XLATE_DFC 3 +static char * mem_prefix[4] = { "", "phys_", "sfc_", "dfc_" }; + +/* Define the minimal 680x0 where NV flags are not affected by xBCD instructions. */ +#define xBCD_KEEPS_N_FLAG 4 +#define xBCD_KEEPS_V_FLAG 3 + static void read_counts (void) { FILE *file; @@ -57,7 +100,8 @@ static void read_counts (void) file = fopen ("frequent.68k", "r"); if (file) { - fscanf (file, "Total: %lu\n", &total); + int c = fscanf (file, "Total: %lu\n", &total); + assert(c == 1); while (fscanf (file, "%lx: %lu %s\n", &opcode, &count, name) == 3) { opcode_next_clev[nr] = 4; opcode_last_postfix[nr] = -1; @@ -88,7 +132,6 @@ static int need_endlabel; static int n_braces = 0; static int m68k_pc_offset = 0; -static int insn_n_cycles; static void start_brace (void) { @@ -141,9 +184,8 @@ static const char *gen_nextilong (void) { static char buffer[80]; int r = m68k_pc_offset; - m68k_pc_offset += 4; - insn_n_cycles += 4; + m68k_pc_offset += 4; if (using_prefetch) sprintf (buffer, "get_ilong_prefetch(%d)", r); @@ -156,9 +198,8 @@ static const char *gen_nextiword (void) { static char buffer[80]; int r = m68k_pc_offset; - m68k_pc_offset += 2; - insn_n_cycles += 2; + m68k_pc_offset += 2; if (using_prefetch) sprintf (buffer, "get_iword_prefetch(%d)", r); @@ -173,8 +214,6 @@ static const char *gen_nextibyte (void) int r = m68k_pc_offset; m68k_pc_offset += 2; - insn_n_cycles += 2; - if (using_prefetch) sprintf (buffer, "get_ibyte_prefetch(%d)", r); else @@ -196,9 +235,22 @@ static void fill_prefetch_2 (void) static void swap_opcode (void) { - printf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); - printf ("\topcode = ((opcode << 8) & 0xFF00) | ((opcode >> 8) & 0xFF);\n"); - printf ("#endif\n"); + printf("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + printf ("\topcode = do_byteswap_16(opcode);\n"); + printf("#endif\n"); +} + +static void real_opcode (int *have) +{ + if (!*have) + { + printf("#if defined(HAVE_GET_WORD_UNSWAPPED) && !defined(FULLMMU)\n"); + printf ("\tuae_u32 real_opcode = do_byteswap_16(opcode);\n"); + printf("#else\n"); + printf ("\tuae_u32 real_opcode = opcode;\n"); + printf("#endif\n"); + *have = 1; + } } static void sync_m68k_pc (void) @@ -220,32 +272,49 @@ static void sync_m68k_pc (void) m68k_pc_offset = 0; } -/* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0, - * the calling routine handles Apdi and Aipi modes. */ -static void genamode (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem) +static void gen_set_fault_pc (void) { + sync_m68k_pc(); + printf ("regs.fault_pc = m68k_getpc ();\n"); + m68k_pc_offset = 0; +} + +/* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0, + * the calling routine handles Apdi and Aipi modes. + * gb-- movem == 2 means the same thing but for a MOVE16 instruction */ + +/* fixup indicates if we want to fix up adress registers in pre decrement + * or post increment mode now (0) or later (1). A value of 2 will then be + * used to do the actual fix up. This allows to do all memory readings + * before any register is modified, and so to rerun operation without + * side effect in case a bus fault is generated by any memory access. + * XJ - 2006/11/13 */ +static void genamode2 (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem, int xlateflag, int fixup) +{ + if (fixup != 2) + { start_brace (); switch (mode) { case Dreg: if (movem) abort (); - if (getv == 1) + if (getv == GENA_GETV_FETCH) switch (size) { case sz_byte: -#if defined(AMIGA) && !defined(WARPUP) + printf("\n#if defined(AMIGA) && !defined(WARPUP)\n"); /* sam: I don't know why gcc.2.7.2.1 produces a code worse */ /* if it is not done like that: */ printf ("\tuae_s8 %s = ((uae_u8*)&m68k_dreg(regs, %s))[3];\n", name, reg); -#else + printf("#else\n"); printf ("\tuae_s8 %s = m68k_dreg(regs, %s);\n", name, reg); -#endif + printf("#endif\n"); break; case sz_word: -#if defined(AMIGA) && !defined(WARPUP) + printf("\n#if defined(AMIGA) && !defined(WARPUP)\n"); printf ("\tuae_s16 %s = ((uae_s16*)&m68k_dreg(regs, %s))[1];\n", name, reg); -#else + printf("#else\n"); printf ("\tuae_s16 %s = m68k_dreg(regs, %s);\n", name, reg); -#endif + printf("#endif\n"); break; case sz_long: printf ("\tuae_s32 %s = m68k_dreg(regs, %s);\n", name, reg); @@ -257,7 +326,7 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge case Areg: if (movem) abort (); - if (getv == 1) + if (getv == GENA_GETV_FETCH) switch (size) { case sz_word: printf ("\tuae_s16 %s = m68k_areg(regs, %s);\n", name, reg); @@ -284,10 +353,16 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge printf ("\tuaecptr %sa = m68k_areg(regs, %s) - areg_byteinc[%s];\n", name, reg, reg); break; case sz_word: - printf ("\tuaecptr %sa = m68k_areg(regs, %s) - %d;\n", name, reg, movem ? 0 : 2); + if (movem) + printf ("\tuaecptr %sa = m68k_areg(regs, %s);\n", name, reg); + else + printf ("\tuaecptr %sa = m68k_areg(regs, %s) - 2;\n", name, reg); break; case sz_long: - printf ("\tuaecptr %sa = m68k_areg(regs, %s) - %d;\n", name, reg, movem ? 0 : 4); + if (movem) + printf ("\tuaecptr %sa = m68k_areg(regs, %s);\n", name, reg); + else + printf ("\tuaecptr %sa = m68k_areg(regs, %s) - 4;\n", name, reg); break; default: abort (); @@ -332,7 +407,7 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge printf ("\tuaecptr %sa = %s;\n", name, gen_nextilong ()); break; case imm: - if (getv != 1) + if (getv != GENA_GETV_FETCH) abort (); switch (size) { case sz_byte: @@ -349,22 +424,22 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge } return; case imm0: - if (getv != 1) + if (getv != GENA_GETV_FETCH) abort (); printf ("\tuae_s8 %s = %s;\n", name, gen_nextibyte ()); return; case imm1: - if (getv != 1) + if (getv != GENA_GETV_FETCH) abort (); printf ("\tuae_s16 %s = %s;\n", name, gen_nextiword ()); return; case imm2: - if (getv != 1) + if (getv != GENA_GETV_FETCH) abort (); printf ("\tuae_s32 %s = %s;\n", name, gen_nextilong ()); return; case immi: - if (getv != 1) + if (getv != GENA_GETV_FETCH) abort (); printf ("\tuae_u32 %s = %s;\n", name, reg); return; @@ -375,7 +450,7 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge /* We get here for all non-reg non-immediate addressing modes to * actually fetch the value. */ - if (using_exception_3 && getv != 0 && size != sz_byte) { + if (using_exception_3 && getv != GENA_GETV_NO_FETCH && size != sz_byte) { printf ("\tif ((%sa & 1) != 0) {\n", name); printf ("\t\tlast_fault_for_exception_3 = %sa;\n", name); printf ("\t\tlast_op_for_exception_3 = opcode;\n"); @@ -387,20 +462,29 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge start_brace (); } - if (getv == 1) { + if (getv == GENA_GETV_FETCH) { switch (size) { - case sz_byte: insn_n_cycles += 2; break; - case sz_word: insn_n_cycles += 2; break; - case sz_long: insn_n_cycles += 4; break; + case sz_byte: break; + case sz_word: break; + case sz_long: break; default: abort (); } start_brace (); + printf("\n#ifdef FULLMMU\n"); switch (size) { - case sz_byte: printf ("\tuae_s8 %s = get_byte(%sa);\n", name, name); break; - case sz_word: printf ("\tuae_s16 %s = get_word(%sa);\n", name, name); break; - case sz_long: printf ("\tuae_s32 %s = get_long(%sa);\n", name, name); break; + case sz_byte: printf ("\tuae_s8 %s = %sget_byte(%sa);\n", name, mem_prefix[xlateflag], name); break; + case sz_word: printf ("\tuae_s16 %s = %sget_word(%sa);\n", name, mem_prefix[xlateflag], name); break; + case sz_long: printf ("\tuae_s32 %s = %sget_long(%sa);\n", name, mem_prefix[xlateflag], name); break; default: abort (); } + printf("#else\n"); + switch (size) { + case sz_byte: printf ("\tuae_s8 %s = phys_get_byte(%sa);\n", name, name); break; + case sz_word: printf ("\tuae_s16 %s = phys_get_word(%sa);\n", name, name); break; + case sz_long: printf ("\tuae_s32 %s = phys_get_long(%sa);\n", name, name); break; + default: abort (); + } + printf("#endif\n"); } /* We now might have to fix up the register for pre-dec or post-inc @@ -408,6 +492,12 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge if (!movem) switch (mode) { case Aipi: + if (fixup == 1) + { + printf ("\tfixup.flag = 1;\n"); + printf ("\tfixup.reg = %s;\n", reg); + printf ("\tfixup.value = m68k_areg(regs, %s);\n", reg); + } switch (size) { case sz_byte: printf ("\tm68k_areg(regs, %s) += areg_byteinc[%s];\n", reg, reg); @@ -423,14 +513,39 @@ static void genamode (amodes mode, char *reg, wordsizes size, char *name, int ge } break; case Apdi: + if (fixup == 1) + { + printf ("\tfixup.flag = 1;\n"); + printf ("\tfixup.reg = %s;\n", reg); + printf ("\tfixup.value = m68k_areg(regs, %s);\n", reg); + } printf ("\tm68k_areg (regs, %s) = %sa;\n", reg, name); break; default: break; } + + } + else /* (fixup != 2) */ + { + if (!movem) + switch (mode) { + case Aipi: + case Apdi: + printf ("\tfixup.flag = 0;\n"); + break; + default: + break; + } + } } -static void genastore (char *from, amodes mode, char *reg, wordsizes size, char *to) +static void genamode (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem, int xlateflag) +{ + genamode2 (mode, reg, size, name, getv, movem, xlateflag, 0); +} + +static void genastore (char *from, amodes mode, char *reg, wordsizes size, char *to, int xlateflag) { switch (mode) { case Dreg: @@ -470,28 +585,32 @@ static void genastore (char *from, amodes mode, char *reg, wordsizes size, char case absl: case PC16: case PC8r: - if (using_prefetch) - sync_m68k_pc (); + gen_set_fault_pc (); + printf("#ifdef FULLMMU\n"); switch (size) { case sz_byte: - insn_n_cycles += 2; + printf ("\t%sput_byte(%sa,%s);\n", mem_prefix[xlateflag], to, from); + printf("#else\n"); printf ("\tput_byte(%sa,%s);\n", to, from); break; case sz_word: - insn_n_cycles += 2; if (cpu_level < 2 && (mode == PC16 || mode == PC8r)) abort (); + printf ("\t%sput_word(%sa,%s);\n", mem_prefix[xlateflag], to, from); + printf("#else\n"); printf ("\tput_word(%sa,%s);\n", to, from); break; case sz_long: - insn_n_cycles += 4; if (cpu_level < 2 && (mode == PC16 || mode == PC8r)) abort (); + printf ("\t%sput_long(%sa,%s);\n", mem_prefix[xlateflag], to, from); + printf("#else\n"); printf ("\tput_long(%sa,%s);\n", to, from); break; default: abort (); } + printf("#endif\n"); break; case imm: case imm0: @@ -507,23 +626,33 @@ static void genastore (char *from, amodes mode, char *reg, wordsizes size, char static void genmovemel (uae_u16 opcode) { - char getcode[100]; + char getcode1[100]; + char getcode2[100]; int size = table68k[opcode].size == sz_long ? 4 : 2; - + if (table68k[opcode].size == sz_long) { - strcpy (getcode, "get_long(srca)"); + strcpy (getcode1, ""); + strcpy (getcode2, "get_long(srca)"); } else { - strcpy (getcode, "(uae_s32)(uae_s16)get_word(srca)"); + strcpy (getcode1, "(uae_s32)(uae_s16)"); + strcpy (getcode2, "get_word(srca)"); } printf ("\tuae_u16 mask = %s;\n", gen_nextiword ()); printf ("\tunsigned int dmask = mask & 0xff, amask = (mask >> 8) & 0xff;\n"); - genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1); + genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_NO_INC, XLATE_LOG); start_brace (); - printf ("\twhile (dmask) { m68k_dreg(regs, movem_index1[dmask]) = %s; srca += %d; dmask = movem_next[dmask]; }\n", - getcode, size); - printf ("\twhile (amask) { m68k_areg(regs, movem_index1[amask]) = %s; srca += %d; amask = movem_next[amask]; }\n", - getcode, size); + printf("\n#ifdef FULLMMU\n"); + printf ("\twhile (dmask) { m68k_dreg(regs, movem_index1[dmask]) = %s%s; srca += %d; dmask = movem_next[dmask]; }\n", + getcode1, getcode2, size); + printf ("\twhile (amask) { m68k_areg(regs, movem_index1[amask]) = %s%s; srca += %d; amask = movem_next[amask]; }\n", + getcode1, getcode2, size); + printf("#else\n"); + printf ("\twhile (dmask) { m68k_dreg(regs, movem_index1[dmask]) = %sphys_%s; srca += %d; dmask = movem_next[dmask]; }\n", + getcode1, getcode2, size); + printf ("\twhile (amask) { m68k_areg(regs, movem_index1[amask]) = %sphys_%s; srca += %d; amask = movem_next[amask]; }\n", + getcode1, getcode2, size); + printf("#endif\n"); if (table68k[opcode].dmode == Aipi) printf ("\tm68k_areg(regs, dstreg) = srca;\n"); @@ -533,6 +662,7 @@ static void genmovemle (uae_u16 opcode) { char putcode[100]; int size = table68k[opcode].size == sz_long ? 4 : 2; + if (table68k[opcode].size == sz_long) { strcpy (putcode, "put_long(srca,"); } else { @@ -540,24 +670,38 @@ static void genmovemle (uae_u16 opcode) } printf ("\tuae_u16 mask = %s;\n", gen_nextiword ()); - genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1); - if (using_prefetch) - sync_m68k_pc (); + genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", + GENA_GETV_FETCH_ALIGN, GENA_MOVEM_NO_INC, XLATE_LOG); + sync_m68k_pc (); start_brace (); if (table68k[opcode].dmode == Apdi) { printf ("\tuae_u16 amask = mask & 0xff, dmask = (mask >> 8) & 0xff;\n"); + printf("#ifdef FULLMMU\n"); printf ("\twhile (amask) { srca -= %d; %s m68k_areg(regs, movem_index2[amask])); amask = movem_next[amask]; }\n", size, putcode); printf ("\twhile (dmask) { srca -= %d; %s m68k_dreg(regs, movem_index2[dmask])); dmask = movem_next[dmask]; }\n", size, putcode); + printf("#else\n"); + printf ("\twhile (amask) { srca -= %d; phys_%s m68k_areg(regs, movem_index2[amask])); amask = movem_next[amask]; }\n", + size, putcode); + printf ("\twhile (dmask) { srca -= %d; phys_%s m68k_dreg(regs, movem_index2[dmask])); dmask = movem_next[dmask]; }\n", + size, putcode); + printf("#endif\n"); printf ("\tm68k_areg(regs, dstreg) = srca;\n"); } else { printf ("\tuae_u16 dmask = mask & 0xff, amask = (mask >> 8) & 0xff;\n"); + printf("#ifdef FULLMMU\n"); printf ("\twhile (dmask) { %s m68k_dreg(regs, movem_index1[dmask])); srca += %d; dmask = movem_next[dmask]; }\n", putcode, size); printf ("\twhile (amask) { %s m68k_areg(regs, movem_index1[amask])); srca += %d; amask = movem_next[amask]; }\n", putcode, size); + printf("#else\n"); + printf ("\twhile (dmask) { phys_%s m68k_dreg(regs, movem_index1[dmask])); srca += %d; dmask = movem_next[dmask]; }\n", + putcode, size); + printf ("\twhile (amask) { phys_%s m68k_areg(regs, movem_index1[amask])); srca += %d; amask = movem_next[amask]; }\n", + putcode, size); + printf("#endif\n"); } } @@ -567,7 +711,7 @@ static void duplicate_carry (void) } typedef enum { - flag_logical_noclobber, flag_logical, flag_add, flag_sub, flag_cmp, flag_addx, flag_subx, flag_zn, + flag_logical_noclobber, flag_logical, flag_add, flag_sub, flag_cmp, flag_addx, flag_subx, flag_z, flag_zn, flag_av, flag_sv } flagtypes; @@ -621,6 +765,7 @@ static void genflags_normal (flagtypes type, wordsizes size, char *value, char * switch (type) { case flag_logical_noclobber: case flag_logical: + case flag_z: case flag_zn: case flag_av: case flag_sv: @@ -629,12 +774,10 @@ static void genflags_normal (flagtypes type, wordsizes size, char *value, char * break; case flag_add: - start_brace (); printf ("uae_u32 %s = %s + %s;\n", value, dstr, sstr); break; case flag_sub: case flag_cmp: - start_brace (); printf ("uae_u32 %s = %s - %s;\n", value, dstr, sstr); break; } @@ -642,6 +785,7 @@ static void genflags_normal (flagtypes type, wordsizes size, char *value, char * switch (type) { case flag_logical_noclobber: case flag_logical: + case flag_z: case flag_zn: break; @@ -652,7 +796,6 @@ static void genflags_normal (flagtypes type, wordsizes size, char *value, char * case flag_cmp: case flag_av: case flag_sv: - start_brace (); printf ("\t" BOOL_TYPE " flgs = %s < 0;\n", sstr); printf ("\t" BOOL_TYPE " flgo = %s < 0;\n", dstr); printf ("\t" BOOL_TYPE " flgn = %s < 0;\n", vstr); @@ -675,6 +818,9 @@ static void genflags_normal (flagtypes type, wordsizes size, char *value, char * case flag_sv: printf ("\tSET_VFLG ((flgs ^ flgo) & (flgn ^ flgo));\n"); break; + case flag_z: + printf ("\tSET_ZFLG (GET_ZFLG & (%s == 0));\n", vstr); + break; case flag_zn: printf ("\tSET_ZFLG (GET_ZFLG & (%s == 0));\n", vstr); printf ("\tSET_NFLG (%s < 0);\n", vstr); @@ -714,163 +860,18 @@ static void genflags_normal (flagtypes type, wordsizes size, char *value, char * static void genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) { -#ifdef SPARC_V8_ASSEMBLY - switch(type) - { - case flag_add: - start_brace(); - printf("\tuae_u32 %s;\n", value); - switch(size) - { - case sz_byte: - printf("\t%s = sparc_v8_flag_add_8(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - case sz_word: - printf("\t%s = sparc_v8_flag_add_16(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - case sz_long: - printf("\t%s = sparc_v8_flag_add_32(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - } - return; - - case flag_sub: - start_brace(); - printf("\tuae_u32 %s;\n", value); - switch(size) - { - case sz_byte: - printf("\t%s = sparc_v8_flag_sub_8(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - case sz_word: - printf("\t%s = sparc_v8_flag_sub_16(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - case sz_long: - printf("\t%s = sparc_v8_flag_sub_32(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - } - return; - - case flag_cmp: - switch(size) - { - case sz_byte: -// printf("\tsparc_v8_flag_cmp_8(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", src, dst); - break; - case sz_word: -// printf("\tsparc_v8_flag_cmp_16(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", src, dst); - break; - case sz_long: -#if 1 - printf("\tsparc_v8_flag_cmp_32(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", src, dst); - return; -#endif - break; - } -// return; - break; - } -#elif defined(SPARC_V9_ASSEMBLY) - switch(type) - { - case flag_add: - start_brace(); - printf("\tuae_u32 %s;\n", value); - switch(size) - { - case sz_byte: - printf("\t%s = sparc_v9_flag_add_8(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - case sz_word: - printf("\t%s = sparc_v9_flag_add_16(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - case sz_long: - printf("\t%s = sparc_v9_flag_add_32(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - } - return; - - case flag_sub: - start_brace(); - printf("\tuae_u32 %s;\n", value); - switch(size) - { - case sz_byte: - printf("\t%s = sparc_v9_flag_sub_8(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - case sz_word: - printf("\t%s = sparc_v9_flag_sub_16(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - case sz_long: - printf("\t%s = sparc_v9_flag_sub_32(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", value, src, dst); - break; - } - return; - - case flag_cmp: - switch(size) - { - case sz_byte: - printf("\tsparc_v9_flag_cmp_8(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", src, dst); - break; - case sz_word: - printf("\tsparc_v9_flag_cmp_16(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", src, dst); - break; - case sz_long: - printf("\tsparc_v9_flag_cmp_32(®flags, (uae_u32)(%s), (uae_u32)(%s));\n", src, dst); - break; - } - return; - - case flag_logical: - if (strcmp(value, "0") == 0) { - printf("\tregflags.nzvc = 0x04;\n"); - } else { - switch(size) { - case sz_byte: - printf("\tsparc_v9_flag_test_8(®flags, (uae_u32)(%s));\n", value); - break; - case sz_word: - printf("\tsparc_v9_flag_test_16(®flags, (uae_u32)(%s));\n", value); - break; - case sz_long: - printf("\tsparc_v9_flag_test_32(®flags, (uae_u32)(%s));\n", value); - break; - } - } - return; - -#if 0 - case flag_logical_noclobber: - printf("\t{uae_u32 old_flags = regflags.nzvc & ~0x0C;\n"); - if (strcmp(value, "0") == 0) { - printf("\tregflags.nzvc = old_flags | 0x04;\n"); - } else { - switch(size) { - case sz_byte: - printf("\tsparc_v9_flag_test_8(®flags, (uae_u32)(%s));\n", value); - break; - case sz_word: - printf("\tsparc_v9_flag_test_16(®flags, (uae_u32)(%s));\n", value); - break; - case sz_long: - printf("\tsparc_v9_flag_test_32(®flags, (uae_u32)(%s));\n", value); - break; - } - printf("\tregflags.nzvc |= old_flags;\n"); - } - printf("\t}\n"); - return; -#endif - } -#elif defined(X86_ASSEMBLY) + /* Temporarily deleted 68k/ARM flag optimizations. I'd prefer to have + them in the appropriate m68k.h files and use just one copy of this + code here. The API can be changed if necessary. */ + int done = 0; + + start_brace (); + printf("\n#ifdef OPTIMIZED_FLAGS\n"); switch (type) { case flag_add: case flag_sub: - start_brace (); printf ("\tuae_u32 %s;\n", value); break; - default: break; } @@ -878,233 +879,71 @@ static void genflags (flagtypes type, wordsizes size, char *value, char *src, ch /* At least some of those casts are fairly important! */ switch (type) { case flag_logical_noclobber: - printf ("\t{uae_u32 oldcznv = regflags.cznv & ~0xC0;\n"); + printf ("\t{uae_u32 oldcznv = GET_CZNV & ~(FLAGVAL_Z | FLAGVAL_N);\n"); if (strcmp (value, "0") == 0) { - printf ("\tregflags.cznv = olcznv | 64;\n"); + printf ("\tSET_CZNV (olcznv | FLAGVAL_Z);\n"); } else { switch (size) { - case sz_byte: printf ("\tx86_flag_testb ((uae_s8)(%s));\n", value); break; - case sz_word: printf ("\tx86_flag_testw ((uae_s16)(%s));\n", value); break; - case sz_long: printf ("\tx86_flag_testl ((uae_s32)(%s));\n", value); break; + case sz_byte: printf ("\toptflag_testb ((uae_s8)(%s));\n", value); break; + case sz_word: printf ("\toptflag_testw ((uae_s16)(%s));\n", value); break; + case sz_long: printf ("\toptflag_testl ((uae_s32)(%s));\n", value); break; } - printf ("\tregflags.cznv |= oldcznv;\n"); + printf ("\tIOR_CZNV (oldcznv);\n"); } printf ("\t}\n"); - return; + done = 1; + break; + case flag_logical: if (strcmp (value, "0") == 0) { - printf ("\tregflags.cznv = 64;\n"); + printf ("\tSET_CZNV (FLAGVAL_Z);\n"); } else { switch (size) { - case sz_byte: printf ("\tx86_flag_testb ((uae_s8)(%s));\n", value); break; - case sz_word: printf ("\tx86_flag_testw ((uae_s16)(%s));\n", value); break; - case sz_long: printf ("\tx86_flag_testl ((uae_s32)(%s));\n", value); break; + case sz_byte: printf ("\toptflag_testb ((uae_s8)(%s));\n", value); break; + case sz_word: printf ("\toptflag_testw ((uae_s16)(%s));\n", value); break; + case sz_long: printf ("\toptflag_testl ((uae_s32)(%s));\n", value); break; } } - return; + done = 1; + break; case flag_add: switch (size) { - case sz_byte: printf ("\tx86_flag_addb (%s, (uae_s8)(%s), (uae_s8)(%s));\n", value, src, dst); break; - case sz_word: printf ("\tx86_flag_addw (%s, (uae_s16)(%s), (uae_s16)(%s));\n", value, src, dst); break; - case sz_long: printf ("\tx86_flag_addl (%s, (uae_s32)(%s), (uae_s32)(%s));\n", value, src, dst); break; + case sz_byte: printf ("\toptflag_addb (%s, (uae_s8)(%s), (uae_s8)(%s));\n", value, src, dst); break; + case sz_word: printf ("\toptflag_addw (%s, (uae_s16)(%s), (uae_s16)(%s));\n", value, src, dst); break; + case sz_long: printf ("\toptflag_addl (%s, (uae_s32)(%s), (uae_s32)(%s));\n", value, src, dst); break; } - return; + done = 1; + break; case flag_sub: switch (size) { - case sz_byte: printf ("\tx86_flag_subb (%s, (uae_s8)(%s), (uae_s8)(%s));\n", value, src, dst); break; - case sz_word: printf ("\tx86_flag_subw (%s, (uae_s16)(%s), (uae_s16)(%s));\n", value, src, dst); break; - case sz_long: printf ("\tx86_flag_subl (%s, (uae_s32)(%s), (uae_s32)(%s));\n", value, src, dst); break; + case sz_byte: printf ("\toptflag_subb (%s, (uae_s8)(%s), (uae_s8)(%s));\n", value, src, dst); break; + case sz_word: printf ("\toptflag_subw (%s, (uae_s16)(%s), (uae_s16)(%s));\n", value, src, dst); break; + case sz_long: printf ("\toptflag_subl (%s, (uae_s32)(%s), (uae_s32)(%s));\n", value, src, dst); break; } - return; + done = 1; + break; case flag_cmp: switch (size) { - case sz_byte: printf ("\tx86_flag_cmpb ((uae_s8)(%s), (uae_s8)(%s));\n", src, dst); break; - case sz_word: printf ("\tx86_flag_cmpw ((uae_s16)(%s), (uae_s16)(%s));\n", src, dst); break; - case sz_long: printf ("\tx86_flag_cmpl ((uae_s32)(%s), (uae_s32)(%s));\n", src, dst); break; + case sz_byte: printf ("\toptflag_cmpb ((uae_s8)(%s), (uae_s8)(%s));\n", src, dst); break; + case sz_word: printf ("\toptflag_cmpw ((uae_s16)(%s), (uae_s16)(%s));\n", src, dst); break; + case sz_long: printf ("\toptflag_cmpl ((uae_s32)(%s), (uae_s32)(%s));\n", src, dst); break; } - return; + done = 1; + break; default: break; } -#elif defined(M68K_FLAG_OPT) - /* sam: here I'm cloning what X86_ASSEMBLY does */ -#define EXT(size) (size==sz_byte?"b":(size==sz_word?"w":"l")) -#define CAST(size) (size==sz_byte?"uae_s8":(size==sz_word?"uae_s16":"uae_s32")) - switch (type) { - case flag_add: - case flag_sub: - start_brace (); - printf ("\tuae_u32 %s;\n", value); - break; - - default: - break; - } - - switch (type) { - case flag_logical: - if (strcmp (value, "0") == 0) { - printf ("\t*(uae_u16 *)®flags = 4;\n"); /* Z = 1 */ - } else { - printf ("\tm68k_flag_tst (%s, (%s)(%s));\n", - EXT (size), CAST (size), value); - } - return; - - case flag_add: - printf ("\t{uae_u16 ccr;\n"); - printf ("\tm68k_flag_add (%s, (%s)%s, (%s)(%s), (%s)(%s));\n", - EXT (size), CAST (size), value, CAST (size), src, CAST (size), dst); - printf ("\t((uae_u16*)®flags)[1]=((uae_u16*)®flags)[0]=ccr;}\n"); - return; - - case flag_sub: - printf ("\t{uae_u16 ccr;\n"); - printf ("\tm68k_flag_sub (%s, (%s)%s, (%s)(%s), (%s)(%s));\n", - EXT (size), CAST (size), value, CAST (size), src, CAST (size), dst); - printf ("\t((uae_u16*)®flags)[1]=((uae_u16*)®flags)[0]=ccr;}\n"); - return; - - case flag_cmp: - printf ("\tm68k_flag_cmp (%s, (%s)(%s), (%s)(%s));\n", - EXT (size), CAST (size), src, CAST (size), dst); - return; - - default: - break; - } -#elif defined(ACORN_FLAG_OPT) && defined(__GNUC_MINOR__) -/* - * This is new. Might be quite buggy. - */ - switch (type) { - case flag_av: - case flag_sv: - case flag_zn: - case flag_addx: - case flag_subx: - break; - - case flag_logical: - if (strcmp (value, "0") == 0) { - /* v=c=n=0 z=1 */ - printf ("\t*(ULONG*)®flags = 0x40000000;\n"); - return; - } else { - start_brace (); - switch (size) { - case sz_byte: - printf ("\tUBYTE ccr;\n"); - printf ("\tULONG shift;\n"); - printf ("\t__asm__(\"mov %%2,%%1,lsl#24\n\ttst %%2,%%2\n\tmov %%0,r15,lsr#24\n\tbic %%0,%%0,#0x30\"\n" - "\t: \"=r\" (ccr) : \"r\" (%s), \"r\" (shift) : \"cc\" );\n", value); - printf ("\t*((UBYTE*)®flags+3) = ccr;\n"); - return; - case sz_word: - printf ("\tUBYTE ccr;\n"); - printf ("\tULONG shift;\n"); - printf ("\t__asm__(\"mov %%2,%%1,lsl#16\n\ttst %%2,%%2\n\tmov %%0,r15,lsr#24\n\tbic %%0,%%0,#0x30\"\n" - "\t: \"=r\" (ccr) : \"r\" ((WORD)%s), \"r\" (shift) : \"cc\" );\n", value); - printf ("\t*((UBYTE*)®flags+3) = ccr;\n"); - return; - case sz_long: - printf ("\tUBYTE ccr;\n"); - printf ("\t__asm__(\"tst %%1,%%1\n\tmov %%0,r15,lsr#24\n\tbic %%0,%%0,#0x30\"\n" - "\t: \"=r\" (ccr) : \"r\" ((LONG)%s) : \"cc\" );\n", value); - printf ("\t*((UBYTE*)®flags+3) = ccr;\n"); - return; - } - } - break; - case flag_add: - if (strcmp (dst, "0") == 0) { - printf ("/* Error! Hier muss Peter noch was machen !!! (ADD-Flags) */"); - } else { - start_brace (); - switch (size) { - case sz_byte: - printf ("\tULONG ccr, shift, %s;\n", value); - printf ("\t__asm__(\"mov %%4,%%3,lsl#24\n\tadds %%0,%%4,%%2,lsl#24\n\tmov %%0,%%0,asr#24\n\tmov %%1,r15\n\torr %%1,%%1,%%1,lsr#29\"\n" - "\t: \"=r\" (%s), \"=r\" (ccr) : \"r\" (%s), \"r\" (%s), \"r\" (shift) : \"cc\" );\n", value, src, dst); - printf ("\t*(ULONG*)®flags = ccr;\n"); - return; - case sz_word: - printf ("\tULONG ccr, shift, %s;\n", value); - printf ("\t__asm__(\"mov %%4,%%3,lsl#16\n\tadds %%0,%%4,%%2,lsl#16\n\tmov %%0,%%0,asr#16\n\tmov %%1,r15\n\torr %%1,%%1,%%1,lsr#29\"\n" - "\t: \"=r\" (%s), \"=r\" (ccr) : \"r\" ((WORD)%s), \"r\" ((WORD)%s), \"r\" (shift) : \"cc\" );\n", value, src, dst); - printf ("\t*(ULONG*)®flags = ccr;\n"); - return; - case sz_long: - printf ("\tULONG ccr, %s;\n", value); - printf ("\t__asm__(\"adds %%0,%%3,%%2\n\tmov %%1,r15\n\torr %%1,%%1,%%1,lsr#29\"\n" - "\t: \"=r\" (%s), \"=r\" (ccr) : \"r\" ((LONG)%s), \"r\" ((LONG)%s) : \"cc\" );\n", value, src, dst); - printf ("\t*(ULONG*)®flags = ccr;\n"); - return; - } - } - break; - case flag_sub: - if (strcmp (dst, "0") == 0) { - printf ("/* Error! Hier muss Peter noch was machen !!! (SUB-Flags) */"); - } else { - start_brace (); - switch (size) { - case sz_byte: - printf ("\tULONG ccr, shift, %s;\n", value); - printf ("\t__asm__(\"mov %%4,%%3,lsl#24\n\tsubs %%0,%%4,%%2,lsl#24\n\tmov %%0,%%0,asr#24\n\tmov %%1,r15\n\teor %%1,%%1,#0x20000000\n\torr %%1,%%1,%%1,lsr#29\"\n" - "\t: \"=r\" (%s), \"=r\" (ccr) : \"r\" (%s), \"r\" (%s), \"r\" (shift) : \"cc\" );\n", value, src, dst); - printf ("\t*(ULONG*)®flags = ccr;\n"); - return; - case sz_word: - printf ("\tULONG ccr, shift, %s;\n", value); - printf ("\t__asm__(\"mov %%4,%%3,lsl#16\n\tsubs %%0,%%4,%%2,lsl#16\n\tmov %%0,%%0,asr#16\n\tmov %%1,r15\n\teor %%1,%%1,#0x20000000\n\torr %%1,%%1,%%1,lsr#29\"\n" - "\t: \"=r\" (%s), \"=r\" (ccr) : \"r\" ((WORD)%s), \"r\" ((WORD)%s), \"r\" (shift) : \"cc\" );\n", value, src, dst); - printf ("\t*(ULONG*)®flags = ccr;\n"); - return; - case sz_long: - printf ("\tULONG ccr, %s;\n", value); - printf ("\t__asm__(\"subs %%0,%%3,%%2\n\tmov %%1,r15\n\teor %%1,%%1,#0x20000000\n\torr %%1,%%1,%%1,lsr#29\"\n" - "\t: \"=r\" (%s), \"=r\" (ccr) : \"r\" ((LONG)%s), \"r\" ((LONG)%s) : \"cc\" );\n", value, src, dst); - printf ("\t*(ULONG*)®flags = ccr;\n"); - return; - } - } - break; - case flag_cmp: - if (strcmp (dst, "0") == 0) { - printf ("/*Error! Hier muss Peter noch was machen !!! (CMP-Flags)*/"); - } else { - start_brace (); - switch (size) { - case sz_byte: - printf ("\tULONG shift, ccr;\n"); - printf ("\t__asm__(\"mov %%3,%%2,lsl#24\n\tcmp %%3,%%1,lsl#24\n\tmov %%0,r15,lsr#24\n\teor %%0,%%0,#0x20\"\n" - "\t: \"=r\" (ccr) : \"r\" (%s), \"r\" (%s), \"r\" (shift) : \"cc\" );\n", src, dst); - printf ("\t*((UBYTE*)®flags+3) = ccr;\n"); - return; - case sz_word: - printf ("\tULONG shift, ccr;\n"); - printf ("\t__asm__(\"mov %%3,%%2,lsl#16\n\tcmp %%3,%%1,lsl#16\n\tmov %%0,r15,lsr#24\n\teor %%0,%%0,#0x20\"\n" - "\t: \"=r\" (ccr) : \"r\" ((WORD)%s), \"r\" ((WORD)%s), \"r\" (shift) : \"cc\" );\n", src, dst); - printf ("\t*((UBYTE*)®flags+3) = ccr;\n"); - return; - case sz_long: - printf ("\tULONG ccr;\n"); - printf ("\t__asm__(\"cmp %%2,%%1\n\tmov %%0,r15,lsr#24\n\teor %%0,%%0,#0x20\"\n" - "\t: \"=r\" (ccr) : \"r\" ((LONG)%s), \"r\" ((LONG)%s) : \"cc\" );\n", src, dst); - printf ("\t*((UBYTE*)®flags+3) = ccr;\n"); - /*printf ("\tprintf (\"%%08x %%08x %%08x\\n\", %s, %s, *((ULONG*)®flags));\n", src, dst); */ - return; - } - } - break; - } -#endif + if (done) + printf("#else\n"); + else + printf("#endif\n"); genflags_normal (type, size, value, src, dst); + if (done) + printf("#endif\n"); } static void force_range_for_rox (const char *var, wordsizes size) @@ -1132,7 +971,7 @@ static const char *cmask (wordsizes size) case sz_byte: return "0x80"; case sz_word: return "0x8000"; case sz_long: return "0x80000000"; - default: abort (); + default: abort (); return NULL; } } @@ -1144,11 +983,10 @@ static int source_is_imm1_8 (struct instr *i) static void gen_opcode (unsigned long int opcode) { struct instr *curi = table68k + opcode; - insn_n_cycles = 2; start_brace (); #if 0 - printf ("uae_u8 *m68k_pc = regs.pc_p;\n"); + printf ("uae_u8 *m68k_pc = m68k_getpc();\n"); #endif m68k_pc_offset = 2; switch (curi->plev) { @@ -1178,16 +1016,16 @@ static void gen_opcode (unsigned long int opcode) case i_OR: case i_AND: case i_EOR: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tsrc %c= dst;\n", curi->mnemo == i_OR ? '|' : curi->mnemo == i_AND ? '&' : '^'); genflags (flag_logical, curi->size, "src", "", ""); - genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("src", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_ORSR: case i_EORSR: printf ("\tMakeSR();\n"); - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) { printf ("\tsrc &= 0xFF;\n"); } @@ -1196,7 +1034,7 @@ static void gen_opcode (unsigned long int opcode) break; case i_ANDSR: printf ("\tMakeSR();\n"); - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) { printf ("\tsrc |= 0xFF00;\n"); } @@ -1204,134 +1042,177 @@ static void gen_opcode (unsigned long int opcode) printf ("\tMakeFromSR();\n"); break; case i_SUB: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); genflags (flag_sub, curi->size, "newv", "src", "dst"); - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_SUBA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 newv = dst - src;\n"); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); break; case i_SUBX: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); start_brace (); printf ("\tuae_u32 newv = dst - src - (GET_XFLG ? 1 : 0);\n"); genflags (flag_subx, curi->size, "newv", "src", "dst"); genflags (flag_zn, curi->size, "newv", "", ""); - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_SBCD: - /* Let's hope this works... */ - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); start_brace (); printf ("\tuae_u16 newv_lo = (dst & 0xF) - (src & 0xF) - (GET_XFLG ? 1 : 0);\n"); printf ("\tuae_u16 newv_hi = (dst & 0xF0) - (src & 0xF0);\n"); - printf ("\tuae_u16 newv;\n"); - printf ("\tint cflg;\n"); - printf ("\tif (newv_lo > 9) { newv_lo-=6; newv_hi-=0x10; }\n"); - printf ("\tnewv = newv_hi + (newv_lo & 0xF);"); - printf ("\tSET_CFLG (cflg = (newv_hi & 0x1F0) > 0x90);\n"); + printf ("\tuae_u16 newv, tmp_newv;\n"); + printf ("\tint bcd = 0;\n"); + printf ("\tnewv = tmp_newv = newv_hi + newv_lo;\n"); + printf ("\tif (newv_lo & 0xF0) { newv -= 6; bcd = 6; };\n"); + printf ("\tif ((((dst & 0xFF) - (src & 0xFF) - (GET_XFLG ? 1 : 0)) & 0x100) > 0xFF) { newv -= 0x60; }\n"); + printf ("\tSET_CFLG ((((dst & 0xFF) - (src & 0xFF) - bcd - (GET_XFLG ? 1 : 0)) & 0x300) > 0xFF);\n"); duplicate_carry (); - printf ("\tif (cflg) newv -= 0x60;\n"); - genflags (flag_zn, curi->size, "newv", "", ""); - genflags (flag_sv, curi->size, "newv", "src", "dst"); - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + /* Manual says bits NV are undefined though a real 68030 doesn't change V and 68040/060 don't change both */ + if (cpu_level >= xBCD_KEEPS_N_FLAG) { + if (next_cpu_level < xBCD_KEEPS_N_FLAG) + next_cpu_level = xBCD_KEEPS_N_FLAG - 1; + genflags (flag_z, curi->size, "newv", "", ""); + } else { + genflags (flag_zn, curi->size, "newv", "", ""); + } + if (cpu_level >= xBCD_KEEPS_V_FLAG) { + if (next_cpu_level < xBCD_KEEPS_V_FLAG) + next_cpu_level = xBCD_KEEPS_V_FLAG - 1; + } else { + printf ("\tSET_VFLG ((tmp_newv & 0x80) != 0 && (newv & 0x80) == 0);\n"); + } + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_ADD: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); genflags (flag_add, curi->size, "newv", "src", "dst"); - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_ADDA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 newv = dst + src;\n"); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); break; case i_ADDX: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); start_brace (); printf ("\tuae_u32 newv = dst + src + (GET_XFLG ? 1 : 0);\n"); genflags (flag_addx, curi->size, "newv", "src", "dst"); genflags (flag_zn, curi->size, "newv", "", ""); - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_ABCD: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); start_brace (); printf ("\tuae_u16 newv_lo = (src & 0xF) + (dst & 0xF) + (GET_XFLG ? 1 : 0);\n"); printf ("\tuae_u16 newv_hi = (src & 0xF0) + (dst & 0xF0);\n"); - printf ("\tuae_u16 newv;\n"); + printf ("\tuae_u16 newv, tmp_newv;\n"); printf ("\tint cflg;\n"); - printf ("\tif (newv_lo > 9) { newv_lo +=6; }\n"); - printf ("\tnewv = newv_hi + newv_lo;"); - printf ("\tSET_CFLG (cflg = (newv & 0x1F0) > 0x90);\n"); - duplicate_carry (); + printf ("\tnewv = tmp_newv = newv_hi + newv_lo;\n"); + printf ("\tif (newv_lo > 9) { newv += 6; }\n"); + printf ("\tcflg = (newv & 0x3F0) > 0x90;\n"); printf ("\tif (cflg) newv += 0x60;\n"); - genflags (flag_zn, curi->size, "newv", "", ""); - genflags (flag_sv, curi->size, "newv", "src", "dst"); - genastore ("newv", curi->dmode, "dstreg", curi->size, "dst"); + printf ("\tSET_CFLG (cflg);\n"); + duplicate_carry (); + /* Manual says bits NV are undefined though a real 68030 doesn't change V and 68040/060 don't change both */ + if (cpu_level >= xBCD_KEEPS_N_FLAG) { + if (next_cpu_level < xBCD_KEEPS_N_FLAG) + next_cpu_level = xBCD_KEEPS_N_FLAG - 1; + genflags (flag_z, curi->size, "newv", "", ""); + } else { + genflags (flag_zn, curi->size, "newv", "", ""); + } + if (cpu_level >= xBCD_KEEPS_V_FLAG) { + if (next_cpu_level < xBCD_KEEPS_V_FLAG) + next_cpu_level = xBCD_KEEPS_V_FLAG - 1; + } else { + printf ("\tSET_VFLG ((tmp_newv & 0x80) == 0 && (newv & 0x80) != 0);\n"); + } + genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_NEG: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); genflags (flag_sub, curi->size, "dst", "src", "0"); - genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_NEGX: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 newv = 0 - src - (GET_XFLG ? 1 : 0);\n"); genflags (flag_subx, curi->size, "newv", "src", "0"); genflags (flag_zn, curi->size, "newv", "", ""); - genastore ("newv", curi->smode, "srcreg", curi->size, "src"); + genastore ("newv", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_NBCD: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u16 newv_lo = - (src & 0xF) - (GET_XFLG ? 1 : 0);\n"); printf ("\tuae_u16 newv_hi = - (src & 0xF0);\n"); printf ("\tuae_u16 newv;\n"); - printf ("\tint cflg;\n"); - printf ("\tif (newv_lo > 9) { newv_lo-=6; newv_hi-=0x10; }\n"); - printf ("\tnewv = newv_hi + (newv_lo & 0xF);"); - printf ("\tSET_CFLG (cflg = (newv_hi & 0x1F0) > 0x90);\n"); - duplicate_carry(); + printf ("\tint cflg, tmp_newv;\n"); + printf ("\ttmp_newv = newv_hi + newv_lo;\n"); + printf ("\tif (newv_lo > 9) { newv_lo -= 6; }\n"); + printf ("\tnewv = newv_hi + newv_lo;\n"); + printf ("\tcflg = (newv & 0x1F0) > 0x90;\n"); printf ("\tif (cflg) newv -= 0x60;\n"); - genflags (flag_zn, curi->size, "newv", "", ""); - genastore ("newv", curi->smode, "srcreg", curi->size, "src"); + printf ("\tSET_CFLG (cflg);\n"); + duplicate_carry(); + /* Manual says bits NV are undefined though a real 68030 doesn't change V and 68040/060 don't change both */ + if (cpu_level >= xBCD_KEEPS_N_FLAG) { + if (next_cpu_level < xBCD_KEEPS_N_FLAG) + next_cpu_level = xBCD_KEEPS_N_FLAG - 1; + genflags (flag_z, curi->size, "newv", "", ""); + } else { + genflags (flag_zn, curi->size, "newv", "", ""); + } + if (cpu_level >= xBCD_KEEPS_V_FLAG) { + if (next_cpu_level < xBCD_KEEPS_V_FLAG) + next_cpu_level = xBCD_KEEPS_V_FLAG - 1; + } else { + printf ("\tSET_VFLG ((tmp_newv & 0x80) != 0 && (newv & 0x80) == 0);\n"); + } + genastore ("newv", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_CLR: - genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); genflags (flag_logical, curi->size, "0", "", ""); - genastore ("0", curi->smode, "srcreg", curi->size, "src"); + genastore ("0", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_NOT: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 dst = ~src;\n"); genflags (flag_logical, curi->size, "dst", "", ""); - genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_TST: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); genflags (flag_logical, curi->size, "src", "", ""); break; case i_BTST: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) printf ("\tsrc &= 7;\n"); else @@ -1339,55 +1220,55 @@ static void gen_opcode (unsigned long int opcode) printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n"); break; case i_BCHG: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) printf ("\tsrc &= 7;\n"); else printf ("\tsrc &= 31;\n"); printf ("\tdst ^= (1 << src);\n"); - printf ("\tSET_ZFLG ((dst & (1 << src)) >> src);\n"); - genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + printf ("\tSET_ZFLG (((uae_u32)dst & (1 << src)) >> src);\n"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_BCLR: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) printf ("\tsrc &= 7;\n"); else printf ("\tsrc &= 31;\n"); printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n"); printf ("\tdst &= ~(1 << src);\n"); - genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_BSET: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) printf ("\tsrc &= 7;\n"); else printf ("\tsrc &= 31;\n"); printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n"); printf ("\tdst |= (1 << src);\n"); - genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_CMPM: case i_CMP: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); genflags (flag_cmp, curi->size, "newv", "src", "dst"); break; case i_CMPA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); genflags (flag_cmp, sz_long, "newv", "src", "dst"); break; /* The next two are coded a little unconventional, but they are doing * weird things... */ case i_MVPRM: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tuaecptr memp = m68k_areg(regs, dstreg) + (uae_s32)(uae_s16)%s;\n", gen_nextiword ()); if (curi->size == sz_word) { @@ -1399,41 +1280,45 @@ static void gen_opcode (unsigned long int opcode) break; case i_MVPMR: printf ("\tuaecptr memp = m68k_areg(regs, srcreg) + (uae_s32)(uae_s16)%s;\n", gen_nextiword ()); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_word) { - printf ("\tuae_u16 val = (get_byte(memp) << 8) + get_byte(memp + 2);\n"); + printf ("\tuae_u16 val = get_byte(memp) << 8;\n"); + printf ("\t val |= get_byte(memp + 2);\n"); } else { - printf ("\tuae_u32 val = (get_byte(memp) << 24) + (get_byte(memp + 2) << 16)\n"); - printf (" + (get_byte(memp + 4) << 8) + get_byte(memp + 6);\n"); + printf ("\tuae_u32 val = get_byte(memp) << 24;\n"); + printf ("\t val |= get_byte(memp + 2) << 16;\n"); + printf ("\t val |= get_byte(memp + 4) << 8;\n"); + printf ("\t val |= get_byte(memp + 6);\n"); } - genastore ("val", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("val", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_MOVE: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); genflags (flag_logical, curi->size, "src", "", ""); - genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("src", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_MOVEA: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_word) { printf ("\tuae_u32 val = (uae_s32)(uae_s16)src;\n"); } else { printf ("\tuae_u32 val = src;\n"); } - genastore ("val", curi->dmode, "dstreg", sz_long, "dst"); + genastore ("val", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); break; case i_MVSR2: - genamode (curi->smode, "srcreg", sz_word, "src", 2, 0); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tMakeSR();\n"); if (curi->size == sz_byte) - genastore ("regs.sr & 0xff", curi->smode, "srcreg", sz_word, "src"); + genastore ("regs.sr & 0xff", curi->smode, "srcreg", sz_word, "src", XLATE_LOG); else - genastore ("regs.sr", curi->smode, "srcreg", sz_word, "src"); + genastore ("regs.sr", curi->smode, "srcreg", sz_word, "src", XLATE_LOG); break; case i_MV2SR: - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); if (curi->size == sz_byte) printf ("\tMakeSR();\n\tregs.sr &= 0xFF00;\n\tregs.sr |= src & 0xFF;\n"); else { @@ -1442,31 +1327,31 @@ static void gen_opcode (unsigned long int opcode) printf ("\tMakeFromSR();\n"); break; case i_SWAP: - genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); + genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 dst = ((src >> 16)&0xFFFF) | ((src&0xFFFF)<<16);\n"); genflags (flag_logical, sz_long, "dst", "", ""); - genastore ("dst", curi->smode, "srcreg", sz_long, "src"); + genastore ("dst", curi->smode, "srcreg", sz_long, "src", XLATE_LOG); break; case i_EXG: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); - genastore ("dst", curi->smode, "srcreg", curi->size, "src"); - genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("dst", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); + genastore ("src", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_EXT: - genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); + genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { - case sz_byte: printf ("\tuae_u32 dst = (uae_s32)(uae_s8)src;\n"); break; - case sz_word: printf ("\tuae_u16 dst = (uae_s16)(uae_s8)src;\n"); break; - case sz_long: printf ("\tuae_u32 dst = (uae_s32)(uae_s16)src;\n"); break; - default: abort (); + case sz_byte: printf ("\tuae_u32 dst = (uae_s32)(uae_s8)src;\n"); break; + case sz_word: printf ("\tuae_u16 dst = (uae_s16)(uae_s8)src;\n"); break; + case sz_long: printf ("\tuae_u32 dst = (uae_s32)(uae_s16)src;\n"); break; + default: abort (); } genflags (flag_logical, curi->size == sz_word ? sz_word : sz_long, "dst", "", ""); genastore ("dst", curi->smode, "srcreg", - curi->size == sz_word ? sz_word : sz_long, "src"); + curi->size == sz_word ? sz_word : sz_long, "src", XLATE_LOG); break; case i_MVMEL: genmovemel (opcode); @@ -1475,33 +1360,51 @@ static void gen_opcode (unsigned long int opcode) genmovemle (opcode); break; case i_TRAP: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - sync_m68k_pc (); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + gen_set_fault_pc (); printf ("\tException(src+32,0);\n"); - m68k_pc_offset = 0; break; case i_MVR2USP: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tregs.usp = src;\n"); break; case i_MVUSP2R: - genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); - genastore ("regs.usp", curi->smode, "srcreg", curi->size, "src"); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("regs.usp", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_RESET: + printf ("\tAtariReset();\n"); break; case i_NOP: break; case i_STOP: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - printf ("\tregs.sr = src;\n"); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + /* + * STOP undocumented features: + * if SR is not set: + * 68000 (68010?): Update SR, increase PC and then cause privilege violation exception (handled in newcpu) + * 68000 (68010?): Traced STOP also runs 4 cycles faster. + * 68020 68030: STOP works normally + * 68040 68060: Immediate privilege violation exception + */ + printf ("\tuae_u16 sr = src;\n"); + if (cpu_level >= 4) { + printf("\tif (!(sr & 0x2000)) {\n"); + printf ("m68k_incpc(%d);\n", m68k_pc_offset); + printf("\t\tException(8,0); goto %s;\n", endlabelstr); + printf("\t}\n"); + } + printf("\tregs.sr = sr;\n"); printf ("\tMakeFromSR();\n"); printf ("\tm68k_setstopped(1);\n"); + sync_m68k_pc (); + /* STOP does not prefetch anything */ + /* did_prefetch = -1; */ break; case i_RTE: if (cpu_level == 0) { - genamode (Aipi, "7", sz_word, "sr", 1, 0); - genamode (Aipi, "7", sz_long, "pc", 1, 0); + genamode (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tregs.sr = sr; m68k_setpc_rte(pc);\n"); fill_prefetch_0 (); printf ("\tMakeFromSR();\n"); @@ -1510,14 +1413,14 @@ static void gen_opcode (unsigned long int opcode) if (next_cpu_level < 0) next_cpu_level = 0; printf ("\tuae_u16 newsr; uae_u32 newpc; for (;;) {\n"); - genamode (Aipi, "7", sz_word, "sr", 1, 0); - genamode (Aipi, "7", sz_long, "pc", 1, 0); - genamode (Aipi, "7", sz_word, "format", 1, 0); + genamode (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (Aipi, "7", sz_word, "format", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tnewsr = sr; newpc = pc;\n"); printf ("\tif ((format & 0xF000) == 0x0000) { break; }\n"); printf ("\telse if ((format & 0xF000) == 0x1000) { ; }\n"); printf ("\telse if ((format & 0xF000) == 0x2000) { m68k_areg(regs, 7) += 4; break; }\n"); - printf ("\telse if ((format & 0xF000) == 0x3000) { m68k_areg(regs, 7) += 4; break; }\n"); +// printf ("\telse if ((format & 0xF000) == 0x3000) { m68k_areg(regs, 7) += 4; break; }\n"); printf ("\telse if ((format & 0xF000) == 0x7000) { m68k_areg(regs, 7) += 52; break; }\n"); printf ("\telse if ((format & 0xF000) == 0x8000) { m68k_areg(regs, 7) += 50; break; }\n"); printf ("\telse if ((format & 0xF000) == 0x9000) { m68k_areg(regs, 7) += 12; break; }\n"); @@ -1535,8 +1438,8 @@ static void gen_opcode (unsigned long int opcode) m68k_pc_offset = 0; break; case i_RTD: - genamode (Aipi, "7", sz_long, "pc", 1, 0); - genamode (curi->smode, "srcreg", curi->size, "offs", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tm68k_areg(regs, 7) += offs;\n"); printf ("\tm68k_setpc_rte(pc);\n"); fill_prefetch_0 (); @@ -1544,18 +1447,18 @@ static void gen_opcode (unsigned long int opcode) m68k_pc_offset = 0; break; case i_LINK: - genamode (Apdi, "7", sz_long, "old", 2, 0); - genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); - genastore ("src", Apdi, "7", sz_long, "old"); - genastore ("m68k_areg(regs, 7)", curi->smode, "srcreg", sz_long, "src"); - genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (Apdi, "7", sz_long, "old", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("m68k_areg(regs, 7)", curi->smode, "srcreg", sz_long, "src", XLATE_LOG); printf ("\tm68k_areg(regs, 7) += offs;\n"); + genastore ("src", Apdi, "7", sz_long, "old", XLATE_LOG); break; case i_UNLK: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tm68k_areg(regs, 7) = src;\n"); - genamode (Aipi, "7", sz_long, "old", 1, 0); - genastore ("old", curi->smode, "srcreg", curi->size, "src"); + genamode (Aipi, "7", sz_long, "old", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("old", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_RTS: printf ("\tm68k_do_rts();\n"); @@ -1563,14 +1466,16 @@ static void gen_opcode (unsigned long int opcode) m68k_pc_offset = 0; break; case i_TRAPV: + printf ("\tuaecptr oldpc = m68k_getpc();\n"); sync_m68k_pc (); - printf ("\tif (GET_VFLG) { Exception(7,m68k_getpc()); goto %s; }\n", endlabelstr); + printf ("\tif (GET_VFLG) { Exception(7,oldpc); goto %s; }\n", endlabelstr); need_endlabel = 1; break; case i_RTR: printf ("\tMakeSR();\n"); - genamode (Aipi, "7", sz_word, "sr", 1, 0); - genamode (Aipi, "7", sz_long, "pc", 1, 0); + genamode2 (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 1); + genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode2 (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG, 2); printf ("\tregs.sr &= 0xFF00; sr &= 0xFF;\n"); printf ("\tregs.sr |= sr; m68k_setpc(pc);\n"); fill_prefetch_0 (); @@ -1578,19 +1483,19 @@ static void gen_opcode (unsigned long int opcode) m68k_pc_offset = 0; break; case i_JSR: - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS); printf ("\tm68k_do_jsr(m68k_getpc() + %d, srca);\n", m68k_pc_offset); fill_prefetch_0 (); m68k_pc_offset = 0; break; case i_JMP: - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS); printf ("\tm68k_setpc(srca);\n"); fill_prefetch_0 (); m68k_pc_offset = 0; break; case i_BSR: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS); printf ("\tuae_s32 s = (uae_s32)src + 2;\n"); if (using_exception_3) { printf ("\tif (src & 1) {\n"); @@ -1618,8 +1523,8 @@ static void gen_opcode (unsigned long int opcode) next_cpu_level = 1; } } - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - printf ("\tif (!cctrue(%d)) goto didnt_jump;\n", curi->cc); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS); + printf ("\tif (!cctrue(%d)) goto didnt_jump_%lx;\n", curi->cc, opcode); if (using_exception_3) { printf ("\tif (src & 1) {\n"); printf ("\t\tlast_addr_for_exception_3 = m68k_getpc() + 2;\n"); @@ -1630,26 +1535,26 @@ static void gen_opcode (unsigned long int opcode) } printf ("\tm68k_incpc ((uae_s32)src + 2);\n"); fill_prefetch_0 (); - printf ("\tgoto %s;\n", endlabelstr); - printf ("didnt_jump:;\n"); + printf ("return;\n"); + printf ("didnt_jump_%lx:;\n", opcode); need_endlabel = 1; break; case i_LEA: - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); - genastore ("srca", curi->dmode, "dstreg", curi->size, "dst"); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("srca", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); break; case i_PEA: - genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); - genamode (Apdi, "7", sz_long, "dst", 2, 0); - genastore ("srca", Apdi, "7", sz_long, "dst"); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (Apdi, "7", sz_long, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); + genastore ("srca", Apdi, "7", sz_long, "dst", XLATE_LOG); break; case i_DBcc: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tif (!cctrue(%d)) {\n", curi->cc); - genastore ("(src-1)", curi->smode, "srcreg", curi->size, "src"); + genastore ("(src-1)", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); printf ("\t\tif (src) {\n"); if (using_exception_3) { @@ -1662,22 +1567,25 @@ static void gen_opcode (unsigned long int opcode) } printf ("\t\t\tm68k_incpc((uae_s32)offs + 2);\n"); fill_prefetch_0 (); - printf ("\t\tgoto %s;\n", endlabelstr); + printf ("return;\n"); printf ("\t\t}\n"); printf ("\t}\n"); need_endlabel = 1; break; case i_Scc: - genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tint val = cctrue(%d) ? 0xff : 0;\n", curi->cc); - genastore ("val", curi->smode, "srcreg", curi->size, "src"); + genastore ("val", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_DIVU: printf ("\tuaecptr oldpc = m68k_getpc();\n"); - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); - printf ("\tif(src == 0) { Exception(5,oldpc); goto %s; } else {\n", endlabelstr); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + sync_m68k_pc (); + /* Clear V flag when dividing by zero - Alcatraz Odyssey demo depends + * on this (actually, it's doing a DIVS). */ + printf ("\tif (src == 0) { SET_VFLG (0); Exception (5, oldpc); goto %s; } else {\n", endlabelstr); printf ("\tuae_u32 newv = (uae_u32)dst / (uae_u32)(uae_u16)src;\n"); printf ("\tuae_u32 rem = (uae_u32)dst %% (uae_u32)(uae_u16)src;\n"); /* The N flag appears to be set each time there is an overflow. @@ -1685,51 +1593,48 @@ static void gen_opcode (unsigned long int opcode) printf ("\tif (newv > 0xffff) { SET_VFLG (1); SET_NFLG (1); SET_CFLG (0); } else\n\t{\n"); genflags (flag_logical, sz_word, "newv", "", ""); printf ("\tnewv = (newv & 0xffff) | ((uae_u32)rem << 16);\n"); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); printf ("\t}\n"); printf ("\t}\n"); - insn_n_cycles += 68; need_endlabel = 1; break; case i_DIVS: printf ("\tuaecptr oldpc = m68k_getpc();\n"); - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); - printf ("\tif(src == 0) { Exception(5,oldpc); goto %s; } else {\n", endlabelstr); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + sync_m68k_pc (); + printf ("\tif (src == 0) { SET_VFLG (0); Exception(5,oldpc); goto %s; } else {\n", endlabelstr); printf ("\tuae_s32 newv = (uae_s32)dst / (uae_s32)(uae_s16)src;\n"); printf ("\tuae_u16 rem = (uae_s32)dst %% (uae_s32)(uae_s16)src;\n"); printf ("\tif ((newv & 0xffff8000) != 0 && (newv & 0xffff8000) != 0xffff8000) { SET_VFLG (1); SET_NFLG (1); SET_CFLG (0); } else\n\t{\n"); printf ("\tif (((uae_s16)rem < 0) != ((uae_s32)dst < 0)) rem = -rem;\n"); genflags (flag_logical, sz_word, "newv", "", ""); printf ("\tnewv = (newv & 0xffff) | ((uae_u32)rem << 16);\n"); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); printf ("\t}\n"); printf ("\t}\n"); - insn_n_cycles += 72; need_endlabel = 1; break; case i_MULU: - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_word, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 newv = (uae_u32)(uae_u16)dst * (uae_u32)(uae_u16)src;\n"); genflags (flag_logical, sz_long, "newv", "", ""); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); - insn_n_cycles += 32; + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); break; case i_MULS: - genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); - genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0); + genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_word, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tuae_u32 newv = (uae_s32)(uae_s16)dst * (uae_s32)(uae_s16)src;\n"); genflags (flag_logical, sz_long, "newv", "", ""); - genastore ("newv", curi->dmode, "dstreg", sz_long, "dst"); - insn_n_cycles += 32; + genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", XLATE_LOG); break; case i_CHK: printf ("\tuaecptr oldpc = m68k_getpc();\n"); - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tif ((uae_s32)dst < 0) { SET_NFLG (1); Exception(6,oldpc); goto %s; }\n", endlabelstr); printf ("\telse if (dst > src) { SET_NFLG (0); Exception(6,oldpc); goto %s; }\n", endlabelstr); need_endlabel = 1; @@ -1737,8 +1642,8 @@ static void gen_opcode (unsigned long int opcode) case i_CHK2: printf ("\tuaecptr oldpc = m68k_getpc();\n"); - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\t{uae_s32 upper,lower,reg = regs.regs[(extra >> 12) & 15];\n"); switch (curi->size) { case sz_byte: @@ -1756,14 +1661,14 @@ static void gen_opcode (unsigned long int opcode) abort (); } printf ("\tSET_ZFLG (upper == reg || lower == reg);\n"); - printf ("\tSET_CFLG (lower <= upper ? reg < lower || reg > upper : reg > upper || reg < lower);\n"); + printf ("\tSET_CFLG_ALWAYS (lower <= upper ? reg < lower || reg > upper : reg > upper || reg < lower);\n"); printf ("\tif ((extra & 0x800) && GET_CFLG) { Exception(6,oldpc); goto %s; }\n}\n", endlabelstr); need_endlabel = 1; break; case i_ASR: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1792,11 +1697,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\t\tval &= %s;\n", bit_mask (curi->size)); printf ("\t}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ASL: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1828,11 +1733,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\t\tval &= %s;\n", bit_mask (curi->size)); printf ("\t}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_LSR: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1857,11 +1762,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\t\tval >>= 1;\n"); printf ("\t}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_LSL: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1887,11 +1792,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\tval &= %s;\n", bit_mask (curi->size)); printf ("\t}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ROL: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1914,11 +1819,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\tSET_CFLG (val & 1);\n"); printf ("}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ROR: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1941,11 +1846,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\tSET_CFLG ((val & %s) >> %d);\n", cmask (curi->size), bit_size (curi->size) - 1); printf ("\t}\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ROXL: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1955,12 +1860,12 @@ static void gen_opcode (unsigned long int opcode) } printf ("\tcnt &= 63;\n"); printf ("\tCLEAR_CZNV;\n"); - if (! source_is_imm1_8 (curi)) - force_range_for_rox ("cnt", curi->size); if (source_is_imm1_8 (curi)) printf ("{"); - else + else { + force_range_for_rox ("cnt", curi->size); printf ("\tif (cnt > 0) {\n"); + } printf ("\tcnt--;\n"); printf ("\t{\n\tuae_u32 carry;\n"); printf ("\tuae_u32 loval = val >> (%d - cnt);\n", bit_size (curi->size) - 1); @@ -1971,11 +1876,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\t} }\n"); printf ("\tSET_CFLG (GET_XFLG);\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ROXR: - genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -1985,12 +1890,12 @@ static void gen_opcode (unsigned long int opcode) } printf ("\tcnt &= 63;\n"); printf ("\tCLEAR_CZNV;\n"); - if (! source_is_imm1_8 (curi)) - force_range_for_rox ("cnt", curi->size); if (source_is_imm1_8 (curi)) printf ("{"); - else + else { + force_range_for_rox ("cnt", curi->size); printf ("\tif (cnt > 0) {\n"); + } printf ("\tcnt--;\n"); printf ("\t{\n\tuae_u32 carry;\n"); printf ("\tuae_u32 hival = (val << 1) | GET_XFLG;\n"); @@ -2004,10 +1909,10 @@ static void gen_opcode (unsigned long int opcode) printf ("\t} }\n"); printf ("\tSET_CFLG (GET_XFLG);\n"); genflags (flag_logical_noclobber, curi->size, "val", "", ""); - genastore ("val", curi->dmode, "dstreg", curi->size, "data"); + genastore ("val", curi->dmode, "dstreg", curi->size, "data", XLATE_LOG); break; case i_ASRW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -2021,10 +1926,10 @@ static void gen_opcode (unsigned long int opcode) genflags (flag_logical, curi->size, "val", "", ""); printf ("\tSET_CFLG (cflg);\n"); duplicate_carry (); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_ASLW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -2041,10 +1946,10 @@ static void gen_opcode (unsigned long int opcode) duplicate_carry (); printf ("\tSET_VFLG (GET_VFLG | (sign2 != sign));\n"); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_LSRW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break; @@ -2057,10 +1962,10 @@ static void gen_opcode (unsigned long int opcode) genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry);\n"); duplicate_carry (); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_LSLW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u8 val = data;\n"); break; @@ -2073,10 +1978,10 @@ static void gen_opcode (unsigned long int opcode) genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1); duplicate_carry (); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_ROLW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u8 val = data;\n"); break; @@ -2089,10 +1994,10 @@ static void gen_opcode (unsigned long int opcode) printf ("\tif (carry) val |= 1;\n"); genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_RORW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u8 val = data;\n"); break; @@ -2105,10 +2010,10 @@ static void gen_opcode (unsigned long int opcode) printf ("\tif (carry) val |= %s;\n", cmask (curi->size)); genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry);\n"); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_ROXLW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u8 val = data;\n"); break; @@ -2122,10 +2027,10 @@ static void gen_opcode (unsigned long int opcode) genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1); duplicate_carry (); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_ROXRW: - genamode (curi->smode, "srcreg", curi->size, "data", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); switch (curi->size) { case sz_byte: printf ("\tuae_u8 val = data;\n"); break; @@ -2139,103 +2044,129 @@ static void gen_opcode (unsigned long int opcode) genflags (flag_logical, curi->size, "val", "", ""); printf ("SET_CFLG (carry);\n"); duplicate_carry (); - genastore ("val", curi->smode, "srcreg", curi->size, "data"); + genastore ("val", curi->smode, "srcreg", curi->size, "data", XLATE_LOG); break; case i_MOVEC2: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tint regno = (src >> 12) & 15;\n"); printf ("\tuae_u32 *regp = regs.regs + regno;\n"); - printf ("\tm68k_movec2(src & 0xFFF, regp);\n"); + printf ("\tif (!m68k_movec2(src & 0xFFF, regp)) goto %s;\n", endlabelstr); break; case i_MOVE2C: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tint regno = (src >> 12) & 15;\n"); printf ("\tuae_u32 *regp = regs.regs + regno;\n"); - printf ("\tm68k_move2c(src & 0xFFF, regp);\n"); + printf ("\tif (!m68k_move2c(src & 0xFFF, regp)) goto %s;\n", endlabelstr); break; case i_CAS: { int old_brace_level; - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); printf ("\tint ru = (src >> 6) & 7;\n"); printf ("\tint rc = src & 7;\n"); genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, rc)", "dst"); + sync_m68k_pc (); printf ("\tif (GET_ZFLG)"); old_brace_level = n_braces; start_brace (); - genastore ("(m68k_dreg(regs, ru))", curi->dmode, "dstreg", curi->size, "dst"); + genastore ("(m68k_dreg(regs, ru))", curi->dmode, "dstreg", curi->size, "dst", XLATE_LOG); pop_braces (old_brace_level); printf ("else"); start_brace (); - printf ("m68k_dreg(regs, rc) = dst;\n"); + switch (curi->size) { + case sz_byte: + printf ("\tm68k_dreg(regs, rc) = (m68k_dreg(regs, rc) & ~0xff) | (dst & 0xff);\n"); + break; + case sz_word: + printf ("\tm68k_dreg(regs, rc) = (m68k_dreg(regs, rc) & ~0xffff) | (dst & 0xffff);\n"); + break; + default: + printf ("\tm68k_dreg(regs, rc) = dst;\n"); + break; + } pop_braces (old_brace_level); } break; case i_CAS2: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); printf ("\tuae_u32 rn1 = regs.regs[(extra >> 28) & 15];\n"); printf ("\tuae_u32 rn2 = regs.regs[(extra >> 12) & 15];\n"); if (curi->size == sz_word) { int old_brace_level = n_braces; + printf ("\tuae_u32 rc1 = (extra >> 16) & 7;\n"); + printf ("\tuae_u32 rc2 = extra & 7;\n"); printf ("\tuae_u16 dst1 = get_word(rn1), dst2 = get_word(rn2);\n"); - genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, (extra >> 16) & 7)", "dst1"); + genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, rc1)", "dst1"); printf ("\tif (GET_ZFLG) {\n"); - genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, extra & 7)", "dst2"); + genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, rc2)", "dst2"); printf ("\tif (GET_ZFLG) {\n"); printf ("\tput_word(rn1, m68k_dreg(regs, (extra >> 22) & 7));\n"); - printf ("\tput_word(rn1, m68k_dreg(regs, (extra >> 6) & 7));\n"); + printf ("\tput_word(rn2, m68k_dreg(regs, (extra >> 6) & 7));\n"); printf ("\t}}\n"); pop_braces (old_brace_level); printf ("\tif (! GET_ZFLG) {\n"); - printf ("\tm68k_dreg(regs, (extra >> 22) & 7) = (m68k_dreg(regs, (extra >> 22) & 7) & ~0xffff) | (dst1 & 0xffff);\n"); - printf ("\tm68k_dreg(regs, (extra >> 6) & 7) = (m68k_dreg(regs, (extra >> 6) & 7) & ~0xffff) | (dst2 & 0xffff);\n"); + printf ("\tm68k_dreg(regs, rc2) = (m68k_dreg(regs, rc2) & ~0xffff) | (dst2 & 0xffff);\n"); + printf ("\tm68k_dreg(regs, rc1) = (m68k_dreg(regs, rc1) & ~0xffff) | (dst1 & 0xffff);\n"); printf ("\t}\n"); } else { int old_brace_level = n_braces; + printf ("\tuae_u32 rc1 = (extra >> 16) & 7;\n"); + printf ("\tuae_u32 rc2 = extra & 7;\n"); printf ("\tuae_u32 dst1 = get_long(rn1), dst2 = get_long(rn2);\n"); - genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, (extra >> 16) & 7)", "dst1"); + genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, rc1)", "dst1"); printf ("\tif (GET_ZFLG) {\n"); - genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, extra & 7)", "dst2"); + genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, rc2)", "dst2"); printf ("\tif (GET_ZFLG) {\n"); printf ("\tput_long(rn1, m68k_dreg(regs, (extra >> 22) & 7));\n"); - printf ("\tput_long(rn1, m68k_dreg(regs, (extra >> 6) & 7));\n"); + printf ("\tput_long(rn2, m68k_dreg(regs, (extra >> 6) & 7));\n"); printf ("\t}}\n"); pop_braces (old_brace_level); printf ("\tif (! GET_ZFLG) {\n"); - printf ("\tm68k_dreg(regs, (extra >> 22) & 7) = dst1;\n"); - printf ("\tm68k_dreg(regs, (extra >> 6) & 7) = dst2;\n"); + printf ("\tm68k_dreg(regs, rc2) = dst2;\n"); + printf ("\tm68k_dreg(regs, rc1) = dst1;\n"); printf ("\t}\n"); } break; - case i_MOVES: /* ignore DFC and SFC because we have no MMU */ + case i_MOVES: { - int old_brace_level; - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); - printf ("\tif (extra & 0x800)\n"); - old_brace_level = n_braces; - start_brace (); - printf ("\tuae_u32 src = regs.regs[(extra >> 12) & 15];\n"); - genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); - genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); - pop_braces (old_brace_level); - printf ("else"); - start_brace (); - genamode (curi->dmode, "dstreg", curi->size, "src", 1, 0); - printf ("\tif (extra & 0x8000) {\n"); - switch (curi->size) { - case sz_byte: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s8)src;\n"); break; - case sz_word: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s16)src;\n"); break; - case sz_long: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = src;\n"); break; - default: abort (); - } - printf ("\t} else {\n"); - genastore ("src", Dreg, "(extra >> 12) & 7", curi->size, ""); - printf ("\t}\n"); - pop_braces (old_brace_level); + int old_brace_level; + + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + start_brace(); + printf ("\tif (extra & 0x0800)\n"); /* from reg to ea */ + { + int old_m68k_pc_offset = m68k_pc_offset; + /* use DFC */ + old_brace_level = n_braces; + start_brace (); + printf ("\tuae_u32 src = regs.regs[(extra >> 12) & 15];\n"); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_DFC); + genastore ("src", curi->dmode, "dstreg", curi->size, "dst", XLATE_DFC); + pop_braces (old_brace_level); + m68k_pc_offset = old_m68k_pc_offset; + } + printf ("else"); /* from ea to reg */ + { + /* use SFC */ + start_brace (); + genamode (curi->dmode, "dstreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_SFC); + printf ("\tif (extra & 0x8000) {\n"); /* address/data */ + switch (curi->size) { + case sz_byte: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s8)src;\n"); break; + case sz_word: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s16)src;\n"); break; + case sz_long: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = src;\n"); break; + default: abort (); + } + printf ("\t} else {\n"); + genastore ("src", Dreg, "(extra >> 12) & 7", curi->size, "", XLATE_LOG); + printf ("\t}\n"); + sync_m68k_pc(); + pop_braces (old_brace_level); + } } break; case i_BKPT: /* only needed for hardware emulators */ @@ -2251,23 +2182,23 @@ static void gen_opcode (unsigned long int opcode) printf ("\top_illg(opcode);\n"); break; case i_TRAPcc: + printf ("\tuaecptr oldpc = m68k_getpc();\n"); if (curi->smode != am_unknown && curi->smode != am_illg) - genamode (curi->smode, "srcreg", curi->size, "dummy", 1, 0); - printf ("\tif (cctrue(%d)) { Exception(7,m68k_getpc()); goto %s; }\n", curi->cc, endlabelstr); + genamode (curi->smode, "srcreg", curi->size, "dummy", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + sync_m68k_pc (); + printf ("\tif (cctrue(%d)) { Exception(7,oldpc); goto %s; }\n", curi->cc, endlabelstr); need_endlabel = 1; break; case i_DIVL: - sync_m68k_pc (); - start_brace (); printf ("\tuaecptr oldpc = m68k_getpc();\n"); - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); printf ("\tm68k_divl(opcode, dst, extra, oldpc);\n"); break; case i_MULL: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); printf ("\tm68k_mull(opcode, dst, extra);\n"); break; @@ -2279,34 +2210,37 @@ static void gen_opcode (unsigned long int opcode) case i_BFFFO: case i_BFSET: case i_BFINS: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); - genamode (curi->dmode, "dstreg", sz_long, "dst", 2, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); + genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG); start_brace (); + printf ("\tuae_u32 bdata[2];"); printf ("\tuae_s32 offset = extra & 0x800 ? m68k_dreg(regs, (extra >> 6) & 7) : (extra >> 6) & 0x1f;\n"); printf ("\tint width = (((extra & 0x20 ? m68k_dreg(regs, extra & 7) : extra) -1) & 0x1f) +1;\n"); if (curi->dmode == Dreg) { - printf ("\tuae_u32 tmp = m68k_dreg(regs, dstreg) << (offset & 0x1f);\n"); + printf ("\tuae_u32 tmp = m68k_dreg(regs, dstreg);\n"); + printf ("\toffset &= 0x1f;\n"); + printf ("\ttmp = (tmp << offset) | (tmp >> (32 - offset));\n"); + printf ("\tbdata[0] = tmp & ((1 << (32 - width)) - 1);\n"); } else { - printf ("\tuae_u32 tmp,bf0,bf1;\n"); - printf ("\tdsta += (offset >> 3) | (offset & 0x80000000 ? ~0x1fffffff : 0);\n"); - printf ("\tbf0 = get_long(dsta);bf1 = get_byte(dsta+4) & 0xff;\n"); - printf ("\ttmp = (bf0 << (offset & 7)) | (bf1 >> (8 - (offset & 7)));\n"); + printf ("\tuae_u32 tmp;\n"); + printf ("\tdsta += offset >> 3;\n"); + printf ("\ttmp = get_bitfield(dsta, bdata, offset, width);\n"); } - printf ("\ttmp >>= (32 - width);\n"); - printf ("\tSET_NFLG (tmp & (1 << (width-1)) ? 1 : 0);\n"); + printf ("\tSET_NFLG_ALWAYS (((uae_s32)tmp) < 0 ? 1 : 0);\n"); + if (curi->mnemo == i_BFEXTS) + printf ("\ttmp = (uae_s32)tmp >> (32 - width);\n"); + else + printf ("\ttmp >>= (32 - width);\n"); printf ("\tSET_ZFLG (tmp == 0); SET_VFLG (0); SET_CFLG (0);\n"); switch (curi->mnemo) { case i_BFTST: break; case i_BFEXTU: + case i_BFEXTS: printf ("\tm68k_dreg(regs, (extra >> 12) & 7) = tmp;\n"); break; case i_BFCHG: - printf ("\ttmp = ~tmp;\n"); - break; - case i_BFEXTS: - printf ("\tif (GET_NFLG) tmp |= width == 32 ? 0 : (-1 << width);\n"); - printf ("\tm68k_dreg(regs, (extra >> 12) & 7) = tmp;\n"); + printf ("\ttmp = tmp ^ (0xffffffffu >> (32 - width));\n"); break; case i_BFCLR: printf ("\ttmp = 0;\n"); @@ -2317,10 +2251,13 @@ static void gen_opcode (unsigned long int opcode) printf ("\tm68k_dreg(regs, (extra >> 12) & 7) = offset;\n"); break; case i_BFSET: - printf ("\ttmp = 0xffffffff;\n"); + printf ("\ttmp = 0xffffffffu >> (32 - width);\n"); break; case i_BFINS: printf ("\ttmp = m68k_dreg(regs, (extra >> 12) & 7);\n"); + printf ("\ttmp = tmp & (0xffffffffu >> (32 - width));\n"); + printf ("\tSET_NFLG_ALWAYS (tmp & (1 << (width - 1)) ? 1 : 0);\n"); + printf ("\tSET_ZFLG (tmp == 0);\n"); break; default: break; @@ -2328,26 +2265,12 @@ static void gen_opcode (unsigned long int opcode) if (curi->mnemo == i_BFCHG || curi->mnemo == i_BFCLR || curi->mnemo == i_BFSET - || curi->mnemo == i_BFINS) - { - printf ("\ttmp <<= (32 - width);\n"); + || curi->mnemo == i_BFINS) { if (curi->dmode == Dreg) { - printf ("\tm68k_dreg(regs, dstreg) = (m68k_dreg(regs, dstreg) & ((offset & 0x1f) == 0 ? 0 :\n"); - printf ("\t\t(0xffffffff << (32 - (offset & 0x1f))))) |\n"); - printf ("\t\t(tmp >> (offset & 0x1f)) |\n"); - printf ("\t\t(((offset & 0x1f) + width) >= 32 ? 0 :\n"); - printf (" (m68k_dreg(regs, dstreg) & ((uae_u32)0xffffffff >> ((offset & 0x1f) + width))));\n"); + printf ("\ttmp = bdata[0] | (tmp << (32 - width));\n"); + printf ("\tm68k_dreg(regs, dstreg) = (tmp >> offset) | (tmp << (32 - offset));\n"); } else { - printf ("\tbf0 = (bf0 & (0xff000000 << (8 - (offset & 7)))) |\n"); - printf ("\t\t(tmp >> (offset & 7)) |\n"); - printf ("\t\t(((offset & 7) + width) >= 32 ? 0 :\n"); - printf ("\t\t (bf0 & ((uae_u32)0xffffffff >> ((offset & 7) + width))));\n"); - printf ("\tput_long(dsta,bf0 );\n"); - printf ("\tif (((offset & 7) + width) > 32) {\n"); - printf ("\t\tbf1 = (bf1 & (0xff >> (width - 32 + (offset & 7)))) |\n"); - printf ("\t\t\t(tmp << (8 - (offset & 7)));\n"); - printf ("\t\tput_byte(dsta+4,bf1);\n"); - printf ("\t}\n"); + printf ("\tput_bitfield(dsta, bdata, tmp, offset, width);\n"); } } break; @@ -2357,11 +2280,11 @@ static void gen_opcode (unsigned long int opcode) printf ("\tm68k_dreg(regs, dstreg) = (m68k_dreg(regs, dstreg) & 0xffffff00) | ((val >> 4) & 0xf0) | (val & 0xf);\n"); } else { printf ("\tuae_u16 val;\n"); - printf ("\tm68k_areg(regs, srcreg) -= areg_byteinc[srcreg];\n"); - printf ("\tval = (uae_u16)get_byte(m68k_areg(regs, srcreg));\n"); - printf ("\tm68k_areg(regs, srcreg) -= areg_byteinc[srcreg];\n"); - printf ("\tval = (val | ((uae_u16)get_byte(m68k_areg(regs, srcreg)) << 8)) + %s;\n", gen_nextiword ()); + printf ("\tval = (uae_u16)get_byte(m68k_areg(regs, srcreg) - areg_byteinc[srcreg]);\n"); + printf ("\tval = (val | ((uae_u16)get_byte(m68k_areg(regs, srcreg) - 2 * areg_byteinc[srcreg]) << 8)) + %s;\n", gen_nextiword ()); + printf ("\tm68k_areg(regs, srcreg) -= 2;\n"); printf ("\tm68k_areg(regs, dstreg) -= areg_byteinc[dstreg];\n"); + gen_set_fault_pc (); printf ("\tput_byte(m68k_areg(regs, dstreg),((val >> 4) & 0xf0) | (val & 0xf));\n"); } break; @@ -2372,92 +2295,167 @@ static void gen_opcode (unsigned long int opcode) printf ("\tm68k_dreg(regs, dstreg) = (m68k_dreg(regs, dstreg) & 0xffff0000) | (val & 0xffff);\n"); } else { printf ("\tuae_u16 val;\n"); - printf ("\tm68k_areg(regs, srcreg) -= areg_byteinc[srcreg];\n"); - printf ("\tval = (uae_u16)get_byte(m68k_areg(regs, srcreg));\n"); + printf ("\tval = (uae_u16)get_byte(m68k_areg(regs, srcreg) - areg_byteinc[srcreg]);\n"); printf ("\tval = (((val << 4) & 0xf00) | (val & 0xf)) + %s;\n", gen_nextiword ()); - printf ("\tm68k_areg(regs, dstreg) -= areg_byteinc[dstreg];\n"); - printf ("\tput_byte(m68k_areg(regs, dstreg),val);\n"); - printf ("\tm68k_areg(regs, dstreg) -= areg_byteinc[dstreg];\n"); - printf ("\tput_byte(m68k_areg(regs, dstreg),val >> 8);\n"); + printf ("\tm68k_areg(regs, srcreg) -= areg_byteinc[srcreg];\n"); + printf ("\tm68k_areg(regs, dstreg) -= 2;\n"); + gen_set_fault_pc (); + printf ("\tput_word(m68k_areg(regs, dstreg), val);\n"); } break; case i_TAS: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); genflags (flag_logical, curi->size, "src", "", ""); printf ("\tsrc |= 0x80;\n"); - genastore ("src", curi->smode, "srcreg", curi->size, "src"); + genastore ("src", curi->smode, "srcreg", curi->size, "src", XLATE_LOG); break; case i_FPP: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); - printf ("\tfpp_opp(opcode,extra);\n"); + printf ("\tfpuop_arithmetic(opcode, extra);\n"); break; case i_FDBcc: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); - printf ("\tfdbcc_opp(opcode,extra);\n"); + printf ("\tfpuop_dbcc(opcode, extra);\n"); break; case i_FScc: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); - printf ("\tfscc_opp(opcode,extra);\n"); + printf ("\tfpuop_scc(opcode, extra);\n"); break; case i_FTRAPcc: sync_m68k_pc (); start_brace (); printf ("\tuaecptr oldpc = m68k_getpc();\n"); + printf ("\tuae_u16 extra = %s;\n", gen_nextiword()); if (curi->smode != am_unknown && curi->smode != am_illg) - genamode (curi->smode, "srcreg", curi->size, "dummy", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "dummy", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); - printf ("\tftrapcc_opp(opcode,oldpc);\n"); + printf ("\tfpuop_trapcc(opcode, oldpc, extra);\n"); break; case i_FBcc: sync_m68k_pc (); start_brace (); printf ("\tuaecptr pc = m68k_getpc();\n"); - genamode (curi->dmode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->dmode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); - printf ("\tfbcc_opp(opcode,pc,extra);\n"); + printf ("\tfpuop_bcc(opcode, pc, extra);\n"); break; case i_FSAVE: sync_m68k_pc (); swap_opcode (); - printf ("\tfsave_opp(opcode);\n"); + printf ("\tfpuop_save(opcode);\n"); break; case i_FRESTORE: sync_m68k_pc (); swap_opcode (); - printf ("\tfrestore_opp(opcode);\n"); + printf ("\tfpuop_restore(opcode);\n"); break; case i_CINVL: + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache(31);\n"); + printf("#endif\n"); + break; case i_CINVP: + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache(32);\n"); + printf("#endif\n"); + break; case i_CINVA: + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache(33);\n"); + printf("#endif\n"); + break; case i_CPUSHL: + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache(41);\n"); + printf("#endif\n"); + break; case i_CPUSHP: + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache(42);\n"); + printf("#endif\n"); + break; case i_CPUSHA: + printf ("\tflush_internals();\n"); + printf("#ifdef USE_JIT\n"); + printf ("\tif (opcode&0x80)\n" + "\t\tflush_icache(43);\n"); + printf("#endif\n"); break; case i_MOVE16: - printf ("\tuaecptr mems = m68k_areg(regs, srcreg) & ~15, memd;\n"); - printf ("\tdstreg = (%s >> 12) & 7;\n", gen_nextiword()); - printf ("\tmemd = m68k_areg(regs, dstreg) & ~15;\n"); - printf ("\tput_long(memd, get_long(mems));\n"); - printf ("\tput_long(memd+4, get_long(mems+4));\n"); - printf ("\tput_long(memd+8, get_long(mems+8));\n"); - printf ("\tput_long(memd+12, get_long(mems+12));\n"); - printf ("\tm68k_areg(regs, srcreg) += 16;\n"); - printf ("\tm68k_areg(regs, dstreg) += 16;\n"); - break; + if ((opcode & 0xfff8) == 0xf620) { + /* MOVE16 (Ax)+,(Ay)+ */ + printf ("\tuaecptr mems = m68k_areg(regs, srcreg) & ~15, memd;\n"); + printf ("\tdstreg = (%s >> 12) & 7;\n", gen_nextiword()); + printf ("\tmemd = m68k_areg(regs, dstreg) & ~15;\n"); + printf ("\tput_long(memd, get_long(mems));\n"); + printf ("\tput_long(memd+4, get_long(mems+4));\n"); + printf ("\tput_long(memd+8, get_long(mems+8));\n"); + printf ("\tput_long(memd+12, get_long(mems+12));\n"); + printf ("\tif (srcreg != dstreg)\n"); + printf ("\tm68k_areg(regs, srcreg) += 16;\n"); + printf ("\tm68k_areg(regs, dstreg) += 16;\n"); + } else { + /* Other variants */ + genamode (curi->smode, "srcreg", curi->size, "mems", GENA_GETV_NO_FETCH, GENA_MOVEM_MOVE16, XLATE_LOG); + genamode (curi->dmode, "dstreg", curi->size, "memd", GENA_GETV_NO_FETCH, GENA_MOVEM_MOVE16, XLATE_LOG); + printf ("\tmemsa &= ~15;\n"); + printf ("\tmemda &= ~15;\n"); + printf ("\tput_long(memda, get_long(memsa));\n"); + printf ("\tput_long(memda+4, get_long(memsa+4));\n"); + printf ("\tput_long(memda+8, get_long(memsa+8));\n"); + printf ("\tput_long(memda+12, get_long(memsa+12));\n"); + if ((opcode & 0xfff8) == 0xf600) + printf ("\tm68k_areg(regs, srcreg) += 16;\n"); + else if ((opcode & 0xfff8) == 0xf608) + printf ("\tm68k_areg(regs, dstreg) += 16;\n"); + } + break; case i_MMUOP: - genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0); + genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG); sync_m68k_pc (); swap_opcode (); printf ("\tmmu_op(opcode,extra);\n"); break; + + case i_EMULOP_RETURN: + printf ("\tm68k_emulop_return();\n"); + m68k_pc_offset = 0; + break; + + case i_EMULOP: + printf ("\n"); + swap_opcode (); + printf ("\tm68k_emulop(opcode);\n"); + break; + + case i_NATFEAT_ID: + printf ("\n"); + printf ("\tm68k_natfeat_id();\n"); + break; + + case i_NATFEAT_CALL: + printf ("\n"); + printf ("\tm68k_natfeat_call();\n"); + break; + default: abort (); break; @@ -2473,17 +2471,43 @@ static void generate_includes (FILE * f) fprintf (f, "#include \"memory.h\"\n"); fprintf (f, "#include \"readcpu.h\"\n"); fprintf (f, "#include \"newcpu.h\"\n"); + fprintf (f, "#ifdef USE_JIT\n"); + fprintf (f, "#include \"compiler/compemu.h\"\n"); + fprintf (f, "#endif\n"); + fprintf (f, "#include \"fpu/fpu.h\"\n"); fprintf (f, "#include \"cputbl.h\"\n"); + fprintf (f, "#include \"cpu_emulation.h\"\n"); + fprintf (f, "#include \"debug.h\"\n"); + + fprintf (f, "#define SET_CFLG_ALWAYS(x) SET_CFLG(x)\n"); + fprintf (f, "#define SET_NFLG_ALWAYS(x) SET_NFLG(x)\n"); + fprintf (f, "#define CPUFUNC_FF(x) x##_ff\n"); + fprintf (f, "#define CPUFUNC_NF(x) x##_nf\n"); + fprintf (f, "#define CPUFUNC(x) CPUFUNC_FF(x)\n"); + + fprintf (f, "#ifdef NOFLAGS\n"); + fprintf (f, "# include \"noflags.h\"\n"); + fprintf (f, "#endif\n"); } static int postfix; +struct gencputbl { + char handler[80]; + uae_u16 specific; + uae_u16 opcode; + int namei; +}; +struct gencputbl cpustbl[65536]; +static int n_cpustbl; + static void generate_one_opcode (int rp) { int i; uae_u16 smsk, dmsk; - long int opcode = opcode_map[rp]; - + int opcode = opcode_map[rp]; + int have_realopcode = 0; + if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) return; @@ -2497,13 +2521,37 @@ static void generate_one_opcode (int rp) return; if (opcode_next_clev[rp] != cpu_level) { - fprintf (stblfile, "{ op_%lx_%d, 0, %ld }, /* %s */\n", opcode, opcode_last_postfix[rp], - opcode, lookuptab[i].name); + sprintf(cpustbl[n_cpustbl].handler, "CPUFUNC(op_%x_%d)", opcode, opcode_last_postfix[rp]); + cpustbl[n_cpustbl].specific = 0; + cpustbl[n_cpustbl].opcode = opcode; + cpustbl[n_cpustbl].namei = i; + fprintf (stblfile, "{ %s, %d, %d }, /* %s */\n", cpustbl[n_cpustbl].handler, cpustbl[n_cpustbl].specific, opcode, lookuptab[i].name); + n_cpustbl++; return; } - fprintf (stblfile, "{ op_%lx_%d, 0, %ld }, /* %s */\n", opcode, postfix, opcode, lookuptab[i].name); - fprintf (headerfile, "extern cpuop_func op_%lx_%d;\n", opcode, postfix); - printf ("void REGPARAM2 op_%lx_%d(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, lookuptab[i].name); + + if (table68k[opcode].flagdead == 0) + /* force to the "ff" variant since the instruction doesn't set at all the condition codes */ + sprintf (cpustbl[n_cpustbl].handler, "CPUFUNC_FF(op_%x_%d)", opcode, postfix); + else + sprintf (cpustbl[n_cpustbl].handler, "CPUFUNC(op_%x_%d)", opcode, postfix); + cpustbl[n_cpustbl].specific = 0; + cpustbl[n_cpustbl].opcode = opcode; + cpustbl[n_cpustbl].namei = i; + fprintf (stblfile, "{ %s, %d, %d }, /* %s */\n", cpustbl[n_cpustbl].handler, cpustbl[n_cpustbl].specific, opcode, lookuptab[i].name); + n_cpustbl++; + + fprintf (headerfile, "extern cpuop_func op_%x_%d_nf;\n", opcode, postfix); + fprintf (headerfile, "extern cpuop_func op_%x_%d_ff;\n", opcode, postfix); + + printf ("void REGPARAM2 CPUFUNC(op_%x_%d)(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, lookuptab[i].name); + printf ("\tcpuop_begin();\n"); + /* gb-- The "nf" variant for an instruction that doesn't set the condition + codes at all is the same as the "ff" variant, so we don't need the "nf" + variant to be compiled since it is mapped to the "ff" variant in the + smalltbl. */ + if (table68k[opcode].flagdead == 0) + printf ("#ifndef NOFLAGS\n"); switch (table68k[opcode].stype) { case 0: smsk = 7; break; @@ -2512,6 +2560,8 @@ static void generate_one_opcode (int rp) case 3: smsk = 7; break; case 4: smsk = 7; break; case 5: smsk = 63; break; + case 6: smsk = 255; break; + case 7: smsk = 3; break; default: abort (); } dmsk = 7; @@ -2521,7 +2571,12 @@ static void generate_one_opcode (int rp) && table68k[opcode].smode != imm && table68k[opcode].smode != imm0 && table68k[opcode].smode != imm1 && table68k[opcode].smode != imm2 && table68k[opcode].smode != absw && table68k[opcode].smode != absl - && table68k[opcode].smode != PC8r && table68k[opcode].smode != PC16) + && table68k[opcode].smode != PC8r && table68k[opcode].smode != PC16 + /* gb-- We don't want to fetch the EmulOp code since the EmulOp() + routine uses the whole opcode value. Maybe all the EmulOps + could be expanded out but I don't think it is an improvement */ + && table68k[opcode].stype != 6 + ) { if (table68k[opcode].spos == -1) { if (((int) table68k[opcode].sreg) >= 128) @@ -2537,38 +2592,17 @@ static void generate_one_opcode (int rp) if (pos < 8 && (smsk >> (8 - pos)) != 0) abort (); #endif - printf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); - - if (pos < 8 && (smsk >> (8 - pos)) != 0) - sprintf (source, "(((opcode >> %d) | (opcode << %d)) & %d)", - pos ^ 8, 8 - pos, dmsk); - else if (pos != 8) - sprintf (source, "((opcode >> %d) & %d)", pos ^ 8, smsk); - else - sprintf (source, "(opcode & %d)", smsk); - - if (table68k[opcode].stype == 3) - printf ("\tuae_u32 srcreg = imm8_table[%s];\n", source); - else if (table68k[opcode].stype == 1) - printf ("\tuae_u32 srcreg = (uae_s32)(uae_s8)%s;\n", source); - else - printf ("\tuae_u32 srcreg = %s;\n", source); - - printf ("#else\n"); - + real_opcode(&have_realopcode); if (pos) - sprintf (source, "((opcode >> %d) & %d)", pos, smsk); + sprintf (source, "((real_opcode >> %d) & %d)", pos, smsk); else - sprintf (source, "(opcode & %d)", smsk); - + sprintf (source, "(real_opcode & %d)", smsk); if (table68k[opcode].stype == 3) printf ("\tuae_u32 srcreg = imm8_table[%s];\n", source); else if (table68k[opcode].stype == 1) printf ("\tuae_u32 srcreg = (uae_s32)(uae_s8)%s;\n", source); else printf ("\tuae_u32 srcreg = %s;\n", source); - - printf ("#endif\n"); } } if (table68k[opcode].duse @@ -2588,27 +2622,13 @@ static void generate_one_opcode (int rp) /* Check that we can do the little endian optimization safely. */ if (pos < 8 && (dmsk >> (8 - pos)) != 0) abort (); -#endif - printf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); - - if (pos < 8 && (dmsk >> (8 - pos)) != 0) - printf ("\tuae_u32 dstreg = ((opcode >> %d) | (opcode << %d)) & %d;\n", - pos ^ 8, 8 - pos, dmsk); - else if (pos != 8) - printf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", - pos ^ 8, dmsk); - else - printf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); - - printf ("#else\n"); - +#endif + real_opcode(&have_realopcode); if (pos) - printf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", + printf ("\tuae_u32 dstreg = (real_opcode >> %d) & %d;\n", pos, dmsk); else - printf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); - - printf ("#endif\n"); + printf ("\tuae_u32 dstreg = real_opcode & %d;\n", dmsk); } } need_endlabel = 0; @@ -2617,6 +2637,9 @@ static void generate_one_opcode (int rp) gen_opcode (opcode); if (need_endlabel) printf ("%s: ;\n", endlabelstr); + if (table68k[opcode].flagdead == 0) + printf ("\n#endif\n"); + printf ("\tcpuop_end();\n"); printf ("}\n"); opcode_next_clev[rp] = next_cpu_level; opcode_last_postfix[rp] = postfix; @@ -2628,24 +2651,18 @@ static void generate_func (void) using_prefetch = 0; using_exception_3 = 0; - for (i = 0; i < 6; i++) { + + for (i = 0; i < 1; i++) { cpu_level = 4 - i; - if (i == 5) { - cpu_level = 0; - using_prefetch = 1; - using_exception_3 = 1; - for (rp = 0; rp < nr_cpuop_funcs; rp++) - opcode_next_clev[rp] = 0; - } postfix = i; - fprintf (stblfile, "struct cputbl op_smalltbl_%d[] = {\n", postfix); + fprintf (stblfile, "const struct cputbl CPUFUNC(op_smalltbl_%d)[] = {\n", postfix); /* sam: this is for people with low memory (eg. me :)) */ printf ("\n" - "#if !defined(PART_1) && !defined(PART_2) && " - "!defined(PART_3) && !defined(PART_4) && " - "!defined(PART_5) && !defined(PART_6) && " - "!defined(PART_7) && !defined(PART_8)" + "#if !defined(PART_1) && !defined(PART_2) && " + "!defined(PART_3) && !defined(PART_4) && " + "!defined(PART_5) && !defined(PART_6) && " + "!defined(PART_7) && !defined(PART_8)" "\n" "#define PART_1 1\n" "#define PART_2 1\n" @@ -2656,8 +2673,8 @@ static void generate_func (void) "#define PART_7 1\n" "#define PART_8 1\n" "#endif\n\n"); - rp = 0; + n_cpustbl = 0; for(j=1;j<=8;++j) { int k = (j*nr_cpuop_funcs)/8; printf ("#ifdef PART_%d\n",j); @@ -2665,12 +2682,88 @@ static void generate_func (void) generate_one_opcode (rp); printf ("#endif\n\n"); } - fprintf (stblfile, "{ 0, 0, 0 }};\n"); } } -int main (int argc, char **argv) +static struct { + const char *handler; + const char *name; +} cpufunctbl[65536]; +static char const op_illg_1[] = "op_illg_1"; +static char const illegal[] = "ILLEGAL"; + +static void generate_functbl (void) +{ + int i; + unsigned int opcode; + int cpu_level = 4; + struct gencputbl *tbl = cpustbl; + + for (opcode = 0; opcode < 65536; opcode++) + { + cpufunctbl[opcode].handler = op_illg_1; + cpufunctbl[opcode].name = illegal; + } + for (i = 0; i < n_cpustbl; i++) + { + if (! tbl[i].specific) + { + cpufunctbl[tbl[i].opcode].handler = tbl[i].handler; + cpufunctbl[tbl[i].opcode].name = lookuptab[tbl[i].namei].name; + } + } + for (opcode = 0; opcode < 65536; opcode++) + { + const char *f; + + if (table68k[opcode].mnemo == i_ILLG || (unsigned)table68k[opcode].clev > (unsigned)cpu_level) + continue; + + if (table68k[opcode].handler != -1) + { + f = cpufunctbl[table68k[opcode].handler].handler; + if (f == op_illg_1) + abort(); + cpufunctbl[opcode].handler = f; + cpufunctbl[opcode].name = cpufunctbl[table68k[opcode].handler].name; + } + } + for (i = 0; i < n_cpustbl; i++) + { + if (tbl[i].specific) + { + cpufunctbl[tbl[i].opcode].handler = tbl[i].handler; + cpufunctbl[tbl[i].opcode].name = lookuptab[tbl[i].namei].name; + } + } + + fprintf(functblfile, "\n"); + fprintf(functblfile, "cpuop_func *cpufunctbl[65536] = {\n"); + fprintf(functblfile, "#if !defined(HAVE_GET_WORD_UNSWAPPED) || defined(FULLMMU)\n"); + for (opcode = 0; opcode < 65536; opcode++) + { + fprintf(functblfile, "\t%s%s /* %s */\n", cpufunctbl[opcode].handler, opcode < 65535 ? "," : "", cpufunctbl[opcode].name); + } + fprintf(functblfile, "#else\n"); + for (opcode = 0; opcode < 65536; opcode++) + { + unsigned int map = do_byteswap_16(opcode); + fprintf(functblfile, "\t%s%s /* %s */\n", cpufunctbl[map].handler, opcode < 65535 ? "," : "", cpufunctbl[map].name); + } + fprintf(functblfile, "#endif\n"); + fprintf(functblfile, "};\n"); +} + +#if (defined(OS_cygwin) || defined(OS_mingw)) && defined(EXTENDED_SIGSEGV) +void cygwin_mingw_abort() +{ +#undef abort + abort(); +} +#endif + +int main () { read_table68k (); do_merges (); @@ -2685,15 +2778,28 @@ int main (int argc, char **argv) * cputbl.h that way), but cpuopti can't cope. That could be fixed, but * I don't dare to touch the 68k version. */ - headerfile = fopen ("cputbl.h", "wb"); - stblfile = fopen ("cpustbl.cpp", "wb"); - freopen ("cpuemu.cpp", "wb", stdout); + if ((headerfile = fopen ("cputbl.h", "wb")) == NULL) + abort(); + if ((stblfile = fopen ("cpustbl.cpp", "wb")) == NULL) + abort(); + if ((functblfile = fopen ("cpufunctbl.cpp", "wb")) == NULL) + abort(); + if (freopen ("cpuemu.cpp", "wb", stdout) == NULL) + abort(); generate_includes (stdout); + fprintf(stdout, "#ifdef HAVE_CFLAG_NO_REDZONE\n"); + fprintf(stdout, "#ifndef NOFLAGS\n"); + fprintf(stdout, "#pragma GCC option \"-mno-red-zone\"\n"); + fprintf(stdout, "#endif\n"); + fprintf(stdout, "#endif\n"); generate_includes (stblfile); - + generate_includes (functblfile); generate_func (); - + generate_functbl (); free (table68k); + fclose(headerfile); + fclose(stblfile); + fclose(functblfile); return 0; } diff --git a/BasiliskII/src/uae_cpu/m68k.h b/BasiliskII/src/uae_cpu/m68k.h index f1ff6977..dc79136b 100644 --- a/BasiliskII/src/uae_cpu/m68k.h +++ b/BasiliskII/src/uae_cpu/m68k.h @@ -1,36 +1,85 @@ +/* + * m68k.h - machine dependent bits + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ /* * UAE - The Un*x Amiga Emulator * * MC68000 emulation - machine dependent bits * * Copyright 1996 Bernd Schmidt + * */ -#if defined(__i386__) && defined(X86_ASSEMBLY) +#ifndef M68K_FLAGS_H +#define M68K_FLAGS_H +#ifdef OPTIMIZED_FLAGS + +#if (defined(CPU_i386) && defined(X86_ASSEMBLY)) || (defined(CPU_x86_64) && defined(X86_64_ASSEMBLY)) + +# include + +#ifndef SAHF_SETO_PROFITABLE + +/* PUSH/POP instructions are naturally 64-bit sized on x86-64, thus + unsigned long hereunder is either 64-bit or 32-bit wide depending + on the target. */ struct flag_struct { - unsigned int cznv; - unsigned int x; +#if defined(CPU_x86_64) + uint64 cznv; + uint64 x; +#else + uint32 cznv; + uint32 x; +#endif }; -#define SET_ZFLG(y) (regflags.cznv = (regflags.cznv & ~0x40) | (((y) & 1) << 6)) -#define SET_CFLG(y) (regflags.cznv = (regflags.cznv & ~1) | ((y) & 1)) -#define SET_VFLG(y) (regflags.cznv = (regflags.cznv & ~0x800) | (((y) & 1) << 11)) -#define SET_NFLG(y) (regflags.cznv = (regflags.cznv & ~0x80) | (((y) & 1) << 7)) -#define SET_XFLG(y) (regflags.x = (y)) +#define FLAGVAL_Z 0x40 +#define FLAGVAL_N 0x80 -#define GET_ZFLG ((regflags.cznv >> 6) & 1) -#define GET_CFLG (regflags.cznv & 1) -#define GET_VFLG ((regflags.cznv >> 11) & 1) -#define GET_NFLG ((regflags.cznv >> 7) & 1) -#define GET_XFLG (regflags.x & 1) +#define SET_ZFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~0x40) | (((y) & 1) << 6)) +#define SET_CFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~1) | ((y) & 1)) +#define SET_VFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~0x800) | (((y) & 1) << 11)) +#define SET_NFLG(y) (regflags.cznv = (((uae_u32)regflags.cznv) & ~0x80) | (((y) & 1) << 7)) +#define SET_XFLG(y) (regflags.x = (y)) -#define CLEAR_CZNV (regflags.cznv = 0) -#define COPY_CARRY (regflags.x = regflags.cznv) +#define GET_ZFLG ((regflags.cznv >> 6) & 1) +#define GET_CFLG (regflags.cznv & 1) +#define GET_VFLG ((regflags.cznv >> 11) & 1) +#define GET_NFLG ((regflags.cznv >> 7) & 1) +#define GET_XFLG (regflags.x & 1) + +#define CLEAR_CZNV (regflags.cznv = 0) +#define GET_CZNV (regflags.cznv) +#define IOR_CZNV(X) (regflags.cznv |= (X)) +#define SET_CZNV(X) (regflags.cznv = (X)) + +#define COPY_CARRY (regflags.x = regflags.cznv) extern struct flag_struct regflags __asm__ ("regflags"); -static __inline__ int cctrue(int cc) +static inline int cctrue(int cc) { uae_u32 cznv = regflags.cznv; switch(cc){ @@ -58,91 +107,553 @@ static __inline__ int cctrue(int cc) return 0; } -#define x86_flag_testl(v) \ - __asm__ __volatile__ ("testl %1,%1\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ - : "=r" (regflags.cznv) : "r" (v) : "cc") +#define optflag_testl(v) \ + __asm__ __volatile__ ("andl %1,%1\n\t" \ + "pushf\n\t" \ + "pop %0\n\t" \ + : "=rm" (regflags.cznv) : "r" (v) : "memory", "cc") -#define x86_flag_testw(v) \ - __asm__ __volatile__ ("testw %w1,%w1\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ - : "=r" (regflags.cznv) : "r" (v) : "cc") +#define optflag_testw(v) \ + __asm__ __volatile__ ("andw %w1,%w1\n\t" \ + "pushf\n\t" \ + "pop %0\n\t" \ + : "=rm" (regflags.cznv) : "r" (v) : "memory", "cc") -#define x86_flag_testb(v) \ - __asm__ __volatile__ ("testb %b1,%b1\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ - : "=r" (regflags.cznv) : "q" (v) : "cc") +#define optflag_testb(v) \ + __asm__ __volatile__ ("andb %b1,%b1\n\t" \ + "pushf\n\t" \ + "pop %0\n\t" \ + : "=rm" (regflags.cznv) : "q" (v) : "memory", "cc") -#define x86_flag_addl(v, s, d) do { \ +#define optflag_addl(v, s, d) do { \ __asm__ __volatile__ ("addl %k2,%k1\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ - : "=r" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "cc"); \ + "pushf\n\t" \ + "pop %0\n\t" \ + : "=rm" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "memory", "cc"); \ COPY_CARRY; \ } while (0) -#define x86_flag_addw(v, s, d) do { \ +#define optflag_addw(v, s, d) do { \ __asm__ __volatile__ ("addw %w2,%w1\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ - : "=r" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "cc"); \ + "pushf\n\t" \ + "pop %0\n\t" \ + : "=rm" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "memory", "cc"); \ COPY_CARRY; \ } while (0) -#define x86_flag_addb(v, s, d) do { \ +#define optflag_addb(v, s, d) do { \ __asm__ __volatile__ ("addb %b2,%b1\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ + "pushf\n\t" \ + "pop %0\n\t" \ : "=r" (regflags.cznv), "=q" (v) : "qmi" (s), "1" (d) : "cc"); \ COPY_CARRY; \ } while (0) -#define x86_flag_subl(v, s, d) do { \ +#define optflag_subl(v, s, d) do { \ __asm__ __volatile__ ("subl %k2,%k1\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ - : "=r" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "cc"); \ + "pushf\n\t" \ + "pop %0\n\t" \ + : "=rm" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "memory", "cc"); \ COPY_CARRY; \ } while (0) -#define x86_flag_subw(v, s, d) do { \ +#define optflag_subw(v, s, d) do { \ __asm__ __volatile__ ("subw %w2,%w1\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ - : "=r" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "cc"); \ + "pushf\n\t" \ + "pop %0\n\t" \ + : "=rm" (regflags.cznv), "=r" (v) : "rmi" (s), "1" (d) : "memory", "cc"); \ COPY_CARRY; \ } while (0) -#define x86_flag_subb(v, s, d) do { \ +#define optflag_subb(v, s, d) do { \ __asm__ __volatile__ ("subb %b2,%b1\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ - : "=r" (regflags.cznv), "=q" (v) : "qmi" (s), "1" (d) : "cc"); \ + "pushf\n\t" \ + "pop %0\n\t" \ + : "=rm" (regflags.cznv), "=q" (v) : "qmi" (s), "1" (d) : "memory", "cc"); \ COPY_CARRY; \ } while (0) -#define x86_flag_cmpl(s, d) \ +#define optflag_cmpl(s, d) \ __asm__ __volatile__ ("cmpl %k1,%k2\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ - : "=r" (regflags.cznv) : "rmi" (s), "r" (d) : "cc") + "pushf\n\t" \ + "pop %0\n\t" \ + : "=rm" (regflags.cznv) : "rmi" (s), "r" (d) : "memory", "cc") -#define x86_flag_cmpw(s, d) \ +#define optflag_cmpw(s, d) \ __asm__ __volatile__ ("cmpw %w1,%w2\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ - : "=r" (regflags.cznv) : "rmi" (s), "r" (d) : "cc") + "pushf\n\t" \ + "pop %0\n\t" \ + : "=rm" (regflags.cznv) : "rmi" (s), "r" (d) : "memory", "cc") -#define x86_flag_cmpb(s, d) \ +#define optflag_cmpb(s, d) \ __asm__ __volatile__ ("cmpb %b1,%b2\n\t" \ - "pushfl\n\t" \ - "popl %0\n\t" \ - : "=r" (regflags.cznv) : "qmi" (s), "q" (d) : "cc") + "pushf\n\t" \ + "pop %0\n\t" \ + : "=rm" (regflags.cznv) : "qmi" (s), "q" (d) : "memory", "cc") -#elif defined(__sparc__) && (defined(SPARC_V8_ASSEMBLY) || defined(SPARC_V9_ASSEMBLY)) +#else + +struct flag_struct { + uae_u32 cznv; + uae_u32 x; +}; + +#define FLAGVAL_Z 0x4000 +#define FLAGVAL_N 0x8000 + +#define SET_ZFLG(y) (regflags.cznv = (regflags.cznv & ~0x4000) | (((y) & 1) << 14)) +#define SET_CFLG(y) (regflags.cznv = (regflags.cznv & ~0x100) | (((y) & 1) << 8)) +#define SET_VFLG(y) (regflags.cznv = (regflags.cznv & ~0x1) | (((y) & 1))) +#define SET_NFLG(y) (regflags.cznv = (regflags.cznv & ~0x8000) | (((y) & 1) << 15)) +#define SET_XFLG(y) (regflags.x = (y)) + +#define GET_ZFLG ((regflags.cznv >> 14) & 1) +#define GET_CFLG ((regflags.cznv >> 8) & 1) +#define GET_VFLG ((regflags.cznv >> 0) & 1) +#define GET_NFLG ((regflags.cznv >> 15) & 1) +#define GET_XFLG (regflags.x & 1) + +#define CLEAR_CZNV (regflags.cznv = 0) +#define GET_CZNV (regflags.cznv) +#define IOR_CZNV(X) (regflags.cznv |= (X)) +#define SET_CZNV(X) (regflags.cznv = (X)) + +#define COPY_CARRY (regflags.x = (regflags.cznv)>>8) + +extern struct flag_struct regflags __asm__ ("regflags"); + +static inline int cctrue(int cc) +{ + uae_u32 cznv = regflags.cznv; + switch(cc){ + case 0: return 1; /* T */ + case 1: return 0; /* F */ + case 2: return (cznv & 0x4100) == 0; /* !GET_CFLG && !GET_ZFLG; HI */ + case 3: return (cznv & 0x4100) != 0; /* GET_CFLG || GET_ZFLG; LS */ + case 4: return (cznv & 0x100) == 0; /* !GET_CFLG; CC */ + case 5: return (cznv & 0x100) != 0; /* GET_CFLG; CS */ + case 6: return (cznv & 0x4000) == 0; /* !GET_ZFLG; NE */ + case 7: return (cznv & 0x4000) != 0; /* GET_ZFLG; EQ */ + case 8: return (cznv & 0x01) == 0; /* !GET_VFLG; VC */ + case 9: return (cznv & 0x01) != 0; /* GET_VFLG; VS */ + case 10:return (cznv & 0x8000) == 0; /* !GET_NFLG; PL */ + case 11:return (cznv & 0x8000) != 0; /* GET_NFLG; MI */ + case 12:return (((cznv << 15) ^ cznv) & 0x8000) == 0; /* GET_NFLG == GET_VFLG; GE */ + case 13:return (((cznv << 15) ^ cznv) & 0x8000) != 0;/* GET_NFLG != GET_VFLG; LT */ + case 14: + cznv &= 0xc001; + return (((cznv << 15) ^ cznv) & 0xc000) == 0; /* !GET_ZFLG && (GET_NFLG == GET_VFLG); GT */ + case 15: + cznv &= 0xc001; + return (((cznv << 15) ^ cznv) & 0xc000) != 0; /* GET_ZFLG || (GET_NFLG != GET_VFLG); LE */ + } + abort(); + return 0; +} + +/* Manually emit LAHF instruction so that 64-bit assemblers can grok it */ +#if defined CPU_x86_64 && defined __GNUC__ +#define ASM_LAHF ".byte 0x9f" +#else +#define ASM_LAHF "lahf" +#endif + +/* Is there any way to do this without declaring *all* memory clobbered? + I.e. any way to tell gcc that some byte-sized value is in %al? */ +#define optflag_testl(v) \ + __asm__ __volatile__ ("andl %0,%0\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : : "r" (v) : "%eax","cc","memory") + +#define optflag_testw(v) \ + __asm__ __volatile__ ("andw %w0,%w0\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : : "r" (v) : "%eax","cc","memory") + +#define optflag_testb(v) \ + __asm__ __volatile__ ("andb %b0,%b0\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : : "q" (v) : "%eax","cc","memory") + +#define optflag_addl(v, s, d) do { \ + __asm__ __volatile__ ("addl %k1,%k0\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : "=r" (v) : "rmi" (s), "0" (d) : "%eax","cc","memory"); \ + COPY_CARRY; \ + } while (0) + +#define optflag_addw(v, s, d) do { \ + __asm__ __volatile__ ("addw %w1,%w0\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : "=r" (v) : "rmi" (s), "0" (d) : "%eax","cc","memory"); \ + COPY_CARRY; \ + } while (0) + +#define optflag_addb(v, s, d) do { \ + __asm__ __volatile__ ("addb %b1,%b0\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : "=q" (v) : "qmi" (s), "0" (d) : "%eax","cc","memory"); \ + COPY_CARRY; \ + } while (0) + +#define optflag_subl(v, s, d) do { \ + __asm__ __volatile__ ("subl %k1,%k0\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : "=r" (v) : "rmi" (s), "0" (d) : "%eax","cc","memory"); \ + COPY_CARRY; \ + } while (0) + +#define optflag_subw(v, s, d) do { \ + __asm__ __volatile__ ("subw %w1,%w0\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : "=r" (v) : "rmi" (s), "0" (d) : "%eax","cc","memory"); \ + COPY_CARRY; \ + } while (0) + +#define optflag_subb(v, s, d) do { \ + __asm__ __volatile__ ("subb %b1,%b0\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : "=q" (v) : "qmi" (s), "0" (d) : "%eax","cc","memory"); \ + COPY_CARRY; \ + } while (0) + +#define optflag_cmpl(s, d) \ + __asm__ __volatile__ ("cmpl %k0,%k1\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : : "rmi" (s), "r" (d) : "%eax","cc","memory") + +#define optflag_cmpw(s, d) \ + __asm__ __volatile__ ("cmpw %w0,%w1\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : : "rmi" (s), "r" (d) : "%eax","cc","memory") + +#define optflag_cmpb(s, d) \ + __asm__ __volatile__ ("cmpb %b0,%b1\n\t" \ + ASM_LAHF "\n\t" \ + "seto %%al\n\t" \ + "movb %%al,regflags\n\t" \ + "movb %%ah,regflags+1\n\t" \ + : : "qmi" (s), "q" (d) : "%eax","cc","memory") + +#endif + +#elif defined(CPU_arm) && defined(ARM_ASSEMBLY) + +struct flag_struct { + uae_u32 nzcv; + uae_u32 x; +}; + +#define FLAGVAL_Q 0x08000000 +#define FLAGVAL_V 0x10000000 +#define FLAGVAL_C 0x20000000 +#define FLAGVAL_Z 0x40000000 +#define FLAGVAL_N 0x80000000 + +#define SET_NFLG(y) (regflags.nzcv = (regflags.nzcv & ~0x80000000) | (((y) & 1) << 31)) +#define SET_ZFLG(y) (regflags.nzcv = (regflags.nzcv & ~0x40000000) | (((y) & 1) << 30)) +#define SET_CFLG(y) (regflags.nzcv = (regflags.nzcv & ~0x20000000) | (((y) & 1) << 29)) +#define SET_VFLG(y) (regflags.nzcv = (regflags.nzcv & ~0x10000000) | (((y) & 1) << 28)) +#define SET_XFLG(y) (regflags.x = (y)) + +#define GET_NFLG ((regflags.nzcv >> 31) & 1) +#define GET_ZFLG ((regflags.nzcv >> 30) & 1) +#define GET_CFLG ((regflags.nzcv >> 29) & 1) +#define GET_VFLG ((regflags.nzcv >> 28) & 1) +#define GET_XFLG (regflags.x & 1) + +#define CLEAR_CZNV (regflags.nzcv = 0) +#define GET_CZNV (regflags.nzcv) +#define IOR_CZNV(X) (regflags.nzcv |= (X)) +#define SET_CZNV(X) (regflags.nzcv = (X)) + +#define COPY_CARRY (regflags.x = (regflags.nzcv)>>29) + +extern struct flag_struct regflags __asm__ ("regflags"); + +static inline int cctrue(int cc) +{ + unsigned int nzcv = regflags.nzcv; + switch(cc){ + case 0: return 1; /* T */ + case 1: return 0; /* F */ + case 2: return (nzcv & 0x60000000) == 0; /* !GET_CFLG && !GET_ZFLG; HI */ + case 3: return (nzcv & 0x60000000) != 0; /* GET_CFLG || GET_ZFLG; LS */ + case 4: return (nzcv & 0x20000000) == 0; /* !GET_CFLG; CC */ + case 5: return (nzcv & 0x20000000) != 0; /* GET_CFLG; CS */ + case 6: return (nzcv & 0x40000000) == 0; /* !GET_ZFLG; NE */ + case 7: return (nzcv & 0x40000000) != 0; /* GET_ZFLG; EQ */ + case 8: return (nzcv & 0x10000000) == 0; /* !GET_VFLG; VC */ + case 9: return (nzcv & 0x10000000) != 0; /* GET_VFLG; VS */ + case 10:return (nzcv & 0x80000000) == 0; /* !GET_NFLG; PL */ + case 11:return (nzcv & 0x80000000) != 0; /* GET_NFLG; MI */ + case 12:return (((nzcv << 3) ^ nzcv) & 0x80000000) == 0; /* GET_NFLG == GET_VFLG; GE */ + case 13:return (((nzcv << 3) ^ nzcv) & 0x80000000) != 0; /* GET_NFLG != GET_VFLG; LT */ + case 14: + nzcv &= 0xd0000000; + return (((nzcv << 3) ^ nzcv) & 0xc0000000) == 0; /* !GET_ZFLG && (GET_NFLG == GET_VFLG); GT */ + case 15: + nzcv &= 0xd0000000; + return (((nzcv << 3) ^ nzcv) & 0xc0000000) != 0; /* GET_ZFLG || (GET_NFLG != GET_VFLG); LE */ + } + return 0; +} + +#define optflag_testl(v) do {\ + __asm__ __volatile__ ("tst %[rv],%[rv]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "bic %[nzcv],#0x30000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rv] "r" (v) \ + : "cc"); \ + } while(0) + +#define optflag_addl(v, s, d) do { \ + __asm__ __volatile__ ("adds %[rv],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY; \ + } while(0) + +#define optflag_subl(v, s, d) do { \ + __asm__ __volatile__ ("subs %[rv],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY; \ + } while(0) + +#define optflag_cmpl(s, d) do { \ + __asm__ __volatile__ ("cmp %[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rs] "ri" (s), [rd] "0" (d) \ + : "cc"); \ + } while(0) + +#if defined(ARMV6_ASSEMBLY) + +// #pragma message "ARM/v6 Assembly optimized flags" + +#define optflag_testw(v) do { \ + __asm__ __volatile__ ("sxth %[rv],%[rv]\n\t" \ + "tst %[rv],%[rv]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "bic %[nzcv],#0x30000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rv] "0" (v) \ + : "cc"); \ + }while(0) + +#define optflag_testb(v) do {\ + __asm__ __volatile__ ("sxtb %[rv],%[rv]\n\t" \ + "tst %[rv],%[rv]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "bic %[nzcv],#0x30000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rv] "0" (v) \ + : "cc"); \ + }while(0) + +#define optflag_addw(v, s, d) do { \ + __asm__ __volatile__ ("sxth %[rd],%[rd]\n\t" \ + "sxth %[rs],%[rs]\n\t" \ + "adds %[rd],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY; \ + } while(0) + +#define optflag_addb(v, s, d) do { \ + __asm__ __volatile__ ("sxtb %[rd],%[rd]\n\t" \ + "sxtb %[rs],%[rs]\n\t" \ + "adds %[rd],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY; \ + } while(0) + +#define optflag_subw(v, s, d) do { \ + __asm__ __volatile__ ("sxth %[rd],%[rd]\n\t" \ + "sxth %[rs],%[rs]\n\t" \ + "subs %[rd],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY; \ + } while(0) + +#define optflag_subb(v, s, d) do { \ + __asm__ __volatile__ ("sxtb %[rd],%[rd]\n\t" \ + "sxtb %[rs],%[rs]\n\t" \ + "subs %[rd],%[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY; \ + } while(0) + +#define optflag_cmpw(s, d) do { \ + __asm__ __volatile__ ("sxth %[rd],%[rd]\n\t" \ + "sxth %[rs],%[rs]\n\t" \ + "cmp %[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rs] "ri" (s), [rd] "0" (d) \ + : "cc"); \ + } while(0) + +#define optflag_cmpb(s, d) do { \ + __asm__ __volatile__ ("sxtb %[rd],%[rd]\n\t" \ + "sxtb %[rs],%[rs]\n\t" \ + "cmp %[rd],%[rs]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rs] "ri" (s), [rd] "0" (d) \ + : "cc"); \ + } while(0) + +#else + +// #pragma message "ARM/generic Assembly optimized flags" + +#define optflag_testw(v) do { \ + __asm__ __volatile__ ("lsl %[rv],%[rv],#16\n\t" \ + "tst %[rv],%[rv]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "bic %[nzcv],#0x30000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rv] "0" (v) \ + : "cc"); \ + }while(0) + +#define optflag_testb(v) do {\ + __asm__ __volatile__ ("lsl %[rv],%[rv],#24\n\t" \ + "tst %[rv],%[rv]\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "bic %[nzcv],#0x30000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rv] "0" (v) \ + : "cc"); \ + }while(0) + +#define optflag_addw(v, s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#16\n\t" \ + "adds %[rd],%[rd],%[rs],lsl #16\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "lsr %[rv],%[rd],#16\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY; \ + } while(0) + +#define optflag_addb(v, s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#24\n\t" \ + "adds %[rd],%[rd],%[rs],lsl #24\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "lsr %[rv],%[rd],#24\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY; \ + } while(0) + +#define optflag_subw(v, s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#16\n\t" \ + "subs %[rd],%[rd],%[rs],lsl #16\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + "lsr %[rv],%[rd],#16\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY; \ + } while(0) + +#define optflag_subb(v, s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#24\n\t" \ + "subs %[rd],%[rd],%[rs],lsl #24\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + "lsr %[rv],%[rd],#24\n\t" \ + : [nzcv] "=r" (regflags.nzcv), [rv] "=r" (v) \ + : [rs] "ri" (s), [rd] "1" (d) \ + : "cc"); \ + COPY_CARRY; \ + } while(0) + +#define optflag_cmpw(s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#16\n\t" \ + "cmp %[rd],%[rs],lsl #16\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rs] "ri" (s), [rd] "0" (d) \ + : "cc"); \ + } while(0) + +#define optflag_cmpb(s, d) do { \ + __asm__ __volatile__ ("lsl %[rd],%[rd],#24\n\t" \ + "cmp %[rd],%[rs],lsl #24\n\t" \ + "mrs %[nzcv],cpsr\n\t" \ + "eor %[nzcv],#0x20000000\n\t" \ + : [nzcv] "=r" (regflags.nzcv) \ + : [rs] "ri" (s), [rd] "0" (d) \ + : "cc"); \ + } while(0) + +#endif + +#elif defined(CPU_sparc) && (defined(SPARC_V8_ASSEMBLY) || defined(SPARC_V9_ASSEMBLY)) struct flag_struct { unsigned char nzvc; @@ -151,22 +662,29 @@ struct flag_struct { extern struct flag_struct regflags; -#define SET_ZFLG(y) (regflags.nzvc = (regflags.nzvc & ~0x04) | (((y) & 1) << 2)) -#define SET_CFLG(y) (regflags.nzvc = (regflags.nzvc & ~1) | ((y) & 1)) -#define SET_VFLG(y) (regflags.nzvc = (regflags.nzvc & ~0x02) | (((y) & 1) << 1)) -#define SET_NFLG(y) (regflags.nzvc = (regflags.nzvc & ~0x08) | (((y) & 1) << 3)) -#define SET_XFLG(y) (regflags.x = (y)) +#define FLAGVAL_Z 0x04 +#define FLAGVAL_N 0x08 -#define GET_ZFLG ((regflags.nzvc >> 2) & 1) -#define GET_CFLG (regflags.nzvc & 1) -#define GET_VFLG ((regflags.nzvc >> 1) & 1) -#define GET_NFLG ((regflags.nzvc >> 3) & 1) -#define GET_XFLG (regflags.x & 1) +#define SET_ZFLG(y) (regflags.nzvc = (regflags.nzvc & ~0x04) | (((y) & 1) << 2)) +#define SET_CFLG(y) (regflags.nzvc = (regflags.nzvc & ~1) | ((y) & 1)) +#define SET_VFLG(y) (regflags.nzvc = (regflags.nzvc & ~0x02) | (((y) & 1) << 1)) +#define SET_NFLG(y) (regflags.nzvc = (regflags.nzvc & ~0x08) | (((y) & 1) << 3)) +#define SET_XFLG(y) (regflags.x = (y)) + +#define GET_ZFLG ((regflags.nzvc >> 2) & 1) +#define GET_CFLG (regflags.nzvc & 1) +#define GET_VFLG ((regflags.nzvc >> 1) & 1) +#define GET_NFLG ((regflags.nzvc >> 3) & 1) +#define GET_XFLG (regflags.x & 1) + +#define CLEAR_CZNV (regflags.nzvc = 0) +#define GET_CZNV (reflags.nzvc) +#define IOR_CZNV(X) (refglags.nzvc |= (X)) +#define SET_CZNV(X) (regflags.nzvc = (X)) -#define CLEAR_CZNV (regflags.nzvc = 0) #define COPY_CARRY (regflags.x = regflags.nzvc) -static __inline__ int cctrue(int cc) +static inline int cctrue(int cc) { uae_u32 nzvc = regflags.nzvc; switch(cc){ @@ -787,6 +1305,8 @@ static inline uae_u32 sparc_v9_flag_addx_32(flag_struct *flags, uae_u32 src, uae #endif /* SPARC_V9_ASSEMBLY */ +#endif + #else struct flag_struct { @@ -805,7 +1325,28 @@ extern struct flag_struct regflags; #define VFLG (regflags.v) #define XFLG (regflags.x) -static __inline__ int cctrue(const int cc) +#define SET_CFLG(x) (CFLG = (x)) +#define SET_NFLG(x) (NFLG = (x)) +#define SET_VFLG(x) (VFLG = (x)) +#define SET_ZFLG(x) (ZFLG = (x)) +#define SET_XFLG(x) (XFLG = (x)) + +#define GET_CFLG CFLG +#define GET_NFLG NFLG +#define GET_VFLG VFLG +#define GET_ZFLG ZFLG +#define GET_XFLG XFLG + +#define CLEAR_CZNV do { \ + SET_CFLG (0); \ + SET_ZFLG (0); \ + SET_NFLG (0); \ + SET_VFLG (0); \ +} while (0) + +#define COPY_CARRY (SET_XFLG (GET_CFLG)) + +static inline int cctrue(const int cc) { switch(cc){ case 0: return 1; /* T */ @@ -828,4 +1369,6 @@ static __inline__ int cctrue(const int cc) return 0; } -#endif +#endif /* OPTIMIZED_FLAGS */ + +#endif /* M68K_FLAGS_H */ diff --git a/BasiliskII/src/uae_cpu/memory-uae.h b/BasiliskII/src/uae_cpu/memory-uae.h new file mode 100644 index 00000000..c93aeb37 --- /dev/null +++ b/BasiliskII/src/uae_cpu/memory-uae.h @@ -0,0 +1,606 @@ +/* + * memory.h - memory management + * + * Copyright (c) 2001-2006 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + /* + * UAE - The Un*x Amiga Emulator + * + * memory management + * + * Copyright 1995 Bernd Schmidt + */ + +#ifndef UAE_MEMORY_H +#define UAE_MEMORY_H + +#include "sysdeps.h" +#include "string.h" +#include "hardware.h" +#include "parameters.h" +#include "registers.h" +#include "cpummu.h" +#include "readcpu.h" + +# include + +// newcpu.h +extern void Exception (int, uaecptr); +#ifdef EXCEPTIONS_VIA_LONGJMP + extern JMP_BUF excep_env; + #define SAVE_EXCEPTION \ + JMP_BUF excep_env_old; \ + memcpy(excep_env_old, excep_env, sizeof(JMP_BUF)) + #define RESTORE_EXCEPTION \ + memcpy(excep_env, excep_env_old, sizeof(JMP_BUF)) + #define TRY(var) int var = SETJMP(excep_env); if (!var) + #define CATCH(var) else + #define THROW(n) LONGJMP(excep_env, n) + #define THROW_AGAIN(var) LONGJMP(excep_env, var) + #define VOLATILE volatile +#else + struct m68k_exception { + int prb; + m68k_exception (int exc) : prb (exc) {} + operator int() { return prb; } + }; + #define SAVE_EXCEPTION + #define RESTORE_EXCEPTION + #define TRY(var) try + #define CATCH(var) catch(m68k_exception var) + #define THROW(n) throw m68k_exception(n) + #define THROW_AGAIN(var) throw + #define VOLATILE +#endif /* EXCEPTIONS_VIA_LONGJMP */ +extern int in_exception_2; + +#define STRAM_END 0x0e00000UL // should be replaced by global ROMBase as soon as ROMBase will be a constant +#define ROM_END 0x0e80000UL // should be replaced by ROMBase + RealROMSize if we are going to work with larger TOS ROMs than 512 kilobytes +#define FastRAM_BEGIN 0x1000000UL // should be replaced by global FastRAMBase as soon as FastRAMBase will be a constant +#ifdef FixedSizeFastRAM +#define FastRAM_SIZE (FixedSizeFastRAM * 1024 * 1024) +#else +#define FastRAM_SIZE FastRAMSize +#endif + +#ifdef FIXED_VIDEORAM +#define ARANYMVRAMSTART 0xf0000000UL +#endif + +#define ARANYMVRAMSIZE 0x00100000 // should be a variable to protect VGA card offscreen memory + +#ifdef FIXED_VIDEORAM +extern uintptr VMEMBaseDiff; +#else +extern uae_u32 VideoRAMBase; +#endif + +#ifdef ARAM_PAGE_CHECK +extern uaecptr pc_page, read_page, write_page; +extern uintptr pc_offset, read_offset, write_offset; +# ifdef PROTECT2K +# define ARAM_PAGE_MASK 0x7ff +# else +# ifdef FULLMMU +# define ARAM_PAGE_MASK 0xfff +# else +# define ARAM_PAGE_MASK 0xfffff +# endif +# endif +#endif + +extern uintptr MEMBaseDiff; +extern uintptr ROMBaseDiff; +extern uintptr FastRAMBaseDiff; +# define InitMEMBaseDiff(va, ra) (MEMBaseDiff = (uintptr)(va) - (uintptr)(ra)) +# define InitROMBaseDiff(va, ra) (ROMBaseDiff = (uintptr)(va) - (uintptr)(ra)) +# define InitFastRAMBaseDiff(va, ra) (FastRAMBaseDiff = (uintptr)(va) - (uintptr)(ra)) + +#ifdef FIXED_VIDEORAM +#define InitVMEMBaseDiff(va, ra) (VMEMBaseDiff = (uintptr)(va) - (uintptr)(ra)) +#else +#define InitVMEMBaseDiff(va, ra) (ra = (uintptr)(va) + MEMBaseDiff) +#endif + +extern "C" void breakpt(void); + + +static inline uae_u64 do_get_mem_quad(uae_u64 *a) {return SDL_SwapBE64(*a);} +static inline void do_put_mem_quad(uae_u64 *a, uae_u64 v) {*a = SDL_SwapBE64(v);} + + +#ifndef NOCHECKBOUNDARY +static ALWAYS_INLINE bool test_ram_boundary(uaecptr addr, int size, bool super, bool write) +{ + if (addr <= (FastRAM_BEGIN + FastRAM_SIZE - size)) { +#ifdef PROTECT2K + // protect first 2kB of RAM - access in supervisor mode only + if (!super && addr < 0x00000800UL) + return false; +#endif + // check for write access to protected areas: + // - first two longwords of ST-RAM are non-writable (ROM shadow) + // - non-writable area between end of ST-RAM and begin of FastRAM + if (!write || addr >= FastRAM_BEGIN || (addr >= 8 && addr <= (STRAM_END - size))) + return true; + } +#ifdef FIXED_VIDEORAM + return addr >= ARANYMVRAMSTART && addr <= (ARANYMVRAMSTART + ARANYMVRAMSIZE - size); +#else + return addr >= VideoRAMBase && addr <= (VideoRAMBase + ARANYMVRAMSIZE - size); +#endif +} +/* + * "size" is the size of the memory access (byte = 1, word = 2, long = 4) + */ +static ALWAYS_INLINE void check_ram_boundary(uaecptr addr, int size, bool write) +{ + if (test_ram_boundary(addr, size, regs.s, write)) + return; + + // D(bug("BUS ERROR %s at $%x\n", (write ? "writing" : "reading"), addr)); + regs.mmu_fault_addr = addr; + regs.mmu_ssw = ((size & 3) << 5) | (write ? 0 : (1 << 8)); + breakpt(); + THROW(2); +} + +#else +static inline bool test_ram_boundary(uaecptr, int, bool, bool) { return 1; } +static inline void check_ram_boundary(uaecptr, int, bool) { } +#endif + +#ifdef FIXED_VIDEORAM +# define do_get_real_address(a) ((uae_u8 *)(((uaecptr)(a) < ARANYMVRAMSTART) ? ((uaecptr)(a) + MEMBaseDiff) : ((uaecptr)(a) + VMEMBaseDiff))) +#else +# define do_get_real_address(a) ((uae_u8 *)((uintptr)(a) + MEMBaseDiff)) +#endif + +static inline uae_u8 *phys_get_real_address(uaecptr addr) +{ + return do_get_real_address(addr); +} + +#ifndef NOCHECKBOUNDARY +static inline bool phys_valid_address(uaecptr addr, bool write, int sz) +{ + return test_ram_boundary(addr, sz, regs.s, write); +} +#else +static inline bool phys_valid_address(uaecptr, bool, int) { return true; } +#endif + +static inline uae_u64 phys_get_quad(uaecptr addr) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ read_page) <= ARAM_PAGE_MASK)) + return do_get_mem_quad((uae_u64*)(addr + read_offset)); +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) return HWget_l(addr); /* TODO: must be HWget_q */ +#endif + check_ram_boundary(addr, 8, false); + uae_u64 * const m = (uae_u64 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + read_page = addr; + read_offset = (uintptr)m - (uintptr)addr; +#endif + return do_get_mem_quad(m); +} + +static inline uae_u32 phys_get_long(uaecptr addr) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ read_page) <= ARAM_PAGE_MASK)) + return do_get_mem_long((uae_u32*)(addr + read_offset)); +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) return HWget_l(addr); +#endif + check_ram_boundary(addr, 4, false); + uae_u32 * const m = (uae_u32 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + read_page = addr; + read_offset = (uintptr)m - (uintptr)addr; +#endif + return do_get_mem_long(m); +} + +static inline uae_u32 phys_get_word(uaecptr addr) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ read_page) <= ARAM_PAGE_MASK)) + return do_get_mem_word((uae_u16*)(addr + read_offset)); +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) return HWget_w(addr); +#endif + check_ram_boundary(addr, 2, false); + uae_u16 * const m = (uae_u16 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + read_page = addr; + read_offset = (uintptr)m - (uintptr)addr; +#endif + return do_get_mem_word(m); +} + +static inline uae_u32 phys_get_byte(uaecptr addr) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ read_page) <= ARAM_PAGE_MASK)) + return do_get_mem_byte((uae_u8*)(addr + read_offset)); +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) return HWget_b(addr); +#endif + check_ram_boundary(addr, 1, false); + uae_u8 * const m = (uae_u8 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + read_page = addr; + read_offset = (uintptr)m - (uintptr)addr; +#endif + return do_get_mem_byte(m); +} + +static inline void phys_put_quad(uaecptr addr, uae_u64 l) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ write_page) <= ARAM_PAGE_MASK)) { + do_put_mem_quad((uae_u64*)(addr + write_offset), l); + return; + } +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) { + HWput_l(addr, l); /* TODO: must be HWput_q */ + return; + } +#endif + check_ram_boundary(addr, 8, true); + uae_u64 * const m = (uae_u64 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + write_page = addr; + write_offset = (uintptr)m - (uintptr)addr; +#endif + do_put_mem_quad(m, l); +} + +static inline void phys_put_long(uaecptr addr, uae_u32 l) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ write_page) <= ARAM_PAGE_MASK)) { + do_put_mem_long((uae_u32*)(addr + write_offset), l); + return; + } +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) { + HWput_l(addr, l); + return; + } +#endif + check_ram_boundary(addr, 4, true); + uae_u32 * const m = (uae_u32 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + write_page = addr; + write_offset = (uintptr)m - (uintptr)addr; +#endif + do_put_mem_long(m, l); +} + +static inline void phys_put_word(uaecptr addr, uae_u32 w) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ write_page) <= ARAM_PAGE_MASK)) { + do_put_mem_word((uae_u16*)(addr + write_offset), w); + return; + } +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) { + HWput_w(addr, w); + return; + } +#endif + check_ram_boundary(addr, 2, true); + uae_u16 * const m = (uae_u16 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + write_page = addr; + write_offset = (uintptr)m - (uintptr)addr; +#endif + do_put_mem_word(m, w); +} + +static inline void phys_put_byte(uaecptr addr, uae_u32 b) +{ +#ifdef ARAM_PAGE_CHECK + if (((addr ^ write_page) <= ARAM_PAGE_MASK)) { + do_put_mem_byte((uae_u8*)(addr + write_offset), b); + return; + } +#endif +#ifndef HW_SIGSEGV + addr = addr < 0xff000000 ? addr : addr & 0x00ffffff; + if ((addr & 0xfff00000) == 0x00f00000) { + HWput_b(addr, b); + return; + } +#endif + check_ram_boundary(addr, 1, true); + uae_u8 * const m = (uae_u8 *)phys_get_real_address(addr); +#ifdef ARAM_PAGE_CHECK + write_page = addr; + write_offset = (uintptr)m - (uintptr)addr; +#endif + do_put_mem_byte(m, b); +} + +#ifdef FULLMMU +static ALWAYS_INLINE bool is_unaligned(uaecptr addr, int size) +{ + return unlikely((addr & (size - 1)) && (addr ^ (addr + size - 1)) & 0x1000); +} + +static ALWAYS_INLINE uae_u8 *mmu_get_real_address(uaecptr addr, struct mmu_atc_line *cl) +{ + return do_get_real_address(cl->phys + addr); +} + +static ALWAYS_INLINE uae_u32 mmu_get_quad(uaecptr addr, int data) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 0, &cl))) + return do_get_mem_quad((uae_u64 *)mmu_get_real_address(addr, cl)); + return mmu_get_quad_slow(addr, regs.s, data, cl); +} + +static ALWAYS_INLINE uae_u64 get_quad(uaecptr addr) +{ + return mmu_get_quad(addr, 1); +} + +static ALWAYS_INLINE uae_u32 mmu_get_long(uaecptr addr, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 0, &cl))) + return do_get_mem_long((uae_u32 *)mmu_get_real_address(addr, cl)); + return mmu_get_long_slow(addr, regs.s, data, size, cl); +} + +static ALWAYS_INLINE uae_u32 get_long(uaecptr addr) +{ + if (unlikely(is_unaligned(addr, 4))) + return mmu_get_long_unaligned(addr, 1); + return mmu_get_long(addr, 1, sz_long); +} + +static ALWAYS_INLINE uae_u16 mmu_get_word(uaecptr addr, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 0, &cl))) + return do_get_mem_word((uae_u16 *)mmu_get_real_address(addr, cl)); + return mmu_get_word_slow(addr, regs.s, data, size, cl); +} + +static ALWAYS_INLINE uae_u16 get_word(uaecptr addr) +{ + if (unlikely(is_unaligned(addr, 2))) + return mmu_get_word_unaligned(addr, 1); + return mmu_get_word(addr, 1, sz_word); +} + +static ALWAYS_INLINE uae_u8 mmu_get_byte(uaecptr addr, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 0, &cl))) + return do_get_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl)); + return mmu_get_byte_slow(addr, regs.s, data, size, cl); +} + +static ALWAYS_INLINE uae_u8 get_byte(uaecptr addr) +{ + return mmu_get_byte(addr, 1, sz_byte); +} + +static ALWAYS_INLINE void mmu_put_quad(uaecptr addr, uae_u64 val, int data) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 1, &cl))) + do_put_mem_quad((uae_u64 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_quad_slow(addr, val, regs.s, data, cl); +} + +static ALWAYS_INLINE void put_quad(uaecptr addr, uae_u32 val) +{ + mmu_put_quad(addr, val, 1); +} + +static ALWAYS_INLINE void mmu_put_long(uaecptr addr, uae_u32 val, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 1, &cl))) + do_put_mem_long((uae_u32 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_long_slow(addr, val, regs.s, data, size, cl); +} + +static ALWAYS_INLINE void put_long(uaecptr addr, uae_u32 val) +{ + if (unlikely(is_unaligned(addr, 4))) + mmu_put_long_unaligned(addr, val, 1); + else + mmu_put_long(addr, val, 1, sz_long); +} + +static ALWAYS_INLINE void mmu_put_word(uaecptr addr, uae_u16 val, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 1, &cl))) + do_put_mem_word((uae_u16 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_word_slow(addr, val, regs.s, data, size, cl); +} + +static ALWAYS_INLINE void put_word(uaecptr addr, uae_u16 val) +{ + if (unlikely(is_unaligned(addr, 2))) + mmu_put_word_unaligned(addr, val, 1); + else + mmu_put_word(addr, val, 1, sz_word); +} + +static ALWAYS_INLINE void mmu_put_byte(uaecptr addr, uae_u8 val, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_lookup(addr, data, 1, &cl))) + do_put_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_byte_slow(addr, val, regs.s, data, size, cl); +} + +static ALWAYS_INLINE void put_byte(uaecptr addr, uae_u8 val) +{ + mmu_put_byte(addr, val, 1, sz_byte); +} + +static inline uae_u8 *get_real_address(uaecptr addr, int write, int sz) +{ + (void)sz; + return phys_get_real_address(mmu_translate(addr, regs.s, 1, write)); +} + +static ALWAYS_INLINE uae_u32 mmu_get_user_long(uaecptr addr, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 0, &cl))) + return do_get_mem_long((uae_u32 *)mmu_get_real_address(addr, cl)); + return mmu_get_long_slow(addr, super, data, size, cl); +} + +static ALWAYS_INLINE uae_u16 mmu_get_user_word(uaecptr addr, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 0, &cl))) + return do_get_mem_word((uae_u16 *)mmu_get_real_address(addr, cl)); + return mmu_get_word_slow(addr, super, data, size, cl); +} + +static ALWAYS_INLINE uae_u8 mmu_get_user_byte(uaecptr addr, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 0, &cl))) + return do_get_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl)); + return mmu_get_byte_slow(addr, super, data, size, cl); +} + +static ALWAYS_INLINE void mmu_put_user_long(uaecptr addr, uae_u32 val, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 1, &cl))) + do_put_mem_long((uae_u32 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_long_slow(addr, val, super, data, size, cl); +} + +static ALWAYS_INLINE void mmu_put_user_word(uaecptr addr, uae_u16 val, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 1, &cl))) + do_put_mem_word((uae_u16 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_word_slow(addr, val, super, data, size, cl); +} + +static ALWAYS_INLINE void mmu_put_user_byte(uaecptr addr, uae_u8 val, int super, int data, int size) +{ + struct mmu_atc_line *cl; + + if (likely(mmu_user_lookup(addr, super, data, 1, &cl))) + do_put_mem_byte((uae_u8 *)mmu_get_real_address(addr, cl), val); + else + mmu_put_byte_slow(addr, val, super, data, size, cl); +} + +static inline bool valid_address(uaecptr addr, bool write, int sz) +{ + SAVE_EXCEPTION; + TRY(prb) { + (void)sz; + check_ram_boundary(mmu_translate(addr, regs.s, 1, (write ? 1 : 0)), sz, write); + RESTORE_EXCEPTION; + return true; + } + CATCH(prb) { + RESTORE_EXCEPTION; + return false; + } +} + +#else + +# define get_quad(a) phys_get_quad(a) +# define get_long(a) phys_get_long(a) +# define get_word(a) phys_get_word(a) +# define get_byte(a) phys_get_byte(a) +# define put_quad(a,b) phys_put_quad(a,b) +# define put_long(a,b) phys_put_long(a,b) +# define put_word(a,b) phys_put_word(a,b) +# define put_byte(a,b) phys_put_byte(a,b) +# define get_real_address(a,w,s) phys_get_real_address(a) + +#define valid_address(a,w,s) phys_valid_address(a,w,s) +#endif + +static inline void flush_internals() { +#ifdef ARAM_PAGE_CHECK + pc_page = 0xeeeeeeee; + read_page = 0xeeeeeeee; + write_page = 0xeeeeeeee; +#endif +} + +#endif /* MEMORY_H */ + +/* +vim:ts=4:sw=4: +*/ diff --git a/BasiliskII/src/uae_cpu/memory.cpp b/BasiliskII/src/uae_cpu/memory.cpp new file mode 100644 index 00000000..e56f993d --- /dev/null +++ b/BasiliskII/src/uae_cpu/memory.cpp @@ -0,0 +1,59 @@ +/* + * memory.cpp - memory management + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + /* + * UAE - The Un*x Amiga Emulator + * + * Memory management + * + * (c) 1995 Bernd Schmidt + */ + +#include "sysdeps.h" + +#include "memory.h" +#define DEBUG 0 +#include "debug.h" + +#ifdef ARAM_PAGE_CHECK +uaecptr pc_page = 0xeeeeeeee; +uintptr pc_offset = 0; +uaecptr read_page = 0xeeeeeeee; +uintptr read_offset = 0; +uaecptr write_page = 0xeeeeeeee; +uintptr write_offset = 0; +#endif + +extern "C" void breakpt(void) +{ + // bug("bus err: pc=%08x, sp=%08x, addr=%08x", m68k_getpc(), regs.regs[15], regs.mmu_fault_addr); +} + +#if !KNOWN_ALLOC && !NORMAL_ADDRESSING +// This part need rewrite for ARAnyM !! +// It can be taken from hatari. + +#error Not prepared for your platform, maybe you need memory banks from hatari + +#endif /* !KNOWN_ALLOC && !NORMAL_ADDRESSING */ diff --git a/BasiliskII/src/uae_cpu/memory.h b/BasiliskII/src/uae_cpu/memory.h index 670c2ee7..f7bab41d 100644 --- a/BasiliskII/src/uae_cpu/memory.h +++ b/BasiliskII/src/uae_cpu/memory.h @@ -27,6 +27,34 @@ extern uintptr MEMBaseDiff; #endif +extern void Exception (int, uaecptr); +#ifdef EXCEPTIONS_VIA_LONGJMP + extern JMP_BUF excep_env; + #define SAVE_EXCEPTION \ + JMP_BUF excep_env_old; \ + memcpy(excep_env_old, excep_env, sizeof(JMP_BUF)) + #define RESTORE_EXCEPTION \ + memcpy(excep_env, excep_env_old, sizeof(JMP_BUF)) + #define TRY(var) int var = SETJMP(excep_env); if (!var) + #define CATCH(var) else + #define THROW(n) LONGJMP(excep_env, n) + #define THROW_AGAIN(var) LONGJMP(excep_env, var) + #define VOLATILE volatile +#else + struct m68k_exception { + int prb; + m68k_exception (int exc) : prb (exc) {} + operator int() { return prb; } + }; + #define SAVE_EXCEPTION + #define RESTORE_EXCEPTION + #define TRY(var) try + #define CATCH(var) catch(m68k_exception var) + #define THROW(n) throw m68k_exception(n) + #define THROW_AGAIN(var) throw + #define VOLATILE +#endif /* EXCEPTIONS_VIA_LONGJMP */ + #if DIRECT_ADDRESSING static __inline__ uae_u8 *do_get_real_address(uaecptr addr) { @@ -41,40 +69,57 @@ static __inline__ uae_u32 get_long(uaecptr addr) uae_u32 * const m = (uae_u32 *)do_get_real_address(addr); return do_get_mem_long(m); } +#define phys_get_long get_long static __inline__ uae_u32 get_word(uaecptr addr) { uae_u16 * const m = (uae_u16 *)do_get_real_address(addr); return do_get_mem_word(m); } +#define phys_get_word get_word static __inline__ uae_u32 get_byte(uaecptr addr) { uae_u8 * const m = (uae_u8 *)do_get_real_address(addr); return do_get_mem_byte(m); } +#define phys_get_byte get_byte static __inline__ void put_long(uaecptr addr, uae_u32 l) { uae_u32 * const m = (uae_u32 *)do_get_real_address(addr); do_put_mem_long(m, l); } +#define phys_put_long put_long static __inline__ void put_word(uaecptr addr, uae_u32 w) { uae_u16 * const m = (uae_u16 *)do_get_real_address(addr); do_put_mem_word(m, w); } +#define phys_put_word put_word static __inline__ void put_byte(uaecptr addr, uae_u32 b) { uae_u8 * const m = (uae_u8 *)do_get_real_address(addr); do_put_mem_byte(m, b); } +#define phys_put_byte put_byte static __inline__ uae_u8 *get_real_address(uaecptr addr) { return do_get_real_address(addr); } +static inline uae_u8 *get_real_address(uaecptr addr, int write, int sz) +{ + return do_get_real_address(addr); +} +static inline uae_u8 *phys_get_real_address(uaecptr addr) +{ + return do_get_real_address(addr); +} static __inline__ uae_u32 get_virtual_address(uae_u8 *addr) { return do_get_virtual_address(addr); } #endif /* DIRECT_ADDRESSING */ +static __inline__ void check_ram_boundary(uaecptr addr, int size, bool write) {} +static inline void flush_internals() {} + #endif /* MEMORY_H */ diff --git a/BasiliskII/src/uae_cpu/newcpu.cpp b/BasiliskII/src/uae_cpu/newcpu.cpp index 4adf0cc8..ac1505f5 100644 --- a/BasiliskII/src/uae_cpu/newcpu.cpp +++ b/BasiliskII/src/uae_cpu/newcpu.cpp @@ -1,3 +1,28 @@ +/* + * newcpu.cpp - CPU emulation + * + * Copyright (c) 2010 ARAnyM dev team (see AUTHORS) + * + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ /* * UAE - The Un*x Amiga Emulator * @@ -6,27 +31,44 @@ * (c) 1995 Bernd Schmidt */ -#include -#include -#include - #include "sysdeps.h" +#include #include "cpu_emulation.h" #include "main.h" #include "emul_op.h" - -extern int intlev(void); // From baisilisk_glue.cpp - #include "m68k.h" #include "memory.h" #include "readcpu.h" #include "newcpu.h" +#ifdef USE_JIT +# include "compiler/compemu.h" +#endif +#include "fpu/fpu.h" +#include "cpummu.h" +// #include "natfeats.h" +// #include "disasm-glue.h" + +#include + +#define DEBUG 0 +#include "debug.h" + +#define SANITY_CHECK_ATC 1 + +struct fixup fixup = {0, 0, 0}; int quit_program = 0; -int debugging = 0; + +// For instruction $7139 +bool cpu_debugging = false; + struct flag_struct regflags; +/* LongJump buffers */ +#ifdef EXCEPTIONS_VIA_LONGJMP +JMP_BUF excep_env; +#endif /* Opcode of faulting instruction */ uae_u16 last_op_for_exception_3; /* PC at fault time */ @@ -41,128 +83,132 @@ int movem_index1[256]; int movem_index2[256]; int movem_next[256]; -int fpp_movem_index1[256]; -int fpp_movem_index2[256]; -int fpp_movem_next[256]; +#ifdef FLIGHT_RECORDER -cpuop_func *cpufunctbl[65536]; +// feel free to edit the following defines to customize the dump +#define FRLOG_HOTKEY 1 /* 1 = dump only when hotkey is held down */ +#define FRLOG_ALL 1 /* 1 = dump continuously to ever growing log */ +#define FRLOG_IRQ 0 /* 1 = dump also CPU in interrupts */ +#define FRLOG_REGS 0 /* 1 = dump also all data/address registers */ +#define FRLOG_SIZE 8192 /* this many instructions in single dump */ -#define COUNT_INSTRS 0 +struct rec_step { + uae_u32 d[8]; + uae_u32 a[8]; + uae_u32 pc; + uae_u16 sr; + uae_u32 usp; + uae_u32 msp; + uae_u32 isp; + uae_u16 instr; +}; -#if COUNT_INSTRS -static unsigned long int instrcount[65536]; -static uae_u16 opcodenums[65536]; +bool cpu_flight_recorder_active = false; -static int compfn (const void *el1, const void *el2) -{ - return instrcount[*(const uae_u16 *)el1] < instrcount[*(const uae_u16 *)el2]; -} - -static char *icountfilename (void) -{ - char *name = getenv ("INSNCOUNT"); - if (name) - return name; - return COUNT_INSTRS == 2 ? "frequent.68k" : "insncount"; -} - -void dump_counts (void) -{ - FILE *f = fopen (icountfilename (), "w"); - unsigned long int total; - int i; - - write_log ("Writing instruction count file...\n"); - for (i = 0; i < 65536; i++) { - opcodenums[i] = i; - total += instrcount[i]; - } - qsort (opcodenums, 65536, sizeof(uae_u16), compfn); - - fprintf (f, "Total: %lu\n", total); - for (i=0; i < 65536; i++) { - unsigned long int cnt = instrcount[opcodenums[i]]; - struct instr *dp; - struct mnemolookup *lookup; - if (!cnt) - break; - dp = table68k + opcodenums[i]; - for (lookup = lookuptab;lookup->mnemo != dp->mnemo; lookup++) - ; - fprintf (f, "%04x: %lu %s\n", opcodenums[i], cnt, lookup->name); - } - fclose (f); -} +#if FRLOG_ALL +const int LOG_SIZE = 10; #else -void dump_counts (void) -{ -} +const int LOG_SIZE = FRLOG_SIZE; #endif +static rec_step frlog[LOG_SIZE]; +static int log_ptr = -1; // First time initialization + +static const char *log_filename(void) +{ + const char *name = getenv("M68K_LOG_FILE"); + return name ? name : "log.68k"; +} + +void dump_flight_recorder(void) +{ +#if FRLOG_ALL + FILE *f = fopen(log_filename(), "a"); +#else + FILE *f = fopen(log_filename(), "w"); +#endif + if (f == NULL) + return; + for (int i = 0; i < LOG_SIZE; i++) { + int j = (i + log_ptr) % LOG_SIZE; + fprintf(f, "pc %08x instr %04x sr %04x usp %08x msp %08x isp %08x\n", frlog[j].pc, frlog[j].instr, frlog[j].sr, frlog[j].usp, frlog[j].msp, frlog[j].isp); + // adding a simple opcode -> assembler conversion table would help +#if FRLOG_REGS + fprintf(f, "d0 %08x d1 %08x d2 %08x d3 %08x\n", frlog[j].d[0], frlog[j].d[1], frlog[j].d[2], frlog[j].d[3]); + fprintf(f, "d4 %08x d5 %08x d6 %08x d7 %08x\n", frlog[j].d[4], frlog[j].d[5], frlog[j].d[6], frlog[j].d[7]); + fprintf(f, "a0 %08x a1 %08x a2 %08x a3 %08x\n", frlog[j].a[0], frlog[j].a[1], frlog[j].a[2], frlog[j].a[3]); + fprintf(f, "a4 %08x a5 %08x a6 %08x a7 %08x\n", frlog[j].a[4], frlog[j].a[5], frlog[j].a[6], frlog[j].a[7]); +#endif + } + fclose(f); +} + +void m68k_record_step(uaecptr pc, int opcode) +{ + static bool last_state = false; + +#if FRLOG_HOTKEY + if (! cpu_flight_recorder_active) { + if (last_state) { + // dump log out + dump_flight_recorder(); + + // remember last state + last_state = false; + } + return; + } +#endif + + if (! last_state) { + // reset old log + log_ptr = 0; + memset(frlog, 0, sizeof(frlog)); + // remember last state + last_state = true; + } + +#if FRLOG_REGS + for (int i = 0; i < 8; i++) { + frlog[log_ptr].d[i] = m68k_dreg(regs, i); + frlog[log_ptr].a[i] = m68k_areg(regs, i); + } +#endif + frlog[log_ptr].pc = pc; + + MakeSR(); +#if ! FRLOG_IRQ + // is CPU in interrupt handler? Quit if should not be logged. + if (regs.s && !regs.m) return; +#endif + frlog[log_ptr].sr = regs.sr; + frlog[log_ptr].usp = regs.usp; + frlog[log_ptr].msp = regs.msp; + frlog[log_ptr].isp = regs.isp; + frlog[log_ptr].instr = opcode; + + log_ptr = (log_ptr + 1) % LOG_SIZE; +#if FRLOG_ALL + if (log_ptr == 0) dump_flight_recorder(); +#endif +} +#endif /* FLIGHT_RECORDER */ int broken_in; -static __inline__ unsigned int cft_map (unsigned int f) +static inline unsigned int cft_map (unsigned int f) { -#ifndef HAVE_GET_WORD_UNSWAPPED +#if !defined(HAVE_GET_WORD_UNSWAPPED) || defined(FULLMMU) return f; #else - return ((f >> 8) & 255) | ((f & 255) << 8); + return do_byteswap_16(f); #endif } -static void REGPARAM2 op_illg_1 (uae_u32 opcode) REGPARAM; - -static void REGPARAM2 op_illg_1 (uae_u32 opcode) +void REGPARAM2 op_illg_1 (uae_u32 opcode) { op_illg (cft_map (opcode)); } -static void build_cpufunctbl (void) -{ - int i; - unsigned long opcode; - int cpu_level = 0; // 68000 (default) - if (CPUType == 4) - cpu_level = 4; // 68040 with FPU - else { - if (FPUType) - cpu_level = 3; // 68020 with FPU - else if (CPUType >= 2) - cpu_level = 2; // 68020 - else if (CPUType == 1) - cpu_level = 1; - } - struct cputbl *tbl = ( - cpu_level == 4 ? op_smalltbl_0 - : cpu_level == 3 ? op_smalltbl_1 - : cpu_level == 2 ? op_smalltbl_2 - : cpu_level == 1 ? op_smalltbl_3 - : op_smalltbl_4); - - for (opcode = 0; opcode < 65536; opcode++) - cpufunctbl[cft_map (opcode)] = op_illg_1; - for (i = 0; tbl[i].handler != NULL; i++) { - if (! tbl[i].specific) - cpufunctbl[cft_map (tbl[i].opcode)] = tbl[i].handler; - } - for (opcode = 0; opcode < 65536; opcode++) { - cpuop_func *f; - - if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) - continue; - - if (table68k[opcode].handler != -1) { - f = cpufunctbl[cft_map (table68k[opcode].handler)]; - if (f == op_illg_1) - abort(); - cpufunctbl[cft_map (opcode)] = f; - } - } - for (i = 0; tbl[i].handler != NULL; i++) { - if (tbl[i].specific) - cpufunctbl[cft_map (tbl[i].opcode)] = tbl[i].handler; - } -} void init_m68k (void) { @@ -177,343 +223,123 @@ void init_m68k (void) movem_index2[i] = 7-j; movem_next[i] = i & (~(1 << j)); } - for (i = 0 ; i < 256 ; i++) { - int j; - for (j = 7 ; j >= 0 ; j--) { - if (i & (1 << j)) break; - } - fpp_movem_index1[i] = 7-j; - fpp_movem_index2[i] = j; - fpp_movem_next[i] = i & (~(1 << j)); - } -#if COUNT_INSTRS - { - FILE *f = fopen (icountfilename (), "r"); - memset (instrcount, 0, sizeof instrcount); - if (f) { - uae_u32 opcode, count, total; - char name[20]; - write_log ("Reading instruction count file...\n"); - fscanf (f, "Total: %lu\n", &total); - while (fscanf (f, "%lx: %lu %s\n", &opcode, &count, name) == 3) { - instrcount[opcode] = count; - } - fclose(f); - } - } -#endif +#ifdef USE_JIT + /* still needed by build_comp(); FIXME */ read_table68k (); do_merges (); +#endif + fpu_init (CPUType == 4); +} - build_cpufunctbl (); +void exit_m68k (void) +{ + fpu_exit (); + + free(table68k); + table68k = NULL; } struct regstruct regs, lastint_regs; -static struct regstruct regs_backup[16]; -static int backup_pointer = 0; -static long int m68kpc_offset; +// MJ static struct regstruct regs_backup[16]; +// MJ static int backup_pointer = 0; int lastint_no; -#define get_ibyte_1(o) get_byte(regs.pc + (regs.pc_p - regs.pc_oldp) + (o) + 1) -#define get_iword_1(o) get_word(regs.pc + (regs.pc_p - regs.pc_oldp) + (o)) -#define get_ilong_1(o) get_long(regs.pc + (regs.pc_p - regs.pc_oldp) + (o)) -uae_s32 ShowEA (int reg, amodes mode, wordsizes size, char *buf) +#ifdef FULLMMU +static inline uae_u8 get_ibyte_1(uae_u32 o) { - uae_u16 dp; - uae_s8 disp8; - uae_s16 disp16; - int r; - uae_u32 dispreg; - uaecptr addr; - uae_s32 offset = 0; - char buffer[80]; + return get_ibyte(o); +} +static inline uae_u16 get_iword_1(uae_u32 o) +{ + return get_iword(o); +} +static inline uae_u32 get_ilong_1(uae_u32 o) +{ + return get_ilong(o); +} +#else +# define get_ibyte_1(o) get_byte(m68k_getpc() + (o) + 1) +# define get_iword_1(o) get_word(m68k_getpc() + (o)) +# define get_ilong_1(o) get_long(m68k_getpc() + (o)) +#endif - switch (mode){ - case Dreg: - sprintf (buffer,"D%d", reg); - break; - case Areg: - sprintf (buffer,"A%d", reg); - break; - case Aind: - sprintf (buffer,"(A%d)", reg); - break; - case Aipi: - sprintf (buffer,"(A%d)+", reg); - break; - case Apdi: - sprintf (buffer,"-(A%d)", reg); - break; - case Ad16: - disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - addr = m68k_areg(regs,reg) + (uae_s16)disp16; - sprintf (buffer,"(A%d,$%04x) == $%08lx", reg, disp16 & 0xffff, - (unsigned long)addr); - break; - case Ad8r: - dp = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - disp8 = dp & 0xFF; - r = (dp & 0x7000) >> 12; - dispreg = dp & 0x8000 ? m68k_areg(regs,r) : m68k_dreg(regs,r); - if (!(dp & 0x800)) dispreg = (uae_s32)(uae_s16)(dispreg); - dispreg <<= (dp >> 9) & 3; +/* + * extract bitfield data from memory and return it in the MSBs + * bdata caches the unmodified data for put_bitfield() + */ +uae_u32 get_bitfield(uae_u32 src, uae_u32 bdata[2], uae_s32 offset, int width) +{ + uae_u32 tmp, res, mask; - if (dp & 0x100) { - uae_s32 outer = 0, disp = 0; - uae_s32 base = m68k_areg(regs,reg); - char name[10]; - sprintf (name,"A%d, ",reg); - if (dp & 0x80) { base = 0; name[0] = 0; } - if (dp & 0x40) dispreg = 0; - if ((dp & 0x30) == 0x20) { disp = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x30) == 0x30) { disp = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - base += disp; - - if ((dp & 0x3) == 0x2) { outer = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x3) == 0x3) { outer = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - - if (!(dp & 4)) base += dispreg; - if (dp & 3) base = get_long (base); - if (dp & 4) base += dispreg; - - addr = base + outer; - sprintf (buffer,"(%s%c%d.%c*%d+%ld)+%ld == $%08lx", name, - dp & 0x8000 ? 'A' : 'D', (int)r, dp & 0x800 ? 'L' : 'W', - 1 << ((dp >> 9) & 3), - disp,outer, - (unsigned long)addr); - } else { - addr = m68k_areg(regs,reg) + (uae_s32)((uae_s8)disp8) + dispreg; - sprintf (buffer,"(A%d, %c%d.%c*%d, $%02x) == $%08lx", reg, - dp & 0x8000 ? 'A' : 'D', (int)r, dp & 0x800 ? 'L' : 'W', - 1 << ((dp >> 9) & 3), disp8, - (unsigned long)addr); + offset &= 7; + mask = 0xffffffffu << (32 - width); + switch ((offset + width + 7) >> 3) { + case 1: + tmp = get_byte(src); + res = tmp << (24 + offset); + bdata[0] = tmp & ~(mask >> (24 + offset)); + break; + case 2: + tmp = get_word(src); + res = tmp << (16 + offset); + bdata[0] = tmp & ~(mask >> (16 + offset)); + break; + case 3: + tmp = get_word(src); + res = tmp << (16 + offset); + bdata[0] = tmp & ~(mask >> (16 + offset)); + tmp = get_byte(src + 2); + res |= tmp << (8 + offset); + bdata[1] = tmp & ~(mask >> (8 + offset)); + break; + case 4: + tmp = get_long(src); + res = tmp << offset; + bdata[0] = tmp & ~(mask >> offset); + break; + case 5: + tmp = get_long(src); + res = tmp << offset; + bdata[0] = tmp & ~(mask >> offset); + tmp = get_byte(src + 4); + res |= tmp >> (8 - offset); + bdata[1] = tmp & ~(mask << (8 - offset)); + break; + default: + /* Panic? */ + res = 0; + break; } - break; - case PC16: - addr = m68k_getpc () + m68kpc_offset; - disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - addr += (uae_s16)disp16; - sprintf (buffer,"(PC,$%04x) == $%08lx", disp16 & 0xffff,(unsigned long)addr); - break; - case PC8r: - addr = m68k_getpc () + m68kpc_offset; - dp = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - disp8 = dp & 0xFF; - r = (dp & 0x7000) >> 12; - dispreg = dp & 0x8000 ? m68k_areg(regs,r) : m68k_dreg(regs,r); - if (!(dp & 0x800)) dispreg = (uae_s32)(uae_s16)(dispreg); - dispreg <<= (dp >> 9) & 3; - - if (dp & 0x100) { - uae_s32 outer = 0,disp = 0; - uae_s32 base = addr; - char name[10]; - sprintf (name,"PC, "); - if (dp & 0x80) { base = 0; name[0] = 0; } - if (dp & 0x40) dispreg = 0; - if ((dp & 0x30) == 0x20) { disp = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x30) == 0x30) { disp = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - base += disp; - - if ((dp & 0x3) == 0x2) { outer = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x3) == 0x3) { outer = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - - if (!(dp & 4)) base += dispreg; - if (dp & 3) base = get_long (base); - if (dp & 4) base += dispreg; - - addr = base + outer; - sprintf (buffer,"(%s%c%d.%c*%d+%ld)+%ld == $%08lx", name, - dp & 0x8000 ? 'A' : 'D', (int)r, dp & 0x800 ? 'L' : 'W', - 1 << ((dp >> 9) & 3), - disp,outer, - (unsigned long)addr); - } else { - addr += (uae_s32)((uae_s8)disp8) + dispreg; - sprintf (buffer,"(PC, %c%d.%c*%d, $%02x) == $%08lx", dp & 0x8000 ? 'A' : 'D', - (int)r, dp & 0x800 ? 'L' : 'W', 1 << ((dp >> 9) & 3), - disp8, (unsigned long)addr); - } - break; - case absw: - sprintf (buffer,"$%08lx", (unsigned long)(uae_s32)(uae_s16)get_iword_1 (m68kpc_offset)); - m68kpc_offset += 2; - break; - case absl: - sprintf (buffer,"$%08lx", (unsigned long)get_ilong_1 (m68kpc_offset)); - m68kpc_offset += 4; - break; - case imm: - switch (size){ - case sz_byte: - sprintf (buffer,"#$%02x", (unsigned int)(get_iword_1 (m68kpc_offset) & 0xff)); - m68kpc_offset += 2; - break; - case sz_word: - sprintf (buffer,"#$%04x", (unsigned int)(get_iword_1 (m68kpc_offset) & 0xffff)); - m68kpc_offset += 2; - break; - case sz_long: - sprintf (buffer,"#$%08lx", (unsigned long)(get_ilong_1 (m68kpc_offset))); - m68kpc_offset += 4; - break; - default: - break; - } - break; - case imm0: - offset = (uae_s32)(uae_s8)get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - sprintf (buffer,"#$%02x", (unsigned int)(offset & 0xff)); - break; - case imm1: - offset = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - sprintf (buffer,"#$%04x", (unsigned int)(offset & 0xffff)); - break; - case imm2: - offset = (uae_s32)get_ilong_1 (m68kpc_offset); - m68kpc_offset += 4; - sprintf (buffer,"#$%08lx", (unsigned long)offset); - break; - case immi: - offset = (uae_s32)(uae_s8)(reg & 0xff); - sprintf (buffer,"#$%08lx", (unsigned long)offset); - break; - default: - break; - } - if (buf == 0) - printf ("%s", buffer); - else - strcat (buf, buffer); - return offset; + return res; } -/* The plan is that this will take over the job of exception 3 handling - - * the CPU emulation functions will just do a longjmp to m68k_go whenever - * they hit an odd address. */ -static int verify_ea (int reg, amodes mode, wordsizes size, uae_u32 *val) +/* + * write bitfield data (in the LSBs) back to memory, upper bits + * must be cleared already. + */ +void put_bitfield(uae_u32 dst, uae_u32 bdata[2], uae_u32 val, uae_s32 offset, int width) { - uae_u16 dp; - uae_s8 disp8; - uae_s16 disp16; - int r; - uae_u32 dispreg; - uaecptr addr; - uae_s32 offset = 0; - - switch (mode){ - case Dreg: - *val = m68k_dreg (regs, reg); - return 1; - case Areg: - *val = m68k_areg (regs, reg); - return 1; - - case Aind: - case Aipi: - addr = m68k_areg (regs, reg); - break; - case Apdi: - addr = m68k_areg (regs, reg); - break; - case Ad16: - disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - addr = m68k_areg(regs,reg) + (uae_s16)disp16; - break; - case Ad8r: - addr = m68k_areg (regs, reg); - d8r_common: - dp = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - disp8 = dp & 0xFF; - r = (dp & 0x7000) >> 12; - dispreg = dp & 0x8000 ? m68k_areg(regs,r) : m68k_dreg(regs,r); - if (!(dp & 0x800)) dispreg = (uae_s32)(uae_s16)(dispreg); - dispreg <<= (dp >> 9) & 3; - - if (dp & 0x100) { - uae_s32 outer = 0, disp = 0; - uae_s32 base = addr; - if (dp & 0x80) base = 0; - if (dp & 0x40) dispreg = 0; - if ((dp & 0x30) == 0x20) { disp = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x30) == 0x30) { disp = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - base += disp; - - if ((dp & 0x3) == 0x2) { outer = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; } - if ((dp & 0x3) == 0x3) { outer = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; } - - if (!(dp & 4)) base += dispreg; - if (dp & 3) base = get_long (base); - if (dp & 4) base += dispreg; - - addr = base + outer; - } else { - addr += (uae_s32)((uae_s8)disp8) + dispreg; + offset = (offset & 7) + width; + switch ((offset + 7) >> 3) { + case 1: + put_byte(dst, bdata[0] | (val << (8 - offset))); + break; + case 2: + put_word(dst, bdata[0] | (val << (16 - offset))); + break; + case 3: + put_word(dst, bdata[0] | (val >> (offset - 16))); + put_byte(dst + 2, bdata[1] | (val << (24 - offset))); + break; + case 4: + put_long(dst, bdata[0] | (val << (32 - offset))); + break; + case 5: + put_long(dst, bdata[0] | (val >> (offset - 32))); + put_byte(dst + 4, bdata[1] | (val << (40 - offset))); + break; } - break; - case PC16: - addr = m68k_getpc () + m68kpc_offset; - disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2; - addr += (uae_s16)disp16; - break; - case PC8r: - addr = m68k_getpc () + m68kpc_offset; - goto d8r_common; - case absw: - addr = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - break; - case absl: - addr = get_ilong_1 (m68kpc_offset); - m68kpc_offset += 4; - break; - case imm: - switch (size){ - case sz_byte: - *val = get_iword_1 (m68kpc_offset) & 0xff; - m68kpc_offset += 2; - break; - case sz_word: - *val = get_iword_1 (m68kpc_offset) & 0xffff; - m68kpc_offset += 2; - break; - case sz_long: - *val = get_ilong_1 (m68kpc_offset); - m68kpc_offset += 4; - break; - default: - break; - } - return 1; - case imm0: - *val = (uae_s32)(uae_s8)get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - return 1; - case imm1: - *val = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - return 1; - case imm2: - *val = get_ilong_1 (m68kpc_offset); - m68kpc_offset += 4; - return 1; - case immi: - *val = (uae_s32)(uae_s8)(reg & 0xff); - return 1; - default: - addr = 0; - break; - } - if ((addr & 1) == 0) - return 1; - - last_addr_for_exception_3 = m68k_getpc () + m68kpc_offset; - last_fault_for_exception_3 = addr; - return 0; } uae_u32 get_disp_ea_020 (uae_u32 base, uae_u32 dp) @@ -595,6 +421,7 @@ void MakeFromSR (void) regs.t1 = (regs.sr >> 15) & 1; regs.t0 = (regs.sr >> 14) & 1; regs.s = (regs.sr >> 13) & 1; + mmu_set_super(regs.s); regs.m = (regs.sr >> 12) & 1; regs.intmask = (regs.sr >> 8) & 7; SET_XFLG ((regs.sr >> 4) & 1); @@ -602,7 +429,6 @@ void MakeFromSR (void) SET_ZFLG ((regs.sr >> 2) & 1); SET_VFLG ((regs.sr >> 1) & 1); SET_CFLG (regs.sr & 1); - if (CPUType >= 2) { if (olds != regs.s) { if (olds) { if (oldm) @@ -623,89 +449,149 @@ void MakeFromSR (void) m68k_areg(regs, 7) = regs.msp; } } - } else { - if (olds != regs.s) { - if (olds) { - regs.isp = m68k_areg(regs, 7); - m68k_areg(regs, 7) = regs.usp; - } else { - regs.usp = m68k_areg(regs, 7); - m68k_areg(regs, 7) = regs.isp; - } - } + + SPCFLAGS_SET( SPCFLAG_INT ); + if (regs.t1 || regs.t0) + SPCFLAGS_SET( SPCFLAG_TRACE ); + else + SPCFLAGS_CLEAR( SPCFLAG_TRACE ); +} + +/* for building exception frames */ +static inline void exc_push_word(uae_u16 w) +{ + m68k_areg(regs, 7) -= 2; + put_word(m68k_areg(regs, 7), w); +} +static inline void exc_push_long(uae_u32 l) +{ + m68k_areg(regs, 7) -= 4; + put_long (m68k_areg(regs, 7), l); +} + +static inline void exc_make_frame( + int format, + uae_u16 sr, + uae_u32 currpc, + int nr, + uae_u32 x0, + uae_u32 x1 +) +{ + switch(format) { + case 4: + exc_push_long(x1); + exc_push_long(x0); + break; + case 3: + case 2: + exc_push_long(x0); + break; } - regs.spcflags |= SPCFLAG_INT; - if (regs.t1 || regs.t0) - regs.spcflags |= SPCFLAG_TRACE; - else - regs.spcflags &= ~(SPCFLAG_TRACE | SPCFLAG_DOTRACE); + exc_push_word((format << 12) + (nr * 4)); /* format | vector */ + exc_push_long(currpc); + exc_push_word(sr); } +#ifdef EXCEPTIONS_VIA_LONGJMP +static int building_bus_fault_stack_frame=0; +#endif + void Exception(int nr, uaecptr oldpc) { + uae_u32 currpc = m68k_getpc (); MakeSR(); + + if (fixup.flag) + { + m68k_areg(regs, fixup.reg) = fixup.value; + fixup.flag = 0; + } + if (!regs.s) { regs.usp = m68k_areg(regs, 7); - if (CPUType >= 2) - m68k_areg(regs, 7) = regs.m ? regs.msp : regs.isp; - else - m68k_areg(regs, 7) = regs.isp; + m68k_areg(regs, 7) = regs.m ? regs.msp : regs.isp; regs.s = 1; + mmu_set_super(1); } - if (CPUType > 0) { - if (nr == 2 || nr == 3) { - int i; - /* @@@ this is probably wrong (?) */ - for (i = 0 ; i < 12 ; i++) { - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), 0); - } - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), 0xa000 + nr * 4); - } else if (nr ==5 || nr == 6 || nr == 7 || nr == 9) { - m68k_areg(regs, 7) -= 4; - put_long (m68k_areg(regs, 7), oldpc); - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), 0x2000 + nr * 4); - } else if (regs.m && nr >= 24 && nr < 32) { - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), nr * 4); - m68k_areg(regs, 7) -= 4; - put_long (m68k_areg(regs, 7), m68k_getpc ()); - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), regs.sr); - regs.sr |= (1 << 13); - regs.msp = m68k_areg(regs, 7); - m68k_areg(regs, 7) = regs.isp; - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), 0x1000 + nr * 4); - } else { - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), nr * 4); + + if (nr == 2) { + /* BUS ERROR handler begins */ +#ifdef ENABLE_EPSLIMITER + check_eps_limit(currpc); +#endif + // panicbug("Exception Nr. %d CPC: %08lx NPC: %08lx SP=%08lx Addr: %08lx", nr, currpc, get_long (regs.vbr + 4*nr), m68k_areg(regs, 7), regs.mmu_fault_addr); + +#ifdef EXCEPTIONS_VIA_LONGJMP + if (!building_bus_fault_stack_frame) +#else + try +#endif + { +#ifdef EXCEPTIONS_VIA_LONGJMP + building_bus_fault_stack_frame= 1; +#endif + /* 68040 */ + exc_push_long(0); /* PD3 */ + exc_push_long(0); /* PD2 */ + exc_push_long(0); /* PD1 */ + exc_push_long(0); /* PD0/WB1D */ + exc_push_long(0); /* WB1A */ + exc_push_long(0); /* WB2D */ + exc_push_long(0); /* WB2A */ + exc_push_long(regs.wb3_data); /* WB3D */ + exc_push_long(regs.mmu_fault_addr); /* WB3A */ + exc_push_long(regs.mmu_fault_addr); + exc_push_word(0); /* WB1S */ + exc_push_word(0); /* WB2S */ + exc_push_word(regs.wb3_status); /* WB3S */ + regs.wb3_status = 0; + exc_push_word(regs.mmu_ssw); + exc_push_long(regs.mmu_fault_addr); /* EA */ + exc_make_frame(7, regs.sr, regs.fault_pc, 2, 0, 0); + } +#ifdef EXCEPTIONS_VIA_LONGJMP + else +#else + catch (m68k_exception) +#endif + { + report_double_bus_error(); +#ifdef EXCEPTIONS_VIA_LONGJMP + building_bus_fault_stack_frame= 0; +#endif + return; + } + +#ifdef EXCEPTIONS_VIA_LONGJMP + building_bus_fault_stack_frame= 0; +#endif + /* end of BUS ERROR handler */ + } else if (nr == 3) { + exc_make_frame(2, regs.sr, last_addr_for_exception_3, nr, + last_fault_for_exception_3 & 0xfffffffe, 0); + } else if (nr ==5 || nr == 6 || nr == 7 || nr == 9) { + /* div by zero, CHK, TRAP or TRACE */ + exc_make_frame(2, regs.sr, currpc, nr, oldpc, 0); + } else if (regs.m && nr >= 24 && nr < 32) { + /* interrupts! */ + exc_make_frame(0, regs.sr, currpc, nr, 0, 0); + regs.sr |= (1 << 13); + regs.msp = m68k_areg(regs, 7); + m68k_areg(regs, 7) = regs.isp; + + exc_make_frame(1, /* throwaway */ + regs.sr, currpc, nr, 0, 0); } else { - if (nr == 2 || nr == 3) { - m68k_areg(regs, 7) -= 12; - /* ??????? */ - if (nr == 3) { - put_long (m68k_areg(regs, 7), last_fault_for_exception_3); - put_word (m68k_areg(regs, 7)+4, last_op_for_exception_3); - put_long (m68k_areg(regs, 7)+8, last_addr_for_exception_3); - } - write_log ("Exception!\n"); - goto kludge_me_do; - } + exc_make_frame(0, regs.sr, currpc, nr, 0, 0); } - m68k_areg(regs, 7) -= 4; - put_long (m68k_areg(regs, 7), m68k_getpc ()); -kludge_me_do: - m68k_areg(regs, 7) -= 2; - put_word (m68k_areg(regs, 7), regs.sr); m68k_setpc (get_long (regs.vbr + 4*nr)); + SPCFLAGS_SET( SPCFLAG_JIT_END_COMPILE ); fill_prefetch_0 (); regs.t1 = regs.t0 = regs.m = 0; - regs.spcflags &= ~(SPCFLAG_TRACE | SPCFLAG_DOTRACE); + SPCFLAGS_CLEAR(SPCFLAG_TRACE | SPCFLAG_DOTRACE); } static void Interrupt(int nr) @@ -716,62 +602,90 @@ static void Interrupt(int nr) Exception(nr+24, 0); regs.intmask = nr; - regs.spcflags |= SPCFLAG_INT; + SPCFLAGS_SET( SPCFLAG_INT ); } -static int caar, cacr, tc, itt0, itt1, dtt0, dtt1; - -void m68k_move2c (int regno, uae_u32 *regp) +static void SCCInterrupt(int nr) +{ + // fprintf(stderr, "CPU: in SCCInterrupt\n"); + lastint_regs = regs; + lastint_no = 5;// ex 5 + Exception(nr, 0); + + regs.intmask = 5;// ex 5 +} + +static void MFPInterrupt(int nr) +{ + // fprintf(stderr, "CPU: in MFPInterrupt\n"); + lastint_regs = regs; + lastint_no = 6; + Exception(nr, 0); + + regs.intmask = 6; +} + +int m68k_move2c (int regno, uae_u32 *regp) { - if (CPUType == 1 && (regno & 0x7FF) > 1) - op_illg (0x4E7B); - else switch (regno) { case 0: regs.sfc = *regp & 7; break; case 1: regs.dfc = *regp & 7; break; - case 2: cacr = *regp & 0x3; break; /* ignore C and CE */ - case 3: tc = *regp & 0xc000; break; - case 4: itt0 = *regp & 0xffffe364; break; - case 5: itt1 = *regp & 0xffffe364; break; - case 6: dtt0 = *regp & 0xffffe364; break; - case 7: dtt1 = *regp & 0xffffe364; break; + case 2: regs.cacr = *regp & 0x80008000; +#ifdef USE_JIT + set_cache_state(regs.cacr & 0x8000); + if (*regp & 0x08) { /* Just to be on the safe side */ + flush_icache(2); + } +#endif + break; + case 3: mmu_set_tc(*regp & 0xc000); break; + case 4: + case 5: + case 6: + case 7: mmu_set_ttr(regno, *regp & 0xffffe364); break; case 0x800: regs.usp = *regp; break; case 0x801: regs.vbr = *regp; break; - case 0x802: caar = *regp &0xfc; break; + case 0x802: regs.caar = *regp & 0xfc; break; case 0x803: regs.msp = *regp; if (regs.m == 1) m68k_areg(regs, 7) = regs.msp; break; case 0x804: regs.isp = *regp; if (regs.m == 0) m68k_areg(regs, 7) = regs.isp; break; + case 0x805: mmu_set_mmusr(*regp); break; + case 0x806: regs.urp = *regp & MMU_ROOT_PTR_ADDR_MASK; break; + case 0x807: regs.srp = *regp & MMU_ROOT_PTR_ADDR_MASK; break; default: op_illg (0x4E7B); - break; + return 0; } + return 1; } -void m68k_movec2 (int regno, uae_u32 *regp) +int m68k_movec2 (int regno, uae_u32 *regp) { - if (CPUType == 1 && (regno & 0x7FF) > 1) - op_illg (0x4E7A); - else switch (regno) { case 0: *regp = regs.sfc; break; case 1: *regp = regs.dfc; break; - case 2: *regp = cacr; break; - case 3: *regp = tc; break; - case 4: *regp = itt0; break; - case 5: *regp = itt1; break; - case 6: *regp = dtt0; break; - case 7: *regp = dtt1; break; + case 2: *regp = regs.cacr; break; + case 3: *regp = regs.tc; break; + case 4: *regp = regs.itt0; break; + case 5: *regp = regs.itt1; break; + case 6: *regp = regs.dtt0; break; + case 7: *regp = regs.dtt1; break; case 0x800: *regp = regs.usp; break; case 0x801: *regp = regs.vbr; break; - case 0x802: *regp = caar; break; + case 0x802: *regp = regs.caar; break; case 0x803: *regp = regs.m == 1 ? m68k_areg(regs, 7) : regs.msp; break; case 0x804: *regp = regs.m == 0 ? m68k_areg(regs, 7) : regs.isp; break; + case 0x805: *regp = regs.mmusr; break; + case 0x806: *regp = regs.urp; break; + case 0x807: *regp = regs.srp; break; default: op_illg (0x4E7A); - break; + return 0; } + return 1; } -static __inline__ int +#if !defined(uae_s64) +static inline int div_unsigned(uae_u32 src_hi, uae_u32 src_lo, uae_u32 div, uae_u32 *quot, uae_u32 *rem) { uae_u32 q = 0, cbit = 0; @@ -795,8 +709,9 @@ div_unsigned(uae_u32 src_hi, uae_u32 src_lo, uae_u32 div, uae_u32 *quot, uae_u32 *rem = src_hi; return 0; } +#endif -void m68k_divl (uae_u32 opcode, uae_u32 src, uae_u16 extra, uaecptr oldpc) +void m68k_divl (uae_u32 /*opcode*/, uae_u32 src, uae_u16 extra, uaecptr oldpc) { #if defined(uae_s64) if (src == 0) { @@ -917,7 +832,8 @@ void m68k_divl (uae_u32 opcode, uae_u32 src, uae_u16 extra, uaecptr oldpc) #endif } -static __inline__ void +#if !defined(uae_s64) +static inline void mul_unsigned(uae_u32 src1, uae_u32 src2, uae_u32 *dst_hi, uae_u32 *dst_lo) { uae_u32 r0 = (src1 & 0xffff) * (src2 & 0xffff); @@ -935,8 +851,9 @@ mul_unsigned(uae_u32 src1, uae_u32 src2, uae_u32 *dst_hi, uae_u32 *dst_lo) *dst_lo = lo; *dst_hi = r3; } +#endif -void m68k_mull (uae_u32 opcode, uae_u32 src, uae_u16 extra) +void m68k_mull (uae_u32 /*opcode*/, uae_u32 src, uae_u16 extra) { #if defined(uae_s64) if (extra & 0x800) { @@ -1022,16 +939,16 @@ void m68k_mull (uae_u32 opcode, uae_u32 src, uae_u16 extra) } #endif } -static char* ccnames[] = -{ "T ","F ","HI","LS","CC","CS","NE","EQ", - "VC","VS","PL","MI","GE","LT","GT","LE" }; + +// If value is greater than zero, this means we are still processing an EmulOp +// because the counter is incremented only in m68k_execute(), i.e. interpretive +// execution only +#ifdef USE_JIT +static int m68k_execute_depth = 0; +#endif void m68k_reset (void) { - m68k_areg (regs, 7) = 0x2000; - m68k_setpc (ROMBaseMac + 0x2a); - fill_prefetch_0 (); - regs.kick_mask = 0xF80000; regs.s = 1; regs.m = 0; regs.stopped = 0; @@ -1042,76 +959,242 @@ void m68k_reset (void) SET_CFLG (0); SET_VFLG (0); SET_NFLG (0); - regs.spcflags = 0; + SPCFLAGS_INIT( 0 ); regs.intmask = 7; regs.vbr = regs.sfc = regs.dfc = 0; - regs.fpcr = regs.fpsr = regs.fpiar = 0; + + // need to ensure the following order of initialization is correct + // (it is definitely better than what it was before this commit + // since it was reading from 0x00000000 in User mode and with active MMU) + mmu_set_tc(regs.tc & ~0x8000); /* disable mmu */ + m68k_areg (regs, 7) = phys_get_long(0x00000000); + m68k_setpc (phys_get_long(0x00000004)); + fill_prefetch_0 (); + + /* gb-- moved into {fpp,fpu_x86}.cpp::fpu_init() + regs.fpcr = regs.fpsr = regs.fpiar = 0; */ + fpu_reset(); + // MMU + mmu_reset(); + mmu_set_super(1); + // Cache + regs.cacr = 0; + regs.caar = 0; +#ifdef FLIGHT_RECORDER + log_ptr = 0; + memset(frlog, 0, sizeof(frlog)); +#endif +} + +void m68k_emulop_return(void) +{ + SPCFLAGS_SET( SPCFLAG_BRK ); + quit_program = 1; +} + +static void save_regs(struct M68kRegisters &r) +{ + int i; + + for (i=0; i<8; i++) { + r.d[i] = m68k_dreg(regs, i); + r.a[i] = m68k_areg(regs, i); + } + r.pc = m68k_getpc(); + MakeSR(); + r.sr = regs.sr; + r.isp = regs.isp; + r.usp = regs.usp; + r.msp = regs.msp; + if ((r.sr & 0x2000) == 0) + r.usp = r.a[7]; + else if ((r.sr & 0x1000) != 0) + r.msp = r.a[7]; + else + r.isp = r.a[7]; +} + +static void restore_regs(struct M68kRegisters &r) +{ + int i; + + for (i=0; i<8; i++) { + m68k_dreg(regs, i) = r.d[i]; + m68k_areg(regs, i) = r.a[i]; + } + regs.isp = r.isp; + regs.usp = r.usp; + regs.msp = r.msp; + regs.sr = r.sr; + MakeFromSR(); +} + +void m68k_emulop(uae_u32 opcode) +{ + struct M68kRegisters r; + save_regs(r); + EmulOp(opcode, &r); + restore_regs(r); +} + +// void m68k_natfeat_id(void) +// { +// struct M68kRegisters r; + +// /* is it really necessary to save all registers? */ +// save_regs(r); + +// memptr stack = r.a[7] + 4; /* skip return address */ +// r.d[0] = nf_get_id(stack); + +// restore_regs(r); +// } + +// void m68k_natfeat_call(void) +// { +// struct M68kRegisters r; + +// /* is it really necessary to save all registers? */ +// save_regs(r); + +// memptr stack = r.a[7] + 4; /* skip return address */ +// bool isSupervisorMode = ((r.sr & 0x2000) == 0x2000); +// r.d[0] = nf_call(stack, isSupervisorMode); + +// restore_regs(r); +// } + +static int m68k_call(uae_u32 pc) +{ + VOLATILE int exc = 0; + m68k_setpc(pc); + TRY(prb) { +#ifdef USE_JIT + if (bx_options.jit.jit) { + exec_nostats(); + // m68k_do_compile_execute(); + // The above call to m68k_do_compile_execute fails with BadAccess in sigsegv_handler (MAC, if it is executed after the first compile_block) + // (NULL pointer to addr_instr). + // Call exec_nostats avoids calling compile_block, because stack modification is only temporary + // which will fill up compile cache with BOGUS data. + // we can call exec_nostats directly, do our code, and return back here. + } + else +#endif + m68k_do_execute(); + } + CATCH(prb) { + exc = int(prb); + } + return exc; +} + +static uae_u32 m68k_alloca(int size) +{ + uae_u32 sp = (m68k_areg(regs, 7) - size) & ~1; + m68k_areg(regs, 7) = sp; + if ((regs.sr & 0x2000) == 0) + regs.usp = sp; + else if ((regs.sr & 0x1000) != 0) + regs.msp = sp; + else + regs.isp = sp; + return sp; +} + +// uae_u32 linea68000(volatile uae_u16 opcode) +// { +// sigjmp_buf jmp; +// struct M68kRegisters r; +// volatile uae_u32 abase = 0; + +// SAVE_EXCEPTION; +// save_regs(r); + +// const int sz = 8 + sizeof(void *); +// volatile uae_u32 sp = 0; +// uae_u32 backup[(sz + 3) / 4]; + +// if (sigsetjmp(jmp, 1) == 0) +// { +// void *p = jmp; +// uae_u8 *sp_p; +// int exc; + +// sp = m68k_alloca(sz); +// memcpy(backup, phys_get_real_address(sp), sz); + +// WriteHWMemInt16(sp, opcode); +// WriteHWMemInt16(sp + 2, 0xa0ff); +// WriteHWMemInt32(sp + 4, 13); +// sp_p = phys_get_real_address(sp + 8); +// *((void **)sp_p) = p; +// if ((exc = m68k_call(sp)) != 0) +// { +// panicbug("exception %d in LINEA", exc); +// m68k_dreg(regs, 0) = 0; +// } +// } else +// { +// abase = m68k_dreg(regs, 0); +// } + +// if (sp) { +// memcpy(phys_get_real_address(sp), backup, sz); +// } +// restore_regs(r); +// m68k_setpc(r.pc); +// RESTORE_EXCEPTION; +// return abase; +// } + + +static void rts68000() +{ + uae_u32 SP = m68k_getpc() + 6; + sigjmp_buf *p; + uae_u8 *sp_p = phys_get_real_address(SP); + + p = (sigjmp_buf *)(*((void **)sp_p)); + SP += sizeof(void *); + m68k_areg(regs, 7) = SP; + siglongjmp(*p, 1); } void REGPARAM2 op_illg (uae_u32 opcode) { - uaecptr pc = m68k_getpc (); + uaecptr pc = m68k_getpc (); - - if ((opcode & 0xFF00) == 0x7100) { - struct M68kRegisters r; - int i; - - // Return from Execute68k()? - if (opcode == M68K_EXEC_RETURN) { - regs.spcflags |= SPCFLAG_BRK; - quit_program = 1; - return; - } - - // Call EMUL_OP opcode - for (i=0; i<8; i++) { - r.d[i] = m68k_dreg(regs, i); - r.a[i] = m68k_areg(regs, i); - } - MakeSR(); - r.sr = regs.sr; - EmulOp(opcode, &r); - for (i=0; i<8; i++) { - m68k_dreg(regs, i) = r.d[i]; - m68k_areg(regs, i) = r.a[i]; - } - regs.sr = r.sr; - MakeFromSR(); - m68k_incpc(2); - fill_prefetch_0 (); + if ((opcode & 0xF000) == 0xA000) { + // if (opcode == 0xa0ff) + // { + // uae_u32 call = ReadHWMemInt32(pc + 2); + // switch (call) + // { + // case 13: + // rts68000(); + // return; + // } + // m68k_setpc(pc + 6); + // } + Exception(0xA,0); return; } - if ((opcode & 0xF000) == 0xA000) { - Exception(0xA,0); + if ((opcode & 0xF000) == 0xF000) { + Exception(0xB,0); + return; + } + + D(bug("Illegal instruction: %04x at %08x", opcode, pc)); +#if defined(USE_JIT) && defined(JIT_DEBUG) + compiler_dumpstate(); +#endif + + Exception (4,0); return; - } - -// write_log ("Illegal instruction: %04x at %08lx\n", opcode, pc); - - if ((opcode & 0xF000) == 0xF000) { - Exception(0xB,0); - return; - } - - write_log ("Illegal instruction: %04x at %08lx\n", opcode, pc); - - Exception (4,0); } -void mmu_op(uae_u32 opcode, uae_u16 extra) -{ - if ((extra & 0xB000) == 0) { /* PMOVE instruction */ - - } else if ((extra & 0xF000) == 0x2000) { /* PLOAD instruction */ - } else if ((extra & 0xF000) == 0x8000) { /* PTEST instruction */ - } else - op_illg (opcode); -} - -static int n_insns = 0, n_spcinsns = 0; - static uaecptr last_trace_ad = 0; static void do_trace (void) @@ -1123,7 +1206,7 @@ static void do_trace (void) /* We can afford this to be inefficient... */ m68k_setpc (m68k_getpc ()); fill_prefetch_0 (); - opcode = get_word (regs.pc); + opcode = get_word(m68k_getpc()); if (opcode == 0x4e72 /* RTE */ || opcode == 0x4e74 /* RTD */ || opcode == 0x4e75 /* RTS */ @@ -1139,216 +1222,362 @@ static void do_trace (void) && (uae_s16)m68k_dreg(regs, opcode & 7) != 0)) { last_trace_ad = m68k_getpc (); - regs.spcflags &= ~SPCFLAG_TRACE; - regs.spcflags |= SPCFLAG_DOTRACE; + SPCFLAGS_CLEAR( SPCFLAG_TRACE ); + SPCFLAGS_SET( SPCFLAG_DOTRACE ); } } else if (regs.t1) { last_trace_ad = m68k_getpc (); - regs.spcflags &= ~SPCFLAG_TRACE; - regs.spcflags |= SPCFLAG_DOTRACE; + SPCFLAGS_CLEAR( SPCFLAG_TRACE ); + SPCFLAGS_SET( SPCFLAG_DOTRACE ); } } +// #define SERVE_VBL_MFP(resetStop) \ +// { \ +// if (SPCFLAGS_TEST( SPCFLAG_INT3|SPCFLAG_VBL|SPCFLAG_INT5|SPCFLAG_SCC|SPCFLAG_MFP )) { \ +// if (SPCFLAGS_TEST( SPCFLAG_INT3 )) { \ +// if (3 > regs.intmask) { \ +// Interrupt(3); \ +// regs.stopped = 0; \ +// SPCFLAGS_CLEAR( SPCFLAG_INT3 ); \ +// if (resetStop) \ +// SPCFLAGS_CLEAR( SPCFLAG_STOP ); \ +// } \ +// } \ +// if (SPCFLAGS_TEST( SPCFLAG_VBL )) { \ +// if (4 > regs.intmask) { \ +// Interrupt(4); \ +// regs.stopped = 0; \ +// SPCFLAGS_CLEAR( SPCFLAG_VBL ); \ +// if (resetStop) \ +// SPCFLAGS_CLEAR( SPCFLAG_STOP ); \ +// } \ +// } \ +// if (SPCFLAGS_TEST( SPCFLAG_INT5 )) { \ +// if (5 > regs.intmask) { \ +// Interrupt(5); \ +// regs.stopped = 0; \ +// SPCFLAGS_CLEAR( SPCFLAG_INT5 ); \ +// if (resetStop) \ +// SPCFLAGS_CLEAR( SPCFLAG_STOP ); \ +// } \ +// } \ +// if (SPCFLAGS_TEST( SPCFLAG_SCC )) { \ +// if (5 > regs.intmask) { \ +// int vector_number=SCCdoInterrupt(); \ +// if(vector_number){ \ +// SCCInterrupt(vector_number); \ +// regs.stopped = 0; \ +// SPCFLAGS_CLEAR( SPCFLAG_SCC); \ +// if (resetStop) \ +// SPCFLAGS_CLEAR( SPCFLAG_STOP ); \ +// } \ +// else \ +// SPCFLAGS_CLEAR( SPCFLAG_SCC ); \ +// } \ +// } \ +// if (SPCFLAGS_TEST( SPCFLAG_MFP )) { \ +// if (6 > regs.intmask) { \ +// int vector_number = MFPdoInterrupt(); \ +// if (vector_number) { \ +// MFPInterrupt(vector_number); \ +// regs.stopped = 0; \ +// if (resetStop) \ +// SPCFLAGS_CLEAR( SPCFLAG_STOP ); \ +// } \ +// else \ +// SPCFLAGS_CLEAR( SPCFLAG_MFP ); \ +// } \ +// } \ +// } \ +// } -static int do_specialties (void) +// #define SERVE_INTERNAL_IRQ() \ +// { \ +// if (SPCFLAGS_TEST( SPCFLAG_INTERNAL_IRQ )) { \ +// SPCFLAGS_CLEAR( SPCFLAG_INTERNAL_IRQ ); \ +// invoke200HzInterrupt(); \ +// } \ +// } + +int m68k_do_specialties(void) { - /*n_spcinsns++;*/ - if (regs.spcflags & SPCFLAG_DOTRACE) { - Exception (9,last_trace_ad); - } - while (regs.spcflags & SPCFLAG_STOP) { - if (regs.spcflags & (SPCFLAG_INT | SPCFLAG_DOINT)){ - int intr = intlev (); - regs.spcflags &= ~(SPCFLAG_INT | SPCFLAG_DOINT); - if (intr != -1 && intr > regs.intmask) { - Interrupt (intr); - regs.stopped = 0; - regs.spcflags &= ~SPCFLAG_STOP; - } - } - } - if (regs.spcflags & SPCFLAG_TRACE) - do_trace (); + // SERVE_INTERNAL_IRQ(); +#ifdef USE_JIT + // Block was compiled + SPCFLAGS_CLEAR( SPCFLAG_JIT_END_COMPILE ); - if (regs.spcflags & SPCFLAG_DOINT) { - int intr = intlev (); - regs.spcflags &= ~SPCFLAG_DOINT; - if (intr != -1 && intr > regs.intmask) { - Interrupt (intr); - regs.stopped = 0; + // Retain the request to get out of compiled code until + // we reached the toplevel execution, i.e. the one that + // can compile then run compiled code. This also means + // we processed all (nested) EmulOps + if ((m68k_execute_depth == 0) && SPCFLAGS_TEST( SPCFLAG_JIT_EXEC_RETURN )) + SPCFLAGS_CLEAR( SPCFLAG_JIT_EXEC_RETURN ); +#endif + /*n_spcinsns++;*/ + if (SPCFLAGS_TEST( SPCFLAG_DOTRACE )) { + Exception (9,last_trace_ad); } - } - if (regs.spcflags & SPCFLAG_INT) { - regs.spcflags &= ~SPCFLAG_INT; - regs.spcflags |= SPCFLAG_DOINT; - } - if (regs.spcflags & (SPCFLAG_BRK | SPCFLAG_MODE_CHANGE)) { - regs.spcflags &= ~(SPCFLAG_BRK | SPCFLAG_MODE_CHANGE); - return 1; - } - return 0; -} +#if 0 /* not for ARAnyM; emulating 040 only */ + if ((regs.spcflags & SPCFLAG_STOP) && regs.s == 0 && currprefs.cpu_model <= 68010) { + // 68000/68010 undocumented special case: + // if STOP clears S-bit and T was not set: + // cause privilege violation exception, PC pointing to following instruction. + // If T was set before STOP: STOP works as documented. + m68k_unset_stop(); + Exception(8, 0); + } +#endif + while (SPCFLAGS_TEST( SPCFLAG_STOP )) { + //TODO: Check + if ((regs.sr & 0x700) == 0x700) + { + // panicbug("STOPed with interrupts disabled, exiting; pc=$%08x", m68k_getpc()); + m68k_dumpstate (stderr, NULL); +#if 0 + quit_program = 1; +#endif +#ifdef FULL_HISTORY + ndebug::showHistory(20, false); + m68k_dumpstate (stderr, NULL); +#endif + return 1; + } + if (SPCFLAGS_TEST( SPCFLAG_INT | SPCFLAG_DOINT )){ + SPCFLAGS_CLEAR( SPCFLAG_INT | SPCFLAG_DOINT ); + int intr = intlev (); + if (intr != -1 && intr > regs.intmask) { + Interrupt (intr); + regs.stopped = 0; + SPCFLAGS_CLEAR( SPCFLAG_STOP ); + } + } -static void m68k_run_1 (void) -{ - for (;;) { - uae_u32 opcode = GET_OPCODE; - (*cpufunctbl[opcode])(opcode); - if (regs.spcflags) { - if (do_specialties()) - return; + // SERVE_INTERNAL_IRQ(); + // SERVE_VBL_MFP(true); + if (SPCFLAGS_TEST( SPCFLAG_BRK )) + break; + } + if (SPCFLAGS_TEST( SPCFLAG_TRACE )) + do_trace (); + + // SERVE_VBL_MFP(false); + + if (SPCFLAGS_TEST( SPCFLAG_DOINT )) { + SPCFLAGS_CLEAR( SPCFLAG_DOINT ); + int intr = intlev (); + if (intr != -1 && intr > regs.intmask) { + Interrupt (intr); + regs.stopped = 0; } } + + if (SPCFLAGS_TEST( SPCFLAG_INT )) { + SPCFLAGS_CLEAR( SPCFLAG_INT ); + SPCFLAGS_SET( SPCFLAG_DOINT ); + } + + if (SPCFLAGS_TEST( SPCFLAG_BRK /*| SPCFLAG_MODE_CHANGE*/ )) { + SPCFLAGS_CLEAR( SPCFLAG_BRK /*| SPCFLAG_MODE_CHANGE*/ ); + return 1; + } + + return 0; } -#define m68k_run1 m68k_run_1 - -int in_m68k_go = 0; - -void m68k_go (int may_quit) +void m68k_do_execute (void) { -// m68k_go() must be reentrant for Execute68k() and Execute68kTrap() to work -/* - if (in_m68k_go || !may_quit) { - write_log("Bug! m68k_go is not reentrant.\n"); - abort(); - } -*/ - in_m68k_go++; + uae_u32 pc; + uae_u32 opcode; for (;;) { - if (quit_program > 0) { - if (quit_program == 1) - break; - quit_program = 0; - m68k_reset (); + regs.fault_pc = pc = m68k_getpc(); +#ifdef FULL_HISTORY +#ifdef NEED_TO_DEBUG_BADLY + history[lasthist] = regs; + historyf[lasthist] = regflags; +#else + history[lasthist] = m68k_getpc(); +#endif + if (++lasthist == MAX_HIST) lasthist = 0; + if (lasthist == firsthist) { + if (++firsthist == MAX_HIST) firsthist = 0; } - m68k_run1(); - } - if (debugging) { - uaecptr nextpc; - m68k_dumpstate(&nextpc); - exit(1); +#endif + +#ifndef FULLMMU +#ifdef ARAM_PAGE_CHECK + if (((pc ^ pc_page) > ARAM_PAGE_MASK)) { + check_ram_boundary(pc, 2, false); + pc_page = pc; + pc_offset = (uintptr)get_real_address(pc, 0, sz_word) - pc; } - in_m68k_go--; -} +#else + check_ram_boundary(pc, 2, false); +#endif +#endif + opcode = GET_OPCODE; +#ifdef FLIGHT_RECORDER + m68k_record_step(m68k_getpc(), opcode); +#endif + (*cpufunctbl[opcode])(opcode); + cpu_check_ticks(); + regs.fault_pc = m68k_getpc(); -static void m68k_verify (uaecptr addr, uaecptr *nextpc) -{ - uae_u32 opcode, val; - struct instr *dp; - - opcode = get_iword_1(0); - last_op_for_exception_3 = opcode; - m68kpc_offset = 2; - - if (cpufunctbl[cft_map (opcode)] == op_illg_1) { - opcode = 0x4AFC; - } - dp = table68k + opcode; - - if (dp->suse) { - if (!verify_ea (dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, &val)) { - Exception (3, 0); - return; - } - } - if (dp->duse) { - if (!verify_ea (dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, &val)) { - Exception (3, 0); - return; + if (SPCFLAGS_TEST(SPCFLAG_ALL_BUT_EXEC_RETURN)) { + if (m68k_do_specialties()) + return; } } } -void m68k_disasm (uaecptr addr, uaecptr *nextpc, int cnt) +void m68k_execute (void) { - uaecptr newpc = 0; - m68kpc_offset = addr - m68k_getpc (); +#ifdef USE_JIT + m68k_execute_depth++; +#endif +#ifdef DEBUGGER + VOLATILE bool after_exception = false; +#endif + +setjmpagain: + TRY(prb) { + for (;;) { + if (quit_program > 0) { + if (quit_program == 1) { +#ifdef FLIGHT_RECORDER + dump_flight_recorder(); +#endif + break; + } + quit_program = 0; + m68k_reset (); + } +#ifdef DEBUGGER + if (debugging && !after_exception) debug(); + after_exception = false; +#endif + m68k_do_execute(); + } + } + CATCH(prb) { + Exception(prb, 0); +#ifdef DEBUGGER + after_exception = true; +#endif + goto setjmpagain; + } + +#ifdef USE_JIT + m68k_execute_depth--; +#endif +} + +void m68k_disasm (FILE *f, uaecptr addr, uaecptr *nextpc, int cnt) +{ +#ifdef HAVE_DISASM_M68K + char buf[256]; + int size; + + disasm_info.memory_vma = addr; while (cnt-- > 0) { - char instrname[20],*ccpt; - int opwords; - uae_u32 opcode; - struct mnemolookup *lookup; - struct instr *dp; - printf ("%08lx: ", m68k_getpc () + m68kpc_offset); - for (opwords = 0; opwords < 5; opwords++){ - printf ("%04x ", get_iword_1 (m68kpc_offset + opwords*2)); + size = m68k_disasm_to_buf(&disasm_info, buf); + fprintf(f, "%s\n", buf); + if (size < 0) + break; } - opcode = get_iword_1 (m68kpc_offset); - m68kpc_offset += 2; - if (cpufunctbl[cft_map (opcode)] == op_illg_1) { - opcode = 0x4AFC; - } - dp = table68k + opcode; - for (lookup = lookuptab;lookup->mnemo != dp->mnemo; lookup++) - ; + if (nextpc) + *nextpc = disasm_info.memory_vma; +#else + if (nextpc) + *nextpc = addr; + (void) f; + (void) cnt; +#endif +} - strcpy (instrname, lookup->name); - ccpt = strstr (instrname, "cc"); - if (ccpt != 0) { - strncpy (ccpt, ccnames[dp->cc], 2); - } - printf ("%s", instrname); - switch (dp->size){ - case sz_byte: printf (".B "); break; - case sz_word: printf (".W "); break; - case sz_long: printf (".L "); break; - default: printf (" "); break; - } +#ifdef DEBUGGER +void newm68k_disasm(FILE *f, uaecptr addr, uaecptr *nextpc, unsigned int cnt) +{ +#ifdef HAVE_DISASM_M68K + char buf[256]; - if (dp->suse) { - newpc = m68k_getpc () + m68kpc_offset; - newpc += ShowEA (dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0); - } - if (dp->suse && dp->duse) - printf (","); - if (dp->duse) { - newpc = m68k_getpc () + m68kpc_offset; - newpc += ShowEA (dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 0); - } - if (ccpt != 0) { - if (cctrue(dp->cc)) - printf (" == %08lx (TRUE)", newpc); - else - printf (" == %08lx (FALSE)", newpc); - } else if ((opcode & 0xff00) == 0x6100) /* BSR */ - printf (" == %08lx", newpc); - printf ("\n"); + disasm_info.memory_vma = addr; + if (cnt == 0) { + m68k_disasm_to_buf(&disasm_info, buf); + } else { + while (cnt-- > 0) { + m68k_disasm_to_buf(&disasm_info, buf); + fprintf(f, "%s\n", buf); + } } if (nextpc) - *nextpc = m68k_getpc () + m68kpc_offset; + *nextpc = disasm_info.memory_vma; +#else + if (nextpc) + *nextpc = addr; + (void) cnt; +#endif } -void m68k_dumpstate (uaecptr *nextpc) +#endif /* DEBUGGER */ + +#ifdef FULL_HISTORY +void showDisasm(uaecptr addr) { +#ifdef HAVE_DISASM_M68K + char buf[256]; + + disasm_info.memory_vma = addr; + m68k_disasm_to_buf(&disasm_info, buf); + bug("%s", buf); +#else + (void) addr; +#endif +} +#endif /* FULL_HISTORY */ + +void m68k_dumpstate (FILE *out, uaecptr *nextpc) { int i; for (i = 0; i < 8; i++){ - printf ("D%d: %08lx ", i, m68k_dreg(regs, i)); - if ((i & 3) == 3) printf ("\n"); + fprintf (out, "D%d: %08lx ", i, (unsigned long)m68k_dreg(regs, i)); + if ((i & 3) == 3) fprintf (out, "\n"); } for (i = 0; i < 8; i++){ - printf ("A%d: %08lx ", i, m68k_areg(regs, i)); - if ((i & 3) == 3) printf ("\n"); + fprintf (out, "A%d: %08lx ", i, (unsigned long)m68k_areg(regs, i)); + if ((i & 3) == 3) fprintf (out, "\n"); } if (regs.s == 0) regs.usp = m68k_areg(regs, 7); if (regs.s && regs.m) regs.msp = m68k_areg(regs, 7); if (regs.s && regs.m == 0) regs.isp = m68k_areg(regs, 7); - printf ("USP=%08lx ISP=%08lx MSP=%08lx VBR=%08lx\n", - regs.usp,regs.isp,regs.msp,regs.vbr); - printf ("T=%d%d S=%d M=%d X=%d N=%d Z=%d V=%d C=%d IMASK=%d\n", + fprintf (out, "USP=%08lx ISP=%08lx MSP=%08lx VBR=%08lx\n", + (unsigned long)regs.usp, (unsigned long)regs.isp, + (unsigned long)regs.msp, (unsigned long)regs.vbr); + fprintf (out, "T=%d%d S=%d M=%d X=%d N=%d Z=%d V=%d C=%d IMASK=%d TCE=%d TCP=%d\n", regs.t1, regs.t0, regs.s, regs.m, - GET_XFLG, GET_NFLG, GET_ZFLG, GET_VFLG, GET_CFLG, regs.intmask); + (int)GET_XFLG, (int)GET_NFLG, (int)GET_ZFLG, (int)GET_VFLG, (int)GET_CFLG, regs.intmask, + regs.mmu_enabled, regs.mmu_pagesize_8k); + fprintf (out, "CACR=%08lx CAAR=%08lx URP=%08lx SRP=%08lx\n", + (unsigned long)regs.cacr, + (unsigned long)regs.caar, + (unsigned long)regs.urp, + (unsigned long)regs.srp); + fprintf (out, "DTT0=%08lx DTT1=%08lx ITT0=%08lx ITT1=%08lx\n", + (unsigned long)regs.dtt0, + (unsigned long)regs.dtt1, + (unsigned long)regs.itt0, + (unsigned long)regs.itt1); for (i = 0; i < 8; i++){ - printf ("FP%d: %g ", i, regs.fp[i]); - if ((i & 3) == 3) printf ("\n"); + fprintf (out, "FP%d: %g ", i, (double)fpu.registers[i]); + if ((i & 3) == 3) fprintf (out, "\n"); } - printf ("N=%d Z=%d I=%d NAN=%d\n", +#if 0 + fprintf (out, "N=%d Z=%d I=%d NAN=%d\n", (regs.fpsr & 0x8000000) != 0, (regs.fpsr & 0x4000000) != 0, (regs.fpsr & 0x2000000) != 0, (regs.fpsr & 0x1000000) != 0); - - m68k_disasm(m68k_getpc (), nextpc, 1); +#endif + m68k_disasm(out, m68k_getpc (), nextpc, 1); if (nextpc) - printf ("next PC: %08lx\n", *nextpc); + fprintf (out, "next PC: %08lx\n", (unsigned long)*nextpc); } diff --git a/BasiliskII/src/uae_cpu/newcpu.h b/BasiliskII/src/uae_cpu/newcpu.h index dc174a78..13a51b82 100644 --- a/BasiliskII/src/uae_cpu/newcpu.h +++ b/BasiliskII/src/uae_cpu/newcpu.h @@ -1,3 +1,27 @@ +/* + * newcpu.h - CPU emulation + * + * Copyright (c) 2009 ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * ARAnyM is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ARAnyM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with ARAnyM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ /* * UAE - The Un*x Amiga Emulator * @@ -6,41 +30,22 @@ * Copyright 1995 Bernd Schmidt */ -#define SPCFLAG_STOP 2 -#define SPCFLAG_DISK 4 -#define SPCFLAG_INT 8 -#define SPCFLAG_BRK 16 -#define SPCFLAG_EXTRA_CYCLES 32 -#define SPCFLAG_TRACE 64 -#define SPCFLAG_DOTRACE 128 -#define SPCFLAG_DOINT 256 -#define SPCFLAG_BLTNASTY 512 -#define SPCFLAG_EXEC 1024 -#define SPCFLAG_MODE_CHANGE 8192 +#ifndef NEWCPU_H +#define NEWCPU_H -#ifndef SET_CFLG +#include "sysdeps.h" +#include "registers.h" +#include "spcflags.h" +#include "m68k.h" +#include "memory.h" -#define SET_CFLG(x) (CFLG = (x)) -#define SET_NFLG(x) (NFLG = (x)) -#define SET_VFLG(x) (VFLG = (x)) -#define SET_ZFLG(x) (ZFLG = (x)) -#define SET_XFLG(x) (XFLG = (x)) +# include -#define GET_CFLG CFLG -#define GET_NFLG NFLG -#define GET_VFLG VFLG -#define GET_ZFLG ZFLG -#define GET_XFLG XFLG - -#define CLEAR_CZNV do { \ - SET_CFLG (0); \ - SET_ZFLG (0); \ - SET_NFLG (0); \ - SET_VFLG (0); \ -} while (0) - -#define COPY_CARRY (SET_XFLG (GET_CFLG)) -#endif +extern struct fixup { + int flag; + uae_u32 reg; + uaecptr value; +}fixup; extern int areg_byteinc[]; extern int imm8_table[]; @@ -49,112 +54,126 @@ extern int movem_index1[256]; extern int movem_index2[256]; extern int movem_next[256]; -extern int fpp_movem_index1[256]; -extern int fpp_movem_index2[256]; -extern int fpp_movem_next[256]; - extern int broken_in; +#ifdef X86_ASSEMBLY +/* This hack seems to force all register saves (pushl %reg) to be moved to the + begining of the function, thus making it possible to cpuopti to remove them + since m68k_run_1 will save those registers before calling the instruction + handler */ +# define cpuop_tag(tag) __asm__ __volatile__ ( "#cpuop_" tag ) +#else +# define cpuop_tag(tag) ; +#endif + +#define cpuop_begin() do { cpuop_tag("begin"); } while (0) +#define cpuop_end() do { cpuop_tag("end"); } while (0) + typedef void REGPARAM2 cpuop_func (uae_u32) REGPARAM; struct cputbl { cpuop_func *handler; - int specific; + uae_u16 specific; uae_u16 opcode; }; +extern cpuop_func *cpufunctbl[65536]; + +#ifdef USE_JIT +typedef void compop_func (uae_u32) REGPARAM; + +struct comptbl { + compop_func *handler; + uae_u32 opcode; + uae_u32 specific; +#define COMP_OPCODE_ISJUMP 0x0001 +#define COMP_OPCODE_LONG_OPCODE 0x0002 +#define COMP_OPCODE_CMOV 0x0004 +#define COMP_OPCODE_ISADDX 0x0008 +#define COMP_OPCODE_ISCJUMP 0x0010 +#define COMP_OPCODE_USES_FPU 0x0020 +}; +#endif + extern void REGPARAM2 op_illg (uae_u32) REGPARAM; -typedef char flagtype; - -extern struct regstruct -{ - uae_u32 regs[16]; - uaecptr usp,isp,msp; - uae_u16 sr; - flagtype t1; - flagtype t0; - flagtype s; - flagtype m; - flagtype x; - flagtype stopped; - int intmask; - - uae_u32 pc; - uae_u8 *pc_p; - uae_u8 *pc_oldp; - - uae_u32 vbr,sfc,dfc; - - double fp[8]; - uae_u32 fpcr,fpsr,fpiar; - - uae_u32 spcflags; - uae_u32 kick_mask; - - /* Fellow sources say this is 4 longwords. That's impossible. It needs - * to be at least a longword. The HRM has some cryptic comment about two - * instructions being on the same longword boundary. - * The way this is implemented now seems like a good compromise. - */ - uae_u32 prefetch; -} regs, lastint_regs; - #define m68k_dreg(r,num) ((r).regs[(num)]) #define m68k_areg(r,num) (((r).regs + 8)[(num)]) -#define get_ibyte(o) do_get_mem_byte((uae_u8 *)(regs.pc_p + (o) + 1)) -#define get_iword(o) do_get_mem_word((uae_u16 *)(regs.pc_p + (o))) -#define get_ilong(o) do_get_mem_long((uae_u32 *)(regs.pc_p + (o))) +#ifdef FULLMMU +static ALWAYS_INLINE uae_u8 get_ibyte(uae_u32 o) +{ + return mmu_get_byte(m68k_getpc() + o + 1, 0, sz_byte); +} +static ALWAYS_INLINE uae_u16 get_iword(uae_u32 o) +{ + return mmu_get_word(m68k_getpc() + o, 0, sz_word); +} +static ALWAYS_INLINE uae_u32 get_ilong(uae_u32 o) +{ + uaecptr addr = m68k_getpc() + o; + + if (unlikely(is_unaligned(addr, 4))) + return mmu_get_long_unaligned(addr, 0); + return mmu_get_long(addr, 0, sz_long); +} -#ifdef HAVE_GET_WORD_UNSWAPPED -#define GET_OPCODE (do_get_mem_word_unswapped (regs.pc_p)) #else -#define GET_OPCODE (get_iword (0)) +#define get_ibyte(o) do_get_mem_byte((uae_u8 *)(get_real_address(m68k_getpc(), 0, sz_byte) + (o) + 1)) +#define get_iword(o) do_get_mem_word((uae_u16 *)(get_real_address(m68k_getpc(), 0, sz_word) + (o))) +#define get_ilong(o) do_get_mem_long((uae_u32 *)(get_real_address(m68k_getpc(), 0, sz_long) + (o))) #endif -static __inline__ uae_u32 get_ibyte_prefetch (uae_s32 o) +#if 0 +static inline uae_u32 get_ibyte_prefetch (uae_s32 o) { if (o > 3 || o < 0) - return do_get_mem_byte((uae_u8 *)(regs.pc_p + o + 1)); + return do_get_mem_byte((uae_u8 *)(do_get_real_address(regs.pcp, false, false) + o + 1)); return do_get_mem_byte((uae_u8 *)(((uae_u8 *)®s.prefetch) + o + 1)); } -static __inline__ uae_u32 get_iword_prefetch (uae_s32 o) +static inline uae_u32 get_iword_prefetch (uae_s32 o) { if (o > 3 || o < 0) - return do_get_mem_word((uae_u16 *)(regs.pc_p + o)); + return do_get_mem_word((uae_u16 *)(do_get_real_address(regs.pcp, false, false) + o)); return do_get_mem_word((uae_u16 *)(((uae_u8 *)®s.prefetch) + o)); } -static __inline__ uae_u32 get_ilong_prefetch (uae_s32 o) +static inline uae_u32 get_ilong_prefetch (uae_s32 o) { if (o > 3 || o < 0) - return do_get_mem_long((uae_u32 *)(regs.pc_p + o)); + return do_get_mem_long((uae_u32 *)(do_get_real_address(regs.pcp, false, false) + o)); if (o == 0) return do_get_mem_long(®s.prefetch); - return (do_get_mem_word (((uae_u16 *)®s.prefetch) + 1) << 16) | do_get_mem_word ((uae_u16 *)(regs.pc_p + 4)); + return (do_get_mem_word (((uae_u16 *)®s.prefetch) + 1) << 16) | do_get_mem_word ((uae_u16 *)(do_get_real_address(regs.pcp, false, false) + 4)); } +#endif +#ifdef FULLMMU +#define m68k_incpc(o) (regs.pc += (o)) +#else #define m68k_incpc(o) (regs.pc_p += (o)) +#endif -static __inline__ void fill_prefetch_0 (void) +static inline void fill_prefetch_0 (void) { +#if USE_PREFETCH_BUFFER uae_u32 r; #ifdef UNALIGNED_PROFITABLE - r = *(uae_u32 *)regs.pc_p; + r = *(uae_u32 *)do_get_real_address(m68k_getpc(), false, false); regs.prefetch = r; #else - r = do_get_mem_long ((uae_u32 *)regs.pc_p); + r = do_get_mem_long ((uae_u32 *)do_get_real_address(m68k_getpc(), false, false)); do_put_mem_long (®s.prefetch, r); #endif +#endif } #if 0 -static __inline__ void fill_prefetch_2 (void) +static inline void fill_prefetch_2 (void) { uae_u32 r = do_get_mem_long (®s.prefetch) << 16; - uae_u32 r2 = do_get_mem_word (((uae_u16 *)regs.pc_p) + 1); + uae_u32 r2 = do_get_mem_word (((uae_u16 *)do_get_real_address(regs.pcp, false, false)) + 1); r |= r2; do_put_mem_long (®s.prefetch, r); } @@ -164,103 +183,114 @@ static __inline__ void fill_prefetch_2 (void) /* These are only used by the 68020/68881 code, and therefore don't * need to handle prefetch. */ -static __inline__ uae_u32 next_ibyte (void) +static inline uae_u32 next_ibyte (void) { uae_u32 r = get_ibyte (0); m68k_incpc (2); return r; } -static __inline__ uae_u32 next_iword (void) +static inline uae_u32 next_iword (void) { uae_u32 r = get_iword (0); m68k_incpc (2); return r; } -static __inline__ uae_u32 next_ilong (void) +static inline uae_u32 next_ilong (void) { uae_u32 r = get_ilong (0); m68k_incpc (4); return r; } -static __inline__ void m68k_setpc (uaecptr newpc) +static inline void m68k_setpc (uaecptr newpc) { - regs.pc_p = regs.pc_oldp = get_real_address(newpc); - regs.pc = newpc; -} - -static __inline__ uaecptr m68k_getpc (void) -{ - return regs.pc + ((char *)regs.pc_p - (char *)regs.pc_oldp); -} - -static __inline__ uaecptr m68k_getpc_p (uae_u8 *p) -{ - return regs.pc + ((char *)p - (char *)regs.pc_oldp); -} - -static __inline__ void m68k_do_rts(void) -{ - m68k_setpc(get_long(m68k_areg(regs, 7))); - m68k_areg(regs, 7) += 4; -} - -static __inline__ void m68k_do_bsr(uaecptr oldpc, uae_s32 offset) -{ - m68k_areg(regs, 7) -= 4; - put_long(m68k_areg(regs, 7), oldpc); - m68k_incpc(offset); -} - -static __inline__ void m68k_do_jsr(uaecptr oldpc, uaecptr dest) -{ - m68k_areg(regs, 7) -= 4; - put_long(m68k_areg(regs, 7), oldpc); - m68k_setpc(dest); +#ifndef FULLMMU + regs.pc_p = regs.pc_oldp = get_real_address(newpc, 0, sz_word); +#endif + regs.fault_pc = regs.pc = newpc; } #define m68k_setpc_fast m68k_setpc #define m68k_setpc_bcc m68k_setpc #define m68k_setpc_rte m68k_setpc -static __inline__ void m68k_setstopped (int stop) +static inline void m68k_do_rts(void) { - regs.stopped = stop; - if (stop) - regs.spcflags |= SPCFLAG_STOP; + m68k_setpc(get_long(m68k_areg(regs, 7))); + m68k_areg(regs, 7) += 4; +} + +static inline void m68k_do_bsr(uaecptr oldpc, uae_s32 offset) +{ + put_long(m68k_areg(regs, 7) - 4, oldpc); + m68k_areg(regs, 7) -= 4; + m68k_incpc(offset); +} + +static inline void m68k_do_jsr(uaecptr oldpc, uaecptr dest) +{ + put_long(m68k_areg(regs, 7) - 4, oldpc); + m68k_areg(regs, 7) -= 4; + m68k_setpc(dest); } -extern uae_u32 get_disp_ea_020 (uae_u32 base, uae_u32 dp); -extern uae_u32 get_disp_ea_000 (uae_u32 base, uae_u32 dp); +static inline void m68k_setstopped (int stop) +{ + regs.stopped = stop; + /* A traced STOP instruction drops through immediately without + actually stopping. */ + if (stop && !( SPCFLAGS_TEST( SPCFLAG_DOTRACE ))) + SPCFLAGS_SET( SPCFLAG_STOP ); +} + +#ifdef FULLMMU +# define GET_OPCODE (get_iword (0)) +#elif defined ARAM_PAGE_CHECK +# ifdef HAVE_GET_WORD_UNSWAPPED +# define GET_OPCODE (do_get_mem_word_unswapped((uae_u16*)(pc + pc_offset))); +# else +# define GET_OPCODE (do_get_mem_word((uae_u16*)(pc + pc_offset))); +# endif +#else +# ifdef HAVE_GET_WORD_UNSWAPPED +# define GET_OPCODE (do_get_mem_word_unswapped ((uae_u16*)get_real_address(m68k_getpc(), 0, sz_word))) +# else +# define GET_OPCODE (get_iword (0)) +# endif +#endif + +extern REGPARAM uae_u32 get_disp_ea_020 (uae_u32 base, uae_u32 dp); +extern REGPARAM uae_u32 get_disp_ea_000 (uae_u32 base, uae_u32 dp); +extern REGPARAM uae_u32 get_bitfield(uae_u32 src, uae_u32 bdata[2], uae_s32 offset, int width); +extern REGPARAM void put_bitfield(uae_u32 dst, uae_u32 bdata[2], uae_u32 val, uae_s32 offset, int width); + -extern uae_s32 ShowEA (int reg, amodes mode, wordsizes size, char *buf); extern void MakeSR (void); extern void MakeFromSR (void); extern void Exception (int, uaecptr); extern void dump_counts (void); -extern void m68k_move2c (int, uae_u32 *); -extern void m68k_movec2 (int, uae_u32 *); +extern int m68k_move2c (int, uae_u32 *); +extern int m68k_movec2 (int, uae_u32 *); extern void m68k_divl (uae_u32, uae_u32, uae_u16, uaecptr); extern void m68k_mull (uae_u32, uae_u32, uae_u16); +extern void m68k_emulop (uae_u32); +extern void m68k_emulop_return (void); +extern void m68k_natfeat_id(void); +extern void m68k_natfeat_call(void); extern void init_m68k (void); -extern void m68k_go (int); -extern void m68k_dumpstate (uaecptr *); -extern void m68k_disasm (uaecptr, uaecptr *, int); +extern void exit_m68k (void); +extern void m68k_dumpstate (FILE *, uaecptr *); +extern void m68k_disasm (FILE *, uaecptr, uaecptr *, int); +extern void newm68k_disasm(FILE *, uaecptr, uaecptr *, unsigned int); +extern void showDisasm(uaecptr); extern void m68k_reset (void); extern void m68k_enter_debugger(void); - -extern void mmu_op (uae_u32, uae_u16); - -extern void fpp_opp (uae_u32, uae_u16); -extern void fdbcc_opp (uae_u32, uae_u16); -extern void fscc_opp (uae_u32, uae_u16); -extern void ftrapcc_opp (uae_u32,uaecptr); -extern void fbcc_opp (uae_u32, uaecptr, uae_u32); -extern void fsave_opp (uae_u32); -extern void frestore_opp (uae_u32); +extern int m68k_do_specialties(void); +extern void m68k_instr_set(void); +uae_u32 linea68000(uae_u16 opcode); /* Opcode of faulting instruction */ extern uae_u16 last_op_for_exception_3; @@ -271,16 +301,34 @@ extern uaecptr last_fault_for_exception_3; #define CPU_OP_NAME(a) op ## a -/* 68020 + 68881 */ -extern struct cputbl op_smalltbl_0[]; -/* 68020 */ -extern struct cputbl op_smalltbl_1[]; -/* 68010 */ -extern struct cputbl op_smalltbl_2[]; -/* 68000 */ -extern struct cputbl op_smalltbl_3[]; -/* 68000 slow but compatible. */ -extern struct cputbl op_smalltbl_4[]; +/* 68040+ 68881 */ +extern const struct cputbl op_smalltbl_0_ff[]; +extern const struct cputbl op_smalltbl_0_nf[]; -extern cpuop_func *cpufunctbl[65536]; +#ifdef FLIGHT_RECORDER +extern void m68k_record_step(uaecptr, int); +#endif +extern void m68k_do_execute(void); +extern void m68k_execute(void); +#ifdef USE_JIT +extern void m68k_compile_execute(void); +extern void m68k_do_compile_execute(void); +#endif +#ifdef USE_CPU_EMUL_SERVICES +extern int32 emulated_ticks; +extern void cpu_do_check_ticks(void); + +static inline void cpu_check_ticks(void) +{ + if (--emulated_ticks <= 0) + cpu_do_check_ticks(); +} +#else +#define cpu_check_ticks() +#define cpu_do_check_ticks() +#endif + +cpuop_func op_illg_1; + +#endif /* NEWCPU_H */ diff --git a/BasiliskII/src/uae_cpu/noflags.h b/BasiliskII/src/uae_cpu/noflags.h new file mode 100644 index 00000000..d680b200 --- /dev/null +++ b/BasiliskII/src/uae_cpu/noflags.h @@ -0,0 +1,142 @@ +#ifndef NOFLAGS_H +#define NOFLAGS_H + +/* Undefine everything that will *set* flags. Note: Leave *reading* + flags alone ;-). We assume that nobody does something like + SET_ZFLG(a=b+c), i.e. expect side effects of the macros. That would + be a stupid thing to do when using macros. +*/ + +/* Gwenole Beauchesne pointed out that CAS and CAS2 use flag_cmp to set + flags that are then used internally, and that thus the noflags versions + of those instructions were broken. Oops! + Easy fix: Leave flag_cmp alone. It is only used by CMP* and CAS* + instructions. For CAS*, noflags is a bad idea. For CMP*, which has + setting flags as its only function, the noflags version is kinda pointless, + anyway. + Note that this will only work while using the optflag_* routines --- + as we do on all (one ;-) platforms that will ever use the noflags + versions, anyway. + However, if you try to compile without optimized flags, the "SET_ZFLAG" + macro will be left unchanged, to make CAS and CAS2 work right. Of course, + this is contrary to the whole idea of noflags, but better be right than + be fast. + + Another problem exists with one of the bitfield operations. Once again, + one of the operations sets a flag, and looks at it later. And the CHK2 + instruction does so as well. For those, a different solution is possible. + the *_ALWAYS versions of the SET_?FLG macros shall remain untouched by + the redefinitions in this file. + Unfortunately, they are defined in terms of the macros we *do* redefine. + So here comes a bit of trickery.... +*/ +#define NOFLAGS_CMP 0 + +#undef SET_NFLG_ALWAYS +static inline void SET_NFLG_ALWAYS(uae_u32 x) +{ + SET_NFLG(x); /* This has not yet been redefined */ +} + +#undef SET_CFLG_ALWAYS +static inline void SET_CFLG_ALWAYS(uae_u32 x) +{ + SET_CFLG(x); /* This has not yet been redefined */ +} + +#undef CPUFUNC +#define CPUFUNC(x) x##_nf + +#ifndef OPTIMIZED_FLAGS +#undef SET_ZFLG +#define SET_ZFLG(y) do {uae_u32 dummy=(y); } while (0) +#endif + +#undef SET_CFLG +#define SET_CFLG(y) do {uae_u32 dummy=(y); } while (0) +#undef SET_VFLG +#define SET_VFLG(y) do {uae_u32 dummy=(y); } while (0) +#undef SET_NFLG +#define SET_NFLG(y) do {uae_u32 dummy=(y); } while (0) +#undef SET_XFLG +#define SET_XFLG(y) do {uae_u32 dummy=(y); } while (0) + +#undef CLEAR_CZNV +#define CLEAR_CZNV +#undef IOR_CZNV +#define IOR_CZNV(y) do {uae_u32 dummy=(y); } while (0) +#undef SET_CZNV +#define SET_CZNV(y) do {uae_u32 dummy=(y); } while (0) +#undef COPY_CARRY +#define COPY_CARRY + +#ifdef optflag_testl +#undef optflag_testl +#endif + +#ifdef optflag_testw +#undef optflag_testw +#endif + +#ifdef optflag_testb +#undef optflag_testb +#endif + +#ifdef optflag_addl +#undef optflag_addl +#endif + +#ifdef optflag_addw +#undef optflag_addw +#endif + +#ifdef optflag_addb +#undef optflag_addb +#endif + +#ifdef optflag_subl +#undef optflag_subl +#endif + +#ifdef optflag_subw +#undef optflag_subw +#endif + +#ifdef optflag_subb +#undef optflag_subb +#endif + +#if NOFLAGS_CMP +#ifdef optflag_cmpl +#undef optflag_cmpl +#endif + +#ifdef optflag_cmpw +#undef optflag_cmpw +#endif + +#ifdef optflag_cmpb +#undef optflag_cmpb +#endif +#endif + +#define optflag_testl(v) do { } while (0) +#define optflag_testw(v) do { } while (0) +#define optflag_testb(v) do { } while (0) + +#define optflag_addl(v, s, d) (v = (uae_s32)(d) + (uae_s32)(s)) +#define optflag_addw(v, s, d) (v = (uae_s16)(d) + (uae_s16)(s)) +#define optflag_addb(v, s, d) (v = (uae_s8)(d) + (uae_s8)(s)) + +#define optflag_subl(v, s, d) (v = (uae_s32)(d) - (uae_s32)(s)) +#define optflag_subw(v, s, d) (v = (uae_s16)(d) - (uae_s16)(s)) +#define optflag_subb(v, s, d) (v = (uae_s8)(d) - (uae_s8)(s)) + +#if NOFLAGS_CMP +/* These are just for completeness sake */ +#define optflag_cmpl(s, d) do { } while (0) +#define optflag_cmpw(s, d) do { } while (0) +#define optflag_cmpb(s, d) do { } while (0) +#endif + +#endif diff --git a/BasiliskII/src/uae_cpu/readcpu.cpp b/BasiliskII/src/uae_cpu/readcpu.cpp index abb3faae..1c385b98 100644 --- a/BasiliskII/src/uae_cpu/readcpu.cpp +++ b/BasiliskII/src/uae_cpu/readcpu.cpp @@ -1,3 +1,4 @@ +/* 2002 MJ */ /* * UAE - The Un*x Amiga Emulator * @@ -6,14 +7,21 @@ * Copyright 1995,1996 Bernd Schmidt */ -#include -#include -#include -#include - #include "sysdeps.h" #include "readcpu.h" +#include +#include +#include +#include + +using std::strncmp; +using std::abort; +using std::fprintf; +using std::strcmp; +using std::strlen; +using std::malloc; + int nr_cpuop_funcs; struct mnemolookup lookuptab[] = { @@ -139,13 +147,20 @@ struct mnemolookup lookuptab[] = { { i_CPUSHA, "CPUSHA" }, { i_MOVE16, "MOVE16" }, + { i_EMULOP_RETURN, "EMULOP_RETURN" }, + { i_EMULOP, "EMULOP" }, + { i_MMUOP, "MMUOP" }, + + {i_NATFEAT_ID, "NATFEAT_ID" }, + {i_NATFEAT_CALL, "NATFEAT_CALL" }, + { i_ILLG, "" }, }; struct instr *table68k; -static __inline__ amodes mode_from_str (const char *str) +static inline amodes mode_from_str (const char *str) { if (strncmp (str, "Dreg", 4) == 0) return Dreg; if (strncmp (str, "Areg", 4) == 0) return Areg; @@ -163,7 +178,7 @@ static __inline__ amodes mode_from_str (const char *str) return (amodes)0; } -static __inline__ amodes mode_from_mr (int mode, int reg) +static inline amodes mode_from_mr (int mode, int reg) { switch (mode) { case 0: return Dreg; @@ -195,12 +210,31 @@ static void build_insn (int insn) int variants; struct instr_def id; const char *opcstr; - int i; + int i, n; int flaglive = 0, flagdead = 0; + int cflow = 0; id = defs68k[insn]; + // Control flow information + cflow = id.cflow; + + // Mask of flags set/used + unsigned char flags_set(0), flags_used(0); + + for (i = 0, n = 4; i < 5; i++, n--) { + switch (id.flaginfo[i].flagset) { + case fa_unset: case fa_isjmp: break; + default: flags_set |= (1 << n); + } + + switch (id.flaginfo[i].flaguse) { + case fu_unused: case fu_isjmp: break; + default: flags_used |= (1 << n); + } + } + for (i = 0; i < 5; i++) { switch (id.flaginfo[i].flagset){ case fa_unset: break; @@ -236,7 +270,7 @@ static void build_insn (int insn) int pos = 0; int mnp = 0; int bitno = 0; - char mnemonic[10]; + char mnemonic[64]; wordsizes sz = sz_long; int srcgather = 0, dstgather = 0; @@ -274,6 +308,9 @@ static void build_insn (int insn) if (bitcnt[bitI] && (bitval[bitI] == 0x00 || bitval[bitI] == 0xff)) continue; + if (bitcnt[bitE] && (bitval[bitE] == 0x00)) + continue; + /* bitI and bitC get copied to biti and bitc */ if (bitcnt[bitI]) { bitval[biti] = bitval[bitI]; bitpos[biti] = bitpos[bitI]; @@ -311,6 +348,11 @@ static void build_insn (int insn) } } mnp++; + if ((unsigned)mnp >= (sizeof(mnemonic)-1)) { + mnemonic[sizeof(mnemonic)-1] = '\0'; + fprintf(stderr, "WTF!!! Instruction '%s' overflow\n", mnemonic); + abort(); + } } pos++; } @@ -339,6 +381,7 @@ static void build_insn (int insn) case 'A': srcmode = Areg; switch (opcstr[pos++]) { + case 'l': srcmode = absl; break; case 'r': srcreg = bitval[bitr]; srcgather = 1; srcpos = bitpos[bitr]; break; case 'R': srcreg = bitval[bitR]; srcgather = 1; srcpos = bitpos[bitR]; break; default: abort(); @@ -348,6 +391,11 @@ static void build_insn (int insn) case 'P': srcmode = Aipi; pos++; break; } break; +#if 0 + case 'L': + srcmode = absl; + break; +#endif case '#': switch (opcstr[pos++]) { case 'z': srcmode = imm; break; @@ -393,6 +441,22 @@ static void build_insn (int insn) srcpos = bitpos[bitK]; } break; + case 'E': srcmode = immi; srcreg = bitval[bitE]; + if (CPU_EMU_SIZE < 5) { // gb-- what is CPU_EMU_SIZE used for ?? + /* 1..255 */ + srcgather = 1; + srctype = 6; + srcpos = bitpos[bitE]; + } + break; + case 'p': srcmode = immi; srcreg = bitval[bitp]; + if (CPU_EMU_SIZE < 5) { // gb-- what is CPU_EMU_SIZE used for ?? + /* 0..3 */ + srcgather = 1; + srctype = 7; + srcpos = bitpos[bitp]; + } + break; default: abort(); } break; @@ -517,12 +581,16 @@ static void build_insn (int insn) case 'R': destreg = bitval[bitR]; dstgather = 1; dstpos = bitpos[bitR]; break; default: abort(); } + if (dstpos < 0 || dstpos >= 32) + abort(); break; case 'A': destmode = Areg; switch (opcstr[pos++]) { + case 'l': destmode = absl; break; case 'r': destreg = bitval[bitr]; dstgather = 1; dstpos = bitpos[bitr]; break; case 'R': destreg = bitval[bitR]; dstgather = 1; dstpos = bitpos[bitR]; break; + case 'x': destreg = 0; dstgather = 0; dstpos = 0; break; default: abort(); } switch (opcstr[pos]) { @@ -530,6 +598,11 @@ static void build_insn (int insn) case 'P': destmode = Aipi; pos++; break; } break; +#if 0 + case 'L': + destmode = absl; + break; +#endif case '#': switch (opcstr[pos++]) { case 'z': destmode = imm; break; @@ -700,8 +773,44 @@ static void build_insn (int insn) table68k[opc].flaginfo[i].flaguse = id.flaginfo[i].flaguse; } #endif + + // Fix flags used information for Scc, Bcc, TRAPcc, DBcc instructions + if ( table68k[opc].mnemo == i_Scc + || table68k[opc].mnemo == i_Bcc + || table68k[opc].mnemo == i_DBcc + || table68k[opc].mnemo == i_TRAPcc + ) { + switch (table68k[opc].cc) { + // CC mask: XNZVC + // 8421 + case 0: flags_used = 0x00; break; /* T */ + case 1: flags_used = 0x00; break; /* F */ + case 2: flags_used = 0x05; break; /* HI */ + case 3: flags_used = 0x05; break; /* LS */ + case 4: flags_used = 0x01; break; /* CC */ + case 5: flags_used = 0x01; break; /* CS */ + case 6: flags_used = 0x04; break; /* NE */ + case 7: flags_used = 0x04; break; /* EQ */ + case 8: flags_used = 0x02; break; /* VC */ + case 9: flags_used = 0x02; break; /* VS */ + case 10:flags_used = 0x08; break; /* PL */ + case 11:flags_used = 0x08; break; /* MI */ + case 12:flags_used = 0x0A; break; /* GE */ + case 13:flags_used = 0x0A; break; /* LT */ + case 14:flags_used = 0x0E; break; /* GT */ + case 15:flags_used = 0x0E; break; /* LE */ + } + } + +#if 1 + /* gb-- flagdead and flaglive would not have correct information */ + table68k[opc].flagdead = flags_set; + table68k[opc].flaglive = flags_used; +#else table68k[opc].flagdead = flagdead; table68k[opc].flaglive = flaglive; +#endif + table68k[opc].cflow = cflow; nomatch: /* FOO! */; } @@ -722,7 +831,7 @@ void read_table68k (void) } } -static int mismatch; +static int readcpu_mismatch; static void handle_merges (long int opcode) { @@ -747,6 +856,10 @@ static void handle_merges (long int opcode) smsk = 7; sbitdst = 8; break; case 5: smsk = 63; sbitdst = 64; break; + case 6: + smsk = 255; sbitdst = 256; break; + case 7: + smsk = 3; sbitdst = 4; break; default: smsk = 0; sbitdst = 0; abort(); @@ -775,20 +888,20 @@ static void handle_merges (long int opcode) || table68k[code].suse != table68k[opcode].suse || table68k[code].duse != table68k[opcode].duse) { - mismatch++; continue; + readcpu_mismatch++; continue; } if (table68k[opcode].suse && (table68k[opcode].spos != table68k[code].spos || table68k[opcode].smode != table68k[code].smode || table68k[opcode].stype != table68k[code].stype)) { - mismatch++; continue; + readcpu_mismatch++; continue; } if (table68k[opcode].duse && (table68k[opcode].dpos != table68k[code].dpos || table68k[opcode].dmode != table68k[code].dmode)) { - mismatch++; continue; + readcpu_mismatch++; continue; } if (code != opcode) @@ -801,7 +914,7 @@ void do_merges (void) { long int opcode; int nr = 0; - mismatch = 0; + readcpu_mismatch = 0; for (opcode = 0; opcode < 65536; opcode++) { if (table68k[opcode].handler != -1 || table68k[opcode].mnemo == i_ILLG) continue; @@ -813,5 +926,5 @@ void do_merges (void) int get_no_mismatches (void) { - return mismatch; + return readcpu_mismatch; } diff --git a/BasiliskII/src/uae_cpu/readcpu.h b/BasiliskII/src/uae_cpu/readcpu.h index 4f225f13..7855ecc7 100644 --- a/BasiliskII/src/uae_cpu/readcpu.h +++ b/BasiliskII/src/uae_cpu/readcpu.h @@ -1,13 +1,17 @@ +/* 2002 MJ */ +#ifndef READCPU_H +#define READCPU_H + #ifdef __cplusplus extern "C" { #endif -ENUMDECL { +typedef enum { Dreg, Areg, Aind, Aipi, Apdi, Ad16, Ad8r, absw, absl, PC16, PC8r, imm, imm0, imm1, imm2, immi, am_unknown, am_illg -} ENUMNAME (amodes); +} amodes; -ENUMDECL { +typedef enum { i_ILLG, i_OR, i_AND, i_EOR, i_ORSR, i_ANDSR, i_EORSR, @@ -32,30 +36,42 @@ ENUMDECL { i_PACK, i_UNPK, i_TAS, i_BKPT, i_CALLM, i_RTM, i_TRAPcc, i_MOVES, i_FPP, i_FDBcc, i_FScc, i_FTRAPcc, i_FBcc, i_FSAVE, i_FRESTORE, i_CINVL, i_CINVP, i_CINVA, i_CPUSHL, i_CPUSHP, i_CPUSHA, i_MOVE16, - i_MMUOP -} ENUMNAME (instrmnem); + i_MMUOP, i_EMULOP_RETURN, i_EMULOP, i_NATFEAT_ID, i_NATFEAT_CALL +} instrmnem; extern struct mnemolookup { instrmnem mnemo; const char *name; } lookuptab[]; -ENUMDECL { +typedef enum { sz_byte, sz_word, sz_long -} ENUMNAME (wordsizes); +} wordsizes; -ENUMDECL { - fa_set, fa_unset, fa_zero, fa_one, fa_dontcare, fa_unknown, fa_isjmp -} ENUMNAME (flagaffect); +typedef enum { + fa_set, fa_unset, fa_zero, fa_one, fa_dontcare, fa_unknown, fa_isjmp, + fa_isbranch +} flagaffect; -ENUMDECL { +typedef enum { fu_used, fu_unused, fu_maybecc, fu_unknown, fu_isjmp -} ENUMNAME (flaguse); +} flaguse; -ENUMDECL { +typedef enum { + fl_normal = 0, + fl_branch = 1, + fl_jump = 2, + fl_return = 3, + fl_trap = 4, + fl_const_jump = 8, + /* Instructions that can trap don't mark the end of a block */ + fl_end_block = 3 +} cflow_t; + +typedef enum { bit0, bit1, bitc, bitC, bitf, biti, bitI, bitj, bitJ, bitk, bitK, - bits, bitS, bitd, bitD, bitr, bitR, bitz, lastbit -} ENUMNAME (bitvals); + bits, bitS, bitd, bitD, bitr, bitR, bitz, bitE, bitp, lastbit +} bitvals; struct instr_def { unsigned int bits; @@ -68,6 +84,7 @@ struct instr_def { unsigned int flaguse:3; unsigned int flagset:3; } flaginfo[5]; + unsigned char cflow; unsigned char sduse; const char *opcstr; }; @@ -86,22 +103,16 @@ extern struct instr { unsigned int mnemo:8; unsigned int cc:4; unsigned int plev:2; -#ifdef sgi wordsizes size:2; amodes smode:5; unsigned int stype:3; amodes dmode:5; -#else - unsigned int size:2; - unsigned int smode:5; - unsigned int stype:3; - unsigned int dmode:5; -#endif unsigned int suse:1; unsigned int duse:1; unsigned int unused1:1; unsigned int clev:3; - unsigned int unused2:5; + unsigned int cflow:3; + unsigned int unused2:2; } *table68k; extern void read_table68k (void); @@ -112,3 +123,5 @@ extern int nr_cpuop_funcs; #ifdef __cplusplus } #endif + +#endif diff --git a/BasiliskII/src/uae_cpu/readcpua.cpp b/BasiliskII/src/uae_cpu/readcpua.cpp new file mode 100644 index 00000000..521c241f --- /dev/null +++ b/BasiliskII/src/uae_cpu/readcpua.cpp @@ -0,0 +1,5 @@ +/* + * readcpu.cpp must be compiled twice, once for the generator program + * and once for the actual executable + */ +#include "readcpu.cpp" diff --git a/BasiliskII/src/uae_cpu/registers.h b/BasiliskII/src/uae_cpu/registers.h new file mode 100644 index 00000000..f7daef1f --- /dev/null +++ b/BasiliskII/src/uae_cpu/registers.h @@ -0,0 +1,116 @@ +/* 2001 MJ */ + +#ifndef REGISTERS_H +#define REGISTERS_H + +#include "sysdeps.h" +#include "spcflags.h" +typedef char flagtype; + + +struct xttrx { + uae_u32 log_addr_base : 8; + uae_u32 log_addr_mask : 8; + uae_u32 enable : 1; + uae_u32 s_field : 2; + uae_u32 : 3; + uae_u32 usr1 : 1; + uae_u32 usr0 : 1; + uae_u32 : 1; + uae_u32 cmode : 2; + uae_u32 : 2; + uae_u32 write : 1; + uae_u32 : 2; +}; + +struct mmusr_t { + uae_u32 phys_addr : 20; + uae_u32 bus_err : 1; + uae_u32 global : 1; + uae_u32 usr1 : 1; + uae_u32 usr0 : 1; + uae_u32 super : 1; + uae_u32 cmode : 2; + uae_u32 modif : 1; + uae_u32 : 1; + uae_u32 write : 1; + uae_u32 ttrhit : 1; + uae_u32 resident : 1; +}; + +struct log_addr4 { + uae_u32 rif : 7; + uae_u32 pif : 7; + uae_u32 paif : 6; + uae_u32 poff : 12; +}; + +struct log_addr8 { + uae_u32 rif : 7; + uae_u32 pif : 7; + uae_u32 paif : 5; + uae_u32 poff : 13; +}; + +extern struct regstruct +{ + uae_u32 regs[16]; + uaecptr usp,isp,msp; + uae_u16 sr; + flagtype t1; + flagtype t0; + flagtype s; + flagtype m; + flagtype x; + flagtype stopped; + int intmask; + + uae_u32 pc; + uae_u32 fault_pc; + uae_u8 *pc_p; + uae_u8 *pc_oldp; + + uae_u32 vbr,sfc,dfc; + + volatile uae_u32 spcflags; + +#if 0 + uae_u32 kick_mask; + + /* Fellow sources say this is 4 longwords. That's impossible. It needs + * to be at least a longword. The HRM has some cryptic comment about two + * instructions being on the same longword boundary. + * The way this is implemented now seems like a good compromise. + */ + uae_u32 prefetch; +#endif + + /* MMU reg*/ + uae_u32 urp,srp; + uae_u32 tc; + + int mmu_enabled; /* flagtype tce; */ + int mmu_pagesize_8k; /* flagtype tcp; */ + + uae_u32 dtt0,dtt1,itt0,itt1; + uae_u32 mmusr; + + uae_u32 mmu_fslw, mmu_fault_addr; + uae_u16 mmu_ssw; + uae_u32 wb3_data; + uae_u16 wb3_status; + + /* Cache reg*/ + uae_u32 cacr,caar; +} regs, lastint_regs; + +static inline uaecptr m68k_getpc (void) +{ +#ifdef FULLMMU + return regs.pc; +#else + return regs.pc + ((char *)regs.pc_p - (char *)regs.pc_oldp); +#endif +} + +#endif diff --git a/BasiliskII/src/uae_cpu/spcflags.h b/BasiliskII/src/uae_cpu/spcflags.h new file mode 100644 index 00000000..b2084372 --- /dev/null +++ b/BasiliskII/src/uae_cpu/spcflags.h @@ -0,0 +1,104 @@ + /* + * UAE - The Un*x Amiga Emulator + * + * MC68000 emulation + * + * Copyright 1995 Bernd Schmidt + */ + +#ifndef SPCFLAGS_H +#define SPCFLAGS_H + +typedef uae_u32 spcflags_t; + +enum { + SPCFLAG_STOP = 0x01, + SPCFLAG_INT = 0x02, + SPCFLAG_BRK = 0x04, + SPCFLAG_TRACE = 0x08, + SPCFLAG_DOTRACE = 0x10, + SPCFLAG_DOINT = 0x20, +#ifdef USE_JIT + SPCFLAG_JIT_END_COMPILE = 0x40, + SPCFLAG_JIT_EXEC_RETURN = 0x80, +#else + SPCFLAG_JIT_END_COMPILE = 0, + SPCFLAG_JIT_EXEC_RETURN = 0, +#endif + SPCFLAG_VBL = 0x100, + SPCFLAG_MFP = 0x200, + SPCFLAG_INT3 = 0x800, + SPCFLAG_INT5 = 0x1000, + SPCFLAG_SCC = 0x2000, +// SPCFLAG_MODE_CHANGE = 0x4000, + SPCFLAG_ALL = SPCFLAG_STOP + | SPCFLAG_INT + | SPCFLAG_BRK + | SPCFLAG_TRACE + | SPCFLAG_DOTRACE + | SPCFLAG_DOINT + | SPCFLAG_JIT_END_COMPILE + | SPCFLAG_JIT_EXEC_RETURN + | SPCFLAG_INT3 + | SPCFLAG_VBL + | SPCFLAG_INT5 + | SPCFLAG_SCC + | SPCFLAG_MFP + , + + SPCFLAG_ALL_BUT_EXEC_RETURN = SPCFLAG_ALL & ~SPCFLAG_JIT_EXEC_RETURN + +}; + +#define SPCFLAGS_TEST(m) \ + ((regs.spcflags & (m)) != 0) + +/* Macro only used in m68k_reset() */ +#define SPCFLAGS_INIT(m) do { \ + regs.spcflags = (m); \ +} while (0) + +#if !(ENABLE_EXCLUSIVE_SPCFLAGS) + +#define SPCFLAGS_SET(m) do { \ + regs.spcflags |= (m); \ +} while (0) + +#define SPCFLAGS_CLEAR(m) do { \ + regs.spcflags &= ~(m); \ +} while (0) + +#elif defined(X86_ASSEMBLY) + +#define HAVE_HARDWARE_LOCKS + +#define SPCFLAGS_SET(m) do { \ + __asm__ __volatile__("lock\n\torl %1,%0" : "=m" (regs.spcflags) : "i" ((m))); \ +} while (0) + +#define SPCFLAGS_CLEAR(m) do { \ + __asm__ __volatile__("lock\n\tandl %1,%0" : "=m" (regs.spcflags) : "i" (~(m))); \ +} while (0) + +#else + +#undef HAVE_HARDWARE_LOCKS + +#include "main.h" +extern B2_mutex *spcflags_lock; + +#define SPCFLAGS_SET(m) do { \ + B2_lock_mutex(spcflags_lock); \ + regs.spcflags |= (m); \ + B2_unlock_mutex(spcflags_lock); \ +} while (0) + +#define SPCFLAGS_CLEAR(m) do { \ + B2_lock_mutex(spcflags_lock); \ + regs.spcflags &= ~(m); \ + B2_unlock_mutex(spcflags_lock); \ +} while (0) + +#endif + +#endif /* SPCFLAGS_H */ diff --git a/BasiliskII/src/uae_cpu/table68k b/BasiliskII/src/uae_cpu/table68k index 54f7c5f3..4445cb50 100644 --- a/BasiliskII/src/uae_cpu/table68k +++ b/BasiliskII/src/uae_cpu/table68k @@ -4,11 +4,13 @@ % C: condition codes, except F % f: direction % i: immediate +% E: immediate, except 00 (for EmulOp instructions) % I: immediate, except 00 and ff % j: immediate 1..8 % J: immediate 0..15 % k: immediate 0..7 % K: immediate 0..63 +% p: immediate 0..3 (CINV and CPUSH: cache field) % s: source mode % S: source reg % d: dest mode @@ -24,15 +26,17 @@ % % Arp: --> -(Ar) % ArP: --> (Ar)+ +% L: --> (xxx.L) % -% Fields on a line: -% 16 chars bitpattern : -% CPU level / privilege level : +% Fields on a line: +% 16 chars bitpattern : +% CPU level / privildge level : % CPU level 0: 68000 % 1: 68010 % 2: 68020 % 3: 68020/68881 % 4: 68040 +% 5: 68060 % privilege level 0: not privileged % 1: unprivileged only on 68000 (check regs.s) % 2: privileged (check regs.s) @@ -43,10 +47,19 @@ % 0 means flag reset % 1 means flag set % ? means programmer was too lazy to check or instruction may trap -% + means instruction is conditional branch -% everything else means flag set/used -% / means instruction is unconditional branch/call +% + means instruction is conditional branch (ignored, only for sync) +% / means instruction is unconditional branch/call (ignored, only for sync) % x means flag is unknown and well-behaved programs shouldn't check it +% everything else means flag set/used +% +% Control flow +% two letters, combination of +% - nothing +% T the instruction may trap or cause an exception +% B branch instruction +% J jump instruction +% R return instruction +% % srcaddr status destaddr status : % bitmasks of % 1 means fetched @@ -56,197 +69,218 @@ % instruction % -0000 0000 0011 1100:00:XNZVC:XNZVC:10: ORSR.B #1 -0000 0000 0111 1100:02:?????:?????:10: ORSR.W #1 -0000 0zz0 11ss sSSS:20:?????:?????:11: CHK2.z #1,s[!Dreg,Areg,Aipi,Apdi,Immd] -0000 0000 zzdd dDDD:00:-NZ00:-----:13: OR.z #z,d[!Areg] -0000 0010 0011 1100:00:XNZVC:XNZVC:10: ANDSR.B #1 -0000 0010 0111 1100:02:?????:?????:10: ANDSR.W #1 -0000 0010 zzdd dDDD:00:-NZ00:-----:13: AND.z #z,d[!Areg] -0000 0100 zzdd dDDD:00:XNZVC:-----:13: SUB.z #z,d[!Areg] -0000 0110 zzdd dDDD:00:XNZVC:-----:13: ADD.z #z,d[!Areg] -0000 0110 11ss sSSS:20:?????:?????:10: CALLM s[!Dreg,Areg,Aipi,Apdi,Immd] -0000 0110 11ss sSSS:20:?????:?????:10: RTM s[Dreg,Areg] -0000 1000 00ss sSSS:00:--Z--:-----:11: BTST #1,s[!Areg] -0000 1000 01ss sSSS:00:--Z--:-----:13: BCHG #1,s[!Areg,Immd] -0000 1000 10ss sSSS:00:--Z--:-----:13: BCLR #1,s[!Areg,Immd] -0000 1000 11ss sSSS:00:--Z--:-----:13: BSET #1,s[!Areg,Immd] -0000 1010 0011 1100:00:XNZVC:XNZVC:10: EORSR.B #1 -0000 1010 0111 1100:02:?????:?????:10: EORSR.W #1 -0000 1010 zzdd dDDD:00:-NZ00:-----:13: EOR.z #z,d[!Areg] -0000 1100 zzss sSSS:00:-NZVC:-----:11: CMP.z #z,s[!Areg,Immd] +0000 0000 0011 1100:00:XNZVC:XNZVC:--:10: ORSR.B #1 +0000 0000 0111 1100:02:XNZVC:XNZVC:T-:10: ORSR.W #1 +0000 0zz0 11ss sSSS:20:-?Z?C:-----:T-:11: CHK2.z #1,s[!Dreg,Areg,Aipi,Apdi,Immd] +0000 0000 zzdd dDDD:00:-NZ00:-----:--:13: OR.z #z,d[!Areg] +0000 0010 0011 1100:00:XNZVC:XNZVC:--:10: ANDSR.B #1 +0000 0010 0111 1100:02:XNZVC:XNZVC:T-:10: ANDSR.W #1 +0000 0010 zzdd dDDD:00:-NZ00:-----:--:13: AND.z #z,d[!Areg] +0000 0100 zzdd dDDD:00:XNZVC:-----:--:13: SUB.z #z,d[!Areg] +0000 0110 zzdd dDDD:00:XNZVC:-----:--:13: ADD.z #z,d[!Areg] +0000 0110 11ss sSSS:20:-----:XNZVC:--:10: CALLM s[!Dreg,Areg,Aipi,Apdi,Immd] +0000 0110 11ss sSSS:20:XNZVC:-----:-R:10: RTM s[Dreg,Areg] +0000 1000 00ss sSSS:00:--Z--:-----:--:11: BTST #1,s[!Areg] +0000 1000 01ss sSSS:00:--Z--:-----:--:13: BCHG #1,s[!Areg,Immd] +0000 1000 10ss sSSS:00:--Z--:-----:--:13: BCLR #1,s[!Areg,Immd] +0000 1000 11ss sSSS:00:--Z--:-----:--:13: BSET #1,s[!Areg,Immd] +0000 1010 0011 1100:00:XNZVC:XNZVC:--:10: EORSR.B #1 +0000 1010 0111 1100:02:XNZVC:XNZVC:T-:10: EORSR.W #1 +0000 1010 zzdd dDDD:00:-NZ00:-----:--:13: EOR.z #z,d[!Areg] +0000 1100 zzss sSSS:00:-NZVC:-----:--:11: CMP.z #z,s[!Areg,Immd] -0000 1010 11ss sSSS:20:?????:?????:13: CAS.B #1,s[!Dreg,Areg,Immd,PC8r,PC16] -0000 1100 11ss sSSS:20:?????:?????:13: CAS.W #1,s[!Dreg,Areg,Immd,PC8r,PC16] -0000 1100 1111 1100:20:?????:?????:10: CAS2.W #2 -0000 1110 zzss sSSS:22:?????:?????:13: MOVES.z #1,s[!Dreg,Areg,Immd,PC8r,PC16] -0000 1110 11ss sSSS:20:?????:?????:13: CAS.L #1,s[!Dreg,Areg,Immd,PC8r,PC16] -0000 1110 1111 1100:20:?????:?????:10: CAS2.L #2 +0000 1010 11ss sSSS:20:-NZVC:-----:--:13: CAS.B #1,s[!Dreg,Areg,Immd,PC8r,PC16] +0000 1100 11ss sSSS:20:-NZVC:-----:--:13: CAS.W #1,s[!Dreg,Areg,Immd,PC8r,PC16] +0000 1100 1111 1100:20:-NZVC:-----:--:10: CAS2.W #2 +0000 1110 zzss sSSS:22:-----:-----:T-:13: MOVES.z #1,s[!Dreg,Areg,Immd,PC8r,PC16] +0000 1110 11ss sSSS:20:-NZVC:-----:--:13: CAS.L #1,s[!Dreg,Areg,Immd,PC8r,PC16] +0000 1110 1111 1100:20:-NZVC:-----:--:10: CAS2.L #2 -0000 rrr1 00dd dDDD:00:-----:-----:12: MVPMR.W d[Areg-Ad16],Dr -0000 rrr1 01dd dDDD:00:-----:-----:12: MVPMR.L d[Areg-Ad16],Dr -0000 rrr1 10dd dDDD:00:-----:-----:12: MVPRM.W Dr,d[Areg-Ad16] -0000 rrr1 11dd dDDD:00:-----:-----:12: MVPRM.L Dr,d[Areg-Ad16] -0000 rrr1 00ss sSSS:00:--Z--:-----:11: BTST Dr,s[!Areg] -0000 rrr1 01ss sSSS:00:--Z--:-----:13: BCHG Dr,s[!Areg,Immd] -0000 rrr1 10ss sSSS:00:--Z--:-----:13: BCLR Dr,s[!Areg,Immd] -0000 rrr1 11ss sSSS:00:--Z--:-----:13: BSET Dr,s[!Areg,Immd] +0000 rrr1 00dd dDDD:00:-----:-----:--:12: MVPMR.W d[Areg-Ad16],Dr +0000 rrr1 01dd dDDD:00:-----:-----:--:12: MVPMR.L d[Areg-Ad16],Dr +0000 rrr1 10dd dDDD:00:-----:-----:--:12: MVPRM.W Dr,d[Areg-Ad16] +0000 rrr1 11dd dDDD:00:-----:-----:--:12: MVPRM.L Dr,d[Areg-Ad16] +0000 rrr1 00ss sSSS:00:--Z--:-----:--:11: BTST Dr,s[!Areg] +0000 rrr1 01ss sSSS:00:--Z--:-----:--:13: BCHG Dr,s[!Areg,Immd] +0000 rrr1 10ss sSSS:00:--Z--:-----:--:13: BCLR Dr,s[!Areg,Immd] +0000 rrr1 11ss sSSS:00:--Z--:-----:--:13: BSET Dr,s[!Areg,Immd] -0001 DDDd ddss sSSS:00:-NZ00:-----:12: MOVE.B s,d[!Areg] -0010 DDDd ddss sSSS:00:-----:-----:12: MOVEA.L s,d[Areg] -0010 DDDd ddss sSSS:00:-NZ00:-----:12: MOVE.L s,d[!Areg] -0011 DDDd ddss sSSS:00:-----:-----:12: MOVEA.W s,d[Areg] -0011 DDDd ddss sSSS:00:-NZ00:-----:12: MOVE.W s,d[!Areg] +0001 DDDd ddss sSSS:00:-NZ00:-----:--:12: MOVE.B s,d[!Areg] +0010 DDDd ddss sSSS:00:-----:-----:--:12: MOVEA.L s,d[Areg] +0010 DDDd ddss sSSS:00:-NZ00:-----:--:12: MOVE.L s,d[!Areg] +0011 DDDd ddss sSSS:00:-----:-----:--:12: MOVEA.W s,d[Areg] +0011 DDDd ddss sSSS:00:-NZ00:-----:--:12: MOVE.W s,d[!Areg] -0100 0000 zzdd dDDD:00:XxZxC:-----:30: NEGX.z d[!Areg] -0100 0000 11dd dDDD:01:?????:?????:10: MVSR2.W d[!Areg] -0100 0010 zzdd dDDD:00:-0100:-----:20: CLR.z d[!Areg] -0100 0010 11dd dDDD:10:?????:?????:10: MVSR2.B d[!Areg] -0100 0100 zzdd dDDD:00:XNZVC:-----:30: NEG.z d[!Areg] -0100 0100 11ss sSSS:00:XNZVC:-----:10: MV2SR.B s[!Areg] -0100 0110 zzdd dDDD:00:-NZ00:-----:30: NOT.z d[!Areg] -0100 0110 11ss sSSS:02:?????:?????:10: MV2SR.W s[!Areg] -0100 1000 0000 1rrr:20:-----:-----:31: LINK.L Ar,#2 -0100 1000 00dd dDDD:00:X?Z?C:X-Z--:30: NBCD.B d[!Areg] -0100 1000 0100 1kkk:20:?????:?????:10: BKPT #k -0100 1000 01ss sSSS:00:-NZ00:-----:30: SWAP.W s[Dreg] -0100 1000 01ss sSSS:00:-----:-----:00: PEA.L s[!Dreg,Areg,Aipi,Apdi,Immd] -0100 1000 10dd dDDD:00:-NZ00:-----:30: EXT.W d[Dreg] -0100 1000 10dd dDDD:00:-----:-----:02: MVMLE.W #1,d[!Dreg,Areg,Aipi] -0100 1000 11dd dDDD:00:-NZ00:-----:30: EXT.L d[Dreg] -0100 1000 11dd dDDD:00:-----:-----:02: MVMLE.L #1,d[!Dreg,Areg,Aipi] -0100 1001 11dd dDDD:00:-NZ00:-----:30: EXT.B d[Dreg] -0100 1010 zzss sSSS:00:-NZ00:-----:10: TST.z s -0100 1010 11dd dDDD:00:?????:?????:30: TAS.B d[!Areg] -0100 1010 1111 1100:00:?????:?????:00: ILLEGAL -0100 1100 00ss sSSS:20:-NZVC:-----:13: MULL.L #1,s[!Areg] -0100 1100 01ss sSSS:20:?????:?????:13: DIVL.L #1,s[!Areg] -0100 1100 10ss sSSS:00:-----:-----:01: MVMEL.W #1,s[!Dreg,Areg,Apdi,Immd] -0100 1100 11ss sSSS:00:-----:-----:01: MVMEL.L #1,s[!Dreg,Areg,Apdi,Immd] -0100 1110 0100 JJJJ:00:-----:XNZVC:10: TRAP #J -0100 1110 0101 0rrr:00:-----:-----:31: LINK.W Ar,#1 -0100 1110 0101 1rrr:00:-----:-----:30: UNLK.L Ar -0100 1110 0110 0rrr:02:-----:-----:10: MVR2USP.L Ar -0100 1110 0110 1rrr:02:-----:-----:20: MVUSP2R.L Ar -0100 1110 0111 0000:02:-----:-----:00: RESET -0100 1110 0111 0001:00:-----:-----:00: NOP -0100 1110 0111 0010:02:XNZVC:-----:10: STOP #1 -0100 1110 0111 0011:02:XNZVC:-----:00: RTE -0100 1110 0111 0100:00:?????:?????:10: RTD #1 -0100 1110 0111 0101:00:-----:-----:00: RTS -0100 1110 0111 0110:00:-----:XNZVC:00: TRAPV -0100 1110 0111 0111:00:XNZVC:-----:00: RTR -0100 1110 0111 1010:12:?????:?????:10: MOVEC2 #1 -0100 1110 0111 1011:12:?????:?????:10: MOVE2C #1 -0100 1110 10ss sSSS:00://///://///:80: JSR.L s[!Dreg,Areg,Aipi,Apdi,Immd] -0100 rrr1 00ss sSSS:00:?????:?????:11: CHK.L s[!Areg],Dr -0100 rrr1 10ss sSSS:00:?????:?????:11: CHK.W s[!Areg],Dr -0100 1110 11ss sSSS:00://///://///:80: JMP.L s[!Dreg,Areg,Aipi,Apdi,Immd] -0100 rrr1 11ss sSSS:00:-----:-----:02: LEA.L s[!Dreg,Areg,Aipi,Apdi,Immd],Ar +0100 0000 zzdd dDDD:00:XNZVC:X-Z--:--:30: NEGX.z d[!Areg] +0100 0000 11dd dDDD:01:-----:XNZVC:T-:10: MVSR2.W d[!Areg] +0100 0010 zzdd dDDD:00:-0100:-----:--:20: CLR.z d[!Areg] +0100 0010 11dd dDDD:10:-----:XNZVC:--:10: MVSR2.B d[!Areg] +0100 0100 zzdd dDDD:00:XNZVC:-----:--:30: NEG.z d[!Areg] +0100 0100 11ss sSSS:00:XNZVC:-----:--:10: MV2SR.B s[!Areg] +0100 0110 zzdd dDDD:00:-NZ00:-----:--:30: NOT.z d[!Areg] +0100 0110 11ss sSSS:02:XNZVC:XNZVC:T-:10: MV2SR.W s[!Areg] +0100 1000 0000 1rrr:20:-----:-----:--:31: LINK.L Ar,#2 +0100 1000 00dd dDDD:00:X?Z?C:X-Z--:--:30: NBCD.B d[!Areg] +0100 1000 0100 1kkk:20:-----:-----:T-:10: BKPT #k +0100 1000 01ss sSSS:00:-NZ00:-----:--:30: SWAP.W s[Dreg] +0100 1000 01ss sSSS:00:-----:-----:--:00: PEA.L s[!Dreg,Areg,Aipi,Apdi,Immd] +0100 1000 10dd dDDD:00:-NZ00:-----:--:30: EXT.W d[Dreg] +0100 1000 10dd dDDD:00:-----:-----:--:02: MVMLE.W #1,d[!Dreg,Areg,Aipi] +0100 1000 11dd dDDD:00:-NZ00:-----:--:30: EXT.L d[Dreg] +0100 1000 11dd dDDD:00:-----:-----:--:02: MVMLE.L #1,d[!Dreg,Areg,Aipi] +0100 1001 11dd dDDD:00:-NZ00:-----:--:30: EXT.B d[Dreg] +0100 1010 zzss sSSS:00:-NZ00:-----:--:10: TST.z s +0100 1010 11dd dDDD:00:-NZ00:-----:--:30: TAS.B d[!Areg] +0100 1010 1111 1100:00:-----:-----:T-:00: ILLEGAL +0100 1100 00ss sSSS:20:-NZVC:-----:--:13: MULL.L #1,s[!Areg] +0100 1100 01ss sSSS:20:-NZV0:-----:T-:13: DIVL.L #1,s[!Areg] +0100 1100 10ss sSSS:00:-----:-----:--:01: MVMEL.W #1,s[!Dreg,Areg,Apdi,Immd] +0100 1100 11ss sSSS:00:-----:-----:--:01: MVMEL.L #1,s[!Dreg,Areg,Apdi,Immd] +0100 1110 0100 JJJJ:00:-----:XNZVC:--:10: TRAP #J +0100 1110 0101 0rrr:00:-----:-----:--:31: LINK.W Ar,#1 +0100 1110 0101 1rrr:00:-----:-----:--:30: UNLK.L Ar +0100 1110 0110 0rrr:02:-----:-----:T-:10: MVR2USP.L Ar +0100 1110 0110 1rrr:02:-----:-----:T-:20: MVUSP2R.L Ar +0100 1110 0111 0000:02:-----:-----:T-:00: RESET +0100 1110 0111 0001:00:-----:-----:--:00: NOP +0100 1110 0111 0010:02:XNZVC:-----:T-:10: STOP #1 +0100 1110 0111 0011:02:XNZVC:-----:TR:00: RTE +0100 1110 0111 0100:00:-----:-----:-R:10: RTD #1 +0100 1110 0111 0101:00:-----:-----:-R:00: RTS +0100 1110 0111 0110:00:-----:XNZVC:T-:00: TRAPV +0100 1110 0111 0111:00:XNZVC:-----:-R:00: RTR +0100 1110 0111 1010:12:-----:-----:T-:10: MOVEC2 #1 +0100 1110 0111 1011:12:-----:-----:T-:10: MOVE2C #1 +0100 1110 10ss sSSS:00://///://///:-J:80: JSR.L s[!Dreg,Areg,Aipi,Apdi,Immd] +0100 rrr1 00ss sSSS:00:-N???:-----:T-:11: CHK.L s[!Areg],Dr +0100 rrr1 10ss sSSS:00:-N???:-----:T-:11: CHK.W s[!Areg],Dr +0100 1110 11ss sSSS:00://///://///:-J:80: JMP.L s[!Dreg,Areg,Aipi,Apdi,Immd] +0100 rrr1 11ss sSSS:00:-----:-----:--:02: LEA.L s[!Dreg,Areg,Aipi,Apdi,Immd],Ar -0101 jjj0 zzdd dDDD:00:-----:-----:13: ADDA.z #j,d[Areg] -0101 jjj0 zzdd dDDD:00:XNZVC:-----:13: ADD.z #j,d[!Areg] -0101 jjj1 zzdd dDDD:00:-----:-----:13: SUBA.z #j,d[Areg] -0101 jjj1 zzdd dDDD:00:XNZVC:-----:13: SUB.z #j,d[!Areg] -0101 cccc 1100 1rrr:00:-----:+++++:31: DBcc.W Dr,#1 -0101 cccc 11dd dDDD:00:-----:+++++:20: Scc.B d[!Areg] -0101 cccc 1111 1010:20:?????:?????:10: TRAPcc #1 -0101 cccc 1111 1011:20:?????:?????:10: TRAPcc #2 -0101 cccc 1111 1100:20:?????:?????:00: TRAPcc +% This variant of ADDQ is word and long sized only +0101 jjj0 01dd dDDD:00:-----:-----:--:13: ADDA.W #j,d[Areg] +0101 jjj0 10dd dDDD:00:-----:-----:--:13: ADDA.L #j,d[Areg] +0101 jjj0 zzdd dDDD:00:XNZVC:-----:--:13: ADD.z #j,d[!Areg] + +% This variant of SUBQ is word and long sized only +0101 jjj1 01dd dDDD:00:-----:-----:--:13: SUBA.W #j,d[Areg] +0101 jjj1 10dd dDDD:00:-----:-----:--:13: SUBA.L #j,d[Areg] +0101 jjj1 zzdd dDDD:00:XNZVC:-----:--:13: SUB.z #j,d[!Areg] + +0101 cccc 1100 1rrr:00:-----:-++++:-B:31: DBcc.W Dr,#1 +0101 cccc 11dd dDDD:00:-----:-++++:--:20: Scc.B d[!Areg] +0101 cccc 1111 1010:20:-----:-????:T-:10: TRAPcc #1 +0101 cccc 1111 1011:20:-----:-????:T-:10: TRAPcc #2 +0101 cccc 1111 1100:20:-----:-????:T-:00: TRAPcc % Bxx.L is 68020 only, but setting the CPU level to 2 would give illegal % instruction exceptions when compiling a 68000 only emulation, which isn't % what we want either. -0110 0001 0000 0000:00://///://///:40: BSR.W #1 -0110 0001 IIII IIII:00://///://///:40: BSR.B #i -0110 0001 1111 1111:00://///://///:40: BSR.L #2 -0110 CCCC 0000 0000:00:-----:+++++:40: Bcc.W #1 -0110 CCCC IIII IIII:00:-----:+++++:40: Bcc.B #i -0110 CCCC 1111 1111:00:-----:+++++:40: Bcc.L #2 +0110 0001 0000 0000:00://///://///:-B:40: BSR.W #1 +0110 0001 IIII IIII:00://///://///:-B:40: BSR.B #i +0110 0001 1111 1111:00://///://///:-B:40: BSR.L #2 +0110 CCCC 0000 0000:00:-----:-++++:-B:40: Bcc.W #1 +0110 CCCC IIII IIII:00:-----:-++++:-B:40: Bcc.B #i +0110 CCCC 1111 1111:00:-----:-++++:-B:40: Bcc.L #2 -0111 rrr0 iiii iiii:00:-NZ00:-----:12: MOVE.L #i,Dr +0111 rrr0 iiii iiii:00:-NZ00:-----:--:12: MOVE.L #i,Dr -1000 rrr0 zzss sSSS:00:-NZ00:-----:13: OR.z s[!Areg],Dr -1000 rrr0 11ss sSSS:00:?????:?????:13: DIVU.W s[!Areg],Dr -1000 rrr1 00dd dDDD:00:XxZxC:X-Z--:13: SBCD.B d[Dreg],Dr -1000 rrr1 00dd dDDD:00:XxZxC:X-Z--:13: SBCD.B d[Areg-Apdi],Arp -1000 rrr1 zzdd dDDD:00:-NZ00:-----:13: OR.z Dr,d[!Areg,Dreg] -1000 rrr1 01dd dDDD:20:?????:?????:12: PACK d[Dreg],Dr -1000 rrr1 01dd dDDD:20:?????:?????:12: PACK d[Areg-Apdi],Arp -1000 rrr1 10dd dDDD:20:?????:?????:12: UNPK d[Dreg],Dr -1000 rrr1 10dd dDDD:20:?????:?????:12: UNPK d[Areg-Apdi],Arp -1000 rrr1 11ss sSSS:00:?????:?????:13: DIVS.W s[!Areg],Dr +1000 rrr0 zzss sSSS:00:-NZ00:-----:--:13: OR.z s[!Areg],Dr +1000 rrr0 11ss sSSS:00:-NZV0:-----:T-:13: DIVU.W s[!Areg],Dr +1000 rrr1 00dd dDDD:00:X?Z?C:X-Z--:--:13: SBCD.B d[Dreg],Dr +1000 rrr1 00dd dDDD:00:X?Z?C:X-Z--:--:13: SBCD.B d[Areg-Apdi],Arp +1000 rrr1 zzdd dDDD:00:-NZ00:-----:--:13: OR.z Dr,d[!Areg,Dreg] +1000 rrr1 01dd dDDD:20:-----:-----:--:12: PACK d[Dreg],Dr +1000 rrr1 01dd dDDD:20:-----:-----:--:12: PACK d[Areg-Apdi],Arp +1000 rrr1 10dd dDDD:20:-----:-----:--:12: UNPK d[Dreg],Dr +1000 rrr1 10dd dDDD:20:-----:-----:--:12: UNPK d[Areg-Apdi],Arp +1000 rrr1 11ss sSSS:00:-NZV0:-----:T-:13: DIVS.W s[!Areg],Dr -1001 rrr0 zzss sSSS:00:XNZVC:-----:13: SUB.z s,Dr -1001 rrr0 11ss sSSS:00:-----:-----:13: SUBA.W s,Ar -1001 rrr1 zzdd dDDD:00:XNZVC:X-Z--:13: SUBX.z d[Dreg],Dr -1001 rrr1 zzdd dDDD:00:XNZVC:X-Z--:13: SUBX.z d[Areg-Apdi],Arp -1001 rrr1 zzdd dDDD:00:XNZVC:-----:13: SUB.z Dr,d[!Areg,Dreg] -1001 rrr1 11ss sSSS:00:-----:-----:13: SUBA.L s,Ar +1001 rrr0 zzss sSSS:00:XNZVC:-----:--:13: SUB.z s,Dr +1001 rrr0 11ss sSSS:00:-----:-----:--:13: SUBA.W s,Ar +1001 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: SUBX.z d[Dreg],Dr +1001 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: SUBX.z d[Areg-Apdi],Arp +1001 rrr1 zzdd dDDD:00:XNZVC:-----:--:13: SUB.z Dr,d[!Areg,Dreg] +1001 rrr1 11ss sSSS:00:-----:-----:--:13: SUBA.L s,Ar -1011 rrr0 zzss sSSS:00:-NZVC:-----:11: CMP.z s,Dr -1011 rrr0 11ss sSSS:00:-NZVC:-----:11: CMPA.W s,Ar -1011 rrr1 11ss sSSS:00:-NZVC:-----:11: CMPA.L s,Ar -1011 rrr1 zzdd dDDD:00:-NZVC:-----:11: CMPM.z d[Areg-Aipi],ArP -1011 rrr1 zzdd dDDD:00:-NZ00:-----:13: EOR.z Dr,d[!Areg] +1011 rrr0 zzss sSSS:00:-NZVC:-----:--:11: CMP.z s,Dr +1011 rrr0 11ss sSSS:00:-NZVC:-----:--:11: CMPA.W s,Ar +1011 rrr1 11ss sSSS:00:-NZVC:-----:--:11: CMPA.L s,Ar +1011 rrr1 zzdd dDDD:00:-NZVC:-----:--:11: CMPM.z d[Areg-Aipi],ArP +1011 rrr1 zzdd dDDD:00:-NZ00:-----:--:13: EOR.z Dr,d[!Areg] -1100 rrr0 zzss sSSS:00:-NZ00:-----:13: AND.z s[!Areg],Dr -1100 rrr0 11ss sSSS:00:-NZ00:-----:13: MULU.W s[!Areg],Dr -1100 rrr1 00dd dDDD:00:XxZxC:X-Z--:13: ABCD.B d[Dreg],Dr -1100 rrr1 00dd dDDD:00:XxZxC:X-Z--:13: ABCD.B d[Areg-Apdi],Arp -1100 rrr1 zzdd dDDD:00:-NZ00:-----:13: AND.z Dr,d[!Areg,Dreg] -1100 rrr1 01dd dDDD:00:-----:-----:33: EXG.L Dr,d[Dreg] -1100 rrr1 01dd dDDD:00:-----:-----:33: EXG.L Ar,d[Areg] -1100 rrr1 10dd dDDD:00:-----:-----:33: EXG.L Dr,d[Areg] -1100 rrr1 11ss sSSS:00:-NZ00:-----:13: MULS.W s[!Areg],Dr +1100 rrr0 zzss sSSS:00:-NZ00:-----:--:13: AND.z s[!Areg],Dr +1100 rrr0 11ss sSSS:00:-NZ00:-----:--:13: MULU.W s[!Areg],Dr +1100 rrr1 00dd dDDD:00:X?Z?C:X-Z--:--:13: ABCD.B d[Dreg],Dr +1100 rrr1 00dd dDDD:00:X?Z?C:X-Z--:--:13: ABCD.B d[Areg-Apdi],Arp +1100 rrr1 zzdd dDDD:00:-NZ00:-----:--:13: AND.z Dr,d[!Areg,Dreg] +1100 rrr1 01dd dDDD:00:-----:-----:--:33: EXG.L Dr,d[Dreg] +1100 rrr1 01dd dDDD:00:-----:-----:--:33: EXG.L Ar,d[Areg] +1100 rrr1 10dd dDDD:00:-----:-----:--:33: EXG.L Dr,d[Areg] +1100 rrr1 11ss sSSS:00:-NZ00:-----:--:13: MULS.W s[!Areg],Dr -1101 rrr0 zzss sSSS:00:XNZVC:-----:13: ADD.z s,Dr -1101 rrr0 11ss sSSS:00:-----:-----:13: ADDA.W s,Ar -1101 rrr1 zzdd dDDD:00:XNZVC:X-Z--:13: ADDX.z d[Dreg],Dr -1101 rrr1 zzdd dDDD:00:XNZVC:X-Z--:13: ADDX.z d[Areg-Apdi],Arp -1101 rrr1 zzdd dDDD:00:XNZVC:-----:13: ADD.z Dr,d[!Areg,Dreg] -1101 rrr1 11ss sSSS:00:-----:-----:13: ADDA.L s,Ar +1101 rrr0 zzss sSSS:00:XNZVC:-----:--:13: ADD.z s,Dr +1101 rrr0 11ss sSSS:00:-----:-----:--:13: ADDA.W s,Ar +1101 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: ADDX.z d[Dreg],Dr +1101 rrr1 zzdd dDDD:00:XNZVC:X-Z--:--:13: ADDX.z d[Areg-Apdi],Arp +1101 rrr1 zzdd dDDD:00:XNZVC:-----:--:13: ADD.z Dr,d[!Areg,Dreg] +1101 rrr1 11ss sSSS:00:-----:-----:--:13: ADDA.L s,Ar -1110 jjjf zz00 0RRR:00:XNZVC:-----:13: ASf.z #j,DR -1110 jjjf zz00 1RRR:00:XNZ0C:-----:13: LSf.z #j,DR -1110 jjjf zz01 0RRR:00:XNZ0C:X----:13: ROXf.z #j,DR -1110 jjjf zz01 1RRR:00:-NZ0C:-----:13: ROf.z #j,DR -1110 rrrf zz10 0RRR:00:XNZVC:X----:13: ASf.z Dr,DR -1110 rrrf zz10 1RRR:00:XNZ0C:X----:13: LSf.z Dr,DR -1110 rrrf zz11 0RRR:00:XNZ0C:X----:13: ROXf.z Dr,DR -1110 rrrf zz11 1RRR:00:-NZ0C:-----:13: ROf.z Dr,DR -1110 000f 11dd dDDD:00:XNZVC:-----:13: ASfW.W d[!Dreg,Areg] -1110 001f 11dd dDDD:00:XNZ0C:-----:13: LSfW.W d[!Dreg,Areg] -1110 010f 11dd dDDD:00:XNZ0C:X----:13: ROXfW.W d[!Dreg,Areg] -1110 011f 11dd dDDD:00:-NZ0C:-----:13: ROfW.W d[!Dreg,Areg] +1110 jjjf zz00 0RRR:00:XNZVC:-----:--:13: ASf.z #j,DR +1110 jjjf zz00 1RRR:00:XNZ0C:-----:--:13: LSf.z #j,DR +1110 jjjf zz01 0RRR:00:XNZ0C:X----:--:13: ROXf.z #j,DR +1110 jjjf zz01 1RRR:00:-NZ0C:-----:--:13: ROf.z #j,DR +1110 rrrf zz10 0RRR:00:XNZVC:X----:--:13: ASf.z Dr,DR +1110 rrrf zz10 1RRR:00:XNZ0C:X----:--:13: LSf.z Dr,DR +1110 rrrf zz11 0RRR:00:XNZ0C:X----:--:13: ROXf.z Dr,DR +1110 rrrf zz11 1RRR:00:-NZ0C:-----:--:13: ROf.z Dr,DR +1110 000f 11dd dDDD:00:XNZVC:-----:--:13: ASfW.W d[!Dreg,Areg] +1110 001f 11dd dDDD:00:XNZ0C:-----:--:13: LSfW.W d[!Dreg,Areg] +1110 010f 11dd dDDD:00:XNZ0C:X----:--:13: ROXfW.W d[!Dreg,Areg] +1110 011f 11dd dDDD:00:-NZ0C:-----:--:13: ROfW.W d[!Dreg,Areg] -1110 1000 11ss sSSS:20:?????:?????:11: BFTST #1,s[!Areg,Apdi,Aipi,Immd] -1110 1001 11ss sSSS:20:?????:?????:11: BFEXTU #1,s[!Areg,Apdi,Aipi,Immd] -1110 1010 11ss sSSS:20:?????:?????:13: BFCHG #1,s[!Areg,Apdi,Aipi,Immd,PC8r,PC16] -1110 1011 11ss sSSS:20:?????:?????:11: BFEXTS #1,s[!Areg,Apdi,Aipi,Immd] -1110 1100 11ss sSSS:20:?????:?????:13: BFCLR #1,s[!Areg,Apdi,Aipi,Immd,PC8r,PC16] -1110 1101 11ss sSSS:20:?????:?????:11: BFFFO #1,s[!Areg,Apdi,Aipi,Immd] -1110 1110 11ss sSSS:20:?????:?????:13: BFSET #1,s[!Areg,Apdi,Aipi,Immd,PC8r,PC16] -1110 1111 11ss sSSS:20:?????:?????:13: BFINS #1,s[!Areg,Apdi,Aipi,Immd,PC8r,PC16] +1110 1000 11ss sSSS:20:-NZ00:-----:--:11: BFTST #1,s[!Areg,Apdi,Aipi,Immd] +1110 1001 11ss sSSS:20:-NZ00:-----:--:11: BFEXTU #1,s[!Areg,Apdi,Aipi,Immd] +1110 1010 11ss sSSS:20:-NZ00:-----:--:13: BFCHG #1,s[!Areg,Apdi,Aipi,Immd,PC8r,PC16] +1110 1011 11ss sSSS:20:-NZ00:-----:--:11: BFEXTS #1,s[!Areg,Apdi,Aipi,Immd] +1110 1100 11ss sSSS:20:-NZ00:-----:--:13: BFCLR #1,s[!Areg,Apdi,Aipi,Immd,PC8r,PC16] +1110 1101 11ss sSSS:20:-NZ00:-----:--:11: BFFFO #1,s[!Areg,Apdi,Aipi,Immd] +1110 1110 11ss sSSS:20:-NZ00:-----:--:13: BFSET #1,s[!Areg,Apdi,Aipi,Immd,PC8r,PC16] +1110 1111 11ss sSSS:20:-NZ00:-----:--:13: BFINS #1,s[!Areg,Apdi,Aipi,Immd,PC8r,PC16] % floating point co processor -% TODO: FPU is currently commented out -% 1111 0010 00ss sSSS:30:?????:?????:11: FPP #1,s -% 1111 0010 01ss sSSS:30:?????:?????:11: FDBcc #1,s[Areg-Dreg] -% 1111 0010 01ss sSSS:30:?????:?????:11: FScc #1,s[!Areg,Immd,PC8r,PC16] -% 1111 0010 0111 1010:30:?????:?????:10: FTRAPcc #1 -% 1111 0010 0111 1011:30:?????:?????:10: FTRAPcc #2 -% 1111 0010 0111 1100:30:?????:?????:00: FTRAPcc -% 1111 0010 10KK KKKK:30:?????:?????:11: FBcc #K,#1 -% 1111 0010 11KK KKKK:30:?????:?????:11: FBcc #K,#2 -% 1111 0011 00ss sSSS:32:?????:?????:20: FSAVE s[!Dreg,Areg,Aipi,Immd,PC8r,PC16] -% 1111 0011 01ss sSSS:32:?????:?????:10: FRESTORE s[!Dreg,Areg,Apdi,Immd] +1111 0010 00ss sSSS:30:-----:-----:--:11: FPP #1,s +1111 0010 01ss sSSS:30:-----:-----:-B:11: FDBcc #1,s[Areg-Dreg] +1111 0010 01ss sSSS:30:-----:-----:--:11: FScc #1,s[!Areg,Immd,PC8r,PC16] +1111 0010 0111 1010:30:-----:-----:T-:10: FTRAPcc #1 +1111 0010 0111 1011:30:-----:-----:T-:10: FTRAPcc #2 +1111 0010 0111 1100:30:-----:-----:T-:00: FTRAPcc +1111 0010 10KK KKKK:30:-----:-----:-B:11: FBcc #K,#1 +1111 0010 11KK KKKK:30:-----:-----:-B:11: FBcc #K,#2 +1111 0011 00ss sSSS:32:-----:-----:--:20: FSAVE s[!Dreg,Areg,Aipi,Immd,PC8r,PC16] +1111 0011 01ss sSSS:32:-----:-----:--:10: FRESTORE s[!Dreg,Areg,Apdi,Immd] % 68040 instructions -1111 0100 ii00 1rrr:42:-----:-----:02: CINVL #i,Ar -1111 0100 ii01 0rrr:42:-----:-----:02: CINVP #i,Ar -1111 0100 ii01 1rrr:42:-----:-----:00: CINVA #i -1111 0100 ii10 1rrr:42:-----:-----:02: CPUSHL #i,Ar -1111 0100 ii11 0rrr:42:-----:-----:02: CPUSHP #i,Ar -1111 0100 ii11 1rrr:42:-----:-----:00: CPUSHA #i -1111 0110 0010 0rrr:40:-----:-----:12: MOVE16 ArP,ARP +1111 0100 pp00 1rrr:42:-----:-----:T-:02: CINVL #p,Ar +1111 0100 pp01 0rrr:42:-----:-----:T-:02: CINVP #p,Ar +1111 0100 pp01 1rrr:42:-----:-----:T-:00: CINVA #p +1111 0100 pp10 1rrr:42:-----:-----:T-:02: CPUSHL #p,Ar +1111 0100 pp11 0rrr:42:-----:-----:T-:02: CPUSHP #p,Ar +1111 0100 pp11 1rrr:42:-----:-----:T-:00: CPUSHA #p +% destination register number is encoded in the following word +1111 0110 0010 0rrr:40:-----:-----:--:12: MOVE16 ArP,AxP +1111 0110 00ss sSSS:40:-----:-----:--:12: MOVE16 s[Dreg-Aipi],Al +1111 0110 00dd dDDD:40:-----:-----:--:12: MOVE16 Al,d[Areg-Aipi] +1111 0110 00ss sSSS:40:-----:-----:--:12: MOVE16 s[Aind],Al +1111 0110 00dd dDDD:40:-----:-----:--:12: MOVE16 Al,d[Aipi-Aind] + +% MMU disabled +% 1111 0101 iiii iSSS:42:?????:?????:T-:11: MMUOP #i,s + +% EmulOp instructions (deprecated, to be removed) +0111 0001 0000 0000:02:-----:-----:-R:00: EMULOP_RETURN +0111 0001 EEEE EEEE:02:-----:-----:-J:10: EMULOP #E + +% NatFea instructions (do I have the srcaddr correct?) disabled +% 0111 0011 0000 0000:00:-----:-----:-J:00: NATFEAT_ID +% 0111 0011 0000 0001:00:-----:-----:-J:00: NATFEAT_CALL