macemu/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp

7130 lines
153 KiB
C++

/*
* compiler/compemu_support.cpp - Core dynamic translation engine
*
* Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
*
* Adaptation for Basilisk II and improvements, copyright 2000-2005
* Gwenole Beauchesne
*
* Basilisk II (C) 1997-2008 Christian Bauer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "sysdeps.h"
#if !REAL_ADDRESSING && !DIRECT_ADDRESSING
#error "Only Real or Direct Addressing is supported with the JIT Compiler"
#endif
#if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
#error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
#endif
/* NOTE: support for AMD64 assumes translation cache and other code
* buffers are allocated into a 32-bit address space because (i) B2/JIT
* code is not 64-bit clean and (ii) it's faster to resolve branches
* that way.
*/
#if !defined(__i386__) && !defined(__x86_64__)
#error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
#endif
#define USE_MATCH 0
/* kludge for Brian, so he can compile under MSVC++ */
#define USE_NORMAL_CALLING_CONVENTION 1 && defined(_MSC_VER)
#ifndef WIN32
#include <unistd.h>
#include <sys/types.h>
#include <sys/mman.h>
#endif
#include <stdlib.h>
#include <fcntl.h>
#include <errno.h>
#include "cpu_emulation.h"
#include "main.h"
#include "prefs.h"
#include "user_strings.h"
#include "vm_alloc.h"
#include "m68k.h"
#include "memory.h"
#include "readcpu.h"
#include "newcpu.h"
#include "comptbl.h"
#include "compiler/compemu.h"
#include "fpu/fpu.h"
#include "fpu/flags.h"
#define DEBUG 1
#include "debug.h"
#ifdef ENABLE_MON
#include "mon.h"
#endif
#ifndef WIN32
#define PROFILE_COMPILE_TIME 1
#define PROFILE_UNTRANSLATED_INSNS 1
#endif
#if defined(__x86_64__) && 0
#define RECORD_REGISTER_USAGE 1
#endif
#ifdef WIN32
#undef write_log
#define write_log dummy_write_log
static void dummy_write_log(const char *, ...) { }
#endif
#if JIT_DEBUG
#undef abort
#define abort() do { \
fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
exit(EXIT_FAILURE); \
} while (0)
#endif
#if RECORD_REGISTER_USAGE
static uint64 reg_count[16];
static int reg_count_local[16];
static int reg_count_compare(const void *ap, const void *bp)
{
const int a = *((int *)ap);
const int b = *((int *)bp);
return reg_count[b] - reg_count[a];
}
#endif
#if PROFILE_COMPILE_TIME
#include <time.h>
static uae_u32 compile_count = 0;
static clock_t compile_time = 0;
static clock_t emul_start_time = 0;
static clock_t emul_end_time = 0;
#endif
#if PROFILE_UNTRANSLATED_INSNS
const int untranslated_top_ten = 20;
static uae_u32 raw_cputbl_count[65536] = { 0, };
static uae_u16 opcode_nums[65536];
static int untranslated_compfn(const void *e1, const void *e2)
{
return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
}
#endif
static compop_func *compfunctbl[65536];
static compop_func *nfcompfunctbl[65536];
static cpuop_func *nfcpufunctbl[65536];
uae_u8* comp_pc_p;
// From newcpu.cpp
extern bool quit_program;
// gb-- Extra data for Basilisk II/JIT
#if JIT_DEBUG
static bool JITDebug = false; // Enable runtime disassemblers through mon?
#else
const bool JITDebug = false; // Don't use JIT debug mode at all
#endif
#if USE_INLINING
static bool follow_const_jumps = true; // Flag: translation through constant jumps
#else
const bool follow_const_jumps = false;
#endif
const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
static bool lazy_flush = true; // Flag: lazy translation cache invalidation
static bool avoid_fpu = true; // Flag: compile FPU instructions ?
static bool have_cmov = false; // target has CMOV instructions ?
static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
static bool have_rat_stall = true; // target has partial register stalls ?
const bool tune_alignment = true; // Tune code alignments for running CPU ?
const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
static int align_loops = 32; // Align the start of loops
static int align_jumps = 32; // Align the start of jumps
static int optcount[10] = {
10, // How often a block has to be executed before it is translated
0, // How often to use naive translation
0, 0, 0, 0,
-1, -1, -1, -1
};
struct op_properties {
uae_u8 use_flags;
uae_u8 set_flags;
uae_u8 is_addx;
uae_u8 cflow;
};
static op_properties prop[65536];
static inline int end_block(uae_u32 opcode)
{
return (prop[opcode].cflow & fl_end_block);
}
static inline bool is_const_jump(uae_u32 opcode)
{
return (prop[opcode].cflow == fl_const_jump);
}
static inline bool may_trap(uae_u32 opcode)
{
return (prop[opcode].cflow & fl_trap) != 0;
}
static inline unsigned int cft_map (unsigned int f)
{
#ifndef HAVE_GET_WORD_UNSWAPPED
return f;
#else
return ((f >> 8) & 255) | ((f & 255) << 8);
#endif
}
uae_u8* start_pc_p;
uae_u32 start_pc;
uae_u32 current_block_pc_p;
static uintptr current_block_start_target;
uae_u32 needed_flags;
static uintptr next_pc_p;
static uintptr taken_pc_p;
static int branch_cc;
static int redo_current_block;
int segvcount=0;
int soft_flush_count=0;
int hard_flush_count=0;
int checksum_count=0;
static uae_u8* current_compile_p=NULL;
static uae_u8* max_compile_start;
static uae_u8* compiled_code=NULL;
static uae_s32 reg_alloc_run;
const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
static uae_u8* popallspace=NULL;
void* pushall_call_handler=NULL;
static void* popall_do_nothing=NULL;
static void* popall_exec_nostats=NULL;
static void* popall_execute_normal=NULL;
static void* popall_cache_miss=NULL;
static void* popall_recompile_block=NULL;
static void* popall_check_checksum=NULL;
/* The 68k only ever executes from even addresses. So right now, we
* waste half the entries in this array
* UPDATE: We now use those entries to store the start of the linked
* lists that we maintain for each hash result.
*/
cacheline cache_tags[TAGSIZE];
int letit=0;
blockinfo* hold_bi[MAX_HOLD_BI];
blockinfo* active;
blockinfo* dormant;
/* 68040 */
extern struct cputbl op_smalltbl_0_nf[];
extern struct comptbl op_smalltbl_0_comp_nf[];
extern struct comptbl op_smalltbl_0_comp_ff[];
/* 68020 + 68881 */
extern struct cputbl op_smalltbl_1_nf[];
/* 68020 */
extern struct cputbl op_smalltbl_2_nf[];
/* 68010 */
extern struct cputbl op_smalltbl_3_nf[];
/* 68000 */
extern struct cputbl op_smalltbl_4_nf[];
/* 68000 slow but compatible. */
extern struct cputbl op_smalltbl_5_nf[];
static void flush_icache_hard(int n);
static void flush_icache_lazy(int n);
static void flush_icache_none(int n);
void (*flush_icache)(int n) = flush_icache_none;
bigstate live;
smallstate empty_ss;
smallstate default_ss;
static int optlev;
static int writereg(int r, int size);
static void unlock2(int r);
static void setlock(int r);
static int readreg_specific(int r, int size, int spec);
static int writereg_specific(int r, int size, int spec);
static void prepare_for_call_1(void);
static void prepare_for_call_2(void);
static void align_target(uae_u32 a);
static uae_s32 nextused[VREGS];
uae_u32 m68k_pc_offset;
/* Some arithmetic ooperations can be optimized away if the operands
* are known to be constant. But that's only a good idea when the
* side effects they would have on the flags are not important. This
* variable indicates whether we need the side effects or not
*/
uae_u32 needflags=0;
/* Flag handling is complicated.
*
* x86 instructions create flags, which quite often are exactly what we
* want. So at times, the "68k" flags are actually in the x86 flags.
*
* Then again, sometimes we do x86 instructions that clobber the x86
* flags, but don't represent a corresponding m68k instruction. In that
* case, we have to save them.
*
* We used to save them to the stack, but now store them back directly
* into the regflags.cznv of the traditional emulation. Thus some odd
* names.
*
* So flags can be in either of two places (used to be three; boy were
* things complicated back then!); And either place can contain either
* valid flags or invalid trash (and on the stack, there was also the
* option of "nothing at all", now gone). A couple of variables keep
* track of the respective states.
*
* To make things worse, we might or might not be interested in the flags.
* by default, we are, but a call to dont_care_flags can change that
* until the next call to live_flags. If we are not, pretty much whatever
* is in the register and/or the native flags is seen as valid.
*/
static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
{
return cache_tags[cl+1].bi;
}
static __inline__ blockinfo* get_blockinfo_addr(void* addr)
{
blockinfo* bi=get_blockinfo(cacheline(addr));
while (bi) {
if (bi->pc_p==addr)
return bi;
bi=bi->next_same_cl;
}
return NULL;
}
/*******************************************************************
* All sorts of list related functions for all of the lists *
*******************************************************************/
static __inline__ void remove_from_cl_list(blockinfo* bi)
{
uae_u32 cl=cacheline(bi->pc_p);
if (bi->prev_same_cl_p)
*(bi->prev_same_cl_p)=bi->next_same_cl;
if (bi->next_same_cl)
bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
if (cache_tags[cl+1].bi)
cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
else
cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
}
static __inline__ void remove_from_list(blockinfo* bi)
{
if (bi->prev_p)
*(bi->prev_p)=bi->next;
if (bi->next)
bi->next->prev_p=bi->prev_p;
}
static __inline__ void remove_from_lists(blockinfo* bi)
{
remove_from_list(bi);
remove_from_cl_list(bi);
}
static __inline__ void add_to_cl_list(blockinfo* bi)
{
uae_u32 cl=cacheline(bi->pc_p);
if (cache_tags[cl+1].bi)
cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
bi->next_same_cl=cache_tags[cl+1].bi;
cache_tags[cl+1].bi=bi;
bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
cache_tags[cl].handler=bi->handler_to_use;
}
static __inline__ void raise_in_cl_list(blockinfo* bi)
{
remove_from_cl_list(bi);
add_to_cl_list(bi);
}
static __inline__ void add_to_active(blockinfo* bi)
{
if (active)
active->prev_p=&(bi->next);
bi->next=active;
active=bi;
bi->prev_p=&active;
}
static __inline__ void add_to_dormant(blockinfo* bi)
{
if (dormant)
dormant->prev_p=&(bi->next);
bi->next=dormant;
dormant=bi;
bi->prev_p=&dormant;
}
static __inline__ void remove_dep(dependency* d)
{
if (d->prev_p)
*(d->prev_p)=d->next;
if (d->next)
d->next->prev_p=d->prev_p;
d->prev_p=NULL;
d->next=NULL;
}
/* This block's code is about to be thrown away, so it no longer
depends on anything else */
static __inline__ void remove_deps(blockinfo* bi)
{
remove_dep(&(bi->dep[0]));
remove_dep(&(bi->dep[1]));
}
static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
{
*(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
}
/********************************************************************
* Soft flush handling support functions *
********************************************************************/
static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
{
//write_log("bi is %p\n",bi);
if (dh!=bi->direct_handler_to_use) {
dependency* x=bi->deplist;
//write_log("bi->deplist=%p\n",bi->deplist);
while (x) {
//write_log("x is %p\n",x);
//write_log("x->next is %p\n",x->next);
//write_log("x->prev_p is %p\n",x->prev_p);
if (x->jmp_off) {
adjust_jmpdep(x,dh);
}
x=x->next;
}
bi->direct_handler_to_use=dh;
}
}
static __inline__ void invalidate_block(blockinfo* bi)
{
int i;
bi->optlevel=0;
bi->count=optcount[0]-1;
bi->handler=NULL;
bi->handler_to_use=(cpuop_func *)popall_execute_normal;
bi->direct_handler=NULL;
set_dhtu(bi,bi->direct_pen);
bi->needed_flags=0xff;
bi->status=BI_INVALID;
for (i=0;i<2;i++) {
bi->dep[i].jmp_off=NULL;
bi->dep[i].target=NULL;
}
remove_deps(bi);
}
static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
{
blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
Dif(!tbi) {
write_log("Could not create jmpdep!\n");
abort();
}
bi->dep[i].jmp_off=jmpaddr;
bi->dep[i].source=bi;
bi->dep[i].target=tbi;
bi->dep[i].next=tbi->deplist;
if (bi->dep[i].next)
bi->dep[i].next->prev_p=&(bi->dep[i].next);
bi->dep[i].prev_p=&(tbi->deplist);
tbi->deplist=&(bi->dep[i]);
}
static __inline__ void block_need_recompile(blockinfo * bi)
{
uae_u32 cl = cacheline(bi->pc_p);
set_dhtu(bi, bi->direct_pen);
bi->direct_handler = bi->direct_pen;
bi->handler_to_use = (cpuop_func *)popall_execute_normal;
bi->handler = (cpuop_func *)popall_execute_normal;
if (bi == cache_tags[cl + 1].bi)
cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
bi->status = BI_NEED_RECOMP;
}
static __inline__ void mark_callers_recompile(blockinfo * bi)
{
dependency *x = bi->deplist;
while (x) {
dependency *next = x->next; /* This disappears when we mark for
* recompilation and thus remove the
* blocks from the lists */
if (x->jmp_off) {
blockinfo *cbi = x->source;
Dif(cbi->status == BI_INVALID) {
// write_log("invalid block in dependency list\n"); // FIXME?
// abort();
}
if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
block_need_recompile(cbi);
mark_callers_recompile(cbi);
}
else if (cbi->status == BI_COMPILING) {
redo_current_block = 1;
}
else if (cbi->status == BI_NEED_RECOMP) {
/* nothing */
}
else {
//write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
}
}
x = next;
}
}
static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
{
blockinfo* bi=get_blockinfo_addr(addr);
int i;
if (!bi) {
for (i=0;i<MAX_HOLD_BI && !bi;i++) {
if (hold_bi[i]) {
uae_u32 cl=cacheline(addr);
bi=hold_bi[i];
hold_bi[i]=NULL;
bi->pc_p=(uae_u8 *)addr;
invalidate_block(bi);
add_to_active(bi);
add_to_cl_list(bi);
}
}
}
if (!bi) {
write_log("Looking for blockinfo, can't find free one\n");
abort();
}
return bi;
}
static void prepare_block(blockinfo* bi);
/* Managment of blockinfos.
A blockinfo struct is allocated whenever a new block has to be
compiled. If the list of free blockinfos is empty, we allocate a new
pool of blockinfos and link the newly created blockinfos altogether
into the list of free blockinfos. Otherwise, we simply pop a structure
off the free list.
Blockinfo are lazily deallocated, i.e. chained altogether in the
list of free blockinfos whenvever a translation cache flush (hard or
soft) request occurs.
*/
template< class T >
class LazyBlockAllocator
{
enum {
kPoolSize = 1 + 4096 / sizeof(T)
};
struct Pool {
T chunk[kPoolSize];
Pool * next;
};
Pool * mPools;
T * mChunks;
public:
LazyBlockAllocator() : mPools(0), mChunks(0) { }
~LazyBlockAllocator();
T * acquire();
void release(T * const);
};
template< class T >
LazyBlockAllocator<T>::~LazyBlockAllocator()
{
Pool * currentPool = mPools;
while (currentPool) {
Pool * deadPool = currentPool;
currentPool = currentPool->next;
free(deadPool);
}
}
template< class T >
T * LazyBlockAllocator<T>::acquire()
{
if (!mChunks) {
// There is no chunk left, allocate a new pool and link the
// chunks into the free list
Pool * newPool = (Pool *)malloc(sizeof(Pool));
for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
chunk->next = mChunks;
mChunks = chunk;
}
newPool->next = mPools;
mPools = newPool;
}
T * chunk = mChunks;
mChunks = chunk->next;
return chunk;
}
template< class T >
void LazyBlockAllocator<T>::release(T * const chunk)
{
chunk->next = mChunks;
mChunks = chunk;
}
template< class T >
class HardBlockAllocator
{
public:
T * acquire() {
T * data = (T *)current_compile_p;
current_compile_p += sizeof(T);
return data;
}
void release(T * const chunk) {
// Deallocated on invalidation
}
};
#if USE_SEPARATE_BIA
static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
#else
static HardBlockAllocator<blockinfo> BlockInfoAllocator;
static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
#endif
static __inline__ checksum_info *alloc_checksum_info(void)
{
checksum_info *csi = ChecksumInfoAllocator.acquire();
csi->next = NULL;
return csi;
}
static __inline__ void free_checksum_info(checksum_info *csi)
{
csi->next = NULL;
ChecksumInfoAllocator.release(csi);
}
static __inline__ void free_checksum_info_chain(checksum_info *csi)
{
while (csi != NULL) {
checksum_info *csi2 = csi->next;
free_checksum_info(csi);
csi = csi2;
}
}
static __inline__ blockinfo *alloc_blockinfo(void)
{
blockinfo *bi = BlockInfoAllocator.acquire();
#if USE_CHECKSUM_INFO
bi->csi = NULL;
#endif
return bi;
}
static __inline__ void free_blockinfo(blockinfo *bi)
{
#if USE_CHECKSUM_INFO
free_checksum_info_chain(bi->csi);
bi->csi = NULL;
#endif
BlockInfoAllocator.release(bi);
}
static __inline__ void alloc_blockinfos(void)
{
int i;
blockinfo* bi;
for (i=0;i<MAX_HOLD_BI;i++) {
if (hold_bi[i])
return;
bi=hold_bi[i]=alloc_blockinfo();
prepare_block(bi);
}
}
/********************************************************************
* Functions to emit data into memory, and other general support *
********************************************************************/
static uae_u8* target;
static void emit_init(void)
{
}
static __inline__ void emit_byte(uae_u8 x)
{
*target++=x;
}
static __inline__ void emit_word(uae_u16 x)
{
*((uae_u16*)target)=x;
target+=2;
}
static __inline__ void emit_long(uae_u32 x)
{
*((uae_u32*)target)=x;
target+=4;
}
static __inline__ void emit_quad(uae_u64 x)
{
*((uae_u64*)target)=x;
target+=8;
}
static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
{
memcpy((uae_u8 *)target,block,blocklen);
target+=blocklen;
}
static __inline__ uae_u32 reverse32(uae_u32 v)
{
#if 1
// gb-- We have specialized byteswapping functions, just use them
return do_byteswap_32(v);
#else
return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
#endif
}
/********************************************************************
* Getting the information about the target CPU *
********************************************************************/
#include "codegen_x86.cpp"
void set_target(uae_u8* t)
{
target=t;
}
static __inline__ uae_u8* get_target_noopt(void)
{
return target;
}
__inline__ uae_u8* get_target(void)
{
return get_target_noopt();
}
/********************************************************************
* Flags status handling. EMIT TIME! *
********************************************************************/
static void bt_l_ri_noclobber(R4 r, IMM i);
static void make_flags_live_internal(void)
{
if (live.flags_in_flags==VALID)
return;
Dif (live.flags_on_stack==TRASH) {
write_log("Want flags, got something on stack, but it is TRASH\n");
abort();
}
if (live.flags_on_stack==VALID) {
int tmp;
tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
raw_reg_to_flags(tmp);
unlock2(tmp);
live.flags_in_flags=VALID;
return;
}
write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
live.flags_in_flags,live.flags_on_stack);
abort();
}
static void flags_to_stack(void)
{
if (live.flags_on_stack==VALID)
return;
if (!live.flags_are_important) {
live.flags_on_stack=VALID;
return;
}
Dif (live.flags_in_flags!=VALID)
abort();
else {
int tmp;
tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
raw_flags_to_reg(tmp);
unlock2(tmp);
}
live.flags_on_stack=VALID;
}
static __inline__ void clobber_flags(void)
{
if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
flags_to_stack();
live.flags_in_flags=TRASH;
}
/* Prepare for leaving the compiled stuff */
static __inline__ void flush_flags(void)
{
flags_to_stack();
return;
}
int touchcnt;
/********************************************************************
* Partial register flushing for optimized calls *
********************************************************************/
struct regusage {
uae_u16 rmask;
uae_u16 wmask;
};
static inline void ru_set(uae_u16 *mask, int reg)
{
#if USE_OPTIMIZED_CALLS
*mask |= 1 << reg;
#endif
}
static inline bool ru_get(const uae_u16 *mask, int reg)
{
#if USE_OPTIMIZED_CALLS
return (*mask & (1 << reg));
#else
/* Default: instruction reads & write to register */
return true;
#endif
}
static inline void ru_set_read(regusage *ru, int reg)
{
ru_set(&ru->rmask, reg);
}
static inline void ru_set_write(regusage *ru, int reg)
{
ru_set(&ru->wmask, reg);
}
static inline bool ru_read_p(const regusage *ru, int reg)
{
return ru_get(&ru->rmask, reg);
}
static inline bool ru_write_p(const regusage *ru, int reg)
{
return ru_get(&ru->wmask, reg);
}
static void ru_fill_ea(regusage *ru, int reg, amodes mode,
wordsizes size, int write_mode)
{
switch (mode) {
case Areg:
reg += 8;
/* fall through */
case Dreg:
ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
break;
case Ad16:
/* skip displacment */
m68k_pc_offset += 2;
case Aind:
case Aipi:
case Apdi:
ru_set_read(ru, reg+8);
break;
case Ad8r:
ru_set_read(ru, reg+8);
/* fall through */
case PC8r: {
uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
reg = (dp >> 12) & 15;
ru_set_read(ru, reg);
if (dp & 0x100)
m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
break;
}
case PC16:
case absw:
case imm0:
case imm1:
m68k_pc_offset += 2;
break;
case absl:
case imm2:
m68k_pc_offset += 4;
break;
case immi:
m68k_pc_offset += (size == sz_long) ? 4 : 2;
break;
}
}
/* TODO: split into a static initialization part and a dynamic one
(instructions depending on extension words) */
static void ru_fill(regusage *ru, uae_u32 opcode)
{
m68k_pc_offset += 2;
/* Default: no register is used or written to */
ru->rmask = 0;
ru->wmask = 0;
uae_u32 real_opcode = cft_map(opcode);
struct instr *dp = &table68k[real_opcode];
bool rw_dest = true;
bool handled = false;
/* Handle some instructions specifically */
uae_u16 ext;
switch (dp->mnemo) {
case i_BFCHG:
case i_BFCLR:
case i_BFEXTS:
case i_BFEXTU:
case i_BFFFO:
case i_BFINS:
case i_BFSET:
case i_BFTST:
ext = comp_get_iword((m68k_pc_offset+=2)-2);
if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
if (ext & 0x020) ru_set_read(ru, ext & 7);
ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
if (dp->dmode == Dreg)
ru_set_read(ru, dp->dreg);
switch (dp->mnemo) {
case i_BFEXTS:
case i_BFEXTU:
case i_BFFFO:
ru_set_write(ru, (ext >> 12) & 7);
break;
case i_BFINS:
ru_set_read(ru, (ext >> 12) & 7);
/* fall through */
case i_BFCHG:
case i_BFCLR:
case i_BSET:
if (dp->dmode == Dreg)
ru_set_write(ru, dp->dreg);
break;
}
handled = true;
rw_dest = false;
break;
case i_BTST:
rw_dest = false;
break;
case i_CAS:
{
ext = comp_get_iword((m68k_pc_offset+=2)-2);
int Du = ext & 7;
ru_set_read(ru, Du);
int Dc = (ext >> 6) & 7;
ru_set_read(ru, Dc);
ru_set_write(ru, Dc);
break;
}
case i_CAS2:
{
int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
ext = comp_get_iword((m68k_pc_offset+=2)-2);
Rn1 = (ext >> 12) & 15;
Du1 = (ext >> 6) & 7;
Dc1 = ext & 7;
ru_set_read(ru, Rn1);
ru_set_read(ru, Du1);
ru_set_read(ru, Dc1);
ru_set_write(ru, Dc1);
ext = comp_get_iword((m68k_pc_offset+=2)-2);
Rn2 = (ext >> 12) & 15;
Du2 = (ext >> 6) & 7;
Dc2 = ext & 7;
ru_set_read(ru, Rn2);
ru_set_read(ru, Du2);
ru_set_write(ru, Dc2);
break;
}
case i_DIVL: case i_MULL:
m68k_pc_offset += 2;
break;
case i_LEA:
case i_MOVE: case i_MOVEA: case i_MOVE16:
rw_dest = false;
break;
case i_PACK: case i_UNPK:
rw_dest = false;
m68k_pc_offset += 2;
break;
case i_TRAPcc:
m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
break;
case i_RTR:
/* do nothing, just for coverage debugging */
break;
/* TODO: handle EXG instruction */
}
/* Handle A-Traps better */
if ((real_opcode & 0xf000) == 0xa000) {
handled = true;
}
/* Handle EmulOps better */
if ((real_opcode & 0xff00) == 0x7100) {
handled = true;
ru->rmask = 0xffff;
ru->wmask = 0;
}
if (dp->suse && !handled)
ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
if (dp->duse && !handled)
ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
if (rw_dest)
ru->rmask |= ru->wmask;
handled = handled || dp->suse || dp->duse;
/* Mark all registers as used/written if the instruction may trap */
if (may_trap(opcode)) {
handled = true;
ru->rmask = 0xffff;
ru->wmask = 0xffff;
}
if (!handled) {
write_log("ru_fill: %04x = { %04x, %04x }\n",
real_opcode, ru->rmask, ru->wmask);
abort();
}
}
/********************************************************************
* register allocation per block logging *
********************************************************************/
static uae_s8 vstate[VREGS];
static uae_s8 vwritten[VREGS];
static uae_s8 nstate[N_REGS];
#define L_UNKNOWN -127
#define L_UNAVAIL -1
#define L_NEEDED -2
#define L_UNNEEDED -3
static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
{
int i;
for (i = 0; i < VREGS; i++)
s->virt[i] = vstate[i];
for (i = 0; i < N_REGS; i++)
s->nat[i] = nstate[i];
}
static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
{
int i;
int reverse = 0;
for (i = 0; i < VREGS; i++) {
if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
return 1;
if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
reverse++;
}
for (i = 0; i < N_REGS; i++) {
if (nstate[i] >= 0 && nstate[i] != s->nat[i])
return 1;
if (nstate[i] < 0 && s->nat[i] >= 0)
reverse++;
}
if (reverse >= 2 && USE_MATCH)
return 1; /* In this case, it might be worth recompiling the
* callers */
return 0;
}
static __inline__ void log_startblock(void)
{
int i;
for (i = 0; i < VREGS; i++) {
vstate[i] = L_UNKNOWN;
vwritten[i] = 0;
}
for (i = 0; i < N_REGS; i++)
nstate[i] = L_UNKNOWN;
}
/* Using an n-reg for a temp variable */
static __inline__ void log_isused(int n)
{
if (nstate[n] == L_UNKNOWN)
nstate[n] = L_UNAVAIL;
}
static __inline__ void log_visused(int r)
{
if (vstate[r] == L_UNKNOWN)
vstate[r] = L_NEEDED;
}
static __inline__ void do_load_reg(int n, int r)
{
if (r == FLAGTMP)
raw_load_flagreg(n, r);
else if (r == FLAGX)
raw_load_flagx(n, r);
else
raw_mov_l_rm(n, (uintptr) live.state[r].mem);
}
static __inline__ void check_load_reg(int n, int r)
{
raw_mov_l_rm(n, (uintptr) live.state[r].mem);
}
static __inline__ void log_vwrite(int r)
{
vwritten[r] = 1;
}
/* Using an n-reg to hold a v-reg */
static __inline__ void log_isreg(int n, int r)
{
static int count = 0;
if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
nstate[n] = r;
else {
do_load_reg(n, r);
if (nstate[n] == L_UNKNOWN)
nstate[n] = L_UNAVAIL;
}
if (vstate[r] == L_UNKNOWN)
vstate[r] = L_NEEDED;
}
static __inline__ void log_clobberreg(int r)
{
if (vstate[r] == L_UNKNOWN)
vstate[r] = L_UNNEEDED;
}
/* This ends all possibility of clever register allocation */
static __inline__ void log_flush(void)
{
int i;
for (i = 0; i < VREGS; i++)
if (vstate[i] == L_UNKNOWN)
vstate[i] = L_NEEDED;
for (i = 0; i < N_REGS; i++)
if (nstate[i] == L_UNKNOWN)
nstate[i] = L_UNAVAIL;
}
static __inline__ void log_dump(void)
{
int i;
return;
write_log("----------------------\n");
for (i = 0; i < N_REGS; i++) {
switch (nstate[i]) {
case L_UNKNOWN:
write_log("Nat %d : UNKNOWN\n", i);
break;
case L_UNAVAIL:
write_log("Nat %d : UNAVAIL\n", i);
break;
default:
write_log("Nat %d : %d\n", i, nstate[i]);
break;
}
}
for (i = 0; i < VREGS; i++) {
if (vstate[i] == L_UNNEEDED)
write_log("Virt %d: UNNEEDED\n", i);
}
}
/********************************************************************
* register status handling. EMIT TIME! *
********************************************************************/
static __inline__ void set_status(int r, int status)
{
if (status == ISCONST)
log_clobberreg(r);
live.state[r].status=status;
}
static __inline__ int isinreg(int r)
{
return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
}
static __inline__ void adjust_nreg(int r, uae_u32 val)
{
if (!val)
return;
raw_lea_l_brr(r,r,val);
}
static void tomem(int r)
{
int rr=live.state[r].realreg;
if (isinreg(r)) {
if (live.state[r].val && live.nat[rr].nholds==1
&& !live.nat[rr].locked) {
// write_log("RemovingA offset %x from reg %d (%d) at %p\n",
// live.state[r].val,r,rr,target);
adjust_nreg(rr,live.state[r].val);
live.state[r].val=0;
live.state[r].dirtysize=4;
set_status(r,DIRTY);
}
}
if (live.state[r].status==DIRTY) {
switch (live.state[r].dirtysize) {
case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
default: abort();
}
log_vwrite(r);
set_status(r,CLEAN);
live.state[r].dirtysize=0;
}
}
static __inline__ int isconst(int r)
{
return live.state[r].status==ISCONST;
}
int is_const(int r)
{
return isconst(r);
}
static __inline__ void writeback_const(int r)
{
if (!isconst(r))
return;
Dif (live.state[r].needflush==NF_HANDLER) {
write_log("Trying to write back constant NF_HANDLER!\n");
abort();
}
raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
log_vwrite(r);
live.state[r].val=0;
set_status(r,INMEM);
}
static __inline__ void tomem_c(int r)
{
if (isconst(r)) {
writeback_const(r);
}
else
tomem(r);
}
static void evict(int r)
{
int rr;
if (!isinreg(r))
return;
tomem(r);
rr=live.state[r].realreg;
Dif (live.nat[rr].locked &&
live.nat[rr].nholds==1) {
write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
abort();
}
live.nat[rr].nholds--;
if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
int topreg=live.nat[rr].holds[live.nat[rr].nholds];
int thisind=live.state[r].realind;
live.nat[rr].holds[thisind]=topreg;
live.state[topreg].realind=thisind;
}
live.state[r].realreg=-1;
set_status(r,INMEM);
}
static __inline__ void free_nreg(int r)
{
int i=live.nat[r].nholds;
while (i) {
int vr;
--i;
vr=live.nat[r].holds[i];
evict(vr);
}
Dif (live.nat[r].nholds!=0) {
write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
abort();
}
}
/* Use with care! */
static __inline__ void isclean(int r)
{
if (!isinreg(r))
return;
live.state[r].validsize=4;
live.state[r].dirtysize=0;
live.state[r].val=0;
set_status(r,CLEAN);
}
static __inline__ void disassociate(int r)
{
isclean(r);
evict(r);
}
static __inline__ void set_const(int r, uae_u32 val)
{
disassociate(r);
live.state[r].val=val;
set_status(r,ISCONST);
}
static __inline__ uae_u32 get_offset(int r)
{
return live.state[r].val;
}
static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
{
int bestreg;
uae_s32 when;
int i;
uae_s32 badness=0; /* to shut up gcc */
bestreg=-1;
when=2000000000;
/* XXX use a regalloc_order table? */
for (i=0;i<N_REGS;i++) {
badness=live.nat[i].touched;
if (live.nat[i].nholds==0)
badness=0;
if (i==hint)
badness-=200000000;
if (!live.nat[i].locked && badness<when) {
if ((size==1 && live.nat[i].canbyte) ||
(size==2 && live.nat[i].canword) ||
(size==4)) {
bestreg=i;
when=badness;
if (live.nat[i].nholds==0 && hint<0)
break;
if (i==hint)
break;
}
}
}
Dif (bestreg==-1)
abort();
if (live.nat[bestreg].nholds>0) {
free_nreg(bestreg);
}
if (isinreg(r)) {
int rr=live.state[r].realreg;
/* This will happen if we read a partially dirty register at a
bigger size */
Dif (willclobber || live.state[r].validsize>=size)
abort();
Dif (live.nat[rr].nholds!=1)
abort();
if (size==4 && live.state[r].validsize==2) {
log_isused(bestreg);
log_visused(r);
raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
raw_bswap_32(bestreg);
raw_zero_extend_16_rr(rr,rr);
raw_zero_extend_16_rr(bestreg,bestreg);
raw_bswap_32(bestreg);
raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
live.state[r].validsize=4;
live.nat[rr].touched=touchcnt++;
return rr;
}
if (live.state[r].validsize==1) {
/* Nothing yet */
}
evict(r);
}
if (!willclobber) {
if (live.state[r].status!=UNDEF) {
if (isconst(r)) {
raw_mov_l_ri(bestreg,live.state[r].val);
live.state[r].val=0;
live.state[r].dirtysize=4;
set_status(r,DIRTY);
log_isused(bestreg);
}
else {
log_isreg(bestreg, r); /* This will also load it! */
live.state[r].dirtysize=0;
set_status(r,CLEAN);
}
}
else {
live.state[r].val=0;
live.state[r].dirtysize=0;
set_status(r,CLEAN);
log_isused(bestreg);
}
live.state[r].validsize=4;
}
else { /* this is the easiest way, but not optimal. FIXME! */
/* Now it's trickier, but hopefully still OK */
if (!isconst(r) || size==4) {
live.state[r].validsize=size;
live.state[r].dirtysize=size;
live.state[r].val=0;
set_status(r,DIRTY);
if (size == 4) {
log_clobberreg(r);
log_isused(bestreg);
}
else {
log_visused(r);
log_isused(bestreg);
}
}
else {
if (live.state[r].status!=UNDEF)
raw_mov_l_ri(bestreg,live.state[r].val);
live.state[r].val=0;
live.state[r].validsize=4;
live.state[r].dirtysize=4;
set_status(r,DIRTY);
log_isused(bestreg);
}
}
live.state[r].realreg=bestreg;
live.state[r].realind=live.nat[bestreg].nholds;
live.nat[bestreg].touched=touchcnt++;
live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
live.nat[bestreg].nholds++;
return bestreg;
}
static int alloc_reg(int r, int size, int willclobber)
{
return alloc_reg_hinted(r,size,willclobber,-1);
}
static void unlock2(int r)
{
Dif (!live.nat[r].locked)
abort();
live.nat[r].locked--;
}
static void setlock(int r)
{
live.nat[r].locked++;
}
static void mov_nregs(int d, int s)
{
int ns=live.nat[s].nholds;
int nd=live.nat[d].nholds;
int i;
if (s==d)
return;
if (nd>0)
free_nreg(d);
log_isused(d);
raw_mov_l_rr(d,s);
for (i=0;i<live.nat[s].nholds;i++) {
int vs=live.nat[s].holds[i];
live.state[vs].realreg=d;
live.state[vs].realind=i;
live.nat[d].holds[i]=vs;
}
live.nat[d].nholds=live.nat[s].nholds;
live.nat[s].nholds=0;
}
static __inline__ void make_exclusive(int r, int size, int spec)
{
reg_status oldstate;
int rr=live.state[r].realreg;
int nr;
int nind;
int ndirt=0;
int i;
if (!isinreg(r))
return;
if (live.nat[rr].nholds==1)
return;
for (i=0;i<live.nat[rr].nholds;i++) {
int vr=live.nat[rr].holds[i];
if (vr!=r &&
(live.state[vr].status==DIRTY || live.state[vr].val))
ndirt++;
}
if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
/* Everything else is clean, so let's keep this register */
for (i=0;i<live.nat[rr].nholds;i++) {
int vr=live.nat[rr].holds[i];
if (vr!=r) {
evict(vr);
i--; /* Try that index again! */
}
}
Dif (live.nat[rr].nholds!=1) {
write_log("natreg %d holds %d vregs, %d not exclusive\n",
rr,live.nat[rr].nholds,r);
abort();
}
return;
}
/* We have to split the register */
oldstate=live.state[r];
setlock(rr); /* Make sure this doesn't go away */
/* Forget about r being in the register rr */
disassociate(r);
/* Get a new register, that we will clobber completely */
if (oldstate.status==DIRTY) {
/* If dirtysize is <4, we need a register that can handle the
eventual smaller memory store! Thanks to Quake68k for exposing
this detail ;-) */
nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
}
else {
nr=alloc_reg_hinted(r,4,1,spec);
}
nind=live.state[r].realind;
live.state[r]=oldstate; /* Keep all the old state info */
live.state[r].realreg=nr;
live.state[r].realind=nind;
if (size<live.state[r].validsize) {
if (live.state[r].val) {
/* Might as well compensate for the offset now */
raw_lea_l_brr(nr,rr,oldstate.val);
live.state[r].val=0;
live.state[r].dirtysize=4;
set_status(r,DIRTY);
}
else
raw_mov_l_rr(nr,rr); /* Make another copy */
}
unlock2(rr);
}
static __inline__ void add_offset(int r, uae_u32 off)
{
live.state[r].val+=off;
}
static __inline__ void remove_offset(int r, int spec)
{
int rr;
if (isconst(r))
return;
if (live.state[r].val==0)
return;
if (isinreg(r) && live.state[r].validsize<4)
evict(r);
if (!isinreg(r))
alloc_reg_hinted(r,4,0,spec);
Dif (live.state[r].validsize!=4) {
write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
abort();
}
make_exclusive(r,0,-1);
/* make_exclusive might have done the job already */
if (live.state[r].val==0)
return;
rr=live.state[r].realreg;
if (live.nat[rr].nholds==1) {
//write_log("RemovingB offset %x from reg %d (%d) at %p\n",
// live.state[r].val,r,rr,target);
adjust_nreg(rr,live.state[r].val);
live.state[r].dirtysize=4;
live.state[r].val=0;
set_status(r,DIRTY);
return;
}
write_log("Failed in remove_offset\n");
abort();
}
static __inline__ void remove_all_offsets(void)
{
int i;
for (i=0;i<VREGS;i++)
remove_offset(i,-1);
}
static inline void flush_reg_count(void)
{
#if RECORD_REGISTER_USAGE
for (int r = 0; r < 16; r++)
if (reg_count_local[r])
ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
#endif
}
static inline void record_register(int r)
{
#if RECORD_REGISTER_USAGE
if (r < 16)
reg_count_local[r]++;
#endif
}
static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
{
int n;
int answer=-1;
record_register(r);
if (live.state[r].status==UNDEF) {
write_log("WARNING: Unexpected read of undefined register %d\n",r);
}
if (!can_offset)
remove_offset(r,spec);
if (isinreg(r) && live.state[r].validsize>=size) {
n=live.state[r].realreg;
switch(size) {
case 1:
if (live.nat[n].canbyte || spec>=0) {
answer=n;
}
break;
case 2:
if (live.nat[n].canword || spec>=0) {
answer=n;
}
break;
case 4:
answer=n;
break;
default: abort();
}
if (answer<0)
evict(r);
}
/* either the value was in memory to start with, or it was evicted and
is in memory now */
if (answer<0) {
answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
}
if (spec>=0 && spec!=answer) {
/* Too bad */
mov_nregs(spec,answer);
answer=spec;
}
live.nat[answer].locked++;
live.nat[answer].touched=touchcnt++;
return answer;
}
static int readreg(int r, int size)
{
return readreg_general(r,size,-1,0);
}
static int readreg_specific(int r, int size, int spec)
{
return readreg_general(r,size,spec,0);
}
static int readreg_offset(int r, int size)
{
return readreg_general(r,size,-1,1);
}
/* writereg_general(r, size, spec)
*
* INPUT
* - r : mid-layer register
* - size : requested size (1/2/4)
* - spec : -1 if find or make a register free, otherwise specifies
* the physical register to use in any case
*
* OUTPUT
* - hard (physical, x86 here) register allocated to virtual register r
*/
static __inline__ int writereg_general(int r, int size, int spec)
{
int n;
int answer=-1;
record_register(r);
if (size<4) {
remove_offset(r,spec);
}
make_exclusive(r,size,spec);
if (isinreg(r)) {
int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
n=live.state[r].realreg;
Dif (live.nat[n].nholds!=1)
abort();
switch(size) {
case 1:
if (live.nat[n].canbyte || spec>=0) {
live.state[r].dirtysize=ndsize;
live.state[r].validsize=nvsize;
answer=n;
}
break;
case 2:
if (live.nat[n].canword || spec>=0) {
live.state[r].dirtysize=ndsize;
live.state[r].validsize=nvsize;
answer=n;
}
break;
case 4:
live.state[r].dirtysize=ndsize;
live.state[r].validsize=nvsize;
answer=n;
break;
default: abort();
}
if (answer<0)
evict(r);
}
/* either the value was in memory to start with, or it was evicted and
is in memory now */
if (answer<0) {
answer=alloc_reg_hinted(r,size,1,spec);
}
if (spec>=0 && spec!=answer) {
mov_nregs(spec,answer);
answer=spec;
}
if (live.state[r].status==UNDEF)
live.state[r].validsize=4;
live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
live.nat[answer].locked++;
live.nat[answer].touched=touchcnt++;
if (size==4) {
live.state[r].val=0;
}
else {
Dif (live.state[r].val) {
write_log("Problem with val\n");
abort();
}
}
set_status(r,DIRTY);
return answer;
}
static int writereg(int r, int size)
{
return writereg_general(r,size,-1);
}
static int writereg_specific(int r, int size, int spec)
{
return writereg_general(r,size,spec);
}
static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
{
int n;
int answer=-1;
record_register(r);
if (live.state[r].status==UNDEF) {
write_log("WARNING: Unexpected read of undefined register %d\n",r);
}
remove_offset(r,spec);
make_exclusive(r,0,spec);
Dif (wsize<rsize) {
write_log("Cannot handle wsize<rsize in rmw_general()\n");
abort();
}
if (isinreg(r) && live.state[r].validsize>=rsize) {
n=live.state[r].realreg;
Dif (live.nat[n].nholds!=1)
abort();
switch(rsize) {
case 1:
if (live.nat[n].canbyte || spec>=0) {
answer=n;
}
break;
case 2:
if (live.nat[n].canword || spec>=0) {
answer=n;
}
break;
case 4:
answer=n;
break;
default: abort();
}
if (answer<0)
evict(r);
}
/* either the value was in memory to start with, or it was evicted and
is in memory now */
if (answer<0) {
answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
}
if (spec>=0 && spec!=answer) {
/* Too bad */
mov_nregs(spec,answer);
answer=spec;
}
if (wsize>live.state[r].dirtysize)
live.state[r].dirtysize=wsize;
if (wsize>live.state[r].validsize)
live.state[r].validsize=wsize;
set_status(r,DIRTY);
live.nat[answer].locked++;
live.nat[answer].touched=touchcnt++;
Dif (live.state[r].val) {
write_log("Problem with val(rmw)\n");
abort();
}
return answer;
}
static int rmw(int r, int wsize, int rsize)
{
return rmw_general(r,wsize,rsize,-1);
}
static int rmw_specific(int r, int wsize, int rsize, int spec)
{
return rmw_general(r,wsize,rsize,spec);
}
/* needed for restoring the carry flag on non-P6 cores */
static void bt_l_ri_noclobber(R4 r, IMM i)
{
int size=4;
if (i<16)
size=2;
r=readreg(r,size);
raw_bt_l_ri(r,i);
unlock2(r);
}
/********************************************************************
* FPU register status handling. EMIT TIME! *
********************************************************************/
static void f_tomem(int r)
{
if (live.fate[r].status==DIRTY) {
#if USE_LONG_DOUBLE
raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
#else
raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
#endif
live.fate[r].status=CLEAN;
}
}
static void f_tomem_drop(int r)
{
if (live.fate[r].status==DIRTY) {
#if USE_LONG_DOUBLE
raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
#else
raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
#endif
live.fate[r].status=INMEM;
}
}
static __inline__ int f_isinreg(int r)
{
return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
}
static void f_evict(int r)
{
int rr;
if (!f_isinreg(r))
return;
rr=live.fate[r].realreg;
if (live.fat[rr].nholds==1)
f_tomem_drop(r);
else
f_tomem(r);
Dif (live.fat[rr].locked &&
live.fat[rr].nholds==1) {
write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
abort();
}
live.fat[rr].nholds--;
if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
int topreg=live.fat[rr].holds[live.fat[rr].nholds];
int thisind=live.fate[r].realind;
live.fat[rr].holds[thisind]=topreg;
live.fate[topreg].realind=thisind;
}
live.fate[r].status=INMEM;
live.fate[r].realreg=-1;
}
static __inline__ void f_free_nreg(int r)
{
int i=live.fat[r].nholds;
while (i) {
int vr;
--i;
vr=live.fat[r].holds[i];
f_evict(vr);
}
Dif (live.fat[r].nholds!=0) {
write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
abort();
}
}
/* Use with care! */
static __inline__ void f_isclean(int r)
{
if (!f_isinreg(r))
return;
live.fate[r].status=CLEAN;
}
static __inline__ void f_disassociate(int r)
{
f_isclean(r);
f_evict(r);
}
static int f_alloc_reg(int r, int willclobber)
{
int bestreg;
uae_s32 when;
int i;
uae_s32 badness;
bestreg=-1;
when=2000000000;
for (i=N_FREGS;i--;) {
badness=live.fat[i].touched;
if (live.fat[i].nholds==0)
badness=0;
if (!live.fat[i].locked && badness<when) {
bestreg=i;
when=badness;
if (live.fat[i].nholds==0)
break;
}
}
Dif (bestreg==-1)
abort();
if (live.fat[bestreg].nholds>0) {
f_free_nreg(bestreg);
}
if (f_isinreg(r)) {
f_evict(r);
}
if (!willclobber) {
if (live.fate[r].status!=UNDEF) {
#if USE_LONG_DOUBLE
raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
#else
raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
#endif
}
live.fate[r].status=CLEAN;
}
else {
live.fate[r].status=DIRTY;
}
live.fate[r].realreg=bestreg;
live.fate[r].realind=live.fat[bestreg].nholds;
live.fat[bestreg].touched=touchcnt++;
live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
live.fat[bestreg].nholds++;
return bestreg;
}
static void f_unlock(int r)
{
Dif (!live.fat[r].locked)
abort();
live.fat[r].locked--;
}
static void f_setlock(int r)
{
live.fat[r].locked++;
}
static __inline__ int f_readreg(int r)
{
int n;
int answer=-1;
if (f_isinreg(r)) {
n=live.fate[r].realreg;
answer=n;
}
/* either the value was in memory to start with, or it was evicted and
is in memory now */
if (answer<0)
answer=f_alloc_reg(r,0);
live.fat[answer].locked++;
live.fat[answer].touched=touchcnt++;
return answer;
}
static __inline__ void f_make_exclusive(int r, int clobber)
{
freg_status oldstate;
int rr=live.fate[r].realreg;
int nr;
int nind;
int ndirt=0;
int i;
if (!f_isinreg(r))
return;
if (live.fat[rr].nholds==1)
return;
for (i=0;i<live.fat[rr].nholds;i++) {
int vr=live.fat[rr].holds[i];
if (vr!=r && live.fate[vr].status==DIRTY)
ndirt++;
}
if (!ndirt && !live.fat[rr].locked) {
/* Everything else is clean, so let's keep this register */
for (i=0;i<live.fat[rr].nholds;i++) {
int vr=live.fat[rr].holds[i];
if (vr!=r) {
f_evict(vr);
i--; /* Try that index again! */
}
}
Dif (live.fat[rr].nholds!=1) {
write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
for (i=0;i<live.fat[rr].nholds;i++) {
write_log(" %d(%d,%d)",live.fat[rr].holds[i],
live.fate[live.fat[rr].holds[i]].realreg,
live.fate[live.fat[rr].holds[i]].realind);
}
write_log("\n");
abort();
}
return;
}
/* We have to split the register */
oldstate=live.fate[r];
f_setlock(rr); /* Make sure this doesn't go away */
/* Forget about r being in the register rr */
f_disassociate(r);
/* Get a new register, that we will clobber completely */
nr=f_alloc_reg(r,1);
nind=live.fate[r].realind;
if (!clobber)
raw_fmov_rr(nr,rr); /* Make another copy */
live.fate[r]=oldstate; /* Keep all the old state info */
live.fate[r].realreg=nr;
live.fate[r].realind=nind;
f_unlock(rr);
}
static __inline__ int f_writereg(int r)
{
int n;
int answer=-1;
f_make_exclusive(r,1);
if (f_isinreg(r)) {
n=live.fate[r].realreg;
answer=n;
}
if (answer<0) {
answer=f_alloc_reg(r,1);
}
live.fate[r].status=DIRTY;
live.fat[answer].locked++;
live.fat[answer].touched=touchcnt++;
return answer;
}
static int f_rmw(int r)
{
int n;
f_make_exclusive(r,0);
if (f_isinreg(r)) {
n=live.fate[r].realreg;
}
else
n=f_alloc_reg(r,0);
live.fate[r].status=DIRTY;
live.fat[n].locked++;
live.fat[n].touched=touchcnt++;
return n;
}
static void fflags_into_flags_internal(uae_u32 tmp)
{
int r;
clobber_flags();
r=f_readreg(FP_RESULT);
if (FFLAG_NREG_CLOBBER_CONDITION) {
int tmp2=tmp;
tmp=writereg_specific(tmp,4,FFLAG_NREG);
raw_fflags_into_flags(r);
unlock2(tmp);
forget_about(tmp2);
}
else
raw_fflags_into_flags(r);
f_unlock(r);
live_flags();
}
/********************************************************************
* CPU functions exposed to gencomp. Both CREATE and EMIT time *
********************************************************************/
/*
* RULES FOR HANDLING REGISTERS:
*
* * In the function headers, order the parameters
* - 1st registers written to
* - 2nd read/modify/write registers
* - 3rd registers read from
* * Before calling raw_*, you must call readreg, writereg or rmw for
* each register
* * The order for this is
* - 1st call remove_offset for all registers written to with size<4
* - 2nd call readreg for all registers read without offset
* - 3rd call rmw for all rmw registers
* - 4th call readreg_offset for all registers that can handle offsets
* - 5th call get_offset for all the registers from the previous step
* - 6th call writereg for all written-to registers
* - 7th call raw_*
* - 8th unlock2 all registers that were locked
*/
MIDFUNC(0,live_flags,(void))
{
live.flags_on_stack=TRASH;
live.flags_in_flags=VALID;
live.flags_are_important=1;
}
MENDFUNC(0,live_flags,(void))
MIDFUNC(0,dont_care_flags,(void))
{
live.flags_are_important=0;
}
MENDFUNC(0,dont_care_flags,(void))
MIDFUNC(0,duplicate_carry,(void))
{
evict(FLAGX);
make_flags_live_internal();
COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
log_vwrite(FLAGX);
}
MENDFUNC(0,duplicate_carry,(void))
MIDFUNC(0,restore_carry,(void))
{
if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
bt_l_ri_noclobber(FLAGX,0);
}
else { /* Avoid the stall the above creates.
This is slow on non-P6, though.
*/
COMPCALL(rol_b_ri(FLAGX,8));
isclean(FLAGX);
}
}
MENDFUNC(0,restore_carry,(void))
MIDFUNC(0,start_needflags,(void))
{
needflags=1;
}
MENDFUNC(0,start_needflags,(void))
MIDFUNC(0,end_needflags,(void))
{
needflags=0;
}
MENDFUNC(0,end_needflags,(void))
MIDFUNC(0,make_flags_live,(void))
{
make_flags_live_internal();
}
MENDFUNC(0,make_flags_live,(void))
MIDFUNC(1,fflags_into_flags,(W2 tmp))
{
clobber_flags();
fflags_into_flags_internal(tmp);
}
MENDFUNC(1,fflags_into_flags,(W2 tmp))
MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
{
int size=4;
if (i<16)
size=2;
CLOBBER_BT;
r=readreg(r,size);
raw_bt_l_ri(r,i);
unlock2(r);
}
MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
{
CLOBBER_BT;
r=readreg(r,4);
b=readreg(b,4);
raw_bt_l_rr(r,b);
unlock2(r);
unlock2(b);
}
MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
{
int size=4;
if (i<16)
size=2;
CLOBBER_BT;
r=rmw(r,size,size);
raw_btc_l_ri(r,i);
unlock2(r);
}
MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
{
CLOBBER_BT;
b=readreg(b,4);
r=rmw(r,4,4);
raw_btc_l_rr(r,b);
unlock2(r);
unlock2(b);
}
MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
{
int size=4;
if (i<16)
size=2;
CLOBBER_BT;
r=rmw(r,size,size);
raw_btr_l_ri(r,i);
unlock2(r);
}
MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
{
CLOBBER_BT;
b=readreg(b,4);
r=rmw(r,4,4);
raw_btr_l_rr(r,b);
unlock2(r);
unlock2(b);
}
MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
{
int size=4;
if (i<16)
size=2;
CLOBBER_BT;
r=rmw(r,size,size);
raw_bts_l_ri(r,i);
unlock2(r);
}
MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
{
CLOBBER_BT;
b=readreg(b,4);
r=rmw(r,4,4);
raw_bts_l_rr(r,b);
unlock2(r);
unlock2(b);
}
MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
{
CLOBBER_MOV;
d=writereg(d,4);
raw_mov_l_rm(d,s);
unlock2(d);
}
MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
{
r=readreg(r,4);
raw_call_r(r);
unlock2(r);
}
MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
{
CLOBBER_SUB;
raw_sub_l_mi(d,s) ;
}
MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
{
CLOBBER_MOV;
raw_mov_l_mi(d,s) ;
}
MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
{
CLOBBER_MOV;
raw_mov_w_mi(d,s) ;
}
MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
{
CLOBBER_MOV;
raw_mov_b_mi(d,s) ;
}
MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_ROL;
r=rmw(r,1,1);
raw_rol_b_ri(r,i);
unlock2(r);
}
MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_ROL;
r=rmw(r,2,2);
raw_rol_w_ri(r,i);
unlock2(r);
}
MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_ROL;
r=rmw(r,4,4);
raw_rol_l_ri(r,i);
unlock2(r);
}
MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
{
if (isconst(r)) {
COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_ROL;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,4,4);
Dif (r!=1) {
write_log("Illegal register %d in raw_rol_b\n",r);
abort();
}
raw_rol_l_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
{ /* Can only do this with r==1, i.e. cl */
if (isconst(r)) {
COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_ROL;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,2,2);
Dif (r!=1) {
write_log("Illegal register %d in raw_rol_b\n",r);
abort();
}
raw_rol_w_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
{ /* Can only do this with r==1, i.e. cl */
if (isconst(r)) {
COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_ROL;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,1,1);
Dif (r!=1) {
write_log("Illegal register %d in raw_rol_b\n",r);
abort();
}
raw_rol_b_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
{
if (isconst(r)) {
COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_SHLL;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,4,4);
Dif (r!=1) {
write_log("Illegal register %d in raw_rol_b\n",r);
abort();
}
raw_shll_l_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
{ /* Can only do this with r==1, i.e. cl */
if (isconst(r)) {
COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_SHLL;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,2,2);
Dif (r!=1) {
write_log("Illegal register %d in raw_shll_b\n",r);
abort();
}
raw_shll_w_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
{ /* Can only do this with r==1, i.e. cl */
if (isconst(r)) {
COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_SHLL;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,1,1);
Dif (r!=1) {
write_log("Illegal register %d in raw_shll_b\n",r);
abort();
}
raw_shll_b_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_ROR;
r=rmw(r,1,1);
raw_ror_b_ri(r,i);
unlock2(r);
}
MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_ROR;
r=rmw(r,2,2);
raw_ror_w_ri(r,i);
unlock2(r);
}
MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_ROR;
r=rmw(r,4,4);
raw_ror_l_ri(r,i);
unlock2(r);
}
MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
{
if (isconst(r)) {
COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_ROR;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,4,4);
raw_ror_l_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
{
if (isconst(r)) {
COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_ROR;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,2,2);
raw_ror_w_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
{
if (isconst(r)) {
COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_ROR;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,1,1);
raw_ror_b_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
{
if (isconst(r)) {
COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_SHRL;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,4,4);
Dif (r!=1) {
write_log("Illegal register %d in raw_rol_b\n",r);
abort();
}
raw_shrl_l_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
{ /* Can only do this with r==1, i.e. cl */
if (isconst(r)) {
COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_SHRL;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,2,2);
Dif (r!=1) {
write_log("Illegal register %d in raw_shrl_b\n",r);
abort();
}
raw_shrl_w_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
{ /* Can only do this with r==1, i.e. cl */
if (isconst(r)) {
COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_SHRL;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,1,1);
Dif (r!=1) {
write_log("Illegal register %d in raw_shrl_b\n",r);
abort();
}
raw_shrl_b_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
{
if (!i && !needflags)
return;
if (isconst(r) && !needflags) {
live.state[r].val<<=i;
return;
}
CLOBBER_SHLL;
r=rmw(r,4,4);
raw_shll_l_ri(r,i);
unlock2(r);
}
MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_SHLL;
r=rmw(r,2,2);
raw_shll_w_ri(r,i);
unlock2(r);
}
MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_SHLL;
r=rmw(r,1,1);
raw_shll_b_ri(r,i);
unlock2(r);
}
MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
{
if (!i && !needflags)
return;
if (isconst(r) && !needflags) {
live.state[r].val>>=i;
return;
}
CLOBBER_SHRL;
r=rmw(r,4,4);
raw_shrl_l_ri(r,i);
unlock2(r);
}
MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_SHRL;
r=rmw(r,2,2);
raw_shrl_w_ri(r,i);
unlock2(r);
}
MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_SHRL;
r=rmw(r,1,1);
raw_shrl_b_ri(r,i);
unlock2(r);
}
MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_SHRA;
r=rmw(r,4,4);
raw_shra_l_ri(r,i);
unlock2(r);
}
MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_SHRA;
r=rmw(r,2,2);
raw_shra_w_ri(r,i);
unlock2(r);
}
MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_SHRA;
r=rmw(r,1,1);
raw_shra_b_ri(r,i);
unlock2(r);
}
MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
{
if (isconst(r)) {
COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_SHRA;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,4,4);
Dif (r!=1) {
write_log("Illegal register %d in raw_rol_b\n",r);
abort();
}
raw_shra_l_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
{ /* Can only do this with r==1, i.e. cl */
if (isconst(r)) {
COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_SHRA;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,2,2);
Dif (r!=1) {
write_log("Illegal register %d in raw_shra_b\n",r);
abort();
}
raw_shra_w_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
{ /* Can only do this with r==1, i.e. cl */
if (isconst(r)) {
COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
return;
}
CLOBBER_SHRA;
r=readreg_specific(r,1,SHIFTCOUNT_NREG);
d=rmw(d,1,1);
Dif (r!=1) {
write_log("Illegal register %d in raw_shra_b\n",r);
abort();
}
raw_shra_b_rr(d,r) ;
unlock2(r);
unlock2(d);
}
MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
MIDFUNC(2,setcc,(W1 d, IMM cc))
{
CLOBBER_SETCC;
d=writereg(d,1);
raw_setcc(d,cc);
unlock2(d);
}
MENDFUNC(2,setcc,(W1 d, IMM cc))
MIDFUNC(2,setcc_m,(IMM d, IMM cc))
{
CLOBBER_SETCC;
raw_setcc_m(d,cc);
}
MENDFUNC(2,setcc_m,(IMM d, IMM cc))
MIDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc))
{
if (d==s)
return;
CLOBBER_CMOV;
s=readreg(s,1);
d=rmw(d,1,1);
raw_cmov_b_rr(d,s,cc);
unlock2(s);
unlock2(d);
}
MENDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc))
MIDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc))
{
if (d==s)
return;
CLOBBER_CMOV;
s=readreg(s,2);
d=rmw(d,2,2);
raw_cmov_w_rr(d,s,cc);
unlock2(s);
unlock2(d);
}
MENDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc))
MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
{
if (d==s)
return;
CLOBBER_CMOV;
s=readreg(s,4);
d=rmw(d,4,4);
raw_cmov_l_rr(d,s,cc);
unlock2(s);
unlock2(d);
}
MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
{
CLOBBER_CMOV;
d=rmw(d,4,4);
raw_cmov_l_rm(d,s,cc);
unlock2(d);
}
MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
{
CLOBBER_BSF;
s = readreg(s, 4);
d = writereg(d, 4);
raw_bsf_l_rr(d, s);
unlock2(s);
unlock2(d);
}
MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
/* Set the Z flag depending on the value in s. Note that the
value has to be 0 or -1 (or, more precisely, for non-zero
values, bit 14 must be set)! */
MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
{
CLOBBER_BSF;
s=rmw_specific(s,4,4,FLAG_NREG3);
tmp=writereg(tmp,4);
raw_flags_set_zero(s, tmp);
unlock2(tmp);
unlock2(s);
}
MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
{
CLOBBER_MUL;
s=readreg(s,4);
d=rmw(d,4,4);
raw_imul_32_32(d,s);
unlock2(s);
unlock2(d);
}
MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
{
CLOBBER_MUL;
s=rmw_specific(s,4,4,MUL_NREG2);
d=rmw_specific(d,4,4,MUL_NREG1);
raw_imul_64_32(d,s);
unlock2(s);
unlock2(d);
}
MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
{
CLOBBER_MUL;
s=rmw_specific(s,4,4,MUL_NREG2);
d=rmw_specific(d,4,4,MUL_NREG1);
raw_mul_64_32(d,s);
unlock2(s);
unlock2(d);
}
MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
{
CLOBBER_MUL;
s=readreg(s,4);
d=rmw(d,4,4);
raw_mul_32_32(d,s);
unlock2(s);
unlock2(d);
}
MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
#if SIZEOF_VOID_P == 8
MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
{
int isrmw;
if (isconst(s)) {
set_const(d,(uae_s32)live.state[s].val);
return;
}
CLOBBER_SE32;
isrmw=(s==d);
if (!isrmw) {
s=readreg(s,4);
d=writereg(d,4);
}
else { /* If we try to lock this twice, with different sizes, we
are int trouble! */
s=d=rmw(s,4,4);
}
raw_sign_extend_32_rr(d,s);
if (!isrmw) {
unlock2(d);
unlock2(s);
}
else {
unlock2(s);
}
}
MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
#endif
MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
{
int isrmw;
if (isconst(s)) {
set_const(d,(uae_s32)(uae_s16)live.state[s].val);
return;
}
CLOBBER_SE16;
isrmw=(s==d);
if (!isrmw) {
s=readreg(s,2);
d=writereg(d,4);
}
else { /* If we try to lock this twice, with different sizes, we
are int trouble! */
s=d=rmw(s,4,2);
}
raw_sign_extend_16_rr(d,s);
if (!isrmw) {
unlock2(d);
unlock2(s);
}
else {
unlock2(s);
}
}
MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
{
int isrmw;
if (isconst(s)) {
set_const(d,(uae_s32)(uae_s8)live.state[s].val);
return;
}
isrmw=(s==d);
CLOBBER_SE8;
if (!isrmw) {
s=readreg(s,1);
d=writereg(d,4);
}
else { /* If we try to lock this twice, with different sizes, we
are int trouble! */
s=d=rmw(s,4,1);
}
raw_sign_extend_8_rr(d,s);
if (!isrmw) {
unlock2(d);
unlock2(s);
}
else {
unlock2(s);
}
}
MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
{
int isrmw;
if (isconst(s)) {
set_const(d,(uae_u32)(uae_u16)live.state[s].val);
return;
}
isrmw=(s==d);
CLOBBER_ZE16;
if (!isrmw) {
s=readreg(s,2);
d=writereg(d,4);
}
else { /* If we try to lock this twice, with different sizes, we
are int trouble! */
s=d=rmw(s,4,2);
}
raw_zero_extend_16_rr(d,s);
if (!isrmw) {
unlock2(d);
unlock2(s);
}
else {
unlock2(s);
}
}
MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
{
int isrmw;
if (isconst(s)) {
set_const(d,(uae_u32)(uae_u8)live.state[s].val);
return;
}
isrmw=(s==d);
CLOBBER_ZE8;
if (!isrmw) {
s=readreg(s,1);
d=writereg(d,4);
}
else { /* If we try to lock this twice, with different sizes, we
are int trouble! */
s=d=rmw(s,4,1);
}
raw_zero_extend_8_rr(d,s);
if (!isrmw) {
unlock2(d);
unlock2(s);
}
else {
unlock2(s);
}
}
MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
{
if (d==s)
return;
if (isconst(s)) {
COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
return;
}
CLOBBER_MOV;
s=readreg(s,1);
d=writereg(d,1);
raw_mov_b_rr(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
{
if (d==s)
return;
if (isconst(s)) {
COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
return;
}
CLOBBER_MOV;
s=readreg(s,2);
d=writereg(d,2);
raw_mov_w_rr(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
{
CLOBBER_MOV;
baser=readreg(baser,4);
index=readreg(index,4);
d=writereg(d,4);
raw_mov_l_rrm_indexed(d,baser,index,factor);
unlock2(d);
unlock2(baser);
unlock2(index);
}
MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
{
CLOBBER_MOV;
baser=readreg(baser,4);
index=readreg(index,4);
d=writereg(d,2);
raw_mov_w_rrm_indexed(d,baser,index,factor);
unlock2(d);
unlock2(baser);
unlock2(index);
}
MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
{
CLOBBER_MOV;
baser=readreg(baser,4);
index=readreg(index,4);
d=writereg(d,1);
raw_mov_b_rrm_indexed(d,baser,index,factor);
unlock2(d);
unlock2(baser);
unlock2(index);
}
MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
{
CLOBBER_MOV;
baser=readreg(baser,4);
index=readreg(index,4);
s=readreg(s,4);
Dif (baser==s || index==s)
abort();
raw_mov_l_mrr_indexed(baser,index,factor,s);
unlock2(s);
unlock2(baser);
unlock2(index);
}
MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
{
CLOBBER_MOV;
baser=readreg(baser,4);
index=readreg(index,4);
s=readreg(s,2);
raw_mov_w_mrr_indexed(baser,index,factor,s);
unlock2(s);
unlock2(baser);
unlock2(index);
}
MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
{
CLOBBER_MOV;
s=readreg(s,1);
baser=readreg(baser,4);
index=readreg(index,4);
raw_mov_b_mrr_indexed(baser,index,factor,s);
unlock2(s);
unlock2(baser);
unlock2(index);
}
MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
{
int basereg=baser;
int indexreg=index;
CLOBBER_MOV;
s=readreg(s,4);
baser=readreg_offset(baser,4);
index=readreg_offset(index,4);
base+=get_offset(basereg);
base+=factor*get_offset(indexreg);
raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
unlock2(s);
unlock2(baser);
unlock2(index);
}
MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
{
int basereg=baser;
int indexreg=index;
CLOBBER_MOV;
s=readreg(s,2);
baser=readreg_offset(baser,4);
index=readreg_offset(index,4);
base+=get_offset(basereg);
base+=factor*get_offset(indexreg);
raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
unlock2(s);
unlock2(baser);
unlock2(index);
}
MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
{
int basereg=baser;
int indexreg=index;
CLOBBER_MOV;
s=readreg(s,1);
baser=readreg_offset(baser,4);
index=readreg_offset(index,4);
base+=get_offset(basereg);
base+=factor*get_offset(indexreg);
raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
unlock2(s);
unlock2(baser);
unlock2(index);
}
MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
/* Read a long from base+baser+factor*index */
MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
{
int basereg=baser;
int indexreg=index;
CLOBBER_MOV;
baser=readreg_offset(baser,4);
index=readreg_offset(index,4);
base+=get_offset(basereg);
base+=factor*get_offset(indexreg);
d=writereg(d,4);
raw_mov_l_brrm_indexed(d,base,baser,index,factor);
unlock2(d);
unlock2(baser);
unlock2(index);
}
MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
{
int basereg=baser;
int indexreg=index;
CLOBBER_MOV;
remove_offset(d,-1);
baser=readreg_offset(baser,4);
index=readreg_offset(index,4);
base+=get_offset(basereg);
base+=factor*get_offset(indexreg);
d=writereg(d,2);
raw_mov_w_brrm_indexed(d,base,baser,index,factor);
unlock2(d);
unlock2(baser);
unlock2(index);
}
MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
{
int basereg=baser;
int indexreg=index;
CLOBBER_MOV;
remove_offset(d,-1);
baser=readreg_offset(baser,4);
index=readreg_offset(index,4);
base+=get_offset(basereg);
base+=factor*get_offset(indexreg);
d=writereg(d,1);
raw_mov_b_brrm_indexed(d,base,baser,index,factor);
unlock2(d);
unlock2(baser);
unlock2(index);
}
MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
/* Read a long from base+factor*index */
MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
{
int indexreg=index;
if (isconst(index)) {
COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
return;
}
CLOBBER_MOV;
index=readreg_offset(index,4);
base+=get_offset(indexreg)*factor;
d=writereg(d,4);
raw_mov_l_rm_indexed(d,base,index,factor);
unlock2(index);
unlock2(d);
}
MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
/* read the long at the address contained in s+offset and store in d */
MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
{
if (isconst(s)) {
COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
return;
}
CLOBBER_MOV;
s=readreg(s,4);
d=writereg(d,4);
raw_mov_l_rR(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
/* read the word at the address contained in s+offset and store in d */
MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
{
if (isconst(s)) {
COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
return;
}
CLOBBER_MOV;
s=readreg(s,4);
d=writereg(d,2);
raw_mov_w_rR(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
/* read the word at the address contained in s+offset and store in d */
MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
{
if (isconst(s)) {
COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
return;
}
CLOBBER_MOV;
s=readreg(s,4);
d=writereg(d,1);
raw_mov_b_rR(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
/* read the long at the address contained in s+offset and store in d */
MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
{
int sreg=s;
if (isconst(s)) {
COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
return;
}
CLOBBER_MOV;
s=readreg_offset(s,4);
offset+=get_offset(sreg);
d=writereg(d,4);
raw_mov_l_brR(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
/* read the word at the address contained in s+offset and store in d */
MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
{
int sreg=s;
if (isconst(s)) {
COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
return;
}
CLOBBER_MOV;
remove_offset(d,-1);
s=readreg_offset(s,4);
offset+=get_offset(sreg);
d=writereg(d,2);
raw_mov_w_brR(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
/* read the word at the address contained in s+offset and store in d */
MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
{
int sreg=s;
if (isconst(s)) {
COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
return;
}
CLOBBER_MOV;
remove_offset(d,-1);
s=readreg_offset(s,4);
offset+=get_offset(sreg);
d=writereg(d,1);
raw_mov_b_brR(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
{
int dreg=d;
if (isconst(d)) {
COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
return;
}
CLOBBER_MOV;
d=readreg_offset(d,4);
offset+=get_offset(dreg);
raw_mov_l_Ri(d,i,offset);
unlock2(d);
}
MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
{
int dreg=d;
if (isconst(d)) {
COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
return;
}
CLOBBER_MOV;
d=readreg_offset(d,4);
offset+=get_offset(dreg);
raw_mov_w_Ri(d,i,offset);
unlock2(d);
}
MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
{
int dreg=d;
if (isconst(d)) {
COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
return;
}
CLOBBER_MOV;
d=readreg_offset(d,4);
offset+=get_offset(dreg);
raw_mov_b_Ri(d,i,offset);
unlock2(d);
}
MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
/* Warning! OFFSET is byte sized only! */
MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
{
if (isconst(d)) {
COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
return;
}
if (isconst(s)) {
COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
return;
}
CLOBBER_MOV;
s=readreg(s,4);
d=readreg(d,4);
raw_mov_l_Rr(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
{
if (isconst(d)) {
COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
return;
}
if (isconst(s)) {
COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
return;
}
CLOBBER_MOV;
s=readreg(s,2);
d=readreg(d,4);
raw_mov_w_Rr(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
{
if (isconst(d)) {
COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
return;
}
if (isconst(s)) {
COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
return;
}
CLOBBER_MOV;
s=readreg(s,1);
d=readreg(d,4);
raw_mov_b_Rr(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
{
if (isconst(s)) {
COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
return;
}
#if USE_OFFSET
if (d==s) {
add_offset(d,offset);
return;
}
#endif
CLOBBER_LEA;
s=readreg(s,4);
d=writereg(d,4);
raw_lea_l_brr(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
{
if (!offset) {
COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
return;
}
CLOBBER_LEA;
s=readreg(s,4);
index=readreg(index,4);
d=writereg(d,4);
raw_lea_l_brr_indexed(d,s,index,factor,offset);
unlock2(d);
unlock2(index);
unlock2(s);
}
MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
{
CLOBBER_LEA;
s=readreg(s,4);
index=readreg(index,4);
d=writereg(d,4);
raw_lea_l_rr_indexed(d,s,index,factor);
unlock2(d);
unlock2(index);
unlock2(s);
}
MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
/* write d to the long at the address contained in s+offset */
MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
{
int dreg=d;
if (isconst(d)) {
COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
return;
}
CLOBBER_MOV;
s=readreg(s,4);
d=readreg_offset(d,4);
offset+=get_offset(dreg);
raw_mov_l_bRr(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
/* write the word at the address contained in s+offset and store in d */
MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
{
int dreg=d;
if (isconst(d)) {
COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
return;
}
CLOBBER_MOV;
s=readreg(s,2);
d=readreg_offset(d,4);
offset+=get_offset(dreg);
raw_mov_w_bRr(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
{
int dreg=d;
if (isconst(d)) {
COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
return;
}
CLOBBER_MOV;
s=readreg(s,1);
d=readreg_offset(d,4);
offset+=get_offset(dreg);
raw_mov_b_bRr(d,s,offset);
unlock2(d);
unlock2(s);
}
MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
MIDFUNC(1,bswap_32,(RW4 r))
{
int reg=r;
if (isconst(r)) {
uae_u32 oldv=live.state[r].val;
live.state[r].val=reverse32(oldv);
return;
}
CLOBBER_SW32;
r=rmw(r,4,4);
raw_bswap_32(r);
unlock2(r);
}
MENDFUNC(1,bswap_32,(RW4 r))
MIDFUNC(1,bswap_16,(RW2 r))
{
if (isconst(r)) {
uae_u32 oldv=live.state[r].val;
live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
(oldv&0xffff0000);
return;
}
CLOBBER_SW16;
r=rmw(r,2,2);
raw_bswap_16(r);
unlock2(r);
}
MENDFUNC(1,bswap_16,(RW2 r))
MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
{
int olds;
if (d==s) { /* How pointless! */
return;
}
if (isconst(s)) {
COMPCALL(mov_l_ri)(d,live.state[s].val);
return;
}
olds=s;
disassociate(d);
s=readreg_offset(s,4);
live.state[d].realreg=s;
live.state[d].realind=live.nat[s].nholds;
live.state[d].val=live.state[olds].val;
live.state[d].validsize=4;
live.state[d].dirtysize=4;
set_status(d,DIRTY);
live.nat[s].holds[live.nat[s].nholds]=d;
live.nat[s].nholds++;
log_clobberreg(d);
/* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
d,s,live.state[d].realind,live.nat[s].nholds); */
unlock2(s);
}
MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
{
if (isconst(s)) {
COMPCALL(mov_l_mi)(d,live.state[s].val);
return;
}
CLOBBER_MOV;
s=readreg(s,4);
raw_mov_l_mr(d,s);
unlock2(s);
}
MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
{
if (isconst(s)) {
COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
return;
}
CLOBBER_MOV;
s=readreg(s,2);
raw_mov_w_mr(d,s);
unlock2(s);
}
MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
{
CLOBBER_MOV;
d=writereg(d,2);
raw_mov_w_rm(d,s);
unlock2(d);
}
MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
{
if (isconst(s)) {
COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
return;
}
CLOBBER_MOV;
s=readreg(s,1);
raw_mov_b_mr(d,s);
unlock2(s);
}
MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
{
CLOBBER_MOV;
d=writereg(d,1);
raw_mov_b_rm(d,s);
unlock2(d);
}
MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
{
set_const(d,s);
return;
}
MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
{
CLOBBER_MOV;
d=writereg(d,2);
raw_mov_w_ri(d,s);
unlock2(d);
}
MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
{
CLOBBER_MOV;
d=writereg(d,1);
raw_mov_b_ri(d,s);
unlock2(d);
}
MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
MIDFUNC(2,add_l_mi,(IMM d, IMM s))
{
CLOBBER_ADD;
raw_add_l_mi(d,s) ;
}
MENDFUNC(2,add_l_mi,(IMM d, IMM s))
MIDFUNC(2,add_w_mi,(IMM d, IMM s))
{
CLOBBER_ADD;
raw_add_w_mi(d,s) ;
}
MENDFUNC(2,add_w_mi,(IMM d, IMM s))
MIDFUNC(2,add_b_mi,(IMM d, IMM s))
{
CLOBBER_ADD;
raw_add_b_mi(d,s) ;
}
MENDFUNC(2,add_b_mi,(IMM d, IMM s))
MIDFUNC(2,test_l_ri,(R4 d, IMM i))
{
CLOBBER_TEST;
d=readreg(d,4);
raw_test_l_ri(d,i);
unlock2(d);
}
MENDFUNC(2,test_l_ri,(R4 d, IMM i))
MIDFUNC(2,test_l_rr,(R4 d, R4 s))
{
CLOBBER_TEST;
d=readreg(d,4);
s=readreg(s,4);
raw_test_l_rr(d,s);;
unlock2(d);
unlock2(s);
}
MENDFUNC(2,test_l_rr,(R4 d, R4 s))
MIDFUNC(2,test_w_rr,(R2 d, R2 s))
{
CLOBBER_TEST;
d=readreg(d,2);
s=readreg(s,2);
raw_test_w_rr(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,test_w_rr,(R2 d, R2 s))
MIDFUNC(2,test_b_rr,(R1 d, R1 s))
{
CLOBBER_TEST;
d=readreg(d,1);
s=readreg(s,1);
raw_test_b_rr(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,test_b_rr,(R1 d, R1 s))
MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
{
if (isconst(d) && !needflags) {
live.state[d].val &= i;
return;
}
CLOBBER_AND;
d=rmw(d,4,4);
raw_and_l_ri(d,i);
unlock2(d);
}
MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
MIDFUNC(2,and_l,(RW4 d, R4 s))
{
CLOBBER_AND;
s=readreg(s,4);
d=rmw(d,4,4);
raw_and_l(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,and_l,(RW4 d, R4 s))
MIDFUNC(2,and_w,(RW2 d, R2 s))
{
CLOBBER_AND;
s=readreg(s,2);
d=rmw(d,2,2);
raw_and_w(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,and_w,(RW2 d, R2 s))
MIDFUNC(2,and_b,(RW1 d, R1 s))
{
CLOBBER_AND;
s=readreg(s,1);
d=rmw(d,1,1);
raw_and_b(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,and_b,(RW1 d, R1 s))
// gb-- used for making an fpcr value in compemu_fpp.cpp
MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
{
CLOBBER_OR;
d=rmw(d,4,4);
raw_or_l_rm(d,s);
unlock2(d);
}
MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
{
if (isconst(d) && !needflags) {
live.state[d].val|=i;
return;
}
CLOBBER_OR;
d=rmw(d,4,4);
raw_or_l_ri(d,i);
unlock2(d);
}
MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
MIDFUNC(2,or_l,(RW4 d, R4 s))
{
if (isconst(d) && isconst(s) && !needflags) {
live.state[d].val|=live.state[s].val;
return;
}
CLOBBER_OR;
s=readreg(s,4);
d=rmw(d,4,4);
raw_or_l(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,or_l,(RW4 d, R4 s))
MIDFUNC(2,or_w,(RW2 d, R2 s))
{
CLOBBER_OR;
s=readreg(s,2);
d=rmw(d,2,2);
raw_or_w(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,or_w,(RW2 d, R2 s))
MIDFUNC(2,or_b,(RW1 d, R1 s))
{
CLOBBER_OR;
s=readreg(s,1);
d=rmw(d,1,1);
raw_or_b(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,or_b,(RW1 d, R1 s))
MIDFUNC(2,adc_l,(RW4 d, R4 s))
{
CLOBBER_ADC;
s=readreg(s,4);
d=rmw(d,4,4);
raw_adc_l(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,adc_l,(RW4 d, R4 s))
MIDFUNC(2,adc_w,(RW2 d, R2 s))
{
CLOBBER_ADC;
s=readreg(s,2);
d=rmw(d,2,2);
raw_adc_w(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,adc_w,(RW2 d, R2 s))
MIDFUNC(2,adc_b,(RW1 d, R1 s))
{
CLOBBER_ADC;
s=readreg(s,1);
d=rmw(d,1,1);
raw_adc_b(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,adc_b,(RW1 d, R1 s))
MIDFUNC(2,add_l,(RW4 d, R4 s))
{
if (isconst(s)) {
COMPCALL(add_l_ri)(d,live.state[s].val);
return;
}
CLOBBER_ADD;
s=readreg(s,4);
d=rmw(d,4,4);
raw_add_l(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,add_l,(RW4 d, R4 s))
MIDFUNC(2,add_w,(RW2 d, R2 s))
{
if (isconst(s)) {
COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
return;
}
CLOBBER_ADD;
s=readreg(s,2);
d=rmw(d,2,2);
raw_add_w(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,add_w,(RW2 d, R2 s))
MIDFUNC(2,add_b,(RW1 d, R1 s))
{
if (isconst(s)) {
COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
return;
}
CLOBBER_ADD;
s=readreg(s,1);
d=rmw(d,1,1);
raw_add_b(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,add_b,(RW1 d, R1 s))
MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
{
if (!i && !needflags)
return;
if (isconst(d) && !needflags) {
live.state[d].val-=i;
return;
}
#if USE_OFFSET
if (!needflags) {
add_offset(d,-i);
return;
}
#endif
CLOBBER_SUB;
d=rmw(d,4,4);
raw_sub_l_ri(d,i);
unlock2(d);
}
MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_SUB;
d=rmw(d,2,2);
raw_sub_w_ri(d,i);
unlock2(d);
}
MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_SUB;
d=rmw(d,1,1);
raw_sub_b_ri(d,i);
unlock2(d);
}
MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
{
if (!i && !needflags)
return;
if (isconst(d) && !needflags) {
live.state[d].val+=i;
return;
}
#if USE_OFFSET
if (!needflags) {
add_offset(d,i);
return;
}
#endif
CLOBBER_ADD;
d=rmw(d,4,4);
raw_add_l_ri(d,i);
unlock2(d);
}
MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_ADD;
d=rmw(d,2,2);
raw_add_w_ri(d,i);
unlock2(d);
}
MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
{
if (!i && !needflags)
return;
CLOBBER_ADD;
d=rmw(d,1,1);
raw_add_b_ri(d,i);
unlock2(d);
}
MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
MIDFUNC(2,sbb_l,(RW4 d, R4 s))
{
CLOBBER_SBB;
s=readreg(s,4);
d=rmw(d,4,4);
raw_sbb_l(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,sbb_l,(RW4 d, R4 s))
MIDFUNC(2,sbb_w,(RW2 d, R2 s))
{
CLOBBER_SBB;
s=readreg(s,2);
d=rmw(d,2,2);
raw_sbb_w(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,sbb_w,(RW2 d, R2 s))
MIDFUNC(2,sbb_b,(RW1 d, R1 s))
{
CLOBBER_SBB;
s=readreg(s,1);
d=rmw(d,1,1);
raw_sbb_b(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,sbb_b,(RW1 d, R1 s))
MIDFUNC(2,sub_l,(RW4 d, R4 s))
{
if (isconst(s)) {
COMPCALL(sub_l_ri)(d,live.state[s].val);
return;
}
CLOBBER_SUB;
s=readreg(s,4);
d=rmw(d,4,4);
raw_sub_l(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,sub_l,(RW4 d, R4 s))
MIDFUNC(2,sub_w,(RW2 d, R2 s))
{
if (isconst(s)) {
COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
return;
}
CLOBBER_SUB;
s=readreg(s,2);
d=rmw(d,2,2);
raw_sub_w(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,sub_w,(RW2 d, R2 s))
MIDFUNC(2,sub_b,(RW1 d, R1 s))
{
if (isconst(s)) {
COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
return;
}
CLOBBER_SUB;
s=readreg(s,1);
d=rmw(d,1,1);
raw_sub_b(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,sub_b,(RW1 d, R1 s))
MIDFUNC(2,cmp_l,(R4 d, R4 s))
{
CLOBBER_CMP;
s=readreg(s,4);
d=readreg(d,4);
raw_cmp_l(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,cmp_l,(R4 d, R4 s))
MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
{
CLOBBER_CMP;
r=readreg(r,4);
raw_cmp_l_ri(r,i);
unlock2(r);
}
MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
MIDFUNC(2,cmp_w,(R2 d, R2 s))
{
CLOBBER_CMP;
s=readreg(s,2);
d=readreg(d,2);
raw_cmp_w(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,cmp_w,(R2 d, R2 s))
MIDFUNC(2,cmp_b,(R1 d, R1 s))
{
CLOBBER_CMP;
s=readreg(s,1);
d=readreg(d,1);
raw_cmp_b(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,cmp_b,(R1 d, R1 s))
MIDFUNC(2,xor_l,(RW4 d, R4 s))
{
CLOBBER_XOR;
s=readreg(s,4);
d=rmw(d,4,4);
raw_xor_l(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,xor_l,(RW4 d, R4 s))
MIDFUNC(2,xor_w,(RW2 d, R2 s))
{
CLOBBER_XOR;
s=readreg(s,2);
d=rmw(d,2,2);
raw_xor_w(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,xor_w,(RW2 d, R2 s))
MIDFUNC(2,xor_b,(RW1 d, R1 s))
{
CLOBBER_XOR;
s=readreg(s,1);
d=rmw(d,1,1);
raw_xor_b(d,s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,xor_b,(RW1 d, R1 s))
MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
{
clobber_flags();
remove_all_offsets();
if (osize==4) {
if (out1!=in1 && out1!=r) {
COMPCALL(forget_about)(out1);
}
}
else {
tomem_c(out1);
}
in1=readreg_specific(in1,isize,REG_PAR1);
r=readreg(r,4);
prepare_for_call_1(); /* This should ensure that there won't be
any need for swapping nregs in prepare_for_call_2
*/
#if USE_NORMAL_CALLING_CONVENTION
raw_push_l_r(in1);
#endif
unlock2(in1);
unlock2(r);
prepare_for_call_2();
raw_call_r(r);
#if USE_NORMAL_CALLING_CONVENTION
raw_inc_sp(4);
#endif
live.nat[REG_RESULT].holds[0]=out1;
live.nat[REG_RESULT].nholds=1;
live.nat[REG_RESULT].touched=touchcnt++;
live.state[out1].realreg=REG_RESULT;
live.state[out1].realind=0;
live.state[out1].val=0;
live.state[out1].validsize=osize;
live.state[out1].dirtysize=osize;
set_status(out1,DIRTY);
}
MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
{
clobber_flags();
remove_all_offsets();
in1=readreg_specific(in1,isize1,REG_PAR1);
in2=readreg_specific(in2,isize2,REG_PAR2);
r=readreg(r,4);
prepare_for_call_1(); /* This should ensure that there won't be
any need for swapping nregs in prepare_for_call_2
*/
#if USE_NORMAL_CALLING_CONVENTION
raw_push_l_r(in2);
raw_push_l_r(in1);
#endif
unlock2(r);
unlock2(in1);
unlock2(in2);
prepare_for_call_2();
raw_call_r(r);
#if USE_NORMAL_CALLING_CONVENTION
raw_inc_sp(8);
#endif
}
MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
/* forget_about() takes a mid-layer register */
MIDFUNC(1,forget_about,(W4 r))
{
if (isinreg(r))
disassociate(r);
live.state[r].val=0;
set_status(r,UNDEF);
}
MENDFUNC(1,forget_about,(W4 r))
MIDFUNC(0,nop,(void))
{
raw_nop();
}
MENDFUNC(0,nop,(void))
MIDFUNC(1,f_forget_about,(FW r))
{
if (f_isinreg(r))
f_disassociate(r);
live.fate[r].status=UNDEF;
}
MENDFUNC(1,f_forget_about,(FW r))
MIDFUNC(1,fmov_pi,(FW r))
{
r=f_writereg(r);
raw_fmov_pi(r);
f_unlock(r);
}
MENDFUNC(1,fmov_pi,(FW r))
MIDFUNC(1,fmov_log10_2,(FW r))
{
r=f_writereg(r);
raw_fmov_log10_2(r);
f_unlock(r);
}
MENDFUNC(1,fmov_log10_2,(FW r))
MIDFUNC(1,fmov_log2_e,(FW r))
{
r=f_writereg(r);
raw_fmov_log2_e(r);
f_unlock(r);
}
MENDFUNC(1,fmov_log2_e,(FW r))
MIDFUNC(1,fmov_loge_2,(FW r))
{
r=f_writereg(r);
raw_fmov_loge_2(r);
f_unlock(r);
}
MENDFUNC(1,fmov_loge_2,(FW r))
MIDFUNC(1,fmov_1,(FW r))
{
r=f_writereg(r);
raw_fmov_1(r);
f_unlock(r);
}
MENDFUNC(1,fmov_1,(FW r))
MIDFUNC(1,fmov_0,(FW r))
{
r=f_writereg(r);
raw_fmov_0(r);
f_unlock(r);
}
MENDFUNC(1,fmov_0,(FW r))
MIDFUNC(2,fmov_rm,(FW r, MEMR m))
{
r=f_writereg(r);
raw_fmov_rm(r,m);
f_unlock(r);
}
MENDFUNC(2,fmov_rm,(FW r, MEMR m))
MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
{
r=f_writereg(r);
raw_fmovi_rm(r,m);
f_unlock(r);
}
MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
{
r=f_readreg(r);
raw_fmovi_mr(m,r);
f_unlock(r);
}
MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
{
r=f_writereg(r);
raw_fmovs_rm(r,m);
f_unlock(r);
}
MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
{
r=f_readreg(r);
raw_fmovs_mr(m,r);
f_unlock(r);
}
MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
{
r=f_readreg(r);
raw_fmov_ext_mr(m,r);
f_unlock(r);
}
MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
MIDFUNC(2,fmov_mr,(MEMW m, FR r))
{
r=f_readreg(r);
raw_fmov_mr(m,r);
f_unlock(r);
}
MENDFUNC(2,fmov_mr,(MEMW m, FR r))
MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
{
r=f_writereg(r);
raw_fmov_ext_rm(r,m);
f_unlock(r);
}
MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
MIDFUNC(2,fmov_rr,(FW d, FR s))
{
if (d==s) { /* How pointless! */
return;
}
#if USE_F_ALIAS
f_disassociate(d);
s=f_readreg(s);
live.fate[d].realreg=s;
live.fate[d].realind=live.fat[s].nholds;
live.fate[d].status=DIRTY;
live.fat[s].holds[live.fat[s].nholds]=d;
live.fat[s].nholds++;
f_unlock(s);
#else
s=f_readreg(s);
d=f_writereg(d);
raw_fmov_rr(d,s);
f_unlock(s);
f_unlock(d);
#endif
}
MENDFUNC(2,fmov_rr,(FW d, FR s))
MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
{
index=readreg(index,4);
raw_fldcw_m_indexed(index,base);
unlock2(index);
}
MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
MIDFUNC(1,ftst_r,(FR r))
{
r=f_readreg(r);
raw_ftst_r(r);
f_unlock(r);
}
MENDFUNC(1,ftst_r,(FR r))
MIDFUNC(0,dont_care_fflags,(void))
{
f_disassociate(FP_RESULT);
}
MENDFUNC(0,dont_care_fflags,(void))
MIDFUNC(2,fsqrt_rr,(FW d, FR s))
{
s=f_readreg(s);
d=f_writereg(d);
raw_fsqrt_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fsqrt_rr,(FW d, FR s))
MIDFUNC(2,fabs_rr,(FW d, FR s))
{
s=f_readreg(s);
d=f_writereg(d);
raw_fabs_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fabs_rr,(FW d, FR s))
MIDFUNC(2,fsin_rr,(FW d, FR s))
{
s=f_readreg(s);
d=f_writereg(d);
raw_fsin_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fsin_rr,(FW d, FR s))
MIDFUNC(2,fcos_rr,(FW d, FR s))
{
s=f_readreg(s);
d=f_writereg(d);
raw_fcos_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fcos_rr,(FW d, FR s))
MIDFUNC(2,ftwotox_rr,(FW d, FR s))
{
s=f_readreg(s);
d=f_writereg(d);
raw_ftwotox_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,ftwotox_rr,(FW d, FR s))
MIDFUNC(2,fetox_rr,(FW d, FR s))
{
s=f_readreg(s);
d=f_writereg(d);
raw_fetox_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fetox_rr,(FW d, FR s))
MIDFUNC(2,frndint_rr,(FW d, FR s))
{
s=f_readreg(s);
d=f_writereg(d);
raw_frndint_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,frndint_rr,(FW d, FR s))
MIDFUNC(2,flog2_rr,(FW d, FR s))
{
s=f_readreg(s);
d=f_writereg(d);
raw_flog2_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,flog2_rr,(FW d, FR s))
MIDFUNC(2,fneg_rr,(FW d, FR s))
{
s=f_readreg(s);
d=f_writereg(d);
raw_fneg_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fneg_rr,(FW d, FR s))
MIDFUNC(2,fadd_rr,(FRW d, FR s))
{
s=f_readreg(s);
d=f_rmw(d);
raw_fadd_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fadd_rr,(FRW d, FR s))
MIDFUNC(2,fsub_rr,(FRW d, FR s))
{
s=f_readreg(s);
d=f_rmw(d);
raw_fsub_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fsub_rr,(FRW d, FR s))
MIDFUNC(2,fcmp_rr,(FR d, FR s))
{
d=f_readreg(d);
s=f_readreg(s);
raw_fcmp_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fcmp_rr,(FR d, FR s))
MIDFUNC(2,fdiv_rr,(FRW d, FR s))
{
s=f_readreg(s);
d=f_rmw(d);
raw_fdiv_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fdiv_rr,(FRW d, FR s))
MIDFUNC(2,frem_rr,(FRW d, FR s))
{
s=f_readreg(s);
d=f_rmw(d);
raw_frem_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,frem_rr,(FRW d, FR s))
MIDFUNC(2,frem1_rr,(FRW d, FR s))
{
s=f_readreg(s);
d=f_rmw(d);
raw_frem1_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,frem1_rr,(FRW d, FR s))
MIDFUNC(2,fmul_rr,(FRW d, FR s))
{
s=f_readreg(s);
d=f_rmw(d);
raw_fmul_rr(d,s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fmul_rr,(FRW d, FR s))
/********************************************************************
* Support functions exposed to gencomp. CREATE time *
********************************************************************/
void set_zero(int r, int tmp)
{
if (setzflg_uses_bsf)
bsf_l_rr(r,r);
else
simulate_bsf(tmp,r);
}
int kill_rodent(int r)
{
return KILLTHERAT &&
have_rat_stall &&
(live.state[r].status==INMEM ||
live.state[r].status==CLEAN ||
live.state[r].status==ISCONST ||
live.state[r].dirtysize==4);
}
uae_u32 get_const(int r)
{
Dif (!isconst(r)) {
write_log("Register %d should be constant, but isn't\n",r);
abort();
}
return live.state[r].val;
}
void sync_m68k_pc(void)
{
if (m68k_pc_offset) {
add_l_ri(PC_P,m68k_pc_offset);
comp_pc_p+=m68k_pc_offset;
m68k_pc_offset=0;
}
}
/********************************************************************
* Scratch registers management *
********************************************************************/
struct scratch_t {
uae_u32 regs[VREGS];
fpu_register fregs[VFREGS];
};
static scratch_t scratch;
/********************************************************************
* Support functions exposed to newcpu *
********************************************************************/
static inline const char *str_on_off(bool b)
{
return b ? "on" : "off";
}
void compiler_init(void)
{
static bool initialized = false;
if (initialized)
return;
#if JIT_DEBUG
// JIT debug mode ?
JITDebug = PrefsFindBool("jitdebug");
#endif
write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
#ifdef USE_JIT_FPU
// Use JIT compiler for FPU instructions ?
avoid_fpu = !PrefsFindBool("jitfpu");
#else
// JIT FPU is always disabled
avoid_fpu = true;
#endif
write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
// Get size of the translation cache (in KB)
cache_size = PrefsFindInt32("jitcachesize");
write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
// Initialize target CPU (check for features, e.g. CMOV, rat stalls)
raw_init_cpu();
setzflg_uses_bsf = target_check_bsf();
write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
// Translation cache flush mechanism
lazy_flush = PrefsFindBool("jitlazyflush");
write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
// Compiler features
write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
#if USE_INLINING
follow_const_jumps = PrefsFindBool("jitinline");
#endif
write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps));
write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
// Build compiler tables
build_comp();
initialized = true;
#if PROFILE_UNTRANSLATED_INSNS
write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
#endif
#if PROFILE_COMPILE_TIME
write_log("<JIT compiler> : gather statistics on translation time\n");
emul_start_time = clock();
#endif
}
void compiler_exit(void)
{
#if PROFILE_COMPILE_TIME
emul_end_time = clock();
#endif
// Deallocate translation cache
if (compiled_code) {
vm_release(compiled_code, cache_size * 1024);
compiled_code = 0;
}
// Deallocate popallspace
if (popallspace) {
vm_release(popallspace, POPALLSPACE_SIZE);
popallspace = 0;
}
#if PROFILE_COMPILE_TIME
write_log("### Compile Block statistics\n");
write_log("Number of calls to compile_block : %d\n", compile_count);
uae_u32 emul_time = emul_end_time - emul_start_time;
write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
100.0*double(compile_time)/double(emul_time));
write_log("\n");
#endif
#if PROFILE_UNTRANSLATED_INSNS
uae_u64 untranslated_count = 0;
for (int i = 0; i < 65536; i++) {
opcode_nums[i] = i;
untranslated_count += raw_cputbl_count[i];
}
write_log("Sorting out untranslated instructions count...\n");
qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
write_log("\nRank Opc Count Name\n");
for (int i = 0; i < untranslated_top_ten; i++) {
uae_u32 count = raw_cputbl_count[opcode_nums[i]];
struct instr *dp;
struct mnemolookup *lookup;
if (!count)
break;
dp = table68k + opcode_nums[i];
for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
;
write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
}
#endif
#if RECORD_REGISTER_USAGE
int reg_count_ids[16];
uint64 tot_reg_count = 0;
for (int i = 0; i < 16; i++) {
reg_count_ids[i] = i;
tot_reg_count += reg_count[i];
}
qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
uint64 cum_reg_count = 0;
for (int i = 0; i < 16; i++) {
int r = reg_count_ids[i];
cum_reg_count += reg_count[r];
printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
reg_count[r],
100.0*double(reg_count[r])/double(tot_reg_count),
100.0*double(cum_reg_count)/double(tot_reg_count));
}
#endif
}
bool compiler_use_jit(void)
{
// Check for the "jit" prefs item
if (!PrefsFindBool("jit"))
return false;
// Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
return false;
}
// Enable JIT for 68020+ emulation only
if (CPUType < 2) {
write_log("<JIT compiler> : JIT is not supported in 680%d0 emulation mode, disabling.\n", CPUType);
return false;
}
return true;
}
void init_comp(void)
{
int i;
uae_s8* cb=can_byte;
uae_s8* cw=can_word;
uae_s8* au=always_used;
#if RECORD_REGISTER_USAGE
for (i=0;i<16;i++)
reg_count_local[i] = 0;
#endif
for (i=0;i<VREGS;i++) {
live.state[i].realreg=-1;
live.state[i].needflush=NF_SCRATCH;
live.state[i].val=0;
set_status(i,UNDEF);
}
for (i=0;i<VFREGS;i++) {
live.fate[i].status=UNDEF;
live.fate[i].realreg=-1;
live.fate[i].needflush=NF_SCRATCH;
}
for (i=0;i<VREGS;i++) {
if (i<16) { /* First 16 registers map to 68k registers */
live.state[i].mem=((uae_u32*)&regs)+i;
live.state[i].needflush=NF_TOMEM;
set_status(i,INMEM);
}
else
live.state[i].mem=scratch.regs+i;
}
live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
live.state[PC_P].needflush=NF_TOMEM;
set_const(PC_P,(uintptr)comp_pc_p);
live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
live.state[FLAGX].needflush=NF_TOMEM;
set_status(FLAGX,INMEM);
live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
live.state[FLAGTMP].needflush=NF_TOMEM;
set_status(FLAGTMP,INMEM);
live.state[NEXT_HANDLER].needflush=NF_HANDLER;
set_status(NEXT_HANDLER,UNDEF);
for (i=0;i<VFREGS;i++) {
if (i<8) { /* First 8 registers map to 68k FPU registers */
live.fate[i].mem=(uae_u32*)fpu_register_address(i);
live.fate[i].needflush=NF_TOMEM;
live.fate[i].status=INMEM;
}
else if (i==FP_RESULT) {
live.fate[i].mem=(uae_u32*)(&fpu.result);
live.fate[i].needflush=NF_TOMEM;
live.fate[i].status=INMEM;
}
else
live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
}
for (i=0;i<N_REGS;i++) {
live.nat[i].touched=0;
live.nat[i].nholds=0;
live.nat[i].locked=0;
if (*cb==i) {
live.nat[i].canbyte=1; cb++;
} else live.nat[i].canbyte=0;
if (*cw==i) {
live.nat[i].canword=1; cw++;
} else live.nat[i].canword=0;
if (*au==i) {
live.nat[i].locked=1; au++;
}
}
for (i=0;i<N_FREGS;i++) {
live.fat[i].touched=0;
live.fat[i].nholds=0;
live.fat[i].locked=0;
}
touchcnt=1;
m68k_pc_offset=0;
live.flags_in_flags=TRASH;
live.flags_on_stack=VALID;
live.flags_are_important=1;
raw_fp_init();
}
/* Only do this if you really mean it! The next call should be to init!*/
void flush(int save_regs)
{
int i;
log_flush();
flush_flags(); /* low level */
sync_m68k_pc(); /* mid level */
if (save_regs) {
for (i=0;i<VFREGS;i++) {
if (live.fate[i].needflush==NF_SCRATCH ||
live.fate[i].status==CLEAN) {
f_disassociate(i);
}
}
for (i=0;i<VREGS;i++) {
if (live.state[i].needflush==NF_TOMEM) {
switch(live.state[i].status) {
case INMEM:
if (live.state[i].val) {
raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
log_vwrite(i);
live.state[i].val=0;
}
break;
case CLEAN:
case DIRTY:
remove_offset(i,-1); tomem(i); break;
case ISCONST:
if (i!=PC_P)
writeback_const(i);
break;
default: break;
}
Dif (live.state[i].val && i!=PC_P) {
write_log("Register %d still has val %x\n",
i,live.state[i].val);
}
}
}
for (i=0;i<VFREGS;i++) {
if (live.fate[i].needflush==NF_TOMEM &&
live.fate[i].status==DIRTY) {
f_evict(i);
}
}
raw_fp_cleanup_drop();
}
if (needflags) {
write_log("Warning! flush with needflags=1!\n");
}
}
static void flush_keepflags(void)
{
int i;
for (i=0;i<VFREGS;i++) {
if (live.fate[i].needflush==NF_SCRATCH ||
live.fate[i].status==CLEAN) {
f_disassociate(i);
}
}
for (i=0;i<VREGS;i++) {
if (live.state[i].needflush==NF_TOMEM) {
switch(live.state[i].status) {
case INMEM:
/* Can't adjust the offset here --- that needs "add" */
break;
case CLEAN:
case DIRTY:
remove_offset(i,-1); tomem(i); break;
case ISCONST:
if (i!=PC_P)
writeback_const(i);
break;
default: break;
}
}
}
for (i=0;i<VFREGS;i++) {
if (live.fate[i].needflush==NF_TOMEM &&
live.fate[i].status==DIRTY) {
f_evict(i);
}
}
raw_fp_cleanup_drop();
}
void freescratch(void)
{
int i;
for (i=0;i<N_REGS;i++)
if (live.nat[i].locked && i!=4)
write_log("Warning! %d is locked\n",i);
for (i=0;i<VREGS;i++)
if (live.state[i].needflush==NF_SCRATCH) {
forget_about(i);
}
for (i=0;i<VFREGS;i++)
if (live.fate[i].needflush==NF_SCRATCH) {
f_forget_about(i);
}
}
/********************************************************************
* Support functions, internal *
********************************************************************/
static void align_target(uae_u32 a)
{
if (!a)
return;
if (tune_nop_fillers)
raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
else {
/* Fill with NOPs --- makes debugging with gdb easier */
while ((uintptr)target&(a-1))
*target++=0x90;
}
}
static __inline__ bool isinrom(uintptr addr)
{
return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
}
static void flush_all(void)
{
int i;
log_flush();
for (i=0;i<VREGS;i++)
if (live.state[i].status==DIRTY) {
if (!call_saved[live.state[i].realreg]) {
tomem(i);
}
}
for (i=0;i<VFREGS;i++)
if (f_isinreg(i))
f_evict(i);
raw_fp_cleanup_drop();
}
/* Make sure all registers that will get clobbered by a call are
save and sound in memory */
static void prepare_for_call_1(void)
{
flush_all(); /* If there are registers that don't get clobbered,
* we should be a bit more selective here */
}
/* We will call a C routine in a moment. That will clobber all registers,
so we need to disassociate everything */
static void prepare_for_call_2(void)
{
int i;
for (i=0;i<N_REGS;i++)
if (!call_saved[i] && live.nat[i].nholds>0)
free_nreg(i);
for (i=0;i<N_FREGS;i++)
if (live.fat[i].nholds>0)
f_free_nreg(i);
live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
flags at the very start of the call_r
functions! */
}
/********************************************************************
* Memory access and related functions, CREATE time *
********************************************************************/
void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
{
next_pc_p=not_taken;
taken_pc_p=taken;
branch_cc=cond;
}
static uae_u32 get_handler_address(uae_u32 addr)
{
uae_u32 cl=cacheline(addr);
blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
return (uintptr)&(bi->direct_handler_to_use);
}
static uae_u32 get_handler(uae_u32 addr)
{
uae_u32 cl=cacheline(addr);
blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
return (uintptr)bi->direct_handler_to_use;
}
static void load_handler(int reg, uae_u32 addr)
{
mov_l_rm(reg,get_handler_address(addr));
}
/* This version assumes that it is writing *real* memory, and *will* fail
* if that assumption is wrong! No branches, no second chances, just
* straight go-for-it attitude */
static void writemem_real(int address, int source, int size, int tmp, int clobber)
{
int f=tmp;
if (clobber)
f=source;
switch(size) {
case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
}
forget_about(tmp);
forget_about(f);
}
void writebyte(int address, int source, int tmp)
{
writemem_real(address,source,1,tmp,0);
}
static __inline__ void writeword_general(int address, int source, int tmp,
int clobber)
{
writemem_real(address,source,2,tmp,clobber);
}
void writeword_clobber(int address, int source, int tmp)
{
writeword_general(address,source,tmp,1);
}
void writeword(int address, int source, int tmp)
{
writeword_general(address,source,tmp,0);
}
static __inline__ void writelong_general(int address, int source, int tmp,
int clobber)
{
writemem_real(address,source,4,tmp,clobber);
}
void writelong_clobber(int address, int source, int tmp)
{
writelong_general(address,source,tmp,1);
}
void writelong(int address, int source, int tmp)
{
writelong_general(address,source,tmp,0);
}
/* This version assumes that it is reading *real* memory, and *will* fail
* if that assumption is wrong! No branches, no second chances, just
* straight go-for-it attitude */
static void readmem_real(int address, int dest, int size, int tmp)
{
int f=tmp;
if (size==4 && address!=dest)
f=dest;
switch(size) {
case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
}
forget_about(tmp);
}
void readbyte(int address, int dest, int tmp)
{
readmem_real(address,dest,1,tmp);
}
void readword(int address, int dest, int tmp)
{
readmem_real(address,dest,2,tmp);
}
void readlong(int address, int dest, int tmp)
{
readmem_real(address,dest,4,tmp);
}
void get_n_addr(int address, int dest, int tmp)
{
// a is the register containing the virtual address
// after the offset had been fetched
int a=tmp;
// f is the register that will contain the offset
int f=tmp;
// a == f == tmp if (address == dest)
if (address!=dest) {
a=address;
f=dest;
}
#if REAL_ADDRESSING
mov_l_rr(dest, address);
#elif DIRECT_ADDRESSING
lea_l_brr(dest,address,MEMBaseDiff);
#endif
forget_about(tmp);
}
void get_n_addr_jmp(int address, int dest, int tmp)
{
/* For this, we need to get the same address as the rest of UAE
would --- otherwise we end up translating everything twice */
get_n_addr(address,dest,tmp);
}
/* base is a register, but dp is an actual value.
target is a register, as is tmp */
void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
{
int reg = (dp >> 12) & 15;
int regd_shift=(dp >> 9) & 3;
if (dp & 0x100) {
int ignorebase=(dp&0x80);
int ignorereg=(dp&0x40);
int addbase=0;
int outer=0;
if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
if (!ignorereg) {
if ((dp & 0x800) == 0)
sign_extend_16_rr(target,reg);
else
mov_l_rr(target,reg);
shll_l_ri(target,regd_shift);
}
else
mov_l_ri(target,0);
/* target is now regd */
if (!ignorebase)
add_l(target,base);
add_l_ri(target,addbase);
if (dp&0x03) readlong(target,target,tmp);
} else { /* do the getlong first, then add regd */
if (!ignorebase) {
mov_l_rr(target,base);
add_l_ri(target,addbase);
}
else
mov_l_ri(target,addbase);
if (dp&0x03) readlong(target,target,tmp);
if (!ignorereg) {
if ((dp & 0x800) == 0)
sign_extend_16_rr(tmp,reg);
else
mov_l_rr(tmp,reg);
shll_l_ri(tmp,regd_shift);
/* tmp is now regd */
add_l(target,tmp);
}
}
add_l_ri(target,outer);
}
else { /* 68000 version */
if ((dp & 0x800) == 0) { /* Sign extend */
sign_extend_16_rr(target,reg);
lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
}
else {
lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
}
}
forget_about(tmp);
}
void set_cache_state(int enabled)
{
if (enabled!=letit)
flush_icache_hard(77);
letit=enabled;
}
int get_cache_state(void)
{
return letit;
}
uae_u32 get_jitted_size(void)
{
if (compiled_code)
return current_compile_p-compiled_code;
return 0;
}
const int CODE_ALLOC_MAX_ATTEMPTS = 10;
const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
static uint8 *do_alloc_code(uint32 size, int depth)
{
#if defined(__linux__) && 0
/*
This is a really awful hack that is known to work on Linux at
least.
The trick here is to make sure the allocated cache is nearby
code segment, and more precisely in the positive half of a
32-bit address space. i.e. addr < 0x80000000. Actually, it
turned out that a 32-bit binary run on AMD64 yields a cache
allocated around 0xa0000000, thus causing some troubles when
translating addresses from m68k to x86.
*/
static uint8 * code_base = NULL;
if (code_base == NULL) {
uintptr page_size = getpagesize();
uintptr boundaries = CODE_ALLOC_BOUNDARIES;
if (boundaries < page_size)
boundaries = page_size;
code_base = (uint8 *)sbrk(0);
for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
if (vm_acquire_fixed(code_base, size) == 0) {
uint8 *code = code_base;
code_base += size;
return code;
}
code_base += boundaries;
}
return NULL;
}
if (vm_acquire_fixed(code_base, size) == 0) {
uint8 *code = code_base;
code_base += size;
return code;
}
if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
return NULL;
return do_alloc_code(size, depth + 1);
#else
uint8 *code = (uint8 *)vm_acquire(size);
return code == VM_MAP_FAILED ? NULL : code;
#endif
}
static inline uint8 *alloc_code(uint32 size)
{
uint8 *ptr = do_alloc_code(size, 0);
/* allocated code must fit in 32-bit boundaries */
assert((uintptr)ptr <= 0xffffffff);
return ptr;
}
void alloc_cache(void)
{
if (compiled_code) {
flush_icache_hard(6);
vm_release(compiled_code, cache_size * 1024);
compiled_code = 0;
}
if (cache_size == 0)
return;
while (!compiled_code && cache_size) {
if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
compiled_code = 0;
cache_size /= 2;
}
}
vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
if (compiled_code) {
write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
current_compile_p = compiled_code;
current_cache_size = 0;
}
}
extern void op_illg_1 (uae_u32 opcode) REGPARAM;
static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
{
uae_u32 k1 = 0;
uae_u32 k2 = 0;
#if USE_CHECKSUM_INFO
checksum_info *csi = bi->csi;
Dif(!csi) abort();
while (csi) {
uae_s32 len = csi->length;
uintptr tmp = (uintptr)csi->start_p;
#else
uae_s32 len = bi->len;
uintptr tmp = (uintptr)bi->min_pcp;
#endif
uae_u32*pos;
len += (tmp & 3);
tmp &= ~((uintptr)3);
pos = (uae_u32 *)tmp;
if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
while (len > 0) {
k1 += *pos;
k2 ^= *pos;
pos++;
len -= 4;
}
}
#if USE_CHECKSUM_INFO
csi = csi->next;
}
#endif
*c1 = k1;
*c2 = k2;
}
#if 0
static void show_checksum(CSI_TYPE* csi)
{
uae_u32 k1=0;
uae_u32 k2=0;
uae_s32 len=CSI_LENGTH(csi);
uae_u32 tmp=(uintptr)CSI_STARTcsi
uae_u32* pos;
len+=(tmp&3);
tmp&=(~3);
pos=(uae_u32*)tmp;
if (len<0 || len>MAX_CHECKSUM_LEN) {
return;
}
else {
while (len>0) {
write_log("%08x ",*pos);
pos++;
len-=4;
}
write_log(" bla\n");
}
}
#endif
int check_for_cache_miss(void)
{
blockinfo* bi=get_blockinfo_addr(regs.pc_p);
if (bi) {
int cl=cacheline(regs.pc_p);
if (bi!=cache_tags[cl+1].bi) {
raise_in_cl_list(bi);
return 1;
}
}
return 0;
}
static void recompile_block(void)
{
/* An existing block's countdown code has expired. We need to make
sure that execute_normal doesn't refuse to recompile due to a
perceived cache miss... */
blockinfo* bi=get_blockinfo_addr(regs.pc_p);
Dif (!bi)
abort();
raise_in_cl_list(bi);
execute_normal();
return;
}
static void cache_miss(void)
{
blockinfo* bi=get_blockinfo_addr(regs.pc_p);
uae_u32 cl=cacheline(regs.pc_p);
blockinfo* bi2=get_blockinfo(cl);
if (!bi) {
execute_normal(); /* Compile this block now */
return;
}
Dif (!bi2 || bi==bi2) {
write_log("Unexplained cache miss %p %p\n",bi,bi2);
abort();
}
raise_in_cl_list(bi);
return;
}
static int called_check_checksum(blockinfo* bi);
static inline int block_check_checksum(blockinfo* bi)
{
uae_u32 c1,c2;
bool isgood;
if (bi->status!=BI_NEED_CHECK)
return 1; /* This block is in a checked state */
checksum_count++;
if (bi->c1 || bi->c2)
calc_checksum(bi,&c1,&c2);
else {
c1=c2=1; /* Make sure it doesn't match */
}
isgood=(c1==bi->c1 && c2==bi->c2);
if (isgood) {
/* This block is still OK. So we reactivate. Of course, that
means we have to move it into the needs-to-be-flushed list */
bi->handler_to_use=bi->handler;
set_dhtu(bi,bi->direct_handler);
bi->status=BI_CHECKING;
isgood=called_check_checksum(bi) != 0;
}
if (isgood) {
/* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
c1,c2,bi->c1,bi->c2);*/
remove_from_list(bi);
add_to_active(bi);
raise_in_cl_list(bi);
bi->status=BI_ACTIVE;
}
else {
/* This block actually changed. We need to invalidate it,
and set it up to be recompiled */
/* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
c1,c2,bi->c1,bi->c2); */
invalidate_block(bi);
raise_in_cl_list(bi);
}
return isgood;
}
static int called_check_checksum(blockinfo* bi)
{
dependency* x=bi->deplist;
int isgood=1;
int i;
for (i=0;i<2 && isgood;i++) {
if (bi->dep[i].jmp_off) {
isgood=block_check_checksum(bi->dep[i].target);
}
}
return isgood;
}
static void check_checksum(void)
{
blockinfo* bi=get_blockinfo_addr(regs.pc_p);
uae_u32 cl=cacheline(regs.pc_p);
blockinfo* bi2=get_blockinfo(cl);
/* These are not the droids you are looking for... */
if (!bi) {
/* Whoever is the primary target is in a dormant state, but
calling it was accidental, and we should just compile this
new block */
execute_normal();
return;
}
if (bi!=bi2) {
/* The block was hit accidentally, but it does exist. Cache miss */
cache_miss();
return;
}
if (!block_check_checksum(bi))
execute_normal();
}
static __inline__ void match_states(blockinfo* bi)
{
int i;
smallstate* s=&(bi->env);
if (bi->status==BI_NEED_CHECK) {
block_check_checksum(bi);
}
if (bi->status==BI_ACTIVE ||
bi->status==BI_FINALIZING) { /* Deal with the *promises* the
block makes (about not using
certain vregs) */
for (i=0;i<16;i++) {
if (s->virt[i]==L_UNNEEDED) {
// write_log("unneeded reg %d at %p\n",i,target);
COMPCALL(forget_about)(i); // FIXME
}
}
}
flush(1);
/* And now deal with the *demands* the block makes */
for (i=0;i<N_REGS;i++) {
int v=s->nat[i];
if (v>=0) {
// printf("Loading reg %d into %d at %p\n",v,i,target);
readreg_specific(v,4,i);
// do_load_reg(i,v);
// setlock(i);
}
}
for (i=0;i<N_REGS;i++) {
int v=s->nat[i];
if (v>=0) {
unlock2(i);
}
}
}
static __inline__ void create_popalls(void)
{
int i,r;
if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
write_log("FATAL: Could not allocate popallspace!\n");
abort();
}
vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
int stack_space = STACK_OFFSET;
for (i=0;i<N_REGS;i++) {
if (need_to_preserve[i])
stack_space += sizeof(void *);
}
stack_space %= STACK_ALIGN;
if (stack_space)
stack_space = STACK_ALIGN - stack_space;
current_compile_p=popallspace;
set_target(current_compile_p);
/* We need to guarantee 16-byte stack alignment on x86 at any point
within the JIT generated code. We have multiple exit points
possible but a single entry. A "jmp" is used so that we don't
have to generate stack alignment in generated code that has to
call external functions (e.g. a generic instruction handler).
In summary, JIT generated code is not leaf so we have to deal
with it here to maintain correct stack alignment. */
align_target(align_jumps);
current_compile_p=get_target();
pushall_call_handler=get_target();
for (i=N_REGS;i--;) {
if (need_to_preserve[i])
raw_push_l_r(i);
}
raw_dec_sp(stack_space);
r=REG_PC_TMP;
raw_mov_l_rm(r,(uintptr)&regs.pc_p);
raw_and_l_ri(r,TAGMASK);
raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
/* now the exit points */
align_target(align_jumps);
popall_do_nothing=get_target();
raw_inc_sp(stack_space);
for (i=0;i<N_REGS;i++) {
if (need_to_preserve[i])
raw_pop_l_r(i);
}
raw_jmp((uintptr)do_nothing);
align_target(align_jumps);
popall_execute_normal=get_target();
raw_inc_sp(stack_space);
for (i=0;i<N_REGS;i++) {
if (need_to_preserve[i])
raw_pop_l_r(i);
}
raw_jmp((uintptr)execute_normal);
align_target(align_jumps);
popall_cache_miss=get_target();
raw_inc_sp(stack_space);
for (i=0;i<N_REGS;i++) {
if (need_to_preserve[i])
raw_pop_l_r(i);
}
raw_jmp((uintptr)cache_miss);
align_target(align_jumps);
popall_recompile_block=get_target();
raw_inc_sp(stack_space);
for (i=0;i<N_REGS;i++) {
if (need_to_preserve[i])
raw_pop_l_r(i);
}
raw_jmp((uintptr)recompile_block);
align_target(align_jumps);
popall_exec_nostats=get_target();
raw_inc_sp(stack_space);
for (i=0;i<N_REGS;i++) {
if (need_to_preserve[i])
raw_pop_l_r(i);
}
raw_jmp((uintptr)exec_nostats);
align_target(align_jumps);
popall_check_checksum=get_target();
raw_inc_sp(stack_space);
for (i=0;i<N_REGS;i++) {
if (need_to_preserve[i])
raw_pop_l_r(i);
}
raw_jmp((uintptr)check_checksum);
// no need to further write into popallspace
vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
}
static __inline__ void reset_lists(void)
{
int i;
for (i=0;i<MAX_HOLD_BI;i++)
hold_bi[i]=NULL;
active=NULL;
dormant=NULL;
}
static void prepare_block(blockinfo* bi)
{
int i;
set_target(current_compile_p);
align_target(align_jumps);
bi->direct_pen=(cpuop_func *)get_target();
raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
raw_mov_l_mr((uintptr)&regs.pc_p,0);
raw_jmp((uintptr)popall_execute_normal);
align_target(align_jumps);
bi->direct_pcc=(cpuop_func *)get_target();
raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
raw_mov_l_mr((uintptr)&regs.pc_p,0);
raw_jmp((uintptr)popall_check_checksum);
current_compile_p=get_target();
bi->deplist=NULL;
for (i=0;i<2;i++) {
bi->dep[i].prev_p=NULL;
bi->dep[i].next=NULL;
}
bi->env=default_ss;
bi->status=BI_INVALID;
bi->havestate=0;
//bi->env=empty_ss;
}
// OPCODE is in big endian format, use cft_map() beforehand, if needed.
static inline void reset_compop(int opcode)
{
compfunctbl[opcode] = NULL;
nfcompfunctbl[opcode] = NULL;
}
static int read_opcode(const char *p)
{
int opcode = 0;
for (int i = 0; i < 4; i++) {
int op = p[i];
switch (op) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
opcode = (opcode << 4) | (op - '0');
break;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
opcode = (opcode << 4) | ((op - 'a') + 10);
break;
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
opcode = (opcode << 4) | ((op - 'A') + 10);
break;
default:
return -1;
}
}
return opcode;
}
static bool merge_blacklist()
{
const char *blacklist = PrefsFindString("jitblacklist");
if (blacklist) {
const char *p = blacklist;
for (;;) {
if (*p == 0)
return true;
int opcode1 = read_opcode(p);
if (opcode1 < 0)
return false;
p += 4;
int opcode2 = opcode1;
if (*p == '-') {
p++;
opcode2 = read_opcode(p);
if (opcode2 < 0)
return false;
p += 4;
}
if (*p == 0 || *p == ',' || *p == ';') {
write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
for (int opcode = opcode1; opcode <= opcode2; opcode++)
reset_compop(cft_map(opcode));
if (*p == ',' || *p++ == ';')
continue;
return true;
}
return false;
}
}
return true;
}
void build_comp(void)
{
int i;
int jumpcount=0;
unsigned long opcode;
struct comptbl* tbl=op_smalltbl_0_comp_ff;
struct comptbl* nftbl=op_smalltbl_0_comp_nf;
int count;
unsigned int cpu_level = 0; // 68000 (default)
if (CPUType == 4)
cpu_level = 4; // 68040 with FPU
else {
if (FPUType)
cpu_level = 3; // 68020 with FPU
else if (CPUType >= 2)
cpu_level = 2; // 68020
else if (CPUType == 1)
cpu_level = 1;
}
struct cputbl *nfctbl = (
cpu_level == 4 ? op_smalltbl_0_nf
: cpu_level == 3 ? op_smalltbl_1_nf
: cpu_level == 2 ? op_smalltbl_2_nf
: cpu_level == 1 ? op_smalltbl_3_nf
: op_smalltbl_4_nf);
write_log ("<JIT compiler> : building compiler function tables\n");
for (opcode = 0; opcode < 65536; opcode++) {
reset_compop(opcode);
nfcpufunctbl[opcode] = op_illg_1;
prop[opcode].use_flags = 0x1f;
prop[opcode].set_flags = 0x1f;
prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
}
for (i = 0; tbl[i].opcode < 65536; i++) {
int cflow = table68k[tbl[i].opcode].cflow;
if (follow_const_jumps && (tbl[i].specific & 16))
cflow = fl_const_jump;
else
cflow &= ~fl_const_jump;
prop[cft_map(tbl[i].opcode)].cflow = cflow;
int uses_fpu = tbl[i].specific & 32;
if (uses_fpu && avoid_fpu)
compfunctbl[cft_map(tbl[i].opcode)] = NULL;
else
compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
}
for (i = 0; nftbl[i].opcode < 65536; i++) {
int uses_fpu = tbl[i].specific & 32;
if (uses_fpu && avoid_fpu)
nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
else
nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
}
for (i = 0; nfctbl[i].handler; i++) {
nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
}
for (opcode = 0; opcode < 65536; opcode++) {
compop_func *f;
compop_func *nff;
cpuop_func *nfcf;
int isaddx,cflow;
if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
continue;
if (table68k[opcode].handler != -1) {
f = compfunctbl[cft_map(table68k[opcode].handler)];
nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
cflow = prop[cft_map(table68k[opcode].handler)].cflow;
isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
prop[cft_map(opcode)].cflow = cflow;
prop[cft_map(opcode)].is_addx = isaddx;
compfunctbl[cft_map(opcode)] = f;
nfcompfunctbl[cft_map(opcode)] = nff;
Dif (nfcf == op_illg_1)
abort();
nfcpufunctbl[cft_map(opcode)] = nfcf;
}
prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
/* Unconditional jumps don't evaluate condition codes, so they
* don't actually use any flags themselves */
if (prop[cft_map(opcode)].cflow & fl_const_jump)
prop[cft_map(opcode)].use_flags = 0;
}
for (i = 0; nfctbl[i].handler != NULL; i++) {
if (nfctbl[i].specific)
nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
}
/* Merge in blacklist */
if (!merge_blacklist())
write_log("<JIT compiler> : blacklist merge failure!\n");
count=0;
for (opcode = 0; opcode < 65536; opcode++) {
if (compfunctbl[cft_map(opcode)])
count++;
}
write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
/* Initialise state */
create_popalls();
alloc_cache();
reset_lists();
for (i=0;i<TAGSIZE;i+=2) {
cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
cache_tags[i+1].bi=NULL;
}
#if 0
for (i=0;i<N_REGS;i++) {
empty_ss.nat[i].holds=-1;
empty_ss.nat[i].validsize=0;
empty_ss.nat[i].dirtysize=0;
}
#endif
for (i=0;i<VREGS;i++) {
empty_ss.virt[i]=L_NEEDED;
}
for (i=0;i<N_REGS;i++) {
empty_ss.nat[i]=L_UNKNOWN;
}
default_ss=empty_ss;
}
static void flush_icache_none(int n)
{
/* Nothing to do. */
}
static void flush_icache_hard(int n)
{
blockinfo* bi, *dbi;
hard_flush_count++;
#if 0
write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
n,regs.pc,regs.pc_p,current_cache_size/1024);
current_cache_size = 0;
#endif
bi=active;
while(bi) {
cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
dbi=bi; bi=bi->next;
free_blockinfo(dbi);
}
bi=dormant;
while(bi) {
cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
dbi=bi; bi=bi->next;
free_blockinfo(dbi);
}
reset_lists();
if (!compiled_code)
return;
current_compile_p=compiled_code;
SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
}
/* "Soft flushing" --- instead of actually throwing everything away,
we simply mark everything as "needs to be checked".
*/
static inline void flush_icache_lazy(int n)
{
blockinfo* bi;
blockinfo* bi2;
soft_flush_count++;
if (!active)
return;
bi=active;
while (bi) {
uae_u32 cl=cacheline(bi->pc_p);
if (bi->status==BI_INVALID ||
bi->status==BI_NEED_RECOMP) {
if (bi==cache_tags[cl+1].bi)
cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
bi->handler_to_use=(cpuop_func *)popall_execute_normal;
set_dhtu(bi,bi->direct_pen);
bi->status=BI_INVALID;
}
else {
if (bi==cache_tags[cl+1].bi)
cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
bi->handler_to_use=(cpuop_func *)popall_check_checksum;
set_dhtu(bi,bi->direct_pcc);
bi->status=BI_NEED_CHECK;
}
bi2=bi;
bi=bi->next;
}
/* bi2 is now the last entry in the active list */
bi2->next=dormant;
if (dormant)
dormant->prev_p=&(bi2->next);
dormant=active;
active->prev_p=&dormant;
active=NULL;
}
void flush_icache_range(uae_u8 *start_p, uae_u32 length)
{
if (!active)
return;
#if LAZY_FLUSH_ICACHE_RANGE
blockinfo *bi = active;
while (bi) {
#if USE_CHECKSUM_INFO
bool candidate = false;
for (checksum_info *csi = bi->csi; csi; csi = csi->next) {
if (((start_p - csi->start_p) < csi->length) ||
((csi->start_p - start_p) < length)) {
candidate = true;
break;
}
}
#else
// Assume system is consistent and would invalidate the right range
const bool candidate = (bi->pc_p - start_p) < length;
#endif
blockinfo *dbi = bi;
bi = bi->next;
if (candidate) {
uae_u32 cl = cacheline(dbi->pc_p);
if (dbi->status == BI_INVALID || dbi->status == BI_NEED_RECOMP) {
if (dbi == cache_tags[cl+1].bi)
cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
dbi->handler_to_use = (cpuop_func *)popall_execute_normal;
set_dhtu(dbi, dbi->direct_pen);
dbi->status = BI_INVALID;
}
else {
if (dbi == cache_tags[cl+1].bi)
cache_tags[cl].handler = (cpuop_func *)popall_check_checksum;
dbi->handler_to_use = (cpuop_func *)popall_check_checksum;
set_dhtu(dbi, dbi->direct_pcc);
dbi->status = BI_NEED_CHECK;
}
remove_from_list(dbi);
add_to_dormant(dbi);
}
}
return;
#endif
flush_icache(-1);
}
static void catastrophe(void)
{
abort();
}
int failure;
#define TARGET_M68K 0
#define TARGET_POWERPC 1
#define TARGET_X86 2
#define TARGET_X86_64 3
#if defined(i386) || defined(__i386__)
#define TARGET_NATIVE TARGET_X86
#endif
#if defined(powerpc) || defined(__powerpc__)
#define TARGET_NATIVE TARGET_POWERPC
#endif
#if defined(x86_64) || defined(__x86_64__)
#define TARGET_NATIVE TARGET_X86_64
#endif
#ifdef ENABLE_MON
static uae_u32 mon_read_byte_jit(uintptr addr)
{
uae_u8 *m = (uae_u8 *)addr;
return (uintptr)(*m);
}
static void mon_write_byte_jit(uintptr addr, uae_u32 b)
{
uae_u8 *m = (uae_u8 *)addr;
*m = b;
}
#endif
void disasm_block(int target, uint8 * start, size_t length)
{
if (!JITDebug)
return;
#if defined(JIT_DEBUG) && defined(ENABLE_MON)
char disasm_str[200];
sprintf(disasm_str, "%s $%x $%x",
target == TARGET_M68K ? "d68" :
target == TARGET_X86 ? "d86" :
target == TARGET_X86_64 ? "d8664" :
target == TARGET_POWERPC ? "d" : "x",
start, start + length - 1);
uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
mon_read_byte = mon_read_byte_jit;
mon_write_byte = mon_write_byte_jit;
const char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
mon(4, arg);
mon_read_byte = old_mon_read_byte;
mon_write_byte = old_mon_write_byte;
#endif
}
static void disasm_native_block(uint8 *start, size_t length)
{
disasm_block(TARGET_NATIVE, start, length);
}
static void disasm_m68k_block(uint8 *start, size_t length)
{
disasm_block(TARGET_M68K, start, length);
}
#ifdef HAVE_GET_WORD_UNSWAPPED
# define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
#else
# define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
#endif
#if JIT_DEBUG
static uae_u8 *last_regs_pc_p = 0;
static uae_u8 *last_compiled_block_addr = 0;
void compiler_dumpstate(void)
{
if (!JITDebug)
return;
write_log("### Host addresses\n");
write_log("MEM_BASE : %x\n", MEMBaseDiff);
write_log("PC_P : %p\n", &regs.pc_p);
write_log("SPCFLAGS : %p\n", &regs.spcflags);
write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
write_log("\n");
write_log("### M68k processor state\n");
m68k_dumpstate(0);
write_log("\n");
write_log("### Block in Mac address space\n");
write_log("M68K block : %p\n",
(void *)(uintptr)get_virtual_address(last_regs_pc_p));
write_log("Native block : %p (%d bytes)\n",
(void *)(uintptr)get_virtual_address(last_compiled_block_addr),
get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
write_log("\n");
}
#endif
static void compile_block(cpu_history* pc_hist, int blocklen)
{
if (letit && compiled_code) {
#if PROFILE_COMPILE_TIME
compile_count++;
clock_t start_time = clock();
#endif
#if JIT_DEBUG
bool disasm_block = false;
#endif
/* OK, here we need to 'compile' a block */
int i;
int r;
int was_comp=0;
uae_u8 liveflags[MAXRUN+1];
#if USE_CHECKSUM_INFO
bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
uintptr min_pcp=max_pcp;
#else
uintptr max_pcp=(uintptr)pc_hist[0].location;
uintptr min_pcp=max_pcp;
#endif
uae_u32 cl=cacheline(pc_hist[0].location);
void* specflags=(void*)&regs.spcflags;
blockinfo* bi=NULL;
blockinfo* bi2;
int extra_len=0;
redo_current_block=0;
if (current_compile_p>=max_compile_start)
flush_icache_hard(7);
alloc_blockinfos();
bi=get_blockinfo_addr_new(pc_hist[0].location,0);
bi2=get_blockinfo(cl);
optlev=bi->optlevel;
if (bi->status!=BI_INVALID) {
Dif (bi!=bi2) {
/* I don't think it can happen anymore. Shouldn't, in
any case. So let's make sure... */
write_log("WOOOWOO count=%d, ol=%d %p %p\n",
bi->count,bi->optlevel,bi->handler_to_use,
cache_tags[cl].handler);
abort();
}
Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
/* What the heck? We are not supposed to be here! */
abort();
}
}
if (bi->count==-1) {
optlev++;
while (!optcount[optlev])
optlev++;
bi->count=optcount[optlev]-1;
}
current_block_pc_p=(uintptr)pc_hist[0].location;
remove_deps(bi); /* We are about to create new code */
bi->optlevel=optlev;
bi->pc_p=(uae_u8*)pc_hist[0].location;
#if USE_CHECKSUM_INFO
free_checksum_info_chain(bi->csi);
bi->csi = NULL;
#endif
liveflags[blocklen]=0x1f; /* All flags needed afterwards */
i=blocklen;
while (i--) {
uae_u16* currpcp=pc_hist[i].location;
uae_u32 op=DO_GET_OPCODE(currpcp);
#if USE_CHECKSUM_INFO
trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
if (follow_const_jumps && is_const_jump(op)) {
checksum_info *csi = alloc_checksum_info();
csi->start_p = (uae_u8 *)min_pcp;
csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
csi->next = bi->csi;
bi->csi = csi;
max_pcp = (uintptr)currpcp;
}
min_pcp = (uintptr)currpcp;
#else
if ((uintptr)currpcp<min_pcp)
min_pcp=(uintptr)currpcp;
if ((uintptr)currpcp>max_pcp)
max_pcp=(uintptr)currpcp;
#endif
liveflags[i]=((liveflags[i+1]&
(~prop[op].set_flags))|
prop[op].use_flags);
if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
liveflags[i]&= ~FLAG_Z;
}
#if USE_CHECKSUM_INFO
checksum_info *csi = alloc_checksum_info();
csi->start_p = (uae_u8 *)min_pcp;
csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
csi->next = bi->csi;
bi->csi = csi;
#endif
bi->needed_flags=liveflags[0];
align_target(align_loops);
was_comp=0;
bi->direct_handler=(cpuop_func *)get_target();
set_dhtu(bi,bi->direct_handler);
bi->status=BI_COMPILING;
current_block_start_target=(uintptr)get_target();
log_startblock();
if (bi->count>=0) { /* Need to generate countdown code */
raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
raw_sub_l_mi((uintptr)&(bi->count),1);
raw_jl((uintptr)popall_recompile_block);
}
if (optlev==0) { /* No need to actually translate */
/* Execute normally without keeping stats */
raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
raw_jmp((uintptr)popall_exec_nostats);
}
else {
reg_alloc_run=0;
next_pc_p=0;
taken_pc_p=0;
branch_cc=0;
comp_pc_p=(uae_u8*)pc_hist[0].location;
init_comp();
was_comp=1;
#ifdef USE_CPU_EMUL_SERVICES
raw_sub_l_mi((uintptr)&emulated_ticks,blocklen);
raw_jcc_b_oponly(NATIVE_CC_GT);
uae_s8 *branchadd=(uae_s8*)get_target();
emit_byte(0);
raw_call((uintptr)cpu_do_check_ticks);
*branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
#endif
#if JIT_DEBUG
if (JITDebug) {
raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
}
#endif
for (i=0;i<blocklen &&
get_target_noopt()<max_compile_start;i++) {
cpuop_func **cputbl;
compop_func **comptbl;
uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
if (!needed_flags) {
cputbl=nfcpufunctbl;
comptbl=nfcompfunctbl;
}
else {
cputbl=cpufunctbl;
comptbl=compfunctbl;
}
#if FLIGHT_RECORDER
{
mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1);
clobber_flags();
remove_all_offsets();
int arg = readreg_specific(S1,4,REG_PAR1);
prepare_for_call_1();
unlock2(arg);
prepare_for_call_2();
raw_call((uintptr)m68k_record_step);
}
#endif
failure = 1; // gb-- defaults to failure state
if (comptbl[opcode] && optlev>1) {
failure=0;
if (!was_comp) {
comp_pc_p=(uae_u8*)pc_hist[i].location;
init_comp();
}
was_comp=1;
comptbl[opcode](opcode);
freescratch();
if (!(liveflags[i+1] & FLAG_CZNV)) {
/* We can forget about flags */
dont_care_flags();
}
#if INDIVIDUAL_INST
flush(1);
nop();
flush(1);
was_comp=0;
#endif
}
if (failure) {
if (was_comp) {
flush(1);
was_comp=0;
}
raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
#if USE_NORMAL_CALLING_CONVENTION
raw_push_l_r(REG_PAR1);
#endif
raw_mov_l_mi((uintptr)&regs.pc_p,
(uintptr)pc_hist[i].location);
raw_call((uintptr)cputbl[opcode]);
#if PROFILE_UNTRANSLATED_INSNS
// raw_cputbl_count[] is indexed with plain opcode (in m68k order)
raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
#endif
#if USE_NORMAL_CALLING_CONVENTION
raw_inc_sp(4);
#endif
if (i < blocklen - 1) {
uae_s8* branchadd;
raw_mov_l_rm(0,(uintptr)specflags);
raw_test_l_rr(0,0);
raw_jz_b_oponly();
branchadd=(uae_s8 *)get_target();
emit_byte(0);
raw_jmp((uintptr)popall_do_nothing);
*branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
}
}
}
#if 1 /* This isn't completely kosher yet; It really needs to be
be integrated into a general inter-block-dependency scheme */
if (next_pc_p && taken_pc_p &&
was_comp && taken_pc_p==current_block_pc_p) {
blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
uae_u8 x=bi1->needed_flags;
if (x==0xff || 1) { /* To be on the safe side */
uae_u16* next=(uae_u16*)next_pc_p;
uae_u32 op=DO_GET_OPCODE(next);
x=0x1f;
x&=(~prop[op].set_flags);
x|=prop[op].use_flags;
}
x|=bi2->needed_flags;
if (!(x & FLAG_CZNV)) {
/* We can forget about flags */
dont_care_flags();
extra_len+=2; /* The next instruction now is part of this
block */
}
}
#endif
log_flush();
if (next_pc_p) { /* A branch was registered */
uintptr t1=next_pc_p;
uintptr t2=taken_pc_p;
int cc=branch_cc;
uae_u32* branchadd;
uae_u32* tba;
bigstate tmp;
blockinfo* tbi;
if (taken_pc_p<next_pc_p) {
/* backward branch. Optimize for the "taken" case ---
which means the raw_jcc should fall through when
the 68k branch is taken. */
t1=taken_pc_p;
t2=next_pc_p;
cc=branch_cc^1;
}
tmp=live; /* ouch! This is big... */
raw_jcc_l_oponly(cc);
branchadd=(uae_u32*)get_target();
emit_long(0);
/* predicted outcome */
tbi=get_blockinfo_addr_new((void*)t1,1);
match_states(tbi);
raw_cmp_l_mi((uintptr)specflags,0);
raw_jcc_l_oponly(4);
tba=(uae_u32*)get_target();
emit_long(get_handler(t1)-((uintptr)tba+4));
raw_mov_l_mi((uintptr)&regs.pc_p,t1);
flush_reg_count();
raw_jmp((uintptr)popall_do_nothing);
create_jmpdep(bi,0,tba,t1);
align_target(align_jumps);
/* not-predicted outcome */
*branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
live=tmp; /* Ouch again */
tbi=get_blockinfo_addr_new((void*)t2,1);
match_states(tbi);
//flush(1); /* Can only get here if was_comp==1 */
raw_cmp_l_mi((uintptr)specflags,0);
raw_jcc_l_oponly(4);
tba=(uae_u32*)get_target();
emit_long(get_handler(t2)-((uintptr)tba+4));
raw_mov_l_mi((uintptr)&regs.pc_p,t2);
flush_reg_count();
raw_jmp((uintptr)popall_do_nothing);
create_jmpdep(bi,1,tba,t2);
}
else
{
if (was_comp) {
flush(1);
}
flush_reg_count();
/* Let's find out where next_handler is... */
if (was_comp && isinreg(PC_P)) {
r=live.state[PC_P].realreg;
raw_and_l_ri(r,TAGMASK);
int r2 = (r==0) ? 1 : 0;
raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
raw_cmp_l_mi((uintptr)specflags,0);
raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
raw_jmp_r(r2);
}
else if (was_comp && isconst(PC_P)) {
uae_u32 v=live.state[PC_P].val;
uae_u32* tba;
blockinfo* tbi;
tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
match_states(tbi);
raw_cmp_l_mi((uintptr)specflags,0);
raw_jcc_l_oponly(4);
tba=(uae_u32*)get_target();
emit_long(get_handler(v)-((uintptr)tba+4));
raw_mov_l_mi((uintptr)&regs.pc_p,v);
raw_jmp((uintptr)popall_do_nothing);
create_jmpdep(bi,0,tba,v);
}
else {
r=REG_PC_TMP;
raw_mov_l_rm(r,(uintptr)&regs.pc_p);
raw_and_l_ri(r,TAGMASK);
int r2 = (r==0) ? 1 : 0;
raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
raw_cmp_l_mi((uintptr)specflags,0);
raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
raw_jmp_r(r2);
}
}
}
#if USE_MATCH
if (callers_need_recompile(&live,&(bi->env))) {
mark_callers_recompile(bi);
}
big_to_small_state(&live,&(bi->env));
#endif
#if USE_CHECKSUM_INFO
remove_from_list(bi);
if (trace_in_rom) {
// No need to checksum that block trace on cache invalidation
free_checksum_info_chain(bi->csi);
bi->csi = NULL;
add_to_dormant(bi);
}
else {
calc_checksum(bi,&(bi->c1),&(bi->c2));
add_to_active(bi);
}
#else
if (next_pc_p+extra_len>=max_pcp &&
next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
else
max_pcp+=LONGEST_68K_INST;
bi->len=max_pcp-min_pcp;
bi->min_pcp=min_pcp;
remove_from_list(bi);
if (isinrom(min_pcp) && isinrom(max_pcp)) {
add_to_dormant(bi); /* No need to checksum it on cache flush.
Please don't start changing ROMs in
flight! */
}
else {
calc_checksum(bi,&(bi->c1),&(bi->c2));
add_to_active(bi);
}
#endif
current_cache_size += get_target() - (uae_u8 *)current_compile_p;
#if JIT_DEBUG
if (JITDebug)
bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
if (JITDebug && disasm_block) {
uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
getchar();
}
#endif
log_dump();
align_target(align_jumps);
/* This is the non-direct handler */
bi->handler=
bi->handler_to_use=(cpuop_func *)get_target();
raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
raw_jnz((uintptr)popall_cache_miss);
comp_pc_p=(uae_u8*)pc_hist[0].location;
bi->status=BI_FINALIZING;
init_comp();
match_states(bi);
flush(1);
raw_jmp((uintptr)bi->direct_handler);
current_compile_p=get_target();
raise_in_cl_list(bi);
/* We will flush soon, anyway, so let's do it now */
if (current_compile_p>=max_compile_start)
flush_icache_hard(7);
bi->status=BI_ACTIVE;
if (redo_current_block)
block_need_recompile(bi);
#if PROFILE_COMPILE_TIME
compile_time += (clock() - start_time);
#endif
}
/* Account for compilation time */
cpu_do_check_ticks();
}
void do_nothing(void)
{
/* What did you expect this to do? */
}
void exec_nostats(void)
{
for (;;) {
uae_u32 opcode = GET_OPCODE;
#if FLIGHT_RECORDER
m68k_record_step(m68k_getpc());
#endif
(*cpufunctbl[opcode])(opcode);
cpu_check_ticks();
if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
return; /* We will deal with the spcflags in the caller */
}
}
}
void execute_normal(void)
{
if (!check_for_cache_miss()) {
cpu_history pc_hist[MAXRUN];
int blocklen = 0;
#if REAL_ADDRESSING || DIRECT_ADDRESSING
start_pc_p = regs.pc_p;
start_pc = get_virtual_address(regs.pc_p);
#else
start_pc_p = regs.pc_oldp;
start_pc = regs.pc;
#endif
for (;;) { /* Take note: This is the do-it-normal loop */
pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
uae_u32 opcode = GET_OPCODE;
#if FLIGHT_RECORDER
m68k_record_step(m68k_getpc());
#endif
(*cpufunctbl[opcode])(opcode);
cpu_check_ticks();
if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
compile_block(pc_hist, blocklen);
return; /* We will deal with the spcflags in the caller */
}
/* No need to check regs.spcflags, because if they were set,
we'd have ended up inside that "if" */
}
}
}
typedef void (*compiled_handler)(void);
static void m68k_do_compile_execute(void)
{
for (;;) {
((compiled_handler)(pushall_call_handler))();
/* Whenever we return from that, we should check spcflags */
if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
if (m68k_do_specialties ())
return;
}
}
}
void m68k_compile_execute (void)
{
for (;;) {
if (quit_program)
break;
m68k_do_compile_execute();
}
}