/* * compiler/compemu_support.cpp - Core dynamic translation engine * * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer * * Adaptation for Basilisk II and improvements, copyright 2000-2005 * Gwenole Beauchesne * * Basilisk II (C) 1997-2008 Christian Bauer * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "sysdeps.h" #if !REAL_ADDRESSING && !DIRECT_ADDRESSING #error "Only Real or Direct Addressing is supported with the JIT Compiler" #endif #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler" #endif /* NOTE: support for AMD64 assumes translation cache and other code * buffers are allocated into a 32-bit address space because (i) B2/JIT * code is not 64-bit clean and (ii) it's faster to resolve branches * that way. */ #if !defined(__i386__) && !defined(__x86_64__) #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler" #endif #define USE_MATCH 0 /* kludge for Brian, so he can compile under MSVC++ */ #define USE_NORMAL_CALLING_CONVENTION 1 && defined(_MSC_VER) #ifndef WIN32 #include #include #include #endif #include #include #include #include "cpu_emulation.h" #include "main.h" #include "prefs.h" #include "user_strings.h" #include "vm_alloc.h" #include "m68k.h" #include "memory.h" #include "readcpu.h" #include "newcpu.h" #include "comptbl.h" #include "compiler/compemu.h" #include "fpu/fpu.h" #include "fpu/flags.h" #define DEBUG 1 #include "debug.h" #ifdef ENABLE_MON #include "mon.h" #endif #ifndef WIN32 #define PROFILE_COMPILE_TIME 1 #define PROFILE_UNTRANSLATED_INSNS 1 #endif #if defined(__x86_64__) && 0 #define RECORD_REGISTER_USAGE 1 #endif #ifdef WIN32 #undef write_log #define write_log dummy_write_log static void dummy_write_log(const char *, ...) { } #endif #if JIT_DEBUG #undef abort #define abort() do { \ fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \ exit(EXIT_FAILURE); \ } while (0) #endif #if RECORD_REGISTER_USAGE static uint64 reg_count[16]; static int reg_count_local[16]; static int reg_count_compare(const void *ap, const void *bp) { const int a = *((int *)ap); const int b = *((int *)bp); return reg_count[b] - reg_count[a]; } #endif #if PROFILE_COMPILE_TIME #include static uae_u32 compile_count = 0; static clock_t compile_time = 0; static clock_t emul_start_time = 0; static clock_t emul_end_time = 0; #endif #if PROFILE_UNTRANSLATED_INSNS const int untranslated_top_ten = 20; static uae_u32 raw_cputbl_count[65536] = { 0, }; static uae_u16 opcode_nums[65536]; static int untranslated_compfn(const void *e1, const void *e2) { return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2]; } #endif static compop_func *compfunctbl[65536]; static compop_func *nfcompfunctbl[65536]; static cpuop_func *nfcpufunctbl[65536]; uae_u8* comp_pc_p; // From newcpu.cpp extern bool quit_program; // gb-- Extra data for Basilisk II/JIT #if JIT_DEBUG static bool JITDebug = false; // Enable runtime disassemblers through mon? #else const bool JITDebug = false; // Don't use JIT debug mode at all #endif #if USE_INLINING static bool follow_const_jumps = true; // Flag: translation through constant jumps #else const bool follow_const_jumps = false; #endif const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB) static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already static bool lazy_flush = true; // Flag: lazy translation cache invalidation static bool avoid_fpu = true; // Flag: compile FPU instructions ? static bool have_cmov = false; // target has CMOV instructions ? static bool have_lahf_lm = true; // target has LAHF supported in long mode ? static bool have_rat_stall = true; // target has partial register stalls ? const bool tune_alignment = true; // Tune code alignments for running CPU ? const bool tune_nop_fillers = true; // Tune no-op fillers for architecture static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly? static int align_loops = 32; // Align the start of loops static int align_jumps = 32; // Align the start of jumps static int optcount[10] = { 10, // How often a block has to be executed before it is translated 0, // How often to use naive translation 0, 0, 0, 0, -1, -1, -1, -1 }; struct op_properties { uae_u8 use_flags; uae_u8 set_flags; uae_u8 is_addx; uae_u8 cflow; }; static op_properties prop[65536]; static inline int end_block(uae_u32 opcode) { return (prop[opcode].cflow & fl_end_block); } static inline bool is_const_jump(uae_u32 opcode) { return (prop[opcode].cflow == fl_const_jump); } static inline bool may_trap(uae_u32 opcode) { return (prop[opcode].cflow & fl_trap) != 0; } static inline unsigned int cft_map (unsigned int f) { #ifndef HAVE_GET_WORD_UNSWAPPED return f; #else return ((f >> 8) & 255) | ((f & 255) << 8); #endif } uae_u8* start_pc_p; uae_u32 start_pc; uae_u32 current_block_pc_p; static uintptr current_block_start_target; uae_u32 needed_flags; static uintptr next_pc_p; static uintptr taken_pc_p; static int branch_cc; static int redo_current_block; int segvcount=0; int soft_flush_count=0; int hard_flush_count=0; int checksum_count=0; static uae_u8* current_compile_p=NULL; static uae_u8* max_compile_start; static uae_u8* compiled_code=NULL; static uae_s32 reg_alloc_run; const int POPALLSPACE_SIZE = 1024; /* That should be enough space */ static uae_u8* popallspace=NULL; void* pushall_call_handler=NULL; static void* popall_do_nothing=NULL; static void* popall_exec_nostats=NULL; static void* popall_execute_normal=NULL; static void* popall_cache_miss=NULL; static void* popall_recompile_block=NULL; static void* popall_check_checksum=NULL; /* The 68k only ever executes from even addresses. So right now, we * waste half the entries in this array * UPDATE: We now use those entries to store the start of the linked * lists that we maintain for each hash result. */ cacheline cache_tags[TAGSIZE]; int letit=0; blockinfo* hold_bi[MAX_HOLD_BI]; blockinfo* active; blockinfo* dormant; /* 68040 */ extern struct cputbl op_smalltbl_0_nf[]; extern struct comptbl op_smalltbl_0_comp_nf[]; extern struct comptbl op_smalltbl_0_comp_ff[]; /* 68020 + 68881 */ extern struct cputbl op_smalltbl_1_nf[]; /* 68020 */ extern struct cputbl op_smalltbl_2_nf[]; /* 68010 */ extern struct cputbl op_smalltbl_3_nf[]; /* 68000 */ extern struct cputbl op_smalltbl_4_nf[]; /* 68000 slow but compatible. */ extern struct cputbl op_smalltbl_5_nf[]; static void flush_icache_hard(int n); static void flush_icache_lazy(int n); static void flush_icache_none(int n); void (*flush_icache)(int n) = flush_icache_none; bigstate live; smallstate empty_ss; smallstate default_ss; static int optlev; static int writereg(int r, int size); static void unlock2(int r); static void setlock(int r); static int readreg_specific(int r, int size, int spec); static int writereg_specific(int r, int size, int spec); static void prepare_for_call_1(void); static void prepare_for_call_2(void); static void align_target(uae_u32 a); static uae_s32 nextused[VREGS]; uae_u32 m68k_pc_offset; /* Some arithmetic ooperations can be optimized away if the operands * are known to be constant. But that's only a good idea when the * side effects they would have on the flags are not important. This * variable indicates whether we need the side effects or not */ uae_u32 needflags=0; /* Flag handling is complicated. * * x86 instructions create flags, which quite often are exactly what we * want. So at times, the "68k" flags are actually in the x86 flags. * * Then again, sometimes we do x86 instructions that clobber the x86 * flags, but don't represent a corresponding m68k instruction. In that * case, we have to save them. * * We used to save them to the stack, but now store them back directly * into the regflags.cznv of the traditional emulation. Thus some odd * names. * * So flags can be in either of two places (used to be three; boy were * things complicated back then!); And either place can contain either * valid flags or invalid trash (and on the stack, there was also the * option of "nothing at all", now gone). A couple of variables keep * track of the respective states. * * To make things worse, we might or might not be interested in the flags. * by default, we are, but a call to dont_care_flags can change that * until the next call to live_flags. If we are not, pretty much whatever * is in the register and/or the native flags is seen as valid. */ static __inline__ blockinfo* get_blockinfo(uae_u32 cl) { return cache_tags[cl+1].bi; } static __inline__ blockinfo* get_blockinfo_addr(void* addr) { blockinfo* bi=get_blockinfo(cacheline(addr)); while (bi) { if (bi->pc_p==addr) return bi; bi=bi->next_same_cl; } return NULL; } /******************************************************************* * All sorts of list related functions for all of the lists * *******************************************************************/ static __inline__ void remove_from_cl_list(blockinfo* bi) { uae_u32 cl=cacheline(bi->pc_p); if (bi->prev_same_cl_p) *(bi->prev_same_cl_p)=bi->next_same_cl; if (bi->next_same_cl) bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p; if (cache_tags[cl+1].bi) cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use; else cache_tags[cl].handler=(cpuop_func *)popall_execute_normal; } static __inline__ void remove_from_list(blockinfo* bi) { if (bi->prev_p) *(bi->prev_p)=bi->next; if (bi->next) bi->next->prev_p=bi->prev_p; } static __inline__ void remove_from_lists(blockinfo* bi) { remove_from_list(bi); remove_from_cl_list(bi); } static __inline__ void add_to_cl_list(blockinfo* bi) { uae_u32 cl=cacheline(bi->pc_p); if (cache_tags[cl+1].bi) cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl); bi->next_same_cl=cache_tags[cl+1].bi; cache_tags[cl+1].bi=bi; bi->prev_same_cl_p=&(cache_tags[cl+1].bi); cache_tags[cl].handler=bi->handler_to_use; } static __inline__ void raise_in_cl_list(blockinfo* bi) { remove_from_cl_list(bi); add_to_cl_list(bi); } static __inline__ void add_to_active(blockinfo* bi) { if (active) active->prev_p=&(bi->next); bi->next=active; active=bi; bi->prev_p=&active; } static __inline__ void add_to_dormant(blockinfo* bi) { if (dormant) dormant->prev_p=&(bi->next); bi->next=dormant; dormant=bi; bi->prev_p=&dormant; } static __inline__ void remove_dep(dependency* d) { if (d->prev_p) *(d->prev_p)=d->next; if (d->next) d->next->prev_p=d->prev_p; d->prev_p=NULL; d->next=NULL; } /* This block's code is about to be thrown away, so it no longer depends on anything else */ static __inline__ void remove_deps(blockinfo* bi) { remove_dep(&(bi->dep[0])); remove_dep(&(bi->dep[1])); } static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a) { *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4); } /******************************************************************** * Soft flush handling support functions * ********************************************************************/ static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh) { //write_log("bi is %p\n",bi); if (dh!=bi->direct_handler_to_use) { dependency* x=bi->deplist; //write_log("bi->deplist=%p\n",bi->deplist); while (x) { //write_log("x is %p\n",x); //write_log("x->next is %p\n",x->next); //write_log("x->prev_p is %p\n",x->prev_p); if (x->jmp_off) { adjust_jmpdep(x,dh); } x=x->next; } bi->direct_handler_to_use=dh; } } static __inline__ void invalidate_block(blockinfo* bi) { int i; bi->optlevel=0; bi->count=optcount[0]-1; bi->handler=NULL; bi->handler_to_use=(cpuop_func *)popall_execute_normal; bi->direct_handler=NULL; set_dhtu(bi,bi->direct_pen); bi->needed_flags=0xff; bi->status=BI_INVALID; for (i=0;i<2;i++) { bi->dep[i].jmp_off=NULL; bi->dep[i].target=NULL; } remove_deps(bi); } static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target) { blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target); Dif(!tbi) { write_log("Could not create jmpdep!\n"); abort(); } bi->dep[i].jmp_off=jmpaddr; bi->dep[i].source=bi; bi->dep[i].target=tbi; bi->dep[i].next=tbi->deplist; if (bi->dep[i].next) bi->dep[i].next->prev_p=&(bi->dep[i].next); bi->dep[i].prev_p=&(tbi->deplist); tbi->deplist=&(bi->dep[i]); } static __inline__ void block_need_recompile(blockinfo * bi) { uae_u32 cl = cacheline(bi->pc_p); set_dhtu(bi, bi->direct_pen); bi->direct_handler = bi->direct_pen; bi->handler_to_use = (cpuop_func *)popall_execute_normal; bi->handler = (cpuop_func *)popall_execute_normal; if (bi == cache_tags[cl + 1].bi) cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; bi->status = BI_NEED_RECOMP; } static __inline__ void mark_callers_recompile(blockinfo * bi) { dependency *x = bi->deplist; while (x) { dependency *next = x->next; /* This disappears when we mark for * recompilation and thus remove the * blocks from the lists */ if (x->jmp_off) { blockinfo *cbi = x->source; Dif(cbi->status == BI_INVALID) { // write_log("invalid block in dependency list\n"); // FIXME? // abort(); } if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) { block_need_recompile(cbi); mark_callers_recompile(cbi); } else if (cbi->status == BI_COMPILING) { redo_current_block = 1; } else if (cbi->status == BI_NEED_RECOMP) { /* nothing */ } else { //write_log("Status %d in mark_callers\n",cbi->status); // FIXME? } } x = next; } } static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate) { blockinfo* bi=get_blockinfo_addr(addr); int i; if (!bi) { for (i=0;ipc_p=(uae_u8 *)addr; invalidate_block(bi); add_to_active(bi); add_to_cl_list(bi); } } } if (!bi) { write_log("Looking for blockinfo, can't find free one\n"); abort(); } return bi; } static void prepare_block(blockinfo* bi); /* Managment of blockinfos. A blockinfo struct is allocated whenever a new block has to be compiled. If the list of free blockinfos is empty, we allocate a new pool of blockinfos and link the newly created blockinfos altogether into the list of free blockinfos. Otherwise, we simply pop a structure off the free list. Blockinfo are lazily deallocated, i.e. chained altogether in the list of free blockinfos whenvever a translation cache flush (hard or soft) request occurs. */ template< class T > class LazyBlockAllocator { enum { kPoolSize = 1 + 4096 / sizeof(T) }; struct Pool { T chunk[kPoolSize]; Pool * next; }; Pool * mPools; T * mChunks; public: LazyBlockAllocator() : mPools(0), mChunks(0) { } ~LazyBlockAllocator(); T * acquire(); void release(T * const); }; template< class T > LazyBlockAllocator::~LazyBlockAllocator() { Pool * currentPool = mPools; while (currentPool) { Pool * deadPool = currentPool; currentPool = currentPool->next; free(deadPool); } } template< class T > T * LazyBlockAllocator::acquire() { if (!mChunks) { // There is no chunk left, allocate a new pool and link the // chunks into the free list Pool * newPool = (Pool *)malloc(sizeof(Pool)); for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) { chunk->next = mChunks; mChunks = chunk; } newPool->next = mPools; mPools = newPool; } T * chunk = mChunks; mChunks = chunk->next; return chunk; } template< class T > void LazyBlockAllocator::release(T * const chunk) { chunk->next = mChunks; mChunks = chunk; } template< class T > class HardBlockAllocator { public: T * acquire() { T * data = (T *)current_compile_p; current_compile_p += sizeof(T); return data; } void release(T * const chunk) { // Deallocated on invalidation } }; #if USE_SEPARATE_BIA static LazyBlockAllocator BlockInfoAllocator; static LazyBlockAllocator ChecksumInfoAllocator; #else static HardBlockAllocator BlockInfoAllocator; static HardBlockAllocator ChecksumInfoAllocator; #endif static __inline__ checksum_info *alloc_checksum_info(void) { checksum_info *csi = ChecksumInfoAllocator.acquire(); csi->next = NULL; return csi; } static __inline__ void free_checksum_info(checksum_info *csi) { csi->next = NULL; ChecksumInfoAllocator.release(csi); } static __inline__ void free_checksum_info_chain(checksum_info *csi) { while (csi != NULL) { checksum_info *csi2 = csi->next; free_checksum_info(csi); csi = csi2; } } static __inline__ blockinfo *alloc_blockinfo(void) { blockinfo *bi = BlockInfoAllocator.acquire(); #if USE_CHECKSUM_INFO bi->csi = NULL; #endif return bi; } static __inline__ void free_blockinfo(blockinfo *bi) { #if USE_CHECKSUM_INFO free_checksum_info_chain(bi->csi); bi->csi = NULL; #endif BlockInfoAllocator.release(bi); } static __inline__ void alloc_blockinfos(void) { int i; blockinfo* bi; for (i=0;i>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000); #endif } /******************************************************************** * Getting the information about the target CPU * ********************************************************************/ #include "codegen_x86.cpp" void set_target(uae_u8* t) { target=t; } static __inline__ uae_u8* get_target_noopt(void) { return target; } __inline__ uae_u8* get_target(void) { return get_target_noopt(); } /******************************************************************** * Flags status handling. EMIT TIME! * ********************************************************************/ static void bt_l_ri_noclobber(R4 r, IMM i); static void make_flags_live_internal(void) { if (live.flags_in_flags==VALID) return; Dif (live.flags_on_stack==TRASH) { write_log("Want flags, got something on stack, but it is TRASH\n"); abort(); } if (live.flags_on_stack==VALID) { int tmp; tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2); raw_reg_to_flags(tmp); unlock2(tmp); live.flags_in_flags=VALID; return; } write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n", live.flags_in_flags,live.flags_on_stack); abort(); } static void flags_to_stack(void) { if (live.flags_on_stack==VALID) return; if (!live.flags_are_important) { live.flags_on_stack=VALID; return; } Dif (live.flags_in_flags!=VALID) abort(); else { int tmp; tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1); raw_flags_to_reg(tmp); unlock2(tmp); } live.flags_on_stack=VALID; } static __inline__ void clobber_flags(void) { if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID) flags_to_stack(); live.flags_in_flags=TRASH; } /* Prepare for leaving the compiled stuff */ static __inline__ void flush_flags(void) { flags_to_stack(); return; } int touchcnt; /******************************************************************** * Partial register flushing for optimized calls * ********************************************************************/ struct regusage { uae_u16 rmask; uae_u16 wmask; }; static inline void ru_set(uae_u16 *mask, int reg) { #if USE_OPTIMIZED_CALLS *mask |= 1 << reg; #endif } static inline bool ru_get(const uae_u16 *mask, int reg) { #if USE_OPTIMIZED_CALLS return (*mask & (1 << reg)); #else /* Default: instruction reads & write to register */ return true; #endif } static inline void ru_set_read(regusage *ru, int reg) { ru_set(&ru->rmask, reg); } static inline void ru_set_write(regusage *ru, int reg) { ru_set(&ru->wmask, reg); } static inline bool ru_read_p(const regusage *ru, int reg) { return ru_get(&ru->rmask, reg); } static inline bool ru_write_p(const regusage *ru, int reg) { return ru_get(&ru->wmask, reg); } static void ru_fill_ea(regusage *ru, int reg, amodes mode, wordsizes size, int write_mode) { switch (mode) { case Areg: reg += 8; /* fall through */ case Dreg: ru_set(write_mode ? &ru->wmask : &ru->rmask, reg); break; case Ad16: /* skip displacment */ m68k_pc_offset += 2; case Aind: case Aipi: case Apdi: ru_set_read(ru, reg+8); break; case Ad8r: ru_set_read(ru, reg+8); /* fall through */ case PC8r: { uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2); reg = (dp >> 12) & 15; ru_set_read(ru, reg); if (dp & 0x100) m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2); break; } case PC16: case absw: case imm0: case imm1: m68k_pc_offset += 2; break; case absl: case imm2: m68k_pc_offset += 4; break; case immi: m68k_pc_offset += (size == sz_long) ? 4 : 2; break; } } /* TODO: split into a static initialization part and a dynamic one (instructions depending on extension words) */ static void ru_fill(regusage *ru, uae_u32 opcode) { m68k_pc_offset += 2; /* Default: no register is used or written to */ ru->rmask = 0; ru->wmask = 0; uae_u32 real_opcode = cft_map(opcode); struct instr *dp = &table68k[real_opcode]; bool rw_dest = true; bool handled = false; /* Handle some instructions specifically */ uae_u16 ext; switch (dp->mnemo) { case i_BFCHG: case i_BFCLR: case i_BFEXTS: case i_BFEXTU: case i_BFFFO: case i_BFINS: case i_BFSET: case i_BFTST: ext = comp_get_iword((m68k_pc_offset+=2)-2); if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7); if (ext & 0x020) ru_set_read(ru, ext & 7); ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1); if (dp->dmode == Dreg) ru_set_read(ru, dp->dreg); switch (dp->mnemo) { case i_BFEXTS: case i_BFEXTU: case i_BFFFO: ru_set_write(ru, (ext >> 12) & 7); break; case i_BFINS: ru_set_read(ru, (ext >> 12) & 7); /* fall through */ case i_BFCHG: case i_BFCLR: case i_BSET: if (dp->dmode == Dreg) ru_set_write(ru, dp->dreg); break; } handled = true; rw_dest = false; break; case i_BTST: rw_dest = false; break; case i_CAS: { ext = comp_get_iword((m68k_pc_offset+=2)-2); int Du = ext & 7; ru_set_read(ru, Du); int Dc = (ext >> 6) & 7; ru_set_read(ru, Dc); ru_set_write(ru, Dc); break; } case i_CAS2: { int Dc1, Dc2, Du1, Du2, Rn1, Rn2; ext = comp_get_iword((m68k_pc_offset+=2)-2); Rn1 = (ext >> 12) & 15; Du1 = (ext >> 6) & 7; Dc1 = ext & 7; ru_set_read(ru, Rn1); ru_set_read(ru, Du1); ru_set_read(ru, Dc1); ru_set_write(ru, Dc1); ext = comp_get_iword((m68k_pc_offset+=2)-2); Rn2 = (ext >> 12) & 15; Du2 = (ext >> 6) & 7; Dc2 = ext & 7; ru_set_read(ru, Rn2); ru_set_read(ru, Du2); ru_set_write(ru, Dc2); break; } case i_DIVL: case i_MULL: m68k_pc_offset += 2; break; case i_LEA: case i_MOVE: case i_MOVEA: case i_MOVE16: rw_dest = false; break; case i_PACK: case i_UNPK: rw_dest = false; m68k_pc_offset += 2; break; case i_TRAPcc: m68k_pc_offset += (dp->size == sz_long) ? 4 : 2; break; case i_RTR: /* do nothing, just for coverage debugging */ break; /* TODO: handle EXG instruction */ } /* Handle A-Traps better */ if ((real_opcode & 0xf000) == 0xa000) { handled = true; } /* Handle EmulOps better */ if ((real_opcode & 0xff00) == 0x7100) { handled = true; ru->rmask = 0xffff; ru->wmask = 0; } if (dp->suse && !handled) ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0); if (dp->duse && !handled) ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1); if (rw_dest) ru->rmask |= ru->wmask; handled = handled || dp->suse || dp->duse; /* Mark all registers as used/written if the instruction may trap */ if (may_trap(opcode)) { handled = true; ru->rmask = 0xffff; ru->wmask = 0xffff; } if (!handled) { write_log("ru_fill: %04x = { %04x, %04x }\n", real_opcode, ru->rmask, ru->wmask); abort(); } } /******************************************************************** * register allocation per block logging * ********************************************************************/ static uae_s8 vstate[VREGS]; static uae_s8 vwritten[VREGS]; static uae_s8 nstate[N_REGS]; #define L_UNKNOWN -127 #define L_UNAVAIL -1 #define L_NEEDED -2 #define L_UNNEEDED -3 static __inline__ void big_to_small_state(bigstate * b, smallstate * s) { int i; for (i = 0; i < VREGS; i++) s->virt[i] = vstate[i]; for (i = 0; i < N_REGS; i++) s->nat[i] = nstate[i]; } static __inline__ int callers_need_recompile(bigstate * b, smallstate * s) { int i; int reverse = 0; for (i = 0; i < VREGS; i++) { if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED) return 1; if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED) reverse++; } for (i = 0; i < N_REGS; i++) { if (nstate[i] >= 0 && nstate[i] != s->nat[i]) return 1; if (nstate[i] < 0 && s->nat[i] >= 0) reverse++; } if (reverse >= 2 && USE_MATCH) return 1; /* In this case, it might be worth recompiling the * callers */ return 0; } static __inline__ void log_startblock(void) { int i; for (i = 0; i < VREGS; i++) { vstate[i] = L_UNKNOWN; vwritten[i] = 0; } for (i = 0; i < N_REGS; i++) nstate[i] = L_UNKNOWN; } /* Using an n-reg for a temp variable */ static __inline__ void log_isused(int n) { if (nstate[n] == L_UNKNOWN) nstate[n] = L_UNAVAIL; } static __inline__ void log_visused(int r) { if (vstate[r] == L_UNKNOWN) vstate[r] = L_NEEDED; } static __inline__ void do_load_reg(int n, int r) { if (r == FLAGTMP) raw_load_flagreg(n, r); else if (r == FLAGX) raw_load_flagx(n, r); else raw_mov_l_rm(n, (uintptr) live.state[r].mem); } static __inline__ void check_load_reg(int n, int r) { raw_mov_l_rm(n, (uintptr) live.state[r].mem); } static __inline__ void log_vwrite(int r) { vwritten[r] = 1; } /* Using an n-reg to hold a v-reg */ static __inline__ void log_isreg(int n, int r) { static int count = 0; if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH) nstate[n] = r; else { do_load_reg(n, r); if (nstate[n] == L_UNKNOWN) nstate[n] = L_UNAVAIL; } if (vstate[r] == L_UNKNOWN) vstate[r] = L_NEEDED; } static __inline__ void log_clobberreg(int r) { if (vstate[r] == L_UNKNOWN) vstate[r] = L_UNNEEDED; } /* This ends all possibility of clever register allocation */ static __inline__ void log_flush(void) { int i; for (i = 0; i < VREGS; i++) if (vstate[i] == L_UNKNOWN) vstate[i] = L_NEEDED; for (i = 0; i < N_REGS; i++) if (nstate[i] == L_UNKNOWN) nstate[i] = L_UNAVAIL; } static __inline__ void log_dump(void) { int i; return; write_log("----------------------\n"); for (i = 0; i < N_REGS; i++) { switch (nstate[i]) { case L_UNKNOWN: write_log("Nat %d : UNKNOWN\n", i); break; case L_UNAVAIL: write_log("Nat %d : UNAVAIL\n", i); break; default: write_log("Nat %d : %d\n", i, nstate[i]); break; } } for (i = 0; i < VREGS; i++) { if (vstate[i] == L_UNNEEDED) write_log("Virt %d: UNNEEDED\n", i); } } /******************************************************************** * register status handling. EMIT TIME! * ********************************************************************/ static __inline__ void set_status(int r, int status) { if (status == ISCONST) log_clobberreg(r); live.state[r].status=status; } static __inline__ int isinreg(int r) { return live.state[r].status==CLEAN || live.state[r].status==DIRTY; } static __inline__ void adjust_nreg(int r, uae_u32 val) { if (!val) return; raw_lea_l_brr(r,r,val); } static void tomem(int r) { int rr=live.state[r].realreg; if (isinreg(r)) { if (live.state[r].val && live.nat[rr].nholds==1 && !live.nat[rr].locked) { // write_log("RemovingA offset %x from reg %d (%d) at %p\n", // live.state[r].val,r,rr,target); adjust_nreg(rr,live.state[r].val); live.state[r].val=0; live.state[r].dirtysize=4; set_status(r,DIRTY); } } if (live.state[r].status==DIRTY) { switch (live.state[r].dirtysize) { case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break; case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break; case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break; default: abort(); } log_vwrite(r); set_status(r,CLEAN); live.state[r].dirtysize=0; } } static __inline__ int isconst(int r) { return live.state[r].status==ISCONST; } int is_const(int r) { return isconst(r); } static __inline__ void writeback_const(int r) { if (!isconst(r)) return; Dif (live.state[r].needflush==NF_HANDLER) { write_log("Trying to write back constant NF_HANDLER!\n"); abort(); } raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val); log_vwrite(r); live.state[r].val=0; set_status(r,INMEM); } static __inline__ void tomem_c(int r) { if (isconst(r)) { writeback_const(r); } else tomem(r); } static void evict(int r) { int rr; if (!isinreg(r)) return; tomem(r); rr=live.state[r].realreg; Dif (live.nat[rr].locked && live.nat[rr].nholds==1) { write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg); abort(); } live.nat[rr].nholds--; if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */ int topreg=live.nat[rr].holds[live.nat[rr].nholds]; int thisind=live.state[r].realind; live.nat[rr].holds[thisind]=topreg; live.state[topreg].realind=thisind; } live.state[r].realreg=-1; set_status(r,INMEM); } static __inline__ void free_nreg(int r) { int i=live.nat[r].nholds; while (i) { int vr; --i; vr=live.nat[r].holds[i]; evict(vr); } Dif (live.nat[r].nholds!=0) { write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds); abort(); } } /* Use with care! */ static __inline__ void isclean(int r) { if (!isinreg(r)) return; live.state[r].validsize=4; live.state[r].dirtysize=0; live.state[r].val=0; set_status(r,CLEAN); } static __inline__ void disassociate(int r) { isclean(r); evict(r); } static __inline__ void set_const(int r, uae_u32 val) { disassociate(r); live.state[r].val=val; set_status(r,ISCONST); } static __inline__ uae_u32 get_offset(int r) { return live.state[r].val; } static int alloc_reg_hinted(int r, int size, int willclobber, int hint) { int bestreg; uae_s32 when; int i; uae_s32 badness=0; /* to shut up gcc */ bestreg=-1; when=2000000000; /* XXX use a regalloc_order table? */ for (i=0;i0) { free_nreg(bestreg); } if (isinreg(r)) { int rr=live.state[r].realreg; /* This will happen if we read a partially dirty register at a bigger size */ Dif (willclobber || live.state[r].validsize>=size) abort(); Dif (live.nat[rr].nholds!=1) abort(); if (size==4 && live.state[r].validsize==2) { log_isused(bestreg); log_visused(r); raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem); raw_bswap_32(bestreg); raw_zero_extend_16_rr(rr,rr); raw_zero_extend_16_rr(bestreg,bestreg); raw_bswap_32(bestreg); raw_lea_l_brr_indexed(rr,rr,bestreg,1,0); live.state[r].validsize=4; live.nat[rr].touched=touchcnt++; return rr; } if (live.state[r].validsize==1) { /* Nothing yet */ } evict(r); } if (!willclobber) { if (live.state[r].status!=UNDEF) { if (isconst(r)) { raw_mov_l_ri(bestreg,live.state[r].val); live.state[r].val=0; live.state[r].dirtysize=4; set_status(r,DIRTY); log_isused(bestreg); } else { log_isreg(bestreg, r); /* This will also load it! */ live.state[r].dirtysize=0; set_status(r,CLEAN); } } else { live.state[r].val=0; live.state[r].dirtysize=0; set_status(r,CLEAN); log_isused(bestreg); } live.state[r].validsize=4; } else { /* this is the easiest way, but not optimal. FIXME! */ /* Now it's trickier, but hopefully still OK */ if (!isconst(r) || size==4) { live.state[r].validsize=size; live.state[r].dirtysize=size; live.state[r].val=0; set_status(r,DIRTY); if (size == 4) { log_clobberreg(r); log_isused(bestreg); } else { log_visused(r); log_isused(bestreg); } } else { if (live.state[r].status!=UNDEF) raw_mov_l_ri(bestreg,live.state[r].val); live.state[r].val=0; live.state[r].validsize=4; live.state[r].dirtysize=4; set_status(r,DIRTY); log_isused(bestreg); } } live.state[r].realreg=bestreg; live.state[r].realind=live.nat[bestreg].nholds; live.nat[bestreg].touched=touchcnt++; live.nat[bestreg].holds[live.nat[bestreg].nholds]=r; live.nat[bestreg].nholds++; return bestreg; } static int alloc_reg(int r, int size, int willclobber) { return alloc_reg_hinted(r,size,willclobber,-1); } static void unlock2(int r) { Dif (!live.nat[r].locked) abort(); live.nat[r].locked--; } static void setlock(int r) { live.nat[r].locked++; } static void mov_nregs(int d, int s) { int ns=live.nat[s].nholds; int nd=live.nat[d].nholds; int i; if (s==d) return; if (nd>0) free_nreg(d); log_isused(d); raw_mov_l_rr(d,s); for (i=0;i=size) { n=live.state[r].realreg; switch(size) { case 1: if (live.nat[n].canbyte || spec>=0) { answer=n; } break; case 2: if (live.nat[n].canword || spec>=0) { answer=n; } break; case 4: answer=n; break; default: abort(); } if (answer<0) evict(r); } /* either the value was in memory to start with, or it was evicted and is in memory now */ if (answer<0) { answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec); } if (spec>=0 && spec!=answer) { /* Too bad */ mov_nregs(spec,answer); answer=spec; } live.nat[answer].locked++; live.nat[answer].touched=touchcnt++; return answer; } static int readreg(int r, int size) { return readreg_general(r,size,-1,0); } static int readreg_specific(int r, int size, int spec) { return readreg_general(r,size,spec,0); } static int readreg_offset(int r, int size) { return readreg_general(r,size,-1,1); } /* writereg_general(r, size, spec) * * INPUT * - r : mid-layer register * - size : requested size (1/2/4) * - spec : -1 if find or make a register free, otherwise specifies * the physical register to use in any case * * OUTPUT * - hard (physical, x86 here) register allocated to virtual register r */ static __inline__ int writereg_general(int r, int size, int spec) { int n; int answer=-1; record_register(r); if (size<4) { remove_offset(r,spec); } make_exclusive(r,size,spec); if (isinreg(r)) { int nvsize=size>live.state[r].validsize?size:live.state[r].validsize; int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; n=live.state[r].realreg; Dif (live.nat[n].nholds!=1) abort(); switch(size) { case 1: if (live.nat[n].canbyte || spec>=0) { live.state[r].dirtysize=ndsize; live.state[r].validsize=nvsize; answer=n; } break; case 2: if (live.nat[n].canword || spec>=0) { live.state[r].dirtysize=ndsize; live.state[r].validsize=nvsize; answer=n; } break; case 4: live.state[r].dirtysize=ndsize; live.state[r].validsize=nvsize; answer=n; break; default: abort(); } if (answer<0) evict(r); } /* either the value was in memory to start with, or it was evicted and is in memory now */ if (answer<0) { answer=alloc_reg_hinted(r,size,1,spec); } if (spec>=0 && spec!=answer) { mov_nregs(spec,answer); answer=spec; } if (live.state[r].status==UNDEF) live.state[r].validsize=4; live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize; live.nat[answer].locked++; live.nat[answer].touched=touchcnt++; if (size==4) { live.state[r].val=0; } else { Dif (live.state[r].val) { write_log("Problem with val\n"); abort(); } } set_status(r,DIRTY); return answer; } static int writereg(int r, int size) { return writereg_general(r,size,-1); } static int writereg_specific(int r, int size, int spec) { return writereg_general(r,size,spec); } static __inline__ int rmw_general(int r, int wsize, int rsize, int spec) { int n; int answer=-1; record_register(r); if (live.state[r].status==UNDEF) { write_log("WARNING: Unexpected read of undefined register %d\n",r); } remove_offset(r,spec); make_exclusive(r,0,spec); Dif (wsize=rsize) { n=live.state[r].realreg; Dif (live.nat[n].nholds!=1) abort(); switch(rsize) { case 1: if (live.nat[n].canbyte || spec>=0) { answer=n; } break; case 2: if (live.nat[n].canword || spec>=0) { answer=n; } break; case 4: answer=n; break; default: abort(); } if (answer<0) evict(r); } /* either the value was in memory to start with, or it was evicted and is in memory now */ if (answer<0) { answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec); } if (spec>=0 && spec!=answer) { /* Too bad */ mov_nregs(spec,answer); answer=spec; } if (wsize>live.state[r].dirtysize) live.state[r].dirtysize=wsize; if (wsize>live.state[r].validsize) live.state[r].validsize=wsize; set_status(r,DIRTY); live.nat[answer].locked++; live.nat[answer].touched=touchcnt++; Dif (live.state[r].val) { write_log("Problem with val(rmw)\n"); abort(); } return answer; } static int rmw(int r, int wsize, int rsize) { return rmw_general(r,wsize,rsize,-1); } static int rmw_specific(int r, int wsize, int rsize, int spec) { return rmw_general(r,wsize,rsize,spec); } /* needed for restoring the carry flag on non-P6 cores */ static void bt_l_ri_noclobber(R4 r, IMM i) { int size=4; if (i<16) size=2; r=readreg(r,size); raw_bt_l_ri(r,i); unlock2(r); } /******************************************************************** * FPU register status handling. EMIT TIME! * ********************************************************************/ static void f_tomem(int r) { if (live.fate[r].status==DIRTY) { #if USE_LONG_DOUBLE raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); #else raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); #endif live.fate[r].status=CLEAN; } } static void f_tomem_drop(int r) { if (live.fate[r].status==DIRTY) { #if USE_LONG_DOUBLE raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); #else raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); #endif live.fate[r].status=INMEM; } } static __inline__ int f_isinreg(int r) { return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY; } static void f_evict(int r) { int rr; if (!f_isinreg(r)) return; rr=live.fate[r].realreg; if (live.fat[rr].nholds==1) f_tomem_drop(r); else f_tomem(r); Dif (live.fat[rr].locked && live.fat[rr].nholds==1) { write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg); abort(); } live.fat[rr].nholds--; if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */ int topreg=live.fat[rr].holds[live.fat[rr].nholds]; int thisind=live.fate[r].realind; live.fat[rr].holds[thisind]=topreg; live.fate[topreg].realind=thisind; } live.fate[r].status=INMEM; live.fate[r].realreg=-1; } static __inline__ void f_free_nreg(int r) { int i=live.fat[r].nholds; while (i) { int vr; --i; vr=live.fat[r].holds[i]; f_evict(vr); } Dif (live.fat[r].nholds!=0) { write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds); abort(); } } /* Use with care! */ static __inline__ void f_isclean(int r) { if (!f_isinreg(r)) return; live.fate[r].status=CLEAN; } static __inline__ void f_disassociate(int r) { f_isclean(r); f_evict(r); } static int f_alloc_reg(int r, int willclobber) { int bestreg; uae_s32 when; int i; uae_s32 badness; bestreg=-1; when=2000000000; for (i=N_FREGS;i--;) { badness=live.fat[i].touched; if (live.fat[i].nholds==0) badness=0; if (!live.fat[i].locked && badness0) { f_free_nreg(bestreg); } if (f_isinreg(r)) { f_evict(r); } if (!willclobber) { if (live.fate[r].status!=UNDEF) { #if USE_LONG_DOUBLE raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem); #else raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem); #endif } live.fate[r].status=CLEAN; } else { live.fate[r].status=DIRTY; } live.fate[r].realreg=bestreg; live.fate[r].realind=live.fat[bestreg].nholds; live.fat[bestreg].touched=touchcnt++; live.fat[bestreg].holds[live.fat[bestreg].nholds]=r; live.fat[bestreg].nholds++; return bestreg; } static void f_unlock(int r) { Dif (!live.fat[r].locked) abort(); live.fat[r].locked--; } static void f_setlock(int r) { live.fat[r].locked++; } static __inline__ int f_readreg(int r) { int n; int answer=-1; if (f_isinreg(r)) { n=live.fate[r].realreg; answer=n; } /* either the value was in memory to start with, or it was evicted and is in memory now */ if (answer<0) answer=f_alloc_reg(r,0); live.fat[answer].locked++; live.fat[answer].touched=touchcnt++; return answer; } static __inline__ void f_make_exclusive(int r, int clobber) { freg_status oldstate; int rr=live.fate[r].realreg; int nr; int nind; int ndirt=0; int i; if (!f_isinreg(r)) return; if (live.fat[rr].nholds==1) return; for (i=0;i>=i; return; } CLOBBER_SHRL; r=rmw(r,4,4); raw_shrl_l_ri(r,i); unlock2(r); } MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) { if (!i && !needflags) return; CLOBBER_SHRL; r=rmw(r,2,2); raw_shrl_w_ri(r,i); unlock2(r); } MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) { if (!i && !needflags) return; CLOBBER_SHRL; r=rmw(r,1,1); raw_shrl_b_ri(r,i); unlock2(r); } MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) MIDFUNC(2,shra_l_ri,(RW4 r, IMM i)) { if (!i && !needflags) return; CLOBBER_SHRA; r=rmw(r,4,4); raw_shra_l_ri(r,i); unlock2(r); } MENDFUNC(2,shra_l_ri,(RW4 r, IMM i)) MIDFUNC(2,shra_w_ri,(RW2 r, IMM i)) { if (!i && !needflags) return; CLOBBER_SHRA; r=rmw(r,2,2); raw_shra_w_ri(r,i); unlock2(r); } MENDFUNC(2,shra_w_ri,(RW2 r, IMM i)) MIDFUNC(2,shra_b_ri,(RW1 r, IMM i)) { if (!i && !needflags) return; CLOBBER_SHRA; r=rmw(r,1,1); raw_shra_b_ri(r,i); unlock2(r); } MENDFUNC(2,shra_b_ri,(RW1 r, IMM i)) MIDFUNC(2,shra_l_rr,(RW4 d, R1 r)) { if (isconst(r)) { COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val); return; } CLOBBER_SHRA; r=readreg_specific(r,1,SHIFTCOUNT_NREG); d=rmw(d,4,4); Dif (r!=1) { write_log("Illegal register %d in raw_rol_b\n",r); abort(); } raw_shra_l_rr(d,r) ; unlock2(r); unlock2(d); } MENDFUNC(2,shra_l_rr,(RW4 d, R1 r)) MIDFUNC(2,shra_w_rr,(RW2 d, R1 r)) { /* Can only do this with r==1, i.e. cl */ if (isconst(r)) { COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val); return; } CLOBBER_SHRA; r=readreg_specific(r,1,SHIFTCOUNT_NREG); d=rmw(d,2,2); Dif (r!=1) { write_log("Illegal register %d in raw_shra_b\n",r); abort(); } raw_shra_w_rr(d,r) ; unlock2(r); unlock2(d); } MENDFUNC(2,shra_w_rr,(RW2 d, R1 r)) MIDFUNC(2,shra_b_rr,(RW1 d, R1 r)) { /* Can only do this with r==1, i.e. cl */ if (isconst(r)) { COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val); return; } CLOBBER_SHRA; r=readreg_specific(r,1,SHIFTCOUNT_NREG); d=rmw(d,1,1); Dif (r!=1) { write_log("Illegal register %d in raw_shra_b\n",r); abort(); } raw_shra_b_rr(d,r) ; unlock2(r); unlock2(d); } MENDFUNC(2,shra_b_rr,(RW1 d, R1 r)) MIDFUNC(2,setcc,(W1 d, IMM cc)) { CLOBBER_SETCC; d=writereg(d,1); raw_setcc(d,cc); unlock2(d); } MENDFUNC(2,setcc,(W1 d, IMM cc)) MIDFUNC(2,setcc_m,(IMM d, IMM cc)) { CLOBBER_SETCC; raw_setcc_m(d,cc); } MENDFUNC(2,setcc_m,(IMM d, IMM cc)) MIDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc)) { if (d==s) return; CLOBBER_CMOV; s=readreg(s,1); d=rmw(d,1,1); raw_cmov_b_rr(d,s,cc); unlock2(s); unlock2(d); } MENDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc)) MIDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc)) { if (d==s) return; CLOBBER_CMOV; s=readreg(s,2); d=rmw(d,2,2); raw_cmov_w_rr(d,s,cc); unlock2(s); unlock2(d); } MENDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc)) MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc)) { if (d==s) return; CLOBBER_CMOV; s=readreg(s,4); d=rmw(d,4,4); raw_cmov_l_rr(d,s,cc); unlock2(s); unlock2(d); } MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc)) MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) { CLOBBER_CMOV; d=rmw(d,4,4); raw_cmov_l_rm(d,s,cc); unlock2(d); } MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) MIDFUNC(2,bsf_l_rr,(W4 d, W4 s)) { CLOBBER_BSF; s = readreg(s, 4); d = writereg(d, 4); raw_bsf_l_rr(d, s); unlock2(s); unlock2(d); } MENDFUNC(2,bsf_l_rr,(W4 d, W4 s)) /* Set the Z flag depending on the value in s. Note that the value has to be 0 or -1 (or, more precisely, for non-zero values, bit 14 must be set)! */ MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) { CLOBBER_BSF; s=rmw_specific(s,4,4,FLAG_NREG3); tmp=writereg(tmp,4); raw_flags_set_zero(s, tmp); unlock2(tmp); unlock2(s); } MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) MIDFUNC(2,imul_32_32,(RW4 d, R4 s)) { CLOBBER_MUL; s=readreg(s,4); d=rmw(d,4,4); raw_imul_32_32(d,s); unlock2(s); unlock2(d); } MENDFUNC(2,imul_32_32,(RW4 d, R4 s)) MIDFUNC(2,imul_64_32,(RW4 d, RW4 s)) { CLOBBER_MUL; s=rmw_specific(s,4,4,MUL_NREG2); d=rmw_specific(d,4,4,MUL_NREG1); raw_imul_64_32(d,s); unlock2(s); unlock2(d); } MENDFUNC(2,imul_64_32,(RW4 d, RW4 s)) MIDFUNC(2,mul_64_32,(RW4 d, RW4 s)) { CLOBBER_MUL; s=rmw_specific(s,4,4,MUL_NREG2); d=rmw_specific(d,4,4,MUL_NREG1); raw_mul_64_32(d,s); unlock2(s); unlock2(d); } MENDFUNC(2,mul_64_32,(RW4 d, RW4 s)) MIDFUNC(2,mul_32_32,(RW4 d, R4 s)) { CLOBBER_MUL; s=readreg(s,4); d=rmw(d,4,4); raw_mul_32_32(d,s); unlock2(s); unlock2(d); } MENDFUNC(2,mul_32_32,(RW4 d, R4 s)) #if SIZEOF_VOID_P == 8 MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s)) { int isrmw; if (isconst(s)) { set_const(d,(uae_s32)live.state[s].val); return; } CLOBBER_SE32; isrmw=(s==d); if (!isrmw) { s=readreg(s,4); d=writereg(d,4); } else { /* If we try to lock this twice, with different sizes, we are int trouble! */ s=d=rmw(s,4,4); } raw_sign_extend_32_rr(d,s); if (!isrmw) { unlock2(d); unlock2(s); } else { unlock2(s); } } MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s)) #endif MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s)) { int isrmw; if (isconst(s)) { set_const(d,(uae_s32)(uae_s16)live.state[s].val); return; } CLOBBER_SE16; isrmw=(s==d); if (!isrmw) { s=readreg(s,2); d=writereg(d,4); } else { /* If we try to lock this twice, with different sizes, we are int trouble! */ s=d=rmw(s,4,2); } raw_sign_extend_16_rr(d,s); if (!isrmw) { unlock2(d); unlock2(s); } else { unlock2(s); } } MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s)) MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s)) { int isrmw; if (isconst(s)) { set_const(d,(uae_s32)(uae_s8)live.state[s].val); return; } isrmw=(s==d); CLOBBER_SE8; if (!isrmw) { s=readreg(s,1); d=writereg(d,4); } else { /* If we try to lock this twice, with different sizes, we are int trouble! */ s=d=rmw(s,4,1); } raw_sign_extend_8_rr(d,s); if (!isrmw) { unlock2(d); unlock2(s); } else { unlock2(s); } } MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s)) MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s)) { int isrmw; if (isconst(s)) { set_const(d,(uae_u32)(uae_u16)live.state[s].val); return; } isrmw=(s==d); CLOBBER_ZE16; if (!isrmw) { s=readreg(s,2); d=writereg(d,4); } else { /* If we try to lock this twice, with different sizes, we are int trouble! */ s=d=rmw(s,4,2); } raw_zero_extend_16_rr(d,s); if (!isrmw) { unlock2(d); unlock2(s); } else { unlock2(s); } } MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s)) MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s)) { int isrmw; if (isconst(s)) { set_const(d,(uae_u32)(uae_u8)live.state[s].val); return; } isrmw=(s==d); CLOBBER_ZE8; if (!isrmw) { s=readreg(s,1); d=writereg(d,4); } else { /* If we try to lock this twice, with different sizes, we are int trouble! */ s=d=rmw(s,4,1); } raw_zero_extend_8_rr(d,s); if (!isrmw) { unlock2(d); unlock2(s); } else { unlock2(s); } } MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s)) MIDFUNC(2,mov_b_rr,(W1 d, R1 s)) { if (d==s) return; if (isconst(s)) { COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val); return; } CLOBBER_MOV; s=readreg(s,1); d=writereg(d,1); raw_mov_b_rr(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,mov_b_rr,(W1 d, R1 s)) MIDFUNC(2,mov_w_rr,(W2 d, R2 s)) { if (d==s) return; if (isconst(s)) { COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val); return; } CLOBBER_MOV; s=readreg(s,2); d=writereg(d,2); raw_mov_w_rr(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,mov_w_rr,(W2 d, R2 s)) MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) { CLOBBER_MOV; baser=readreg(baser,4); index=readreg(index,4); d=writereg(d,4); raw_mov_l_rrm_indexed(d,baser,index,factor); unlock2(d); unlock2(baser); unlock2(index); } MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) { CLOBBER_MOV; baser=readreg(baser,4); index=readreg(index,4); d=writereg(d,2); raw_mov_w_rrm_indexed(d,baser,index,factor); unlock2(d); unlock2(baser); unlock2(index); } MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) { CLOBBER_MOV; baser=readreg(baser,4); index=readreg(index,4); d=writereg(d,1); raw_mov_b_rrm_indexed(d,baser,index,factor); unlock2(d); unlock2(baser); unlock2(index); } MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) { CLOBBER_MOV; baser=readreg(baser,4); index=readreg(index,4); s=readreg(s,4); Dif (baser==s || index==s) abort(); raw_mov_l_mrr_indexed(baser,index,factor,s); unlock2(s); unlock2(baser); unlock2(index); } MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) { CLOBBER_MOV; baser=readreg(baser,4); index=readreg(index,4); s=readreg(s,2); raw_mov_w_mrr_indexed(baser,index,factor,s); unlock2(s); unlock2(baser); unlock2(index); } MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) { CLOBBER_MOV; s=readreg(s,1); baser=readreg(baser,4); index=readreg(index,4); raw_mov_b_mrr_indexed(baser,index,factor,s); unlock2(s); unlock2(baser); unlock2(index); } MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) { int basereg=baser; int indexreg=index; CLOBBER_MOV; s=readreg(s,4); baser=readreg_offset(baser,4); index=readreg_offset(index,4); base+=get_offset(basereg); base+=factor*get_offset(indexreg); raw_mov_l_bmrr_indexed(base,baser,index,factor,s); unlock2(s); unlock2(baser); unlock2(index); } MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) { int basereg=baser; int indexreg=index; CLOBBER_MOV; s=readreg(s,2); baser=readreg_offset(baser,4); index=readreg_offset(index,4); base+=get_offset(basereg); base+=factor*get_offset(indexreg); raw_mov_w_bmrr_indexed(base,baser,index,factor,s); unlock2(s); unlock2(baser); unlock2(index); } MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) { int basereg=baser; int indexreg=index; CLOBBER_MOV; s=readreg(s,1); baser=readreg_offset(baser,4); index=readreg_offset(index,4); base+=get_offset(basereg); base+=factor*get_offset(indexreg); raw_mov_b_bmrr_indexed(base,baser,index,factor,s); unlock2(s); unlock2(baser); unlock2(index); } MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) /* Read a long from base+baser+factor*index */ MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) { int basereg=baser; int indexreg=index; CLOBBER_MOV; baser=readreg_offset(baser,4); index=readreg_offset(index,4); base+=get_offset(basereg); base+=factor*get_offset(indexreg); d=writereg(d,4); raw_mov_l_brrm_indexed(d,base,baser,index,factor); unlock2(d); unlock2(baser); unlock2(index); } MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) { int basereg=baser; int indexreg=index; CLOBBER_MOV; remove_offset(d,-1); baser=readreg_offset(baser,4); index=readreg_offset(index,4); base+=get_offset(basereg); base+=factor*get_offset(indexreg); d=writereg(d,2); raw_mov_w_brrm_indexed(d,base,baser,index,factor); unlock2(d); unlock2(baser); unlock2(index); } MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) { int basereg=baser; int indexreg=index; CLOBBER_MOV; remove_offset(d,-1); baser=readreg_offset(baser,4); index=readreg_offset(index,4); base+=get_offset(basereg); base+=factor*get_offset(indexreg); d=writereg(d,1); raw_mov_b_brrm_indexed(d,base,baser,index,factor); unlock2(d); unlock2(baser); unlock2(index); } MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) /* Read a long from base+factor*index */ MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) { int indexreg=index; if (isconst(index)) { COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val); return; } CLOBBER_MOV; index=readreg_offset(index,4); base+=get_offset(indexreg)*factor; d=writereg(d,4); raw_mov_l_rm_indexed(d,base,index,factor); unlock2(index); unlock2(d); } MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) /* read the long at the address contained in s+offset and store in d */ MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset)) { if (isconst(s)) { COMPCALL(mov_l_rm)(d,live.state[s].val+offset); return; } CLOBBER_MOV; s=readreg(s,4); d=writereg(d,4); raw_mov_l_rR(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset)) /* read the word at the address contained in s+offset and store in d */ MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset)) { if (isconst(s)) { COMPCALL(mov_w_rm)(d,live.state[s].val+offset); return; } CLOBBER_MOV; s=readreg(s,4); d=writereg(d,2); raw_mov_w_rR(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset)) /* read the word at the address contained in s+offset and store in d */ MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset)) { if (isconst(s)) { COMPCALL(mov_b_rm)(d,live.state[s].val+offset); return; } CLOBBER_MOV; s=readreg(s,4); d=writereg(d,1); raw_mov_b_rR(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset)) /* read the long at the address contained in s+offset and store in d */ MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset)) { int sreg=s; if (isconst(s)) { COMPCALL(mov_l_rm)(d,live.state[s].val+offset); return; } CLOBBER_MOV; s=readreg_offset(s,4); offset+=get_offset(sreg); d=writereg(d,4); raw_mov_l_brR(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset)) /* read the word at the address contained in s+offset and store in d */ MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset)) { int sreg=s; if (isconst(s)) { COMPCALL(mov_w_rm)(d,live.state[s].val+offset); return; } CLOBBER_MOV; remove_offset(d,-1); s=readreg_offset(s,4); offset+=get_offset(sreg); d=writereg(d,2); raw_mov_w_brR(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset)) /* read the word at the address contained in s+offset and store in d */ MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset)) { int sreg=s; if (isconst(s)) { COMPCALL(mov_b_rm)(d,live.state[s].val+offset); return; } CLOBBER_MOV; remove_offset(d,-1); s=readreg_offset(s,4); offset+=get_offset(sreg); d=writereg(d,1); raw_mov_b_brR(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset)) MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset)) { int dreg=d; if (isconst(d)) { COMPCALL(mov_l_mi)(live.state[d].val+offset,i); return; } CLOBBER_MOV; d=readreg_offset(d,4); offset+=get_offset(dreg); raw_mov_l_Ri(d,i,offset); unlock2(d); } MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset)) MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset)) { int dreg=d; if (isconst(d)) { COMPCALL(mov_w_mi)(live.state[d].val+offset,i); return; } CLOBBER_MOV; d=readreg_offset(d,4); offset+=get_offset(dreg); raw_mov_w_Ri(d,i,offset); unlock2(d); } MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset)) MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset)) { int dreg=d; if (isconst(d)) { COMPCALL(mov_b_mi)(live.state[d].val+offset,i); return; } CLOBBER_MOV; d=readreg_offset(d,4); offset+=get_offset(dreg); raw_mov_b_Ri(d,i,offset); unlock2(d); } MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset)) /* Warning! OFFSET is byte sized only! */ MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset)) { if (isconst(d)) { COMPCALL(mov_l_mr)(live.state[d].val+offset,s); return; } if (isconst(s)) { COMPCALL(mov_l_Ri)(d,live.state[s].val,offset); return; } CLOBBER_MOV; s=readreg(s,4); d=readreg(d,4); raw_mov_l_Rr(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset)) MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset)) { if (isconst(d)) { COMPCALL(mov_w_mr)(live.state[d].val+offset,s); return; } if (isconst(s)) { COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset); return; } CLOBBER_MOV; s=readreg(s,2); d=readreg(d,4); raw_mov_w_Rr(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset)) MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset)) { if (isconst(d)) { COMPCALL(mov_b_mr)(live.state[d].val+offset,s); return; } if (isconst(s)) { COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset); return; } CLOBBER_MOV; s=readreg(s,1); d=readreg(d,4); raw_mov_b_Rr(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset)) MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset)) { if (isconst(s)) { COMPCALL(mov_l_ri)(d,live.state[s].val+offset); return; } #if USE_OFFSET if (d==s) { add_offset(d,offset); return; } #endif CLOBBER_LEA; s=readreg(s,4); d=writereg(d,4); raw_lea_l_brr(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset)) MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) { if (!offset) { COMPCALL(lea_l_rr_indexed)(d,s,index,factor); return; } CLOBBER_LEA; s=readreg(s,4); index=readreg(index,4); d=writereg(d,4); raw_lea_l_brr_indexed(d,s,index,factor,offset); unlock2(d); unlock2(index); unlock2(s); } MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) { CLOBBER_LEA; s=readreg(s,4); index=readreg(index,4); d=writereg(d,4); raw_lea_l_rr_indexed(d,s,index,factor); unlock2(d); unlock2(index); unlock2(s); } MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) /* write d to the long at the address contained in s+offset */ MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset)) { int dreg=d; if (isconst(d)) { COMPCALL(mov_l_mr)(live.state[d].val+offset,s); return; } CLOBBER_MOV; s=readreg(s,4); d=readreg_offset(d,4); offset+=get_offset(dreg); raw_mov_l_bRr(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset)) /* write the word at the address contained in s+offset and store in d */ MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset)) { int dreg=d; if (isconst(d)) { COMPCALL(mov_w_mr)(live.state[d].val+offset,s); return; } CLOBBER_MOV; s=readreg(s,2); d=readreg_offset(d,4); offset+=get_offset(dreg); raw_mov_w_bRr(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset)) MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset)) { int dreg=d; if (isconst(d)) { COMPCALL(mov_b_mr)(live.state[d].val+offset,s); return; } CLOBBER_MOV; s=readreg(s,1); d=readreg_offset(d,4); offset+=get_offset(dreg); raw_mov_b_bRr(d,s,offset); unlock2(d); unlock2(s); } MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset)) MIDFUNC(1,bswap_32,(RW4 r)) { int reg=r; if (isconst(r)) { uae_u32 oldv=live.state[r].val; live.state[r].val=reverse32(oldv); return; } CLOBBER_SW32; r=rmw(r,4,4); raw_bswap_32(r); unlock2(r); } MENDFUNC(1,bswap_32,(RW4 r)) MIDFUNC(1,bswap_16,(RW2 r)) { if (isconst(r)) { uae_u32 oldv=live.state[r].val; live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) | (oldv&0xffff0000); return; } CLOBBER_SW16; r=rmw(r,2,2); raw_bswap_16(r); unlock2(r); } MENDFUNC(1,bswap_16,(RW2 r)) MIDFUNC(2,mov_l_rr,(W4 d, R4 s)) { int olds; if (d==s) { /* How pointless! */ return; } if (isconst(s)) { COMPCALL(mov_l_ri)(d,live.state[s].val); return; } olds=s; disassociate(d); s=readreg_offset(s,4); live.state[d].realreg=s; live.state[d].realind=live.nat[s].nholds; live.state[d].val=live.state[olds].val; live.state[d].validsize=4; live.state[d].dirtysize=4; set_status(d,DIRTY); live.nat[s].holds[live.nat[s].nholds]=d; live.nat[s].nholds++; log_clobberreg(d); /* write_log("Added %d to nreg %d(%d), now holds %d regs\n", d,s,live.state[d].realind,live.nat[s].nholds); */ unlock2(s); } MENDFUNC(2,mov_l_rr,(W4 d, R4 s)) MIDFUNC(2,mov_l_mr,(IMM d, R4 s)) { if (isconst(s)) { COMPCALL(mov_l_mi)(d,live.state[s].val); return; } CLOBBER_MOV; s=readreg(s,4); raw_mov_l_mr(d,s); unlock2(s); } MENDFUNC(2,mov_l_mr,(IMM d, R4 s)) MIDFUNC(2,mov_w_mr,(IMM d, R2 s)) { if (isconst(s)) { COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val); return; } CLOBBER_MOV; s=readreg(s,2); raw_mov_w_mr(d,s); unlock2(s); } MENDFUNC(2,mov_w_mr,(IMM d, R2 s)) MIDFUNC(2,mov_w_rm,(W2 d, IMM s)) { CLOBBER_MOV; d=writereg(d,2); raw_mov_w_rm(d,s); unlock2(d); } MENDFUNC(2,mov_w_rm,(W2 d, IMM s)) MIDFUNC(2,mov_b_mr,(IMM d, R1 s)) { if (isconst(s)) { COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val); return; } CLOBBER_MOV; s=readreg(s,1); raw_mov_b_mr(d,s); unlock2(s); } MENDFUNC(2,mov_b_mr,(IMM d, R1 s)) MIDFUNC(2,mov_b_rm,(W1 d, IMM s)) { CLOBBER_MOV; d=writereg(d,1); raw_mov_b_rm(d,s); unlock2(d); } MENDFUNC(2,mov_b_rm,(W1 d, IMM s)) MIDFUNC(2,mov_l_ri,(W4 d, IMM s)) { set_const(d,s); return; } MENDFUNC(2,mov_l_ri,(W4 d, IMM s)) MIDFUNC(2,mov_w_ri,(W2 d, IMM s)) { CLOBBER_MOV; d=writereg(d,2); raw_mov_w_ri(d,s); unlock2(d); } MENDFUNC(2,mov_w_ri,(W2 d, IMM s)) MIDFUNC(2,mov_b_ri,(W1 d, IMM s)) { CLOBBER_MOV; d=writereg(d,1); raw_mov_b_ri(d,s); unlock2(d); } MENDFUNC(2,mov_b_ri,(W1 d, IMM s)) MIDFUNC(2,add_l_mi,(IMM d, IMM s)) { CLOBBER_ADD; raw_add_l_mi(d,s) ; } MENDFUNC(2,add_l_mi,(IMM d, IMM s)) MIDFUNC(2,add_w_mi,(IMM d, IMM s)) { CLOBBER_ADD; raw_add_w_mi(d,s) ; } MENDFUNC(2,add_w_mi,(IMM d, IMM s)) MIDFUNC(2,add_b_mi,(IMM d, IMM s)) { CLOBBER_ADD; raw_add_b_mi(d,s) ; } MENDFUNC(2,add_b_mi,(IMM d, IMM s)) MIDFUNC(2,test_l_ri,(R4 d, IMM i)) { CLOBBER_TEST; d=readreg(d,4); raw_test_l_ri(d,i); unlock2(d); } MENDFUNC(2,test_l_ri,(R4 d, IMM i)) MIDFUNC(2,test_l_rr,(R4 d, R4 s)) { CLOBBER_TEST; d=readreg(d,4); s=readreg(s,4); raw_test_l_rr(d,s);; unlock2(d); unlock2(s); } MENDFUNC(2,test_l_rr,(R4 d, R4 s)) MIDFUNC(2,test_w_rr,(R2 d, R2 s)) { CLOBBER_TEST; d=readreg(d,2); s=readreg(s,2); raw_test_w_rr(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,test_w_rr,(R2 d, R2 s)) MIDFUNC(2,test_b_rr,(R1 d, R1 s)) { CLOBBER_TEST; d=readreg(d,1); s=readreg(s,1); raw_test_b_rr(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,test_b_rr,(R1 d, R1 s)) MIDFUNC(2,and_l_ri,(RW4 d, IMM i)) { if (isconst(d) && !needflags) { live.state[d].val &= i; return; } CLOBBER_AND; d=rmw(d,4,4); raw_and_l_ri(d,i); unlock2(d); } MENDFUNC(2,and_l_ri,(RW4 d, IMM i)) MIDFUNC(2,and_l,(RW4 d, R4 s)) { CLOBBER_AND; s=readreg(s,4); d=rmw(d,4,4); raw_and_l(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,and_l,(RW4 d, R4 s)) MIDFUNC(2,and_w,(RW2 d, R2 s)) { CLOBBER_AND; s=readreg(s,2); d=rmw(d,2,2); raw_and_w(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,and_w,(RW2 d, R2 s)) MIDFUNC(2,and_b,(RW1 d, R1 s)) { CLOBBER_AND; s=readreg(s,1); d=rmw(d,1,1); raw_and_b(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,and_b,(RW1 d, R1 s)) // gb-- used for making an fpcr value in compemu_fpp.cpp MIDFUNC(2,or_l_rm,(RW4 d, IMM s)) { CLOBBER_OR; d=rmw(d,4,4); raw_or_l_rm(d,s); unlock2(d); } MENDFUNC(2,or_l_rm,(RW4 d, IMM s)) MIDFUNC(2,or_l_ri,(RW4 d, IMM i)) { if (isconst(d) && !needflags) { live.state[d].val|=i; return; } CLOBBER_OR; d=rmw(d,4,4); raw_or_l_ri(d,i); unlock2(d); } MENDFUNC(2,or_l_ri,(RW4 d, IMM i)) MIDFUNC(2,or_l,(RW4 d, R4 s)) { if (isconst(d) && isconst(s) && !needflags) { live.state[d].val|=live.state[s].val; return; } CLOBBER_OR; s=readreg(s,4); d=rmw(d,4,4); raw_or_l(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,or_l,(RW4 d, R4 s)) MIDFUNC(2,or_w,(RW2 d, R2 s)) { CLOBBER_OR; s=readreg(s,2); d=rmw(d,2,2); raw_or_w(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,or_w,(RW2 d, R2 s)) MIDFUNC(2,or_b,(RW1 d, R1 s)) { CLOBBER_OR; s=readreg(s,1); d=rmw(d,1,1); raw_or_b(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,or_b,(RW1 d, R1 s)) MIDFUNC(2,adc_l,(RW4 d, R4 s)) { CLOBBER_ADC; s=readreg(s,4); d=rmw(d,4,4); raw_adc_l(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,adc_l,(RW4 d, R4 s)) MIDFUNC(2,adc_w,(RW2 d, R2 s)) { CLOBBER_ADC; s=readreg(s,2); d=rmw(d,2,2); raw_adc_w(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,adc_w,(RW2 d, R2 s)) MIDFUNC(2,adc_b,(RW1 d, R1 s)) { CLOBBER_ADC; s=readreg(s,1); d=rmw(d,1,1); raw_adc_b(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,adc_b,(RW1 d, R1 s)) MIDFUNC(2,add_l,(RW4 d, R4 s)) { if (isconst(s)) { COMPCALL(add_l_ri)(d,live.state[s].val); return; } CLOBBER_ADD; s=readreg(s,4); d=rmw(d,4,4); raw_add_l(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,add_l,(RW4 d, R4 s)) MIDFUNC(2,add_w,(RW2 d, R2 s)) { if (isconst(s)) { COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val); return; } CLOBBER_ADD; s=readreg(s,2); d=rmw(d,2,2); raw_add_w(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,add_w,(RW2 d, R2 s)) MIDFUNC(2,add_b,(RW1 d, R1 s)) { if (isconst(s)) { COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val); return; } CLOBBER_ADD; s=readreg(s,1); d=rmw(d,1,1); raw_add_b(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,add_b,(RW1 d, R1 s)) MIDFUNC(2,sub_l_ri,(RW4 d, IMM i)) { if (!i && !needflags) return; if (isconst(d) && !needflags) { live.state[d].val-=i; return; } #if USE_OFFSET if (!needflags) { add_offset(d,-i); return; } #endif CLOBBER_SUB; d=rmw(d,4,4); raw_sub_l_ri(d,i); unlock2(d); } MENDFUNC(2,sub_l_ri,(RW4 d, IMM i)) MIDFUNC(2,sub_w_ri,(RW2 d, IMM i)) { if (!i && !needflags) return; CLOBBER_SUB; d=rmw(d,2,2); raw_sub_w_ri(d,i); unlock2(d); } MENDFUNC(2,sub_w_ri,(RW2 d, IMM i)) MIDFUNC(2,sub_b_ri,(RW1 d, IMM i)) { if (!i && !needflags) return; CLOBBER_SUB; d=rmw(d,1,1); raw_sub_b_ri(d,i); unlock2(d); } MENDFUNC(2,sub_b_ri,(RW1 d, IMM i)) MIDFUNC(2,add_l_ri,(RW4 d, IMM i)) { if (!i && !needflags) return; if (isconst(d) && !needflags) { live.state[d].val+=i; return; } #if USE_OFFSET if (!needflags) { add_offset(d,i); return; } #endif CLOBBER_ADD; d=rmw(d,4,4); raw_add_l_ri(d,i); unlock2(d); } MENDFUNC(2,add_l_ri,(RW4 d, IMM i)) MIDFUNC(2,add_w_ri,(RW2 d, IMM i)) { if (!i && !needflags) return; CLOBBER_ADD; d=rmw(d,2,2); raw_add_w_ri(d,i); unlock2(d); } MENDFUNC(2,add_w_ri,(RW2 d, IMM i)) MIDFUNC(2,add_b_ri,(RW1 d, IMM i)) { if (!i && !needflags) return; CLOBBER_ADD; d=rmw(d,1,1); raw_add_b_ri(d,i); unlock2(d); } MENDFUNC(2,add_b_ri,(RW1 d, IMM i)) MIDFUNC(2,sbb_l,(RW4 d, R4 s)) { CLOBBER_SBB; s=readreg(s,4); d=rmw(d,4,4); raw_sbb_l(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,sbb_l,(RW4 d, R4 s)) MIDFUNC(2,sbb_w,(RW2 d, R2 s)) { CLOBBER_SBB; s=readreg(s,2); d=rmw(d,2,2); raw_sbb_w(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,sbb_w,(RW2 d, R2 s)) MIDFUNC(2,sbb_b,(RW1 d, R1 s)) { CLOBBER_SBB; s=readreg(s,1); d=rmw(d,1,1); raw_sbb_b(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,sbb_b,(RW1 d, R1 s)) MIDFUNC(2,sub_l,(RW4 d, R4 s)) { if (isconst(s)) { COMPCALL(sub_l_ri)(d,live.state[s].val); return; } CLOBBER_SUB; s=readreg(s,4); d=rmw(d,4,4); raw_sub_l(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,sub_l,(RW4 d, R4 s)) MIDFUNC(2,sub_w,(RW2 d, R2 s)) { if (isconst(s)) { COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val); return; } CLOBBER_SUB; s=readreg(s,2); d=rmw(d,2,2); raw_sub_w(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,sub_w,(RW2 d, R2 s)) MIDFUNC(2,sub_b,(RW1 d, R1 s)) { if (isconst(s)) { COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val); return; } CLOBBER_SUB; s=readreg(s,1); d=rmw(d,1,1); raw_sub_b(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,sub_b,(RW1 d, R1 s)) MIDFUNC(2,cmp_l,(R4 d, R4 s)) { CLOBBER_CMP; s=readreg(s,4); d=readreg(d,4); raw_cmp_l(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,cmp_l,(R4 d, R4 s)) MIDFUNC(2,cmp_l_ri,(R4 r, IMM i)) { CLOBBER_CMP; r=readreg(r,4); raw_cmp_l_ri(r,i); unlock2(r); } MENDFUNC(2,cmp_l_ri,(R4 r, IMM i)) MIDFUNC(2,cmp_w,(R2 d, R2 s)) { CLOBBER_CMP; s=readreg(s,2); d=readreg(d,2); raw_cmp_w(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,cmp_w,(R2 d, R2 s)) MIDFUNC(2,cmp_b,(R1 d, R1 s)) { CLOBBER_CMP; s=readreg(s,1); d=readreg(d,1); raw_cmp_b(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,cmp_b,(R1 d, R1 s)) MIDFUNC(2,xor_l,(RW4 d, R4 s)) { CLOBBER_XOR; s=readreg(s,4); d=rmw(d,4,4); raw_xor_l(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,xor_l,(RW4 d, R4 s)) MIDFUNC(2,xor_w,(RW2 d, R2 s)) { CLOBBER_XOR; s=readreg(s,2); d=rmw(d,2,2); raw_xor_w(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,xor_w,(RW2 d, R2 s)) MIDFUNC(2,xor_b,(RW1 d, R1 s)) { CLOBBER_XOR; s=readreg(s,1); d=rmw(d,1,1); raw_xor_b(d,s); unlock2(d); unlock2(s); } MENDFUNC(2,xor_b,(RW1 d, R1 s)) MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize)) { clobber_flags(); remove_all_offsets(); if (osize==4) { if (out1!=in1 && out1!=r) { COMPCALL(forget_about)(out1); } } else { tomem_c(out1); } in1=readreg_specific(in1,isize,REG_PAR1); r=readreg(r,4); prepare_for_call_1(); /* This should ensure that there won't be any need for swapping nregs in prepare_for_call_2 */ #if USE_NORMAL_CALLING_CONVENTION raw_push_l_r(in1); #endif unlock2(in1); unlock2(r); prepare_for_call_2(); raw_call_r(r); #if USE_NORMAL_CALLING_CONVENTION raw_inc_sp(4); #endif live.nat[REG_RESULT].holds[0]=out1; live.nat[REG_RESULT].nholds=1; live.nat[REG_RESULT].touched=touchcnt++; live.state[out1].realreg=REG_RESULT; live.state[out1].realind=0; live.state[out1].val=0; live.state[out1].validsize=osize; live.state[out1].dirtysize=osize; set_status(out1,DIRTY); } MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize)) MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)) { clobber_flags(); remove_all_offsets(); in1=readreg_specific(in1,isize1,REG_PAR1); in2=readreg_specific(in2,isize2,REG_PAR2); r=readreg(r,4); prepare_for_call_1(); /* This should ensure that there won't be any need for swapping nregs in prepare_for_call_2 */ #if USE_NORMAL_CALLING_CONVENTION raw_push_l_r(in2); raw_push_l_r(in1); #endif unlock2(r); unlock2(in1); unlock2(in2); prepare_for_call_2(); raw_call_r(r); #if USE_NORMAL_CALLING_CONVENTION raw_inc_sp(8); #endif } MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)) /* forget_about() takes a mid-layer register */ MIDFUNC(1,forget_about,(W4 r)) { if (isinreg(r)) disassociate(r); live.state[r].val=0; set_status(r,UNDEF); } MENDFUNC(1,forget_about,(W4 r)) MIDFUNC(0,nop,(void)) { raw_nop(); } MENDFUNC(0,nop,(void)) MIDFUNC(1,f_forget_about,(FW r)) { if (f_isinreg(r)) f_disassociate(r); live.fate[r].status=UNDEF; } MENDFUNC(1,f_forget_about,(FW r)) MIDFUNC(1,fmov_pi,(FW r)) { r=f_writereg(r); raw_fmov_pi(r); f_unlock(r); } MENDFUNC(1,fmov_pi,(FW r)) MIDFUNC(1,fmov_log10_2,(FW r)) { r=f_writereg(r); raw_fmov_log10_2(r); f_unlock(r); } MENDFUNC(1,fmov_log10_2,(FW r)) MIDFUNC(1,fmov_log2_e,(FW r)) { r=f_writereg(r); raw_fmov_log2_e(r); f_unlock(r); } MENDFUNC(1,fmov_log2_e,(FW r)) MIDFUNC(1,fmov_loge_2,(FW r)) { r=f_writereg(r); raw_fmov_loge_2(r); f_unlock(r); } MENDFUNC(1,fmov_loge_2,(FW r)) MIDFUNC(1,fmov_1,(FW r)) { r=f_writereg(r); raw_fmov_1(r); f_unlock(r); } MENDFUNC(1,fmov_1,(FW r)) MIDFUNC(1,fmov_0,(FW r)) { r=f_writereg(r); raw_fmov_0(r); f_unlock(r); } MENDFUNC(1,fmov_0,(FW r)) MIDFUNC(2,fmov_rm,(FW r, MEMR m)) { r=f_writereg(r); raw_fmov_rm(r,m); f_unlock(r); } MENDFUNC(2,fmov_rm,(FW r, MEMR m)) MIDFUNC(2,fmovi_rm,(FW r, MEMR m)) { r=f_writereg(r); raw_fmovi_rm(r,m); f_unlock(r); } MENDFUNC(2,fmovi_rm,(FW r, MEMR m)) MIDFUNC(2,fmovi_mr,(MEMW m, FR r)) { r=f_readreg(r); raw_fmovi_mr(m,r); f_unlock(r); } MENDFUNC(2,fmovi_mr,(MEMW m, FR r)) MIDFUNC(2,fmovs_rm,(FW r, MEMR m)) { r=f_writereg(r); raw_fmovs_rm(r,m); f_unlock(r); } MENDFUNC(2,fmovs_rm,(FW r, MEMR m)) MIDFUNC(2,fmovs_mr,(MEMW m, FR r)) { r=f_readreg(r); raw_fmovs_mr(m,r); f_unlock(r); } MENDFUNC(2,fmovs_mr,(MEMW m, FR r)) MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) { r=f_readreg(r); raw_fmov_ext_mr(m,r); f_unlock(r); } MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) MIDFUNC(2,fmov_mr,(MEMW m, FR r)) { r=f_readreg(r); raw_fmov_mr(m,r); f_unlock(r); } MENDFUNC(2,fmov_mr,(MEMW m, FR r)) MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) { r=f_writereg(r); raw_fmov_ext_rm(r,m); f_unlock(r); } MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) MIDFUNC(2,fmov_rr,(FW d, FR s)) { if (d==s) { /* How pointless! */ return; } #if USE_F_ALIAS f_disassociate(d); s=f_readreg(s); live.fate[d].realreg=s; live.fate[d].realind=live.fat[s].nholds; live.fate[d].status=DIRTY; live.fat[s].holds[live.fat[s].nholds]=d; live.fat[s].nholds++; f_unlock(s); #else s=f_readreg(s); d=f_writereg(d); raw_fmov_rr(d,s); f_unlock(s); f_unlock(d); #endif } MENDFUNC(2,fmov_rr,(FW d, FR s)) MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base)) { index=readreg(index,4); raw_fldcw_m_indexed(index,base); unlock2(index); } MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base)) MIDFUNC(1,ftst_r,(FR r)) { r=f_readreg(r); raw_ftst_r(r); f_unlock(r); } MENDFUNC(1,ftst_r,(FR r)) MIDFUNC(0,dont_care_fflags,(void)) { f_disassociate(FP_RESULT); } MENDFUNC(0,dont_care_fflags,(void)) MIDFUNC(2,fsqrt_rr,(FW d, FR s)) { s=f_readreg(s); d=f_writereg(d); raw_fsqrt_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,fsqrt_rr,(FW d, FR s)) MIDFUNC(2,fabs_rr,(FW d, FR s)) { s=f_readreg(s); d=f_writereg(d); raw_fabs_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,fabs_rr,(FW d, FR s)) MIDFUNC(2,fsin_rr,(FW d, FR s)) { s=f_readreg(s); d=f_writereg(d); raw_fsin_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,fsin_rr,(FW d, FR s)) MIDFUNC(2,fcos_rr,(FW d, FR s)) { s=f_readreg(s); d=f_writereg(d); raw_fcos_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,fcos_rr,(FW d, FR s)) MIDFUNC(2,ftwotox_rr,(FW d, FR s)) { s=f_readreg(s); d=f_writereg(d); raw_ftwotox_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,ftwotox_rr,(FW d, FR s)) MIDFUNC(2,fetox_rr,(FW d, FR s)) { s=f_readreg(s); d=f_writereg(d); raw_fetox_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,fetox_rr,(FW d, FR s)) MIDFUNC(2,frndint_rr,(FW d, FR s)) { s=f_readreg(s); d=f_writereg(d); raw_frndint_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,frndint_rr,(FW d, FR s)) MIDFUNC(2,flog2_rr,(FW d, FR s)) { s=f_readreg(s); d=f_writereg(d); raw_flog2_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,flog2_rr,(FW d, FR s)) MIDFUNC(2,fneg_rr,(FW d, FR s)) { s=f_readreg(s); d=f_writereg(d); raw_fneg_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,fneg_rr,(FW d, FR s)) MIDFUNC(2,fadd_rr,(FRW d, FR s)) { s=f_readreg(s); d=f_rmw(d); raw_fadd_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,fadd_rr,(FRW d, FR s)) MIDFUNC(2,fsub_rr,(FRW d, FR s)) { s=f_readreg(s); d=f_rmw(d); raw_fsub_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,fsub_rr,(FRW d, FR s)) MIDFUNC(2,fcmp_rr,(FR d, FR s)) { d=f_readreg(d); s=f_readreg(s); raw_fcmp_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,fcmp_rr,(FR d, FR s)) MIDFUNC(2,fdiv_rr,(FRW d, FR s)) { s=f_readreg(s); d=f_rmw(d); raw_fdiv_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,fdiv_rr,(FRW d, FR s)) MIDFUNC(2,frem_rr,(FRW d, FR s)) { s=f_readreg(s); d=f_rmw(d); raw_frem_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,frem_rr,(FRW d, FR s)) MIDFUNC(2,frem1_rr,(FRW d, FR s)) { s=f_readreg(s); d=f_rmw(d); raw_frem1_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,frem1_rr,(FRW d, FR s)) MIDFUNC(2,fmul_rr,(FRW d, FR s)) { s=f_readreg(s); d=f_rmw(d); raw_fmul_rr(d,s); f_unlock(s); f_unlock(d); } MENDFUNC(2,fmul_rr,(FRW d, FR s)) /******************************************************************** * Support functions exposed to gencomp. CREATE time * ********************************************************************/ void set_zero(int r, int tmp) { if (setzflg_uses_bsf) bsf_l_rr(r,r); else simulate_bsf(tmp,r); } int kill_rodent(int r) { return KILLTHERAT && have_rat_stall && (live.state[r].status==INMEM || live.state[r].status==CLEAN || live.state[r].status==ISCONST || live.state[r].dirtysize==4); } uae_u32 get_const(int r) { Dif (!isconst(r)) { write_log("Register %d should be constant, but isn't\n",r); abort(); } return live.state[r].val; } void sync_m68k_pc(void) { if (m68k_pc_offset) { add_l_ri(PC_P,m68k_pc_offset); comp_pc_p+=m68k_pc_offset; m68k_pc_offset=0; } } /******************************************************************** * Scratch registers management * ********************************************************************/ struct scratch_t { uae_u32 regs[VREGS]; fpu_register fregs[VFREGS]; }; static scratch_t scratch; /******************************************************************** * Support functions exposed to newcpu * ********************************************************************/ static inline const char *str_on_off(bool b) { return b ? "on" : "off"; } void compiler_init(void) { static bool initialized = false; if (initialized) return; #if JIT_DEBUG // JIT debug mode ? JITDebug = PrefsFindBool("jitdebug"); #endif write_log(" : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no"); #ifdef USE_JIT_FPU // Use JIT compiler for FPU instructions ? avoid_fpu = !PrefsFindBool("jitfpu"); #else // JIT FPU is always disabled avoid_fpu = true; #endif write_log(" : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no"); // Get size of the translation cache (in KB) cache_size = PrefsFindInt32("jitcachesize"); write_log(" : requested translation cache size : %d KB\n", cache_size); // Initialize target CPU (check for features, e.g. CMOV, rat stalls) raw_init_cpu(); setzflg_uses_bsf = target_check_bsf(); write_log(" : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no"); write_log(" : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no"); write_log(" : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps); // Translation cache flush mechanism lazy_flush = PrefsFindBool("jitlazyflush"); write_log(" : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush)); flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard; // Compiler features write_log(" : register aliasing : %s\n", str_on_off(1)); write_log(" : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS)); write_log(" : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET)); #if USE_INLINING follow_const_jumps = PrefsFindBool("jitinline"); #endif write_log(" : translate through constant jumps : %s\n", str_on_off(follow_const_jumps)); write_log(" : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA)); // Build compiler tables build_comp(); initialized = true; #if PROFILE_UNTRANSLATED_INSNS write_log(" : gather statistics on untranslated insns count\n"); #endif #if PROFILE_COMPILE_TIME write_log(" : gather statistics on translation time\n"); emul_start_time = clock(); #endif } void compiler_exit(void) { #if PROFILE_COMPILE_TIME emul_end_time = clock(); #endif // Deallocate translation cache if (compiled_code) { vm_release(compiled_code, cache_size * 1024); compiled_code = 0; } // Deallocate popallspace if (popallspace) { vm_release(popallspace, POPALLSPACE_SIZE); popallspace = 0; } #if PROFILE_COMPILE_TIME write_log("### Compile Block statistics\n"); write_log("Number of calls to compile_block : %d\n", compile_count); uae_u32 emul_time = emul_end_time - emul_start_time; write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC)); write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC), 100.0*double(compile_time)/double(emul_time)); write_log("\n"); #endif #if PROFILE_UNTRANSLATED_INSNS uae_u64 untranslated_count = 0; for (int i = 0; i < 65536; i++) { opcode_nums[i] = i; untranslated_count += raw_cputbl_count[i]; } write_log("Sorting out untranslated instructions count...\n"); qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn); write_log("\nRank Opc Count Name\n"); for (int i = 0; i < untranslated_top_ten; i++) { uae_u32 count = raw_cputbl_count[opcode_nums[i]]; struct instr *dp; struct mnemolookup *lookup; if (!count) break; dp = table68k + opcode_nums[i]; for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++) ; write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name); } #endif #if RECORD_REGISTER_USAGE int reg_count_ids[16]; uint64 tot_reg_count = 0; for (int i = 0; i < 16; i++) { reg_count_ids[i] = i; tot_reg_count += reg_count[i]; } qsort(reg_count_ids, 16, sizeof(int), reg_count_compare); uint64 cum_reg_count = 0; for (int i = 0; i < 16; i++) { int r = reg_count_ids[i]; cum_reg_count += reg_count[r]; printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8, reg_count[r], 100.0*double(reg_count[r])/double(tot_reg_count), 100.0*double(cum_reg_count)/double(tot_reg_count)); } #endif } bool compiler_use_jit(void) { // Check for the "jit" prefs item if (!PrefsFindBool("jit")) return false; // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) { write_log(" : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE); return false; } // Enable JIT for 68020+ emulation only if (CPUType < 2) { write_log(" : JIT is not supported in 680%d0 emulation mode, disabling.\n", CPUType); return false; } return true; } void init_comp(void) { int i; uae_s8* cb=can_byte; uae_s8* cw=can_word; uae_s8* au=always_used; #if RECORD_REGISTER_USAGE for (i=0;i<16;i++) reg_count_local[i] = 0; #endif for (i=0;i= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize)); } static void flush_all(void) { int i; log_flush(); for (i=0;i0) free_nreg(i); for (i=0;i0) f_free_nreg(i); live.flags_in_flags=TRASH; /* Note: We assume we already rescued the flags at the very start of the call_r functions! */ } /******************************************************************** * Memory access and related functions, CREATE time * ********************************************************************/ void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond) { next_pc_p=not_taken; taken_pc_p=taken; branch_cc=cond; } static uae_u32 get_handler_address(uae_u32 addr) { uae_u32 cl=cacheline(addr); blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0); return (uintptr)&(bi->direct_handler_to_use); } static uae_u32 get_handler(uae_u32 addr) { uae_u32 cl=cacheline(addr); blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0); return (uintptr)bi->direct_handler_to_use; } static void load_handler(int reg, uae_u32 addr) { mov_l_rm(reg,get_handler_address(addr)); } /* This version assumes that it is writing *real* memory, and *will* fail * if that assumption is wrong! No branches, no second chances, just * straight go-for-it attitude */ static void writemem_real(int address, int source, int size, int tmp, int clobber) { int f=tmp; if (clobber) f=source; switch(size) { case 1: mov_b_bRr(address,source,MEMBaseDiff); break; case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break; case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break; } forget_about(tmp); forget_about(f); } void writebyte(int address, int source, int tmp) { writemem_real(address,source,1,tmp,0); } static __inline__ void writeword_general(int address, int source, int tmp, int clobber) { writemem_real(address,source,2,tmp,clobber); } void writeword_clobber(int address, int source, int tmp) { writeword_general(address,source,tmp,1); } void writeword(int address, int source, int tmp) { writeword_general(address,source,tmp,0); } static __inline__ void writelong_general(int address, int source, int tmp, int clobber) { writemem_real(address,source,4,tmp,clobber); } void writelong_clobber(int address, int source, int tmp) { writelong_general(address,source,tmp,1); } void writelong(int address, int source, int tmp) { writelong_general(address,source,tmp,0); } /* This version assumes that it is reading *real* memory, and *will* fail * if that assumption is wrong! No branches, no second chances, just * straight go-for-it attitude */ static void readmem_real(int address, int dest, int size, int tmp) { int f=tmp; if (size==4 && address!=dest) f=dest; switch(size) { case 1: mov_b_brR(dest,address,MEMBaseDiff); break; case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break; case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break; } forget_about(tmp); } void readbyte(int address, int dest, int tmp) { readmem_real(address,dest,1,tmp); } void readword(int address, int dest, int tmp) { readmem_real(address,dest,2,tmp); } void readlong(int address, int dest, int tmp) { readmem_real(address,dest,4,tmp); } void get_n_addr(int address, int dest, int tmp) { // a is the register containing the virtual address // after the offset had been fetched int a=tmp; // f is the register that will contain the offset int f=tmp; // a == f == tmp if (address == dest) if (address!=dest) { a=address; f=dest; } #if REAL_ADDRESSING mov_l_rr(dest, address); #elif DIRECT_ADDRESSING lea_l_brr(dest,address,MEMBaseDiff); #endif forget_about(tmp); } void get_n_addr_jmp(int address, int dest, int tmp) { /* For this, we need to get the same address as the rest of UAE would --- otherwise we end up translating everything twice */ get_n_addr(address,dest,tmp); } /* base is a register, but dp is an actual value. target is a register, as is tmp */ void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp) { int reg = (dp >> 12) & 15; int regd_shift=(dp >> 9) & 3; if (dp & 0x100) { int ignorebase=(dp&0x80); int ignorereg=(dp&0x40); int addbase=0; int outer=0; if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4); if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4); if ((dp & 0x4) == 0) { /* add regd *before* the get_long */ if (!ignorereg) { if ((dp & 0x800) == 0) sign_extend_16_rr(target,reg); else mov_l_rr(target,reg); shll_l_ri(target,regd_shift); } else mov_l_ri(target,0); /* target is now regd */ if (!ignorebase) add_l(target,base); add_l_ri(target,addbase); if (dp&0x03) readlong(target,target,tmp); } else { /* do the getlong first, then add regd */ if (!ignorebase) { mov_l_rr(target,base); add_l_ri(target,addbase); } else mov_l_ri(target,addbase); if (dp&0x03) readlong(target,target,tmp); if (!ignorereg) { if ((dp & 0x800) == 0) sign_extend_16_rr(tmp,reg); else mov_l_rr(tmp,reg); shll_l_ri(tmp,regd_shift); /* tmp is now regd */ add_l(target,tmp); } } add_l_ri(target,outer); } else { /* 68000 version */ if ((dp & 0x800) == 0) { /* Sign extend */ sign_extend_16_rr(target,reg); lea_l_brr_indexed(target,base,target,1<= CODE_ALLOC_MAX_ATTEMPTS) return NULL; return do_alloc_code(size, depth + 1); #else uint8 *code = (uint8 *)vm_acquire(size); return code == VM_MAP_FAILED ? NULL : code; #endif } static inline uint8 *alloc_code(uint32 size) { uint8 *ptr = do_alloc_code(size, 0); /* allocated code must fit in 32-bit boundaries */ assert((uintptr)ptr <= 0xffffffff); return ptr; } void alloc_cache(void) { if (compiled_code) { flush_icache_hard(6); vm_release(compiled_code, cache_size * 1024); compiled_code = 0; } if (cache_size == 0) return; while (!compiled_code && cache_size) { if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) { compiled_code = 0; cache_size /= 2; } } vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE); if (compiled_code) { write_log(" : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code); max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST; current_compile_p = compiled_code; current_cache_size = 0; } } extern void op_illg_1 (uae_u32 opcode) REGPARAM; static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2) { uae_u32 k1 = 0; uae_u32 k2 = 0; #if USE_CHECKSUM_INFO checksum_info *csi = bi->csi; Dif(!csi) abort(); while (csi) { uae_s32 len = csi->length; uintptr tmp = (uintptr)csi->start_p; #else uae_s32 len = bi->len; uintptr tmp = (uintptr)bi->min_pcp; #endif uae_u32*pos; len += (tmp & 3); tmp &= ~((uintptr)3); pos = (uae_u32 *)tmp; if (len >= 0 && len <= MAX_CHECKSUM_LEN) { while (len > 0) { k1 += *pos; k2 ^= *pos; pos++; len -= 4; } } #if USE_CHECKSUM_INFO csi = csi->next; } #endif *c1 = k1; *c2 = k2; } #if 0 static void show_checksum(CSI_TYPE* csi) { uae_u32 k1=0; uae_u32 k2=0; uae_s32 len=CSI_LENGTH(csi); uae_u32 tmp=(uintptr)CSI_STARTcsi uae_u32* pos; len+=(tmp&3); tmp&=(~3); pos=(uae_u32*)tmp; if (len<0 || len>MAX_CHECKSUM_LEN) { return; } else { while (len>0) { write_log("%08x ",*pos); pos++; len-=4; } write_log(" bla\n"); } } #endif int check_for_cache_miss(void) { blockinfo* bi=get_blockinfo_addr(regs.pc_p); if (bi) { int cl=cacheline(regs.pc_p); if (bi!=cache_tags[cl+1].bi) { raise_in_cl_list(bi); return 1; } } return 0; } static void recompile_block(void) { /* An existing block's countdown code has expired. We need to make sure that execute_normal doesn't refuse to recompile due to a perceived cache miss... */ blockinfo* bi=get_blockinfo_addr(regs.pc_p); Dif (!bi) abort(); raise_in_cl_list(bi); execute_normal(); return; } static void cache_miss(void) { blockinfo* bi=get_blockinfo_addr(regs.pc_p); uae_u32 cl=cacheline(regs.pc_p); blockinfo* bi2=get_blockinfo(cl); if (!bi) { execute_normal(); /* Compile this block now */ return; } Dif (!bi2 || bi==bi2) { write_log("Unexplained cache miss %p %p\n",bi,bi2); abort(); } raise_in_cl_list(bi); return; } static int called_check_checksum(blockinfo* bi); static inline int block_check_checksum(blockinfo* bi) { uae_u32 c1,c2; bool isgood; if (bi->status!=BI_NEED_CHECK) return 1; /* This block is in a checked state */ checksum_count++; if (bi->c1 || bi->c2) calc_checksum(bi,&c1,&c2); else { c1=c2=1; /* Make sure it doesn't match */ } isgood=(c1==bi->c1 && c2==bi->c2); if (isgood) { /* This block is still OK. So we reactivate. Of course, that means we have to move it into the needs-to-be-flushed list */ bi->handler_to_use=bi->handler; set_dhtu(bi,bi->direct_handler); bi->status=BI_CHECKING; isgood=called_check_checksum(bi) != 0; } if (isgood) { /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p, c1,c2,bi->c1,bi->c2);*/ remove_from_list(bi); add_to_active(bi); raise_in_cl_list(bi); bi->status=BI_ACTIVE; } else { /* This block actually changed. We need to invalidate it, and set it up to be recompiled */ /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p, c1,c2,bi->c1,bi->c2); */ invalidate_block(bi); raise_in_cl_list(bi); } return isgood; } static int called_check_checksum(blockinfo* bi) { dependency* x=bi->deplist; int isgood=1; int i; for (i=0;i<2 && isgood;i++) { if (bi->dep[i].jmp_off) { isgood=block_check_checksum(bi->dep[i].target); } } return isgood; } static void check_checksum(void) { blockinfo* bi=get_blockinfo_addr(regs.pc_p); uae_u32 cl=cacheline(regs.pc_p); blockinfo* bi2=get_blockinfo(cl); /* These are not the droids you are looking for... */ if (!bi) { /* Whoever is the primary target is in a dormant state, but calling it was accidental, and we should just compile this new block */ execute_normal(); return; } if (bi!=bi2) { /* The block was hit accidentally, but it does exist. Cache miss */ cache_miss(); return; } if (!block_check_checksum(bi)) execute_normal(); } static __inline__ void match_states(blockinfo* bi) { int i; smallstate* s=&(bi->env); if (bi->status==BI_NEED_CHECK) { block_check_checksum(bi); } if (bi->status==BI_ACTIVE || bi->status==BI_FINALIZING) { /* Deal with the *promises* the block makes (about not using certain vregs) */ for (i=0;i<16;i++) { if (s->virt[i]==L_UNNEEDED) { // write_log("unneeded reg %d at %p\n",i,target); COMPCALL(forget_about)(i); // FIXME } } } flush(1); /* And now deal with the *demands* the block makes */ for (i=0;inat[i]; if (v>=0) { // printf("Loading reg %d into %d at %p\n",v,i,target); readreg_specific(v,4,i); // do_load_reg(i,v); // setlock(i); } } for (i=0;inat[i]; if (v>=0) { unlock2(i); } } } static __inline__ void create_popalls(void) { int i,r; if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) { write_log("FATAL: Could not allocate popallspace!\n"); abort(); } vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE); int stack_space = STACK_OFFSET; for (i=0;idirect_pen=(cpuop_func *)get_target(); raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); raw_mov_l_mr((uintptr)®s.pc_p,0); raw_jmp((uintptr)popall_execute_normal); align_target(align_jumps); bi->direct_pcc=(cpuop_func *)get_target(); raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); raw_mov_l_mr((uintptr)®s.pc_p,0); raw_jmp((uintptr)popall_check_checksum); current_compile_p=get_target(); bi->deplist=NULL; for (i=0;i<2;i++) { bi->dep[i].prev_p=NULL; bi->dep[i].next=NULL; } bi->env=default_ss; bi->status=BI_INVALID; bi->havestate=0; //bi->env=empty_ss; } // OPCODE is in big endian format, use cft_map() beforehand, if needed. static inline void reset_compop(int opcode) { compfunctbl[opcode] = NULL; nfcompfunctbl[opcode] = NULL; } static int read_opcode(const char *p) { int opcode = 0; for (int i = 0; i < 4; i++) { int op = p[i]; switch (op) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': opcode = (opcode << 4) | (op - '0'); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': opcode = (opcode << 4) | ((op - 'a') + 10); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': opcode = (opcode << 4) | ((op - 'A') + 10); break; default: return -1; } } return opcode; } static bool merge_blacklist() { const char *blacklist = PrefsFindString("jitblacklist"); if (blacklist) { const char *p = blacklist; for (;;) { if (*p == 0) return true; int opcode1 = read_opcode(p); if (opcode1 < 0) return false; p += 4; int opcode2 = opcode1; if (*p == '-') { p++; opcode2 = read_opcode(p); if (opcode2 < 0) return false; p += 4; } if (*p == 0 || *p == ',' || *p == ';') { write_log(" : blacklist opcodes : %04x-%04x\n", opcode1, opcode2); for (int opcode = opcode1; opcode <= opcode2; opcode++) reset_compop(cft_map(opcode)); if (*p == ',' || *p++ == ';') continue; return true; } return false; } } return true; } void build_comp(void) { int i; int jumpcount=0; unsigned long opcode; struct comptbl* tbl=op_smalltbl_0_comp_ff; struct comptbl* nftbl=op_smalltbl_0_comp_nf; int count; unsigned int cpu_level = 0; // 68000 (default) if (CPUType == 4) cpu_level = 4; // 68040 with FPU else { if (FPUType) cpu_level = 3; // 68020 with FPU else if (CPUType >= 2) cpu_level = 2; // 68020 else if (CPUType == 1) cpu_level = 1; } struct cputbl *nfctbl = ( cpu_level == 4 ? op_smalltbl_0_nf : cpu_level == 3 ? op_smalltbl_1_nf : cpu_level == 2 ? op_smalltbl_2_nf : cpu_level == 1 ? op_smalltbl_3_nf : op_smalltbl_4_nf); write_log (" : building compiler function tables\n"); for (opcode = 0; opcode < 65536; opcode++) { reset_compop(opcode); nfcpufunctbl[opcode] = op_illg_1; prop[opcode].use_flags = 0x1f; prop[opcode].set_flags = 0x1f; prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap } for (i = 0; tbl[i].opcode < 65536; i++) { int cflow = table68k[tbl[i].opcode].cflow; if (follow_const_jumps && (tbl[i].specific & 16)) cflow = fl_const_jump; else cflow &= ~fl_const_jump; prop[cft_map(tbl[i].opcode)].cflow = cflow; int uses_fpu = tbl[i].specific & 32; if (uses_fpu && avoid_fpu) compfunctbl[cft_map(tbl[i].opcode)] = NULL; else compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler; } for (i = 0; nftbl[i].opcode < 65536; i++) { int uses_fpu = tbl[i].specific & 32; if (uses_fpu && avoid_fpu) nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL; else nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler; nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler; } for (i = 0; nfctbl[i].handler; i++) { nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler; } for (opcode = 0; opcode < 65536; opcode++) { compop_func *f; compop_func *nff; cpuop_func *nfcf; int isaddx,cflow; if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) continue; if (table68k[opcode].handler != -1) { f = compfunctbl[cft_map(table68k[opcode].handler)]; nff = nfcompfunctbl[cft_map(table68k[opcode].handler)]; nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)]; cflow = prop[cft_map(table68k[opcode].handler)].cflow; isaddx = prop[cft_map(table68k[opcode].handler)].is_addx; prop[cft_map(opcode)].cflow = cflow; prop[cft_map(opcode)].is_addx = isaddx; compfunctbl[cft_map(opcode)] = f; nfcompfunctbl[cft_map(opcode)] = nff; Dif (nfcf == op_illg_1) abort(); nfcpufunctbl[cft_map(opcode)] = nfcf; } prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead; prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive; /* Unconditional jumps don't evaluate condition codes, so they * don't actually use any flags themselves */ if (prop[cft_map(opcode)].cflow & fl_const_jump) prop[cft_map(opcode)].use_flags = 0; } for (i = 0; nfctbl[i].handler != NULL; i++) { if (nfctbl[i].specific) nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler; } /* Merge in blacklist */ if (!merge_blacklist()) write_log(" : blacklist merge failure!\n"); count=0; for (opcode = 0; opcode < 65536; opcode++) { if (compfunctbl[cft_map(opcode)]) count++; } write_log(" : supposedly %d compileable opcodes!\n",count); /* Initialise state */ create_popalls(); alloc_cache(); reset_lists(); for (i=0;ipc_p)].handler=(cpuop_func *)popall_execute_normal; cache_tags[cacheline(bi->pc_p)+1].bi=NULL; dbi=bi; bi=bi->next; free_blockinfo(dbi); } bi=dormant; while(bi) { cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal; cache_tags[cacheline(bi->pc_p)+1].bi=NULL; dbi=bi; bi=bi->next; free_blockinfo(dbi); } reset_lists(); if (!compiled_code) return; current_compile_p=compiled_code; SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */ } /* "Soft flushing" --- instead of actually throwing everything away, we simply mark everything as "needs to be checked". */ static inline void flush_icache_lazy(int n) { blockinfo* bi; blockinfo* bi2; soft_flush_count++; if (!active) return; bi=active; while (bi) { uae_u32 cl=cacheline(bi->pc_p); if (bi->status==BI_INVALID || bi->status==BI_NEED_RECOMP) { if (bi==cache_tags[cl+1].bi) cache_tags[cl].handler=(cpuop_func *)popall_execute_normal; bi->handler_to_use=(cpuop_func *)popall_execute_normal; set_dhtu(bi,bi->direct_pen); bi->status=BI_INVALID; } else { if (bi==cache_tags[cl+1].bi) cache_tags[cl].handler=(cpuop_func *)popall_check_checksum; bi->handler_to_use=(cpuop_func *)popall_check_checksum; set_dhtu(bi,bi->direct_pcc); bi->status=BI_NEED_CHECK; } bi2=bi; bi=bi->next; } /* bi2 is now the last entry in the active list */ bi2->next=dormant; if (dormant) dormant->prev_p=&(bi2->next); dormant=active; active->prev_p=&dormant; active=NULL; } void flush_icache_range(uae_u8 *start_p, uae_u32 length) { if (!active) return; #if LAZY_FLUSH_ICACHE_RANGE blockinfo *bi = active; while (bi) { #if USE_CHECKSUM_INFO bool candidate = false; for (checksum_info *csi = bi->csi; csi; csi = csi->next) { if (((start_p - csi->start_p) < csi->length) || ((csi->start_p - start_p) < length)) { candidate = true; break; } } #else // Assume system is consistent and would invalidate the right range const bool candidate = (bi->pc_p - start_p) < length; #endif blockinfo *dbi = bi; bi = bi->next; if (candidate) { uae_u32 cl = cacheline(dbi->pc_p); if (dbi->status == BI_INVALID || dbi->status == BI_NEED_RECOMP) { if (dbi == cache_tags[cl+1].bi) cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; dbi->handler_to_use = (cpuop_func *)popall_execute_normal; set_dhtu(dbi, dbi->direct_pen); dbi->status = BI_INVALID; } else { if (dbi == cache_tags[cl+1].bi) cache_tags[cl].handler = (cpuop_func *)popall_check_checksum; dbi->handler_to_use = (cpuop_func *)popall_check_checksum; set_dhtu(dbi, dbi->direct_pcc); dbi->status = BI_NEED_CHECK; } remove_from_list(dbi); add_to_dormant(dbi); } } return; #endif flush_icache(-1); } static void catastrophe(void) { abort(); } int failure; #define TARGET_M68K 0 #define TARGET_POWERPC 1 #define TARGET_X86 2 #define TARGET_X86_64 3 #if defined(i386) || defined(__i386__) #define TARGET_NATIVE TARGET_X86 #endif #if defined(powerpc) || defined(__powerpc__) #define TARGET_NATIVE TARGET_POWERPC #endif #if defined(x86_64) || defined(__x86_64__) #define TARGET_NATIVE TARGET_X86_64 #endif #ifdef ENABLE_MON static uae_u32 mon_read_byte_jit(uintptr addr) { uae_u8 *m = (uae_u8 *)addr; return (uintptr)(*m); } static void mon_write_byte_jit(uintptr addr, uae_u32 b) { uae_u8 *m = (uae_u8 *)addr; *m = b; } #endif void disasm_block(int target, uint8 * start, size_t length) { if (!JITDebug) return; #if defined(JIT_DEBUG) && defined(ENABLE_MON) char disasm_str[200]; sprintf(disasm_str, "%s $%x $%x", target == TARGET_M68K ? "d68" : target == TARGET_X86 ? "d86" : target == TARGET_X86_64 ? "d8664" : target == TARGET_POWERPC ? "d" : "x", start, start + length - 1); uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte; void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte; mon_read_byte = mon_read_byte_jit; mon_write_byte = mon_write_byte_jit; const char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL}; mon(4, arg); mon_read_byte = old_mon_read_byte; mon_write_byte = old_mon_write_byte; #endif } static void disasm_native_block(uint8 *start, size_t length) { disasm_block(TARGET_NATIVE, start, length); } static void disasm_m68k_block(uint8 *start, size_t length) { disasm_block(TARGET_M68K, start, length); } #ifdef HAVE_GET_WORD_UNSWAPPED # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a))) #else # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a))) #endif #if JIT_DEBUG static uae_u8 *last_regs_pc_p = 0; static uae_u8 *last_compiled_block_addr = 0; void compiler_dumpstate(void) { if (!JITDebug) return; write_log("### Host addresses\n"); write_log("MEM_BASE : %x\n", MEMBaseDiff); write_log("PC_P : %p\n", ®s.pc_p); write_log("SPCFLAGS : %p\n", ®s.spcflags); write_log("D0-D7 : %p-%p\n", ®s.regs[0], ®s.regs[7]); write_log("A0-A7 : %p-%p\n", ®s.regs[8], ®s.regs[15]); write_log("\n"); write_log("### M68k processor state\n"); m68k_dumpstate(0); write_log("\n"); write_log("### Block in Mac address space\n"); write_log("M68K block : %p\n", (void *)(uintptr)get_virtual_address(last_regs_pc_p)); write_log("Native block : %p (%d bytes)\n", (void *)(uintptr)get_virtual_address(last_compiled_block_addr), get_blockinfo_addr(last_regs_pc_p)->direct_handler_size); write_log("\n"); } #endif static void compile_block(cpu_history* pc_hist, int blocklen) { if (letit && compiled_code) { #if PROFILE_COMPILE_TIME compile_count++; clock_t start_time = clock(); #endif #if JIT_DEBUG bool disasm_block = false; #endif /* OK, here we need to 'compile' a block */ int i; int r; int was_comp=0; uae_u8 liveflags[MAXRUN+1]; #if USE_CHECKSUM_INFO bool trace_in_rom = isinrom((uintptr)pc_hist[0].location); uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location; uintptr min_pcp=max_pcp; #else uintptr max_pcp=(uintptr)pc_hist[0].location; uintptr min_pcp=max_pcp; #endif uae_u32 cl=cacheline(pc_hist[0].location); void* specflags=(void*)®s.spcflags; blockinfo* bi=NULL; blockinfo* bi2; int extra_len=0; redo_current_block=0; if (current_compile_p>=max_compile_start) flush_icache_hard(7); alloc_blockinfos(); bi=get_blockinfo_addr_new(pc_hist[0].location,0); bi2=get_blockinfo(cl); optlev=bi->optlevel; if (bi->status!=BI_INVALID) { Dif (bi!=bi2) { /* I don't think it can happen anymore. Shouldn't, in any case. So let's make sure... */ write_log("WOOOWOO count=%d, ol=%d %p %p\n", bi->count,bi->optlevel,bi->handler_to_use, cache_tags[cl].handler); abort(); } Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) { write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status); /* What the heck? We are not supposed to be here! */ abort(); } } if (bi->count==-1) { optlev++; while (!optcount[optlev]) optlev++; bi->count=optcount[optlev]-1; } current_block_pc_p=(uintptr)pc_hist[0].location; remove_deps(bi); /* We are about to create new code */ bi->optlevel=optlev; bi->pc_p=(uae_u8*)pc_hist[0].location; #if USE_CHECKSUM_INFO free_checksum_info_chain(bi->csi); bi->csi = NULL; #endif liveflags[blocklen]=0x1f; /* All flags needed afterwards */ i=blocklen; while (i--) { uae_u16* currpcp=pc_hist[i].location; uae_u32 op=DO_GET_OPCODE(currpcp); #if USE_CHECKSUM_INFO trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp); if (follow_const_jumps && is_const_jump(op)) { checksum_info *csi = alloc_checksum_info(); csi->start_p = (uae_u8 *)min_pcp; csi->length = max_pcp - min_pcp + LONGEST_68K_INST; csi->next = bi->csi; bi->csi = csi; max_pcp = (uintptr)currpcp; } min_pcp = (uintptr)currpcp; #else if ((uintptr)currpcpmax_pcp) max_pcp=(uintptr)currpcp; #endif liveflags[i]=((liveflags[i+1]& (~prop[op].set_flags))| prop[op].use_flags); if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0) liveflags[i]&= ~FLAG_Z; } #if USE_CHECKSUM_INFO checksum_info *csi = alloc_checksum_info(); csi->start_p = (uae_u8 *)min_pcp; csi->length = max_pcp - min_pcp + LONGEST_68K_INST; csi->next = bi->csi; bi->csi = csi; #endif bi->needed_flags=liveflags[0]; align_target(align_loops); was_comp=0; bi->direct_handler=(cpuop_func *)get_target(); set_dhtu(bi,bi->direct_handler); bi->status=BI_COMPILING; current_block_start_target=(uintptr)get_target(); log_startblock(); if (bi->count>=0) { /* Need to generate countdown code */ raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); raw_sub_l_mi((uintptr)&(bi->count),1); raw_jl((uintptr)popall_recompile_block); } if (optlev==0) { /* No need to actually translate */ /* Execute normally without keeping stats */ raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); raw_jmp((uintptr)popall_exec_nostats); } else { reg_alloc_run=0; next_pc_p=0; taken_pc_p=0; branch_cc=0; comp_pc_p=(uae_u8*)pc_hist[0].location; init_comp(); was_comp=1; #ifdef USE_CPU_EMUL_SERVICES raw_sub_l_mi((uintptr)&emulated_ticks,blocklen); raw_jcc_b_oponly(NATIVE_CC_GT); uae_s8 *branchadd=(uae_s8*)get_target(); emit_byte(0); raw_call((uintptr)cpu_do_check_ticks); *branchadd=(uintptr)get_target()-((uintptr)branchadd+1); #endif #if JIT_DEBUG if (JITDebug) { raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location); raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target); } #endif for (i=0;i1) { failure=0; if (!was_comp) { comp_pc_p=(uae_u8*)pc_hist[i].location; init_comp(); } was_comp=1; comptbl[opcode](opcode); freescratch(); if (!(liveflags[i+1] & FLAG_CZNV)) { /* We can forget about flags */ dont_care_flags(); } #if INDIVIDUAL_INST flush(1); nop(); flush(1); was_comp=0; #endif } if (failure) { if (was_comp) { flush(1); was_comp=0; } raw_mov_l_ri(REG_PAR1,(uae_u32)opcode); #if USE_NORMAL_CALLING_CONVENTION raw_push_l_r(REG_PAR1); #endif raw_mov_l_mi((uintptr)®s.pc_p, (uintptr)pc_hist[i].location); raw_call((uintptr)cputbl[opcode]); #if PROFILE_UNTRANSLATED_INSNS // raw_cputbl_count[] is indexed with plain opcode (in m68k order) raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1); #endif #if USE_NORMAL_CALLING_CONVENTION raw_inc_sp(4); #endif if (i < blocklen - 1) { uae_s8* branchadd; raw_mov_l_rm(0,(uintptr)specflags); raw_test_l_rr(0,0); raw_jz_b_oponly(); branchadd=(uae_s8 *)get_target(); emit_byte(0); raw_jmp((uintptr)popall_do_nothing); *branchadd=(uintptr)get_target()-(uintptr)branchadd-1; } } } #if 1 /* This isn't completely kosher yet; It really needs to be be integrated into a general inter-block-dependency scheme */ if (next_pc_p && taken_pc_p && was_comp && taken_pc_p==current_block_pc_p) { blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0); blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0); uae_u8 x=bi1->needed_flags; if (x==0xff || 1) { /* To be on the safe side */ uae_u16* next=(uae_u16*)next_pc_p; uae_u32 op=DO_GET_OPCODE(next); x=0x1f; x&=(~prop[op].set_flags); x|=prop[op].use_flags; } x|=bi2->needed_flags; if (!(x & FLAG_CZNV)) { /* We can forget about flags */ dont_care_flags(); extra_len+=2; /* The next instruction now is part of this block */ } } #endif log_flush(); if (next_pc_p) { /* A branch was registered */ uintptr t1=next_pc_p; uintptr t2=taken_pc_p; int cc=branch_cc; uae_u32* branchadd; uae_u32* tba; bigstate tmp; blockinfo* tbi; if (taken_pc_penv))) { mark_callers_recompile(bi); } big_to_small_state(&live,&(bi->env)); #endif #if USE_CHECKSUM_INFO remove_from_list(bi); if (trace_in_rom) { // No need to checksum that block trace on cache invalidation free_checksum_info_chain(bi->csi); bi->csi = NULL; add_to_dormant(bi); } else { calc_checksum(bi,&(bi->c1),&(bi->c2)); add_to_active(bi); } #else if (next_pc_p+extra_len>=max_pcp && next_pc_p+extra_lenlen=max_pcp-min_pcp; bi->min_pcp=min_pcp; remove_from_list(bi); if (isinrom(min_pcp) && isinrom(max_pcp)) { add_to_dormant(bi); /* No need to checksum it on cache flush. Please don't start changing ROMs in flight! */ } else { calc_checksum(bi,&(bi->c1),&(bi->c2)); add_to_active(bi); } #endif current_cache_size += get_target() - (uae_u8 *)current_compile_p; #if JIT_DEBUG if (JITDebug) bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target; if (JITDebug && disasm_block) { uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p); D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen)); uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1; disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size); D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location)); disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size); getchar(); } #endif log_dump(); align_target(align_jumps); /* This is the non-direct handler */ bi->handler= bi->handler_to_use=(cpuop_func *)get_target(); raw_cmp_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); raw_jnz((uintptr)popall_cache_miss); comp_pc_p=(uae_u8*)pc_hist[0].location; bi->status=BI_FINALIZING; init_comp(); match_states(bi); flush(1); raw_jmp((uintptr)bi->direct_handler); current_compile_p=get_target(); raise_in_cl_list(bi); /* We will flush soon, anyway, so let's do it now */ if (current_compile_p>=max_compile_start) flush_icache_hard(7); bi->status=BI_ACTIVE; if (redo_current_block) block_need_recompile(bi); #if PROFILE_COMPILE_TIME compile_time += (clock() - start_time); #endif } /* Account for compilation time */ cpu_do_check_ticks(); } void do_nothing(void) { /* What did you expect this to do? */ } void exec_nostats(void) { for (;;) { uae_u32 opcode = GET_OPCODE; #if FLIGHT_RECORDER m68k_record_step(m68k_getpc()); #endif (*cpufunctbl[opcode])(opcode); cpu_check_ticks(); if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) { return; /* We will deal with the spcflags in the caller */ } } } void execute_normal(void) { if (!check_for_cache_miss()) { cpu_history pc_hist[MAXRUN]; int blocklen = 0; #if REAL_ADDRESSING || DIRECT_ADDRESSING start_pc_p = regs.pc_p; start_pc = get_virtual_address(regs.pc_p); #else start_pc_p = regs.pc_oldp; start_pc = regs.pc; #endif for (;;) { /* Take note: This is the do-it-normal loop */ pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p; uae_u32 opcode = GET_OPCODE; #if FLIGHT_RECORDER m68k_record_step(m68k_getpc()); #endif (*cpufunctbl[opcode])(opcode); cpu_check_ticks(); if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) { compile_block(pc_hist, blocklen); return; /* We will deal with the spcflags in the caller */ } /* No need to check regs.spcflags, because if they were set, we'd have ended up inside that "if" */ } } } typedef void (*compiled_handler)(void); static void m68k_do_compile_execute(void) { for (;;) { ((compiled_handler)(pushall_call_handler))(); /* Whenever we return from that, we should check spcflags */ if (SPCFLAGS_TEST(SPCFLAG_ALL)) { if (m68k_do_specialties ()) return; } } } void m68k_compile_execute (void) { for (;;) { if (quit_program) break; m68k_do_compile_execute(); } }