Files
gb6/system6/jit.c
Tanner Fokkens 7caca2f056 Fix LY wait and HALT timing in CGB double speed mode
In double speed mode, dmg_sync_hw() halves CPU cycles before adding
to frame_cycles. The timing functions (compile_ly_wait, compile_ly_wait_reg,
compile_halt) compute D2 in PPU-domain units, but only half gets applied
to frame_cycles, causing the target LY to never be reached and an
infinite re-entry loop during game init.

Add effective_double_speed byte to JIT context and emit a runtime check
in all three timing functions to double D2 when double speed is active.
2026-02-06 16:06:36 -08:00

362 lines
9.7 KiB
C

#include <Memory.h>
#include <Timer.h>
#include <stdio.h>
#include <string.h>
#include "types.h"
#include "jit.h"
#include "compiler.h"
#include "dmg.h"
#include "cgb.h"
#include "cache.h"
#include "lcd.h"
#include "rom.h"
#include "dispatcher_asm.h"
#include "emulator.h"
#include "settings.h"
#include "debug.h"
#include "arena.h"
#include "cpu_cache.h"
// Debug: ring buffer of last 16 PCs executed
#define PC_HISTORY_SIZE 16
static u32 pc_history[PC_HISTORY_SIZE];
static u8 op_history[PC_HISTORY_SIZE]; // first opcode of each block
static int pc_history_idx = 0;
static u32 time_in_jit = 0;
static u32 time_in_sync = 0;
static u32 call_count = 0;
static u32 last_report_tick = 0;
// register state that persists between block executions
struct {
u32 d2; // accumulated cycles, output
u32 d3; // next pc, output only
u32 d4, d5, d6, d7; // a, bc, de, f
u32 a2, a3, a4; // hl, sp, ctx
u32 a5, a6; // read_page, write_page
} jit_regs;
// exposed to main emulator.c
jit_context jit_ctx;
int jit_halted = 0;
// compile-time context for address calculation
static struct compile_ctx compile_ctx;
// this is a huge context switch and my main goal is to do this as little as
// possible. currently it will not return to C when jumping to another compiled
// block. it still does to check and handle interrupts, though.
static void enter_asm_world(void *code)
{
asm volatile(
// save callee-saved registers
"movem.l %%d2-%%d7/%%a2-%%a6, -(%%sp)\n\t"
// copy code pointer to A0
"movea.l %[code], %%a0\n\t"
// load GB state into 68k registers
"lea %[jit_regs], %%a1\n\t"
"movem.l (%%a1), %%d2-%%d7/%%a2-%%a6\n\t"
// call the generated code, this can then chain to other blocks
"jsr (%%a0)\n\t"
// save results back to memory
"lea %[jit_regs], %%a0\n\t"
"movem.l %%d2-%%d7/%%a2-%%a3, (%%a0)\n\t"
// restore callee-saved registers
"movem.l (%%sp)+, %%d2-%%d7/%%a2-%%a6\n\t"
: // no outputs
: [jit_regs] "m" (jit_regs),
[code] "a" (code)
: "d0", "d1", "a0", "a1", "cc", "memory"
);
}
// Sync jit_ctx cache pointers from lru.c, need to do this when the arena
// is cleared and the cache is reinitialized with new arrays
static void sync_cache_pointers(void)
{
cache_get_arrays(&jit_ctx.bank0_cache, &jit_ctx.banked_cache, &jit_ctx.upper_cache);
}
// Handle STOP instruction - checks for CGB speed switch
// Returns 0 to continue execution, non-zero to halt
static int jit_handle_stop(struct dmg *dmg)
{
if (dmg->cgb && cgb_speed_switch(dmg->cgb)) {
// Speed switched successfully - update effective_double_speed
jit_ctx.effective_double_speed = (dmg->cgb->double_speed && !ignore_double_speed) ? 1 : 0;
return 0;
}
// DMG mode or speed switch not armed - halt
return 1;
}
// Initialize JIT state for a new emulation session
void jit_init(struct dmg *dmg)
{
set_status_bar("Loading...");
compiler_init();
if (!arena_init()) {
set_status_bar("Arena alloc fail");
jit_halted = 1;
return;
}
// pre-allocate cache arrays so dispatcher never sees NULL
if (!cache_init()) {
set_status_bar("Cache alloc fail");
jit_halted = 1;
return;
}
memset(&jit_regs, 0, sizeof jit_regs);
// Set initial A register for CGB mode ($11) vs DMG mode ($01)
jit_regs.d4 = (dmg->cgb && dmg->cgb->mode) ? 0x11 : 0x01;
compile_ctx.dmg = dmg;
compile_ctx.read = dmg_read;
compile_ctx.cache_store = cache_store;
compile_ctx.alloc = arena_alloc;
compile_ctx.wram_base = dmg->main_ram;
compile_ctx.hram_base = dmg->zero_page;
jit_ctx.dmg = dmg;
jit_ctx.read_func = dmg_read;
jit_ctx.write_func = dmg_write;
jit_ctx.read16_func = dmg_read16;
jit_ctx.write16_func = dmg_write16;
jit_ctx.ei_di_func = dmg_ei_di;
jit_ctx.stop_func = jit_handle_stop;
jit_ctx.current_rom_bank = 1; // bank 1 is default after boot
jit_ctx.dispatcher_return = get_dispatcher_code();
jit_ctx.patch_helper = get_patch_helper_code();
jit_ctx.frame_cycles_ptr = &dmg->frame_cycles;
jit_ctx.gb_sp = 0xfffe; // initial SP (HRAM)
jit_ctx.stack_in_ram = 1; // fast mode - A3 points to native HRAM
jit_ctx.effective_double_speed = 0;
sync_cache_pointers();
jit_regs.d3 = 0x100; // initial PC
// A register: 0x11 for CGB, 0x01 for DMG
jit_regs.d4 = (dmg->cgb && dmg->cgb->mode) ? 0x11 : 0x01;
jit_regs.d5 = 0x00000013; // BC
jit_regs.d6 = 0x000000d8; // DE
jit_regs.d7 = 0x05; // flags
jit_regs.a2 = 0x014d; // HL
// A3 = native pointer to HRAM at GB SP 0xFFFE
// HRAM is at dmg->zero_page (0xFF80-0xFFFF), 0xFFFE - 0xFF80 = 0x7E
jit_regs.a3 = (unsigned long) (dmg->zero_page + 0x7e);
jit_regs.a4 = (unsigned long) &jit_ctx;
jit_regs.a5 = (unsigned long) dmg->read_page;
jit_regs.a6 = (unsigned long) dmg->write_page;
jit_halted = 0;
}
int jit_clear_all_blocks(void)
{
arena_reset();
if (!cache_init()) {
set_status_bar("Cache alloc fail");
jit_halted = 1;
return 0;
}
sync_cache_pointers();
return 1;
}
// i moved this out of dmg.c because it needs to mess with the JIT state
static void check_interrupts(struct dmg *dmg)
{
static const u16 handlers[] = { 0x40, 0x48, 0x50, 0x58, 0x60 };
u8 pending = dmg->zero_page[0x7f] & dmg->interrupt_request_mask & 0x1f;
if (!pending) {
return;
}
int k;
for (k = 0; k < 5; k++) {
if (pending & (1 << k)) {
// clear IF bit and disable IME
dmg->interrupt_request_mask &= ~(1 << k);
dmg->interrupt_enable = 0;
jit_ctx.gb_sp -= 2;
jit_regs.a3 -= 2;
dmg_write16(dmg, jit_ctx.gb_sp, jit_regs.d3);
// Jump to handler
jit_regs.d3 = handlers[k];
break;
}
}
}
static void update_profiling_status_bar(u32 frames_now)
{
char buf[64];
static u32 last_jit = 0, last_sync = 0, last_frames_rendered = 0;
u32 now = TickCount();
u32 elapsed = now - last_report_tick;
u32 exits_per_sec = elapsed > 0 ? (100 * 60) / elapsed : 0;
u32 d_jit = time_in_jit - last_jit;
u32 d_sync = time_in_sync - last_sync;
u32 pct_jit = elapsed > 0 ? (d_jit * 100) / elapsed : 0;
u32 pct_sync = elapsed > 0 ? (d_sync * 100) / elapsed : 0;
u32 frames_delta = frames_now - last_frames_rendered;
u32 fps = elapsed > 0 ? (frames_delta * 60) / elapsed : 0;
last_frames_rendered = frames_now;
last_jit = time_in_jit;
last_sync = time_in_sync;
last_report_tick = now;
sprintf(buf, "%lu FPS (J: %lu, S: %lu)", fps, pct_jit, pct_sync);
set_status_bar(buf);
}
int jit_run(struct dmg *dmg)
{
void *code;
struct code_block *block;
char buf[64];
u32 t0, t1, t2, t3;
if (jit_halted) {
return 0;
}
// look up or compile block
t0 = TickCount();
code = cache_lookup(jit_regs.d3, jit_ctx.current_rom_bank);
if (!code) {
sprintf(buf, "$%02x:%04x %luk/%luk",
jit_ctx.current_rom_bank,
jit_regs.d3,
arena_remaining() / 1024,
arena_size() / 1024
);
set_status_bar(buf);
compile_ctx.current_bank = jit_ctx.current_rom_bank;
block = compile_block(jit_regs.d3, &compile_ctx);
if (!block) {
// arena full, reset and retry once
if (!jit_clear_all_blocks()) {
return 0;
}
block = compile_block(jit_regs.d3, &compile_ctx);
if (!block) {
sprintf(buf, "JIT: block fail pc=%04x", jit_regs.d3);
set_status_bar(buf);
jit_halted = 1;
return 0;
}
}
if (block->error) {
sprintf(buf, "Error pc=%02x:%04x op=%02x", jit_ctx.current_rom_bank,
block->failed_address, block->failed_opcode);
set_status_bar(buf);
jit_halted = 1;
return 0;
}
if (!cache_store(jit_regs.d3, jit_ctx.current_rom_bank, block->code)) {
// this means this was the first block to be stored for a given bank,
// and the bank cache array couldn't be allocated. unrecoverable OOM?
// i'm not actually sure...
if (!jit_clear_all_blocks()) {
return 0;
}
// try again
if (!cache_store(jit_regs.d3, jit_ctx.current_rom_bank, block->code)) {
// something is really wrong
sprintf(buf, "JIT: bank array fail pc=%04x", jit_regs.d3);
set_status_bar(buf);
jit_halted = 1;
return 0;
}
// recovered
}
if (TrapAvailable(_CacheFlush)) {
// for 68040. 68030 needed a cache flush when blocks were patched, but
// 040 needs it here too because the caches are copy-back, so the code that
// was just compiled isn't necessarily in main memory yet, and it won't
// look in the data cache for instructions, just the instruction cache.
// see Apple Technical Note HW06: Cache As Cache Can
FlushCodeCache();
}
code = block->code;
}
// trace mode: show PC before every block execution
if (jit_ctx.trace_enabled) {
sprintf(buf, "$%02x:%04lx", jit_ctx.current_rom_bank, jit_regs.d3);
set_status_bar(buf);
}
// Log PC and first opcode to ring buffer for crash debugging
pc_history[pc_history_idx] = jit_regs.d3;
op_history[pc_history_idx] = dmg_read(dmg, jit_regs.d3);
pc_history_idx = (pc_history_idx + 1) % PC_HISTORY_SIZE;
t1 = TickCount();
enter_asm_world(code);
t2 = TickCount();
// Get next PC from D3
if (jit_regs.d3 == HALT_SENTINEL) {
set_status_bar("HALT");
jit_halted = 1;
return 0;
}
// sync hardware with cycles accumulated by compiled code
// sprintf(buf, "%lu %lu", dmg->frame_cycles, jit_regs.d2);
// set_status_bar(buf);
dmg_sync_hw(dmg, jit_regs.d2);
if (dmg->interrupt_enable) {
check_interrupts(dmg);
}
jit_regs.d2 = 0;
t3 = TickCount();
time_in_jit += t2 - t1;
time_in_sync += t3 - t2;
call_count++;
if (call_count % 100 == 0) {
update_profiling_status_bar(dmg->frames_rendered);
}
return 1;
}
void jit_cleanup(void)
{
// we need this memory back to load the next ROM
arena_destroy();
}