From 6972ac5cc32790ecc889ca928035fc3303f28fa2 Mon Sep 17 00:00:00 2001 From: Matthew Laux Date: Sun, 25 Jan 2026 17:42:29 -0600 Subject: [PATCH] calculate mid-frame LY read correctly instead of cycling through - fixes Metroid enemies --- compiler/cb_prefix.c | 1 + compiler/compiler.c | 9 +++ compiler/emitters.c | 7 +++ compiler/emitters.h | 1 + compiler/interop.c | 5 +- compiler/tests/test_exec_timing.c | 99 +++++++++++++++++-------------- src/dmg.c | 12 ++-- 7 files changed, 78 insertions(+), 56 deletions(-) diff --git a/compiler/cb_prefix.c b/compiler/cb_prefix.c index 86c7bd7..5fe57b5 100644 --- a/compiler/cb_prefix.c +++ b/compiler/cb_prefix.c @@ -15,6 +15,7 @@ static void compile_shift_flags(struct code_block *block) static void compile_swap_flags(struct code_block *block) { emit_move_sr_dn(block, REG_68K_D_FLAGS); + emit_andi_b_dn(block, REG_68K_D_FLAGS, 0xfe); } static void compile_bit_flags(struct code_block *block) diff --git a/compiler/compiler.c b/compiler/compiler.c index e2a6994..9213d19 100644 --- a/compiler/compiler.c +++ b/compiler/compiler.c @@ -111,7 +111,12 @@ static void compile_ly_wait( // load frame_cycles pointer emit_movea_l_disp_an_an(block, JIT_CTX_FRAME_CYCLES_PTR, REG_68K_A_CTX, REG_68K_A_SCRATCH_1); + // load frame_cycles into d0 emit_move_l_ind_an_dn(block, REG_68K_A_SCRATCH_1, REG_68K_D_SCRATCH_0); + // add accumulated cycles for this JIT run + emit_add_l_dn_dn(block, REG_68K_D_CYCLE_COUNT, REG_68K_D_SCRATCH_0); + // write true position back to memory + emit_move_l_dn_disp_an(block, REG_68K_D_SCRATCH_0, 0, REG_68K_A_SCRATCH_1); // compare frame_cycles to target emit_cmpi_l_imm_dn(block, target_cycles, REG_68K_D_SCRATCH_0); @@ -154,6 +159,10 @@ static void compile_halt(struct code_block *block, int next_pc) // load frame_cycles pointer emit_movea_l_disp_an_an(block, JIT_CTX_FRAME_CYCLES_PTR, REG_68K_A_CTX, REG_68K_A_SCRATCH_1); emit_move_l_ind_an_dn(block, REG_68K_A_SCRATCH_1, REG_68K_D_SCRATCH_0); + // add accumulated cycles for this JIT run + emit_add_l_dn_dn(block, REG_68K_D_CYCLE_COUNT, REG_68K_D_SCRATCH_0); + // write true position back to memory + emit_move_l_dn_disp_an(block, REG_68K_D_SCRATCH_0, 0, REG_68K_A_SCRATCH_1); // see if already in vblank emit_cmpi_l_imm_dn(block, 65664, REG_68K_D_SCRATCH_0); diff --git a/compiler/emitters.c b/compiler/emitters.c index e44761a..982bade 100644 --- a/compiler/emitters.c +++ b/compiler/emitters.c @@ -707,6 +707,13 @@ void emit_add_w_dn_dn(struct code_block *block, uint8_t src, uint8_t dest) emit_word(block, 0xd040 | (dest << 9) | src); } +// add.l Ds, Dd - ADD data registers (result to Dd) +void emit_add_l_dn_dn(struct code_block *block, uint8_t src, uint8_t dest) +{ + // 1101 ddd 0 10 000 sss + emit_word(block, 0xd080 | (dest << 9) | src); +} + // sub.b Ds, Dd - SUB data registers (result to Dd) void emit_sub_b_dn_dn(struct code_block *block, uint8_t src, uint8_t dest) { diff --git a/compiler/emitters.h b/compiler/emitters.h index 62a2ccc..f9aafd5 100644 --- a/compiler/emitters.h +++ b/compiler/emitters.h @@ -95,6 +95,7 @@ void emit_ror_b_imm(struct code_block *block, uint8_t count, uint8_t dreg); void emit_rol_b_imm(struct code_block *block, uint8_t count, uint8_t dreg); void emit_add_b_dn_dn(struct code_block *block, uint8_t src, uint8_t dest); void emit_add_w_dn_dn(struct code_block *block, uint8_t src, uint8_t dest); +void emit_add_l_dn_dn(struct code_block *block, uint8_t src, uint8_t dest); void emit_sub_b_dn_dn(struct code_block *block, uint8_t src, uint8_t dest); void emit_sub_w_dn_dn(struct code_block *block, uint8_t src, uint8_t dest); void emit_adda_w_dn_an(struct code_block *block, uint8_t dreg, uint8_t areg); diff --git a/compiler/interop.c b/compiler/interop.c index 8b8c67e..ddb48d7 100644 --- a/compiler/interop.c +++ b/compiler/interop.c @@ -12,8 +12,7 @@ // addr in D1, val_reg specifies value register void compile_slow_dmg_write(struct code_block *block, uint8_t val_reg) { - // store current cycle count for lazy register evaluation, right now - // it's just DIV but want to add more like lcd + // store current cycle count for lazy register evaluation emit_move_l_dn_disp_an(block, REG_68K_D_CYCLE_COUNT, JIT_CTX_READ_CYCLES, REG_68K_A_CTX); // and push so retro68 doesn't erase emit_push_l_dn(block, REG_68K_D_CYCLE_COUNT); // 2 @@ -87,7 +86,7 @@ void compile_call_dmg_write_d0(struct code_block *block) // Emit slow path call to dmg_read - expects address in D1, returns in D0 void compile_slow_dmg_read(struct code_block *block) { - // store current cycle count for lazy DIV evaluation + // store current cycle count for DIV/LY evaluation emit_move_l_dn_disp_an(block, REG_68K_D_CYCLE_COUNT, JIT_CTX_READ_CYCLES, REG_68K_A_CTX); // 4 emit_push_l_dn(block, REG_68K_D_CYCLE_COUNT); // 2 emit_push_w_dn(block, REG_68K_D_SCRATCH_1); // 2 diff --git a/compiler/tests/test_exec_timing.c b/compiler/tests/test_exec_timing.c index cef72f1..0a6e3df 100644 --- a/compiler/tests/test_exec_timing.c +++ b/compiler/tests/test_exec_timing.c @@ -3,83 +3,89 @@ // ============================================================================ // HALT instruction tests // HALT waits until vblank interrupt (LY 144, cycle 65664) +// Note: HALT's own 4 cycles are added to D2 before skip calculation, +// so true_pos = frame_cycles + 4 // ============================================================================ TEST(test_halt_before_vblank) { - // HALT when frame_cycles=0 should wait 65664 cycles to reach vblank + // HALT when frame_cycles=0, true_pos=4, skip = 65664-4 = 65660 uint8_t rom[] = { 0x76 // halt }; run_block_with_frame_cycles(rom, 0); - ASSERT_EQ(get_cycle_count(), 65664); + ASSERT_EQ(get_cycle_count(), 65664 - 4); } TEST(test_halt_mid_frame) { - // HALT at frame_cycles=10000 should wait 55664 cycles (65664-10000) + // HALT at frame_cycles=10000, true_pos=10004, skip = 65664-10004 = 55660 uint8_t rom[] = { 0x76 // halt }; run_block_with_frame_cycles(rom, 10000); - ASSERT_EQ(get_cycle_count(), 65664 - 10000); + ASSERT_EQ(get_cycle_count(), 65664 - 10000 - 4); } TEST(test_halt_just_before_vblank) { - // HALT at frame_cycles=65663 should wait 1 cycle + // HALT at frame_cycles=65659, true_pos=65663, skip = 1 uint8_t rom[] = { 0x76 // halt }; - run_block_with_frame_cycles(rom, 65663); + run_block_with_frame_cycles(rom, 65659); ASSERT_EQ(get_cycle_count(), 1); } TEST(test_halt_at_vblank_start) { - // HALT at exactly cycle 65664 (vblank start) should wait until next frame - // cycles = (70224 + 65664) - 65664 = 70224 + // HALT at frame_cycles=65660, true_pos=65664 (exactly at vblank) + // In vblank path: skip = 135888 - 65664 = 70224 uint8_t rom[] = { 0x76 // halt }; - run_block_with_frame_cycles(rom, 65664); + run_block_with_frame_cycles(rom, 65660); ASSERT_EQ(get_cycle_count(), 70224); } TEST(test_halt_during_vblank) { - // HALT at frame_cycles=68000 (in vblank) should wait until next frame vblank - // cycles = (70224 + 65664) - 68000 = 135888 - 68000 = 67888 + // HALT at frame_cycles=68000, true_pos=68004 (in vblank) + // skip = 135888 - 68004 = 67884 uint8_t rom[] = { 0x76 // halt }; run_block_with_frame_cycles(rom, 68000); - ASSERT_EQ(get_cycle_count(), 135888 - 68000); + ASSERT_EQ(get_cycle_count(), 135888 - 68000 - 4); } TEST(test_halt_near_frame_end) { - // HALT at frame_cycles=70000 should wait until next frame vblank - // cycles = (70224 + 65664) - 70000 = 65888 + // HALT at frame_cycles=70000, true_pos=70004 (near frame end) + // skip = 135888 - 70004 = 65884 uint8_t rom[] = { 0x76 // halt }; run_block_with_frame_cycles(rom, 70000); - ASSERT_EQ(get_cycle_count(), 135888 - 70000); + ASSERT_EQ(get_cycle_count(), 135888 - 70000 - 4); } // ============================================================================ // LY wait pattern tests // Pattern: ldh a, [$44]; cp N; jr cc, back // Compiler synthesizes a wait instead of spinning in a loop +// Note: The initial ld's cycles are added to D2 +// before skip calculation, so true_pos = frame_cycles + 12 // ============================================================================ +#define LY_WAIT_CYCLES 12 + TEST(test_ly_wait_jr_nz_ly0) { // ldh a, [$44]; cp 0; jr nz, back // Wait for LY=0 (frame start), from frame_cycles=0 - // target_cycles = 0 * 456 = 0, so wait until next frame - // D2 = (70224 + 0) - 0 = 70224, A = 0 + // true_pos = 20, target = 0, so wait until next frame + // skip = (70224 + 0) - 20 = 70204 uint8_t rom[] = { 0xf0, 0x44, // ldh a, ($ff44) - read LY 0xfe, 0x00, // cp 0 @@ -88,16 +94,15 @@ TEST(test_ly_wait_jr_nz_ly0) }; run_block_with_frame_cycles(rom, 0); ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 0); - // At frame_cycles=0, waiting for LY 0 means next frame - ASSERT_EQ(get_cycle_count(), 70224); + ASSERT_EQ(get_cycle_count(), 70224 - LY_WAIT_CYCLES); } TEST(test_ly_wait_jr_nz_ly90) { // ldh a, [$44]; cp 90; jr nz, back // Wait for LY=90, from frame_cycles=0 - // target_cycles = 90 * 456 = 41040 - // D2 = 41040 - 0 = 41040, A = 90 + // true_pos = 20, target = 90*456 = 41040 + // skip = 41040 - 20 = 41020 uint8_t rom[] = { 0xf0, 0x44, // ldh a, ($ff44) - read LY 0xfe, 0x5a, // cp 90 @@ -106,13 +111,14 @@ TEST(test_ly_wait_jr_nz_ly90) }; run_block_with_frame_cycles(rom, 0); ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 90); - ASSERT_EQ(get_cycle_count(), 90 * 456); + ASSERT_EQ(get_cycle_count(), 90 * 456 - LY_WAIT_CYCLES); } TEST(test_ly_wait_jr_nz_ly144) { // Wait for LY=144 (vblank start), from frame_cycles=0 - // target_cycles = 144 * 456 = 65664 + // true_pos = 20, target = 144*456 = 65664 + // skip = 65664 - 20 = 65644 uint8_t rom[] = { 0xf0, 0x44, // ldh a, ($ff44) 0xfe, 0x90, // cp 144 @@ -121,15 +127,15 @@ TEST(test_ly_wait_jr_nz_ly144) }; run_block_with_frame_cycles(rom, 0); ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 144); - ASSERT_EQ(get_cycle_count(), 144 * 456); + ASSERT_EQ(get_cycle_count(), 144 * 456 - LY_WAIT_CYCLES); } TEST(test_ly_wait_jr_nz_past_target) { // Wait for LY=50, but frame_cycles already past that - // frame_cycles=30000, LY 50 is at 22800 - // Since frame_cycles >= target, wait until next frame - // D2 = (70224 + 22800) - 30000 = 63024 + // frame_cycles=30000, true_pos=30020, target=22800 + // Since true_pos >= target, wait until next frame + // skip = (70224 + 22800) - 30020 = 63004 uint8_t rom[] = { 0xf0, 0x44, // ldh a, ($ff44) 0xfe, 0x32, // cp 50 @@ -138,7 +144,7 @@ TEST(test_ly_wait_jr_nz_past_target) }; run_block_with_frame_cycles(rom, 30000); ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 50); - ASSERT_EQ(get_cycle_count(), 70224 + (50 * 456) - 30000); + ASSERT_EQ(get_cycle_count(), 70224 + (50 * 456) - 30000 - LY_WAIT_CYCLES); } TEST(test_ly_wait_jr_z_ly90) @@ -146,7 +152,8 @@ TEST(test_ly_wait_jr_z_ly90) // ldh a, [$44]; cp 90; jr z, back // jr z: loop while LY == 90, exit when LY != 90 // This waits for LY = (90 + 1) % 154 = 91 - // target_cycles = 91 * 456 = 41496 + // true_pos = 20, target = 91*456 = 41496 + // skip = 41496 - 20 = 41476 uint8_t rom[] = { 0xf0, 0x44, // ldh a, ($ff44) 0xfe, 0x5a, // cp 90 @@ -155,15 +162,16 @@ TEST(test_ly_wait_jr_z_ly90) }; run_block_with_frame_cycles(rom, 0); ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 91); - ASSERT_EQ(get_cycle_count(), 91 * 456); + ASSERT_EQ(get_cycle_count(), 91 * 456 - LY_WAIT_CYCLES); } TEST(test_ly_wait_jr_z_ly153) { // ldh a, [$44]; cp 153; jr z, back // wait_ly = (153 + 1) % 154 = 0 (wraps to start of frame) - // target_cycles = 0 * 456 = 0 - // From frame_cycles=0, this should wait for next frame + // true_pos = 20, target = 0 + // Since true_pos >= target, wait for next frame + // skip = (70224 + 0) - 20 = 70204 uint8_t rom[] = { 0xf0, 0x44, // ldh a, ($ff44) 0xfe, 0x99, // cp 153 @@ -172,15 +180,15 @@ TEST(test_ly_wait_jr_z_ly153) }; run_block_with_frame_cycles(rom, 0); ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 0); - // At frame_cycles=0, target is 0, so next frame - ASSERT_EQ(get_cycle_count(), 70224); + ASSERT_EQ(get_cycle_count(), 70224 - LY_WAIT_CYCLES); } TEST(test_ly_wait_jr_c_ly100) { // ldh a, [$44]; cp 100; jr c, back // jr c: loop while LY < 100, exit when LY >= 100 - // wait_ly = 100, target_cycles = 100 * 456 = 45600 + // true_pos = 20, target = 100*456 = 45600 + // skip = 45600 - 20 = 45580 uint8_t rom[] = { 0xf0, 0x44, // ldh a, ($ff44) 0xfe, 0x64, // cp 100 @@ -189,14 +197,14 @@ TEST(test_ly_wait_jr_c_ly100) }; run_block_with_frame_cycles(rom, 0); ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 100); - ASSERT_EQ(get_cycle_count(), 100 * 456); + ASSERT_EQ(get_cycle_count(), 100 * 456 - LY_WAIT_CYCLES); } TEST(test_ly_wait_mid_frame) { // Wait for LY=100, starting at frame_cycles=20000 - // LY 100 is at cycle 45600 - // D2 = 45600 - 20000 = 25600 + // true_pos = 20020, target = 45600 + // skip = 45600 - 20020 = 25580 uint8_t rom[] = { 0xf0, 0x44, // ldh a, ($ff44) 0xfe, 0x64, // cp 100 @@ -205,24 +213,25 @@ TEST(test_ly_wait_mid_frame) }; run_block_with_frame_cycles(rom, 20000); ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 100); - ASSERT_EQ(get_cycle_count(), 45600 - 20000); + ASSERT_EQ(get_cycle_count(), 45600 - 20000 - LY_WAIT_CYCLES); } TEST(test_ly_wait_exact_target) { - // Start exactly at the target LY cycle - // LY 50 is at cycle 22800, start there - // frame_cycles >= target_cycles, so wait for next frame + // Start at frame_cycles such that true_pos exactly equals target + // target = 50*456 = 22800, so frame_cycles = 22800 - 20 = 22780 + // true_pos = 22800 >= target, so wait for next frame + // skip = (70224 + 22800) - 22800 = 70224 uint8_t rom[] = { 0xf0, 0x44, // ldh a, ($ff44) 0xfe, 0x32, // cp 50 0x20, 0xfa, // jr nz, -6 0x10 // stop }; - run_block_with_frame_cycles(rom, 22800); + run_block_with_frame_cycles(rom, 22800 - LY_WAIT_CYCLES); ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 50); - // frame_cycles == target_cycles, uses next frame path - ASSERT_EQ(get_cycle_count(), 70224); + // true_pos == target_cycles, uses next frame path + ASSERT_EQ(get_cycle_count(), 70224); } void register_timing_tests(void) diff --git a/src/dmg.c b/src/dmg.c index 6cbc14b..07ddaf2 100644 --- a/src/dmg.c +++ b/src/dmg.c @@ -150,13 +150,9 @@ u8 dmg_read_slow(struct dmg *dmg, u16 address) if (address == REG_LY) { // the compiler detects "ldh a, [$44]; cp N; jr cc" which is the most // common case, and skips to that line, so this actually doesn't run - // that much - just give it the value it's waiting for. LY=LYC is handled - // in a nicer way below, when the compiled code returns to C - dmg->ly_hack++; - if (dmg->ly_hack == 154) { - dmg->ly_hack = 0; - } - return dmg->ly_hack; + // that much + u32 current = (dmg->frame_cycles + jit_ctx.read_cycles) % 70224; + return current / 456; } if (address == REG_STAT) { @@ -405,7 +401,7 @@ void dmg_sync_hw(struct dmg *dmg, int cycles) // need as a separate check for the case where cycles = 70224. in that case, // it needs to execute both the previous block and this one if (dmg->frame_cycles >= CYCLES_PER_FRAME) { - dmg->frame_cycles -= CYCLES_PER_FRAME; + dmg->frame_cycles %= CYCLES_PER_FRAME; dmg->sent_vblank_start = 0; dmg->sent_ly_interrupt = 0; dmg->rendered_this_frame = 0;