calculate mid-frame LY read correctly instead of cycling through - fixes Metroid enemies

This commit is contained in:
Matthew Laux
2026-01-25 17:42:29 -06:00
parent d9efe79aa1
commit 6972ac5cc3
7 changed files with 78 additions and 56 deletions

View File

@@ -15,6 +15,7 @@ static void compile_shift_flags(struct code_block *block)
static void compile_swap_flags(struct code_block *block)
{
emit_move_sr_dn(block, REG_68K_D_FLAGS);
emit_andi_b_dn(block, REG_68K_D_FLAGS, 0xfe);
}
static void compile_bit_flags(struct code_block *block)

View File

@@ -111,7 +111,12 @@ static void compile_ly_wait(
// load frame_cycles pointer
emit_movea_l_disp_an_an(block, JIT_CTX_FRAME_CYCLES_PTR, REG_68K_A_CTX, REG_68K_A_SCRATCH_1);
// load frame_cycles into d0
emit_move_l_ind_an_dn(block, REG_68K_A_SCRATCH_1, REG_68K_D_SCRATCH_0);
// add accumulated cycles for this JIT run
emit_add_l_dn_dn(block, REG_68K_D_CYCLE_COUNT, REG_68K_D_SCRATCH_0);
// write true position back to memory
emit_move_l_dn_disp_an(block, REG_68K_D_SCRATCH_0, 0, REG_68K_A_SCRATCH_1);
// compare frame_cycles to target
emit_cmpi_l_imm_dn(block, target_cycles, REG_68K_D_SCRATCH_0);
@@ -154,6 +159,10 @@ static void compile_halt(struct code_block *block, int next_pc)
// load frame_cycles pointer
emit_movea_l_disp_an_an(block, JIT_CTX_FRAME_CYCLES_PTR, REG_68K_A_CTX, REG_68K_A_SCRATCH_1);
emit_move_l_ind_an_dn(block, REG_68K_A_SCRATCH_1, REG_68K_D_SCRATCH_0);
// add accumulated cycles for this JIT run
emit_add_l_dn_dn(block, REG_68K_D_CYCLE_COUNT, REG_68K_D_SCRATCH_0);
// write true position back to memory
emit_move_l_dn_disp_an(block, REG_68K_D_SCRATCH_0, 0, REG_68K_A_SCRATCH_1);
// see if already in vblank
emit_cmpi_l_imm_dn(block, 65664, REG_68K_D_SCRATCH_0);

View File

@@ -707,6 +707,13 @@ void emit_add_w_dn_dn(struct code_block *block, uint8_t src, uint8_t dest)
emit_word(block, 0xd040 | (dest << 9) | src);
}
// add.l Ds, Dd - ADD data registers (result to Dd)
void emit_add_l_dn_dn(struct code_block *block, uint8_t src, uint8_t dest)
{
// 1101 ddd 0 10 000 sss
emit_word(block, 0xd080 | (dest << 9) | src);
}
// sub.b Ds, Dd - SUB data registers (result to Dd)
void emit_sub_b_dn_dn(struct code_block *block, uint8_t src, uint8_t dest)
{

View File

@@ -95,6 +95,7 @@ void emit_ror_b_imm(struct code_block *block, uint8_t count, uint8_t dreg);
void emit_rol_b_imm(struct code_block *block, uint8_t count, uint8_t dreg);
void emit_add_b_dn_dn(struct code_block *block, uint8_t src, uint8_t dest);
void emit_add_w_dn_dn(struct code_block *block, uint8_t src, uint8_t dest);
void emit_add_l_dn_dn(struct code_block *block, uint8_t src, uint8_t dest);
void emit_sub_b_dn_dn(struct code_block *block, uint8_t src, uint8_t dest);
void emit_sub_w_dn_dn(struct code_block *block, uint8_t src, uint8_t dest);
void emit_adda_w_dn_an(struct code_block *block, uint8_t dreg, uint8_t areg);

View File

@@ -12,8 +12,7 @@
// addr in D1, val_reg specifies value register
void compile_slow_dmg_write(struct code_block *block, uint8_t val_reg)
{
// store current cycle count for lazy register evaluation, right now
// it's just DIV but want to add more like lcd
// store current cycle count for lazy register evaluation
emit_move_l_dn_disp_an(block, REG_68K_D_CYCLE_COUNT, JIT_CTX_READ_CYCLES, REG_68K_A_CTX);
// and push so retro68 doesn't erase
emit_push_l_dn(block, REG_68K_D_CYCLE_COUNT); // 2
@@ -87,7 +86,7 @@ void compile_call_dmg_write_d0(struct code_block *block)
// Emit slow path call to dmg_read - expects address in D1, returns in D0
void compile_slow_dmg_read(struct code_block *block)
{
// store current cycle count for lazy DIV evaluation
// store current cycle count for DIV/LY evaluation
emit_move_l_dn_disp_an(block, REG_68K_D_CYCLE_COUNT, JIT_CTX_READ_CYCLES, REG_68K_A_CTX); // 4
emit_push_l_dn(block, REG_68K_D_CYCLE_COUNT); // 2
emit_push_w_dn(block, REG_68K_D_SCRATCH_1); // 2

View File

@@ -3,83 +3,89 @@
// ============================================================================
// HALT instruction tests
// HALT waits until vblank interrupt (LY 144, cycle 65664)
// Note: HALT's own 4 cycles are added to D2 before skip calculation,
// so true_pos = frame_cycles + 4
// ============================================================================
TEST(test_halt_before_vblank)
{
// HALT when frame_cycles=0 should wait 65664 cycles to reach vblank
// HALT when frame_cycles=0, true_pos=4, skip = 65664-4 = 65660
uint8_t rom[] = {
0x76 // halt
};
run_block_with_frame_cycles(rom, 0);
ASSERT_EQ(get_cycle_count(), 65664);
ASSERT_EQ(get_cycle_count(), 65664 - 4);
}
TEST(test_halt_mid_frame)
{
// HALT at frame_cycles=10000 should wait 55664 cycles (65664-10000)
// HALT at frame_cycles=10000, true_pos=10004, skip = 65664-10004 = 55660
uint8_t rom[] = {
0x76 // halt
};
run_block_with_frame_cycles(rom, 10000);
ASSERT_EQ(get_cycle_count(), 65664 - 10000);
ASSERT_EQ(get_cycle_count(), 65664 - 10000 - 4);
}
TEST(test_halt_just_before_vblank)
{
// HALT at frame_cycles=65663 should wait 1 cycle
// HALT at frame_cycles=65659, true_pos=65663, skip = 1
uint8_t rom[] = {
0x76 // halt
};
run_block_with_frame_cycles(rom, 65663);
run_block_with_frame_cycles(rom, 65659);
ASSERT_EQ(get_cycle_count(), 1);
}
TEST(test_halt_at_vblank_start)
{
// HALT at exactly cycle 65664 (vblank start) should wait until next frame
// cycles = (70224 + 65664) - 65664 = 70224
// HALT at frame_cycles=65660, true_pos=65664 (exactly at vblank)
// In vblank path: skip = 135888 - 65664 = 70224
uint8_t rom[] = {
0x76 // halt
};
run_block_with_frame_cycles(rom, 65664);
run_block_with_frame_cycles(rom, 65660);
ASSERT_EQ(get_cycle_count(), 70224);
}
TEST(test_halt_during_vblank)
{
// HALT at frame_cycles=68000 (in vblank) should wait until next frame vblank
// cycles = (70224 + 65664) - 68000 = 135888 - 68000 = 67888
// HALT at frame_cycles=68000, true_pos=68004 (in vblank)
// skip = 135888 - 68004 = 67884
uint8_t rom[] = {
0x76 // halt
};
run_block_with_frame_cycles(rom, 68000);
ASSERT_EQ(get_cycle_count(), 135888 - 68000);
ASSERT_EQ(get_cycle_count(), 135888 - 68000 - 4);
}
TEST(test_halt_near_frame_end)
{
// HALT at frame_cycles=70000 should wait until next frame vblank
// cycles = (70224 + 65664) - 70000 = 65888
// HALT at frame_cycles=70000, true_pos=70004 (near frame end)
// skip = 135888 - 70004 = 65884
uint8_t rom[] = {
0x76 // halt
};
run_block_with_frame_cycles(rom, 70000);
ASSERT_EQ(get_cycle_count(), 135888 - 70000);
ASSERT_EQ(get_cycle_count(), 135888 - 70000 - 4);
}
// ============================================================================
// LY wait pattern tests
// Pattern: ldh a, [$44]; cp N; jr cc, back
// Compiler synthesizes a wait instead of spinning in a loop
// Note: The initial ld's cycles are added to D2
// before skip calculation, so true_pos = frame_cycles + 12
// ============================================================================
#define LY_WAIT_CYCLES 12
TEST(test_ly_wait_jr_nz_ly0)
{
// ldh a, [$44]; cp 0; jr nz, back
// Wait for LY=0 (frame start), from frame_cycles=0
// target_cycles = 0 * 456 = 0, so wait until next frame
// D2 = (70224 + 0) - 0 = 70224, A = 0
// true_pos = 20, target = 0, so wait until next frame
// skip = (70224 + 0) - 20 = 70204
uint8_t rom[] = {
0xf0, 0x44, // ldh a, ($ff44) - read LY
0xfe, 0x00, // cp 0
@@ -88,16 +94,15 @@ TEST(test_ly_wait_jr_nz_ly0)
};
run_block_with_frame_cycles(rom, 0);
ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 0);
// At frame_cycles=0, waiting for LY 0 means next frame
ASSERT_EQ(get_cycle_count(), 70224);
ASSERT_EQ(get_cycle_count(), 70224 - LY_WAIT_CYCLES);
}
TEST(test_ly_wait_jr_nz_ly90)
{
// ldh a, [$44]; cp 90; jr nz, back
// Wait for LY=90, from frame_cycles=0
// target_cycles = 90 * 456 = 41040
// D2 = 41040 - 0 = 41040, A = 90
// true_pos = 20, target = 90*456 = 41040
// skip = 41040 - 20 = 41020
uint8_t rom[] = {
0xf0, 0x44, // ldh a, ($ff44) - read LY
0xfe, 0x5a, // cp 90
@@ -106,13 +111,14 @@ TEST(test_ly_wait_jr_nz_ly90)
};
run_block_with_frame_cycles(rom, 0);
ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 90);
ASSERT_EQ(get_cycle_count(), 90 * 456);
ASSERT_EQ(get_cycle_count(), 90 * 456 - LY_WAIT_CYCLES);
}
TEST(test_ly_wait_jr_nz_ly144)
{
// Wait for LY=144 (vblank start), from frame_cycles=0
// target_cycles = 144 * 456 = 65664
// true_pos = 20, target = 144*456 = 65664
// skip = 65664 - 20 = 65644
uint8_t rom[] = {
0xf0, 0x44, // ldh a, ($ff44)
0xfe, 0x90, // cp 144
@@ -121,15 +127,15 @@ TEST(test_ly_wait_jr_nz_ly144)
};
run_block_with_frame_cycles(rom, 0);
ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 144);
ASSERT_EQ(get_cycle_count(), 144 * 456);
ASSERT_EQ(get_cycle_count(), 144 * 456 - LY_WAIT_CYCLES);
}
TEST(test_ly_wait_jr_nz_past_target)
{
// Wait for LY=50, but frame_cycles already past that
// frame_cycles=30000, LY 50 is at 22800
// Since frame_cycles >= target, wait until next frame
// D2 = (70224 + 22800) - 30000 = 63024
// frame_cycles=30000, true_pos=30020, target=22800
// Since true_pos >= target, wait until next frame
// skip = (70224 + 22800) - 30020 = 63004
uint8_t rom[] = {
0xf0, 0x44, // ldh a, ($ff44)
0xfe, 0x32, // cp 50
@@ -138,7 +144,7 @@ TEST(test_ly_wait_jr_nz_past_target)
};
run_block_with_frame_cycles(rom, 30000);
ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 50);
ASSERT_EQ(get_cycle_count(), 70224 + (50 * 456) - 30000);
ASSERT_EQ(get_cycle_count(), 70224 + (50 * 456) - 30000 - LY_WAIT_CYCLES);
}
TEST(test_ly_wait_jr_z_ly90)
@@ -146,7 +152,8 @@ TEST(test_ly_wait_jr_z_ly90)
// ldh a, [$44]; cp 90; jr z, back
// jr z: loop while LY == 90, exit when LY != 90
// This waits for LY = (90 + 1) % 154 = 91
// target_cycles = 91 * 456 = 41496
// true_pos = 20, target = 91*456 = 41496
// skip = 41496 - 20 = 41476
uint8_t rom[] = {
0xf0, 0x44, // ldh a, ($ff44)
0xfe, 0x5a, // cp 90
@@ -155,15 +162,16 @@ TEST(test_ly_wait_jr_z_ly90)
};
run_block_with_frame_cycles(rom, 0);
ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 91);
ASSERT_EQ(get_cycle_count(), 91 * 456);
ASSERT_EQ(get_cycle_count(), 91 * 456 - LY_WAIT_CYCLES);
}
TEST(test_ly_wait_jr_z_ly153)
{
// ldh a, [$44]; cp 153; jr z, back
// wait_ly = (153 + 1) % 154 = 0 (wraps to start of frame)
// target_cycles = 0 * 456 = 0
// From frame_cycles=0, this should wait for next frame
// true_pos = 20, target = 0
// Since true_pos >= target, wait for next frame
// skip = (70224 + 0) - 20 = 70204
uint8_t rom[] = {
0xf0, 0x44, // ldh a, ($ff44)
0xfe, 0x99, // cp 153
@@ -172,15 +180,15 @@ TEST(test_ly_wait_jr_z_ly153)
};
run_block_with_frame_cycles(rom, 0);
ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 0);
// At frame_cycles=0, target is 0, so next frame
ASSERT_EQ(get_cycle_count(), 70224);
ASSERT_EQ(get_cycle_count(), 70224 - LY_WAIT_CYCLES);
}
TEST(test_ly_wait_jr_c_ly100)
{
// ldh a, [$44]; cp 100; jr c, back
// jr c: loop while LY < 100, exit when LY >= 100
// wait_ly = 100, target_cycles = 100 * 456 = 45600
// true_pos = 20, target = 100*456 = 45600
// skip = 45600 - 20 = 45580
uint8_t rom[] = {
0xf0, 0x44, // ldh a, ($ff44)
0xfe, 0x64, // cp 100
@@ -189,14 +197,14 @@ TEST(test_ly_wait_jr_c_ly100)
};
run_block_with_frame_cycles(rom, 0);
ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 100);
ASSERT_EQ(get_cycle_count(), 100 * 456);
ASSERT_EQ(get_cycle_count(), 100 * 456 - LY_WAIT_CYCLES);
}
TEST(test_ly_wait_mid_frame)
{
// Wait for LY=100, starting at frame_cycles=20000
// LY 100 is at cycle 45600
// D2 = 45600 - 20000 = 25600
// true_pos = 20020, target = 45600
// skip = 45600 - 20020 = 25580
uint8_t rom[] = {
0xf0, 0x44, // ldh a, ($ff44)
0xfe, 0x64, // cp 100
@@ -205,24 +213,25 @@ TEST(test_ly_wait_mid_frame)
};
run_block_with_frame_cycles(rom, 20000);
ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 100);
ASSERT_EQ(get_cycle_count(), 45600 - 20000);
ASSERT_EQ(get_cycle_count(), 45600 - 20000 - LY_WAIT_CYCLES);
}
TEST(test_ly_wait_exact_target)
{
// Start exactly at the target LY cycle
// LY 50 is at cycle 22800, start there
// frame_cycles >= target_cycles, so wait for next frame
// Start at frame_cycles such that true_pos exactly equals target
// target = 50*456 = 22800, so frame_cycles = 22800 - 20 = 22780
// true_pos = 22800 >= target, so wait for next frame
// skip = (70224 + 22800) - 22800 = 70224
uint8_t rom[] = {
0xf0, 0x44, // ldh a, ($ff44)
0xfe, 0x32, // cp 50
0x20, 0xfa, // jr nz, -6
0x10 // stop
};
run_block_with_frame_cycles(rom, 22800);
run_block_with_frame_cycles(rom, 22800 - LY_WAIT_CYCLES);
ASSERT_EQ(get_dreg(REG_68K_D_A) & 0xff, 50);
// frame_cycles == target_cycles, uses next frame path
ASSERT_EQ(get_cycle_count(), 70224);
// true_pos == target_cycles, uses next frame path
ASSERT_EQ(get_cycle_count(), 70224);
}
void register_timing_tests(void)

View File

@@ -150,13 +150,9 @@ u8 dmg_read_slow(struct dmg *dmg, u16 address)
if (address == REG_LY) {
// the compiler detects "ldh a, [$44]; cp N; jr cc" which is the most
// common case, and skips to that line, so this actually doesn't run
// that much - just give it the value it's waiting for. LY=LYC is handled
// in a nicer way below, when the compiled code returns to C
dmg->ly_hack++;
if (dmg->ly_hack == 154) {
dmg->ly_hack = 0;
}
return dmg->ly_hack;
// that much
u32 current = (dmg->frame_cycles + jit_ctx.read_cycles) % 70224;
return current / 456;
}
if (address == REG_STAT) {
@@ -405,7 +401,7 @@ void dmg_sync_hw(struct dmg *dmg, int cycles)
// need as a separate check for the case where cycles = 70224. in that case,
// it needs to execute both the previous block and this one
if (dmg->frame_cycles >= CYCLES_PER_FRAME) {
dmg->frame_cycles -= CYCLES_PER_FRAME;
dmg->frame_cycles %= CYCLES_PER_FRAME;
dmg->sent_vblank_start = 0;
dmg->sent_ly_interrupt = 0;
dmg->rendered_this_frame = 0;