Add DAA instruction support for BCD arithmetic

This commit is contained in:
Tanner Fokkens
2026-01-26 09:59:15 -08:00
parent 1ccc90ee1b
commit d1ff4462e5
4 changed files with 150 additions and 2 deletions
+133 -1
View File
@@ -49,10 +49,33 @@ static int try_fuse_branch(
return 1;
}
// DAA tracking: save old_A and set N flag before ALU ops that affect A
// These are needed for DAA to compute the half-carry (H) and know add vs sub
static void compile_daa_track_add(struct code_block *block)
{
// Save old_A to context for H flag computation
emit_move_b_dn_disp_an(block, REG_68K_D_A, JIT_CTX_DAA_STATE, REG_68K_A_CTX);
// Set N=0 (addition)
emit_moveq_dn(block, REG_68K_D_SCRATCH_0, 0);
emit_move_b_dn_disp_an(block, REG_68K_D_SCRATCH_0, JIT_CTX_DAA_STATE + 1, REG_68K_A_CTX);
}
static void compile_daa_track_sub(struct code_block *block)
{
// Save old_A to context for H flag computation
emit_move_b_dn_disp_an(block, REG_68K_D_A, JIT_CTX_DAA_STATE, REG_68K_A_CTX);
// Set N=1 (subtraction)
emit_moveq_dn(block, REG_68K_D_SCRATCH_0, 1);
emit_move_b_dn_disp_an(block, REG_68K_D_SCRATCH_0, JIT_CTX_DAA_STATE + 1, REG_68K_A_CTX);
}
// ADC core: expects operand already in D1.b
// Does A = A + D1 + carry using 16-bit arithmetic
static void compile_adc_core(struct code_block *block)
{
// Track for DAA: save old_A and set N=0 (addition)
compile_daa_track_add(block);
// Zero-extend operand: andi.w #0xff, D1
emit_andi_w_dn(block, REG_68K_D_SCRATCH_1, 0x00ff);
@@ -84,6 +107,9 @@ static void compile_adc_core(struct code_block *block)
// Does A = A - D1 - carry using 16-bit arithmetic
static void compile_sbc_core(struct code_block *block)
{
// Track for DAA: save old_A and set N=1 (subtraction)
compile_daa_track_sub(block);
// Zero-extend operand: andi.w #0xff, D1
emit_andi_w_dn(block, REG_68K_D_SCRATCH_1, 0x00ff);
@@ -237,7 +263,95 @@ int compile_alu_op(
// misc ALU ops
case 0x27: // daa - decimal adjust accumulator
// TODO: implement DAA properly
// DAA adjusts A for BCD arithmetic based on N, H, C flags
// We stored old_A and N flag in context before the last ALU op
// H is computed: for add, H = (A & 0xF) < (old_A & 0xF)
// for sub, H = (A & 0xF) > (old_A & 0xF)
{
size_t branch_to_sub, branch_add_h_done;
size_t branch_to_finish, branch_to_finish2;
// Load old_A into D0
emit_move_b_disp_an_dn(block, JIT_CTX_DAA_STATE, REG_68K_A_CTX, REG_68K_D_SCRATCH_0);
// Save original A lower nibble into D1 for H computation later
// (before any DAA adjustments modify A)
emit_move_b_dn_dn(block, REG_68K_D_A, REG_68K_D_SCRATCH_1);
emit_andi_b_dn(block, REG_68K_D_SCRATCH_1, 0x0F); // D1 = original A & 0xF
// Load N flag and test it (use stack to preserve D1)
emit_push_w_dn(block, REG_68K_D_SCRATCH_1); // save D1
emit_move_b_disp_an_dn(block, JIT_CTX_DAA_STATE + 1, REG_68K_A_CTX, REG_68K_D_SCRATCH_1);
emit_tst_b_dn(block, REG_68K_D_SCRATCH_1);
emit_pop_w_dn(block, REG_68K_D_SCRATCH_1); // restore D1
branch_to_sub = block->length;
emit_bne_w(block, 0); // branch to subtraction path if N=1
// === Addition path (N=0) ===
// First check C || A > 0x99 -> add 0x60
emit_btst_imm_dn(block, 0, REG_68K_D_FLAGS); // test C flag (4 bytes)
emit_bne_b(block, 6); // if C set, skip compare and jump to add 0x60 (2 bytes)
emit_cmp_b_imm_dn(block, REG_68K_D_A, 0x99); // (4 bytes)
emit_bls_b(block, 8); // if A <= 0x99, skip add 0x60 and ori (2 bytes)
// add 0x60 and set C
emit_addi_b_dn(block, REG_68K_D_A, 0x60); // (4 bytes)
emit_ori_b_dn(block, REG_68K_D_FLAGS, 0x01); // set C (4 bytes)
// Now check H || (A & 0x0F) > 9 -> add 0x06
// D0 = old_A (full), D1 = original A & 0xF (saved before adjustments)
// Compute H: D1 < (D0 & 0xF)?
emit_andi_b_dn(block, REG_68K_D_SCRATCH_0, 0x0F); // D0 = old_A & 0xF
emit_cmp_b_dn_dn(block, REG_68K_D_SCRATCH_0, REG_68K_D_SCRATCH_1); // cmp D0, D1
emit_bcc_s(block, 8); // if D1 >= D0 (carry clear), H=0, check nibble value
// H=1, add 0x06
emit_addi_b_dn(block, REG_68K_D_A, 0x06);
branch_add_h_done = block->length;
emit_bra_w(block, 0); // skip to finish
// H=0, check if original (A & 0x0F) > 9 (D1 still has this value)
emit_cmp_b_imm_dn(block, REG_68K_D_SCRATCH_1, 0x09);
emit_bls_b(block, 4); // if <= 9, skip
emit_addi_b_dn(block, REG_68K_D_A, 0x06);
branch_to_finish = block->length;
emit_bra_w(block, 0); // jump to finish
// === Subtraction path (N=1) ===
// Patch branch_to_sub
block->code[branch_to_sub + 2] = (block->length - branch_to_sub - 2) >> 8;
block->code[branch_to_sub + 3] = (block->length - branch_to_sub - 2) & 0xff;
// Check C -> sub 0x60
emit_btst_imm_dn(block, 0, REG_68K_D_FLAGS);
emit_beq_b(block, 4); // if C clear, skip
emit_subi_b_dn(block, REG_68K_D_A, 0x60);
// Compute H: D1 > (old_A & 0xF) for subtraction
// D1 still has original A & 0xF, reload old_A & 0xF into D0
emit_move_b_disp_an_dn(block, JIT_CTX_DAA_STATE, REG_68K_A_CTX, REG_68K_D_SCRATCH_0);
emit_andi_b_dn(block, REG_68K_D_SCRATCH_0, 0x0F); // D0 = old_A & 0xF
emit_cmp_b_dn_dn(block, REG_68K_D_SCRATCH_0, REG_68K_D_SCRATCH_1); // cmp D0, D1
emit_bls_b(block, 4); // if D1 <= D0 (lower or same), no H, skip
emit_subi_b_dn(block, REG_68K_D_A, 0x06);
branch_to_finish2 = block->length;
emit_bra_w(block, 0);
// === Finish: set Z flag ===
// Patch forward branches
block->code[branch_add_h_done + 2] = (block->length - branch_add_h_done - 2) >> 8;
block->code[branch_add_h_done + 3] = (block->length - branch_add_h_done - 2) & 0xff;
block->code[branch_to_finish + 2] = (block->length - branch_to_finish - 2) >> 8;
block->code[branch_to_finish + 3] = (block->length - branch_to_finish - 2) & 0xff;
block->code[branch_to_finish2 + 2] = (block->length - branch_to_finish2 - 2) >> 8;
block->code[branch_to_finish2 + 3] = (block->length - branch_to_finish2 - 2) & 0xff;
// Set Z flag based on A, preserve C
emit_andi_b_dn(block, REG_68K_D_FLAGS, 0x01); // keep only C
emit_tst_b_dn(block, REG_68K_D_A);
emit_bne_b(block, 4); // if not zero, skip setting Z
emit_ori_b_dn(block, REG_68K_D_FLAGS, 0x04); // set Z
}
return 1;
case 0x2f: // cpl - complement A
@@ -254,6 +368,7 @@ int compile_alu_op(
// 8-bit ALU register ops (0x80-0xbf)
case 0x80: // add a, b
compile_daa_track_add(block);
emit_swap(block, REG_68K_D_BC);
emit_add_b_dn_dn(block, REG_68K_D_BC, REG_68K_D_A);
compile_set_zc_flags(block); // capture before swap clobbers CCR
@@ -261,11 +376,13 @@ int compile_alu_op(
return 1;
case 0x81: // add a, c
compile_daa_track_add(block);
emit_add_b_dn_dn(block, REG_68K_D_BC, REG_68K_D_A);
compile_set_zc_flags(block);
return 1;
case 0x82: // add a, d
compile_daa_track_add(block);
emit_swap(block, REG_68K_D_DE);
emit_add_b_dn_dn(block, REG_68K_D_DE, REG_68K_D_A);
compile_set_zc_flags(block); // capture before swap clobbers CCR
@@ -273,11 +390,13 @@ int compile_alu_op(
return 1;
case 0x83: // add a, e
compile_daa_track_add(block);
emit_add_b_dn_dn(block, REG_68K_D_DE, REG_68K_D_A);
compile_set_zc_flags(block);
return 1;
case 0x84: // add a, h
compile_daa_track_add(block);
emit_move_w_an_dn(block, REG_68K_A_HL, REG_68K_D_SCRATCH_1);
emit_rol_w_8(block, REG_68K_D_SCRATCH_1);
emit_add_b_dn_dn(block, REG_68K_D_SCRATCH_1, REG_68K_D_A);
@@ -285,12 +404,14 @@ int compile_alu_op(
return 1;
case 0x85: // add a, l
compile_daa_track_add(block);
emit_move_w_an_dn(block, REG_68K_A_HL, REG_68K_D_SCRATCH_1);
emit_add_b_dn_dn(block, REG_68K_D_SCRATCH_1, REG_68K_D_A);
compile_set_zc_flags(block);
return 1;
case 0x86: // add a, (hl)
compile_daa_track_add(block);
emit_move_w_an_dn(block, REG_68K_A_HL, REG_68K_D_SCRATCH_1);
compile_call_dmg_read(block);
emit_add_b_dn_dn(block, REG_68K_D_SCRATCH_0, REG_68K_D_A);
@@ -298,6 +419,7 @@ int compile_alu_op(
return 1;
case 0x87: // add a, a
compile_daa_track_add(block);
emit_add_b_dn_dn(block, REG_68K_D_A, REG_68K_D_A);
compile_set_zc_flags(block);
return 1;
@@ -351,6 +473,7 @@ int compile_alu_op(
return 1;
case 0x90: // sub a, b
compile_daa_track_sub(block);
emit_swap(block, REG_68K_D_BC);
emit_sub_b_dn_dn(block, REG_68K_D_BC, REG_68K_D_A);
compile_set_zc_flags(block); // capture before swap clobbers CCR
@@ -358,11 +481,13 @@ int compile_alu_op(
return 1;
case 0x91: // sub a, c
compile_daa_track_sub(block);
emit_sub_b_dn_dn(block, REG_68K_D_BC, REG_68K_D_A);
compile_set_zc_flags(block);
return 1;
case 0x92: // sub a, d
compile_daa_track_sub(block);
emit_swap(block, REG_68K_D_DE);
emit_sub_b_dn_dn(block, REG_68K_D_DE, REG_68K_D_A);
compile_set_zc_flags(block); // capture before swap clobbers CCR
@@ -370,11 +495,13 @@ int compile_alu_op(
return 1;
case 0x93: // sub a, e
compile_daa_track_sub(block);
emit_sub_b_dn_dn(block, REG_68K_D_DE, REG_68K_D_A);
compile_set_zc_flags(block);
return 1;
case 0x94: // sub a, h
compile_daa_track_sub(block);
emit_move_w_an_dn(block, REG_68K_A_HL, REG_68K_D_SCRATCH_1);
emit_rol_w_8(block, REG_68K_D_SCRATCH_1);
emit_sub_b_dn_dn(block, REG_68K_D_SCRATCH_1, REG_68K_D_A);
@@ -382,12 +509,14 @@ int compile_alu_op(
return 1;
case 0x95: // sub a, l
compile_daa_track_sub(block);
emit_move_w_an_dn(block, REG_68K_A_HL, REG_68K_D_SCRATCH_1);
emit_sub_b_dn_dn(block, REG_68K_D_SCRATCH_1, REG_68K_D_A);
compile_set_zc_flags(block);
return 1;
case 0x96: // sub a, (hl)
compile_daa_track_sub(block);
emit_move_w_an_dn(block, REG_68K_A_HL, REG_68K_D_SCRATCH_1);
compile_call_dmg_read(block);
emit_sub_b_dn_dn(block, REG_68K_D_SCRATCH_0, REG_68K_D_A);
@@ -395,6 +524,7 @@ int compile_alu_op(
return 1;
case 0x97: // sub a, a - always results in 0
compile_daa_track_sub(block);
emit_moveq_dn(block, REG_68K_D_A, 0);
compile_set_zc_flags(block);
return 1;
@@ -653,6 +783,7 @@ int compile_alu_op(
// immediate ALU ops (0xc6, 0xce, 0xd6, 0xde, 0xe6, 0xee, 0xf6, 0xfe)
case 0xc6: // add a, #imm
compile_daa_track_add(block);
emit_addi_b_dn(block, REG_68K_D_A, READ_BYTE(*src_ptr));
(*src_ptr)++;
compile_set_zc_flags(block);
@@ -665,6 +796,7 @@ int compile_alu_op(
return 1;
case 0xd6: // sub a, #imm
compile_daa_track_sub(block);
emit_subi_b_dn(block, REG_68K_D_A, READ_BYTE(*src_ptr));
(*src_ptr)++;
compile_set_zc_flags(block);
+1 -1
View File
@@ -62,7 +62,7 @@
#define JIT_CTX_CYCLES 44 // u32: accumulated GB cycles
#define JIT_CTX_PATCH_HELPER 48 // void *patch_helper routine
#define JIT_CTX_READ_CYCLES 52 // u32: in-flight cycles at dmg_read call
#define JIT_CTX_UNUSED_1 56
#define JIT_CTX_DAA_STATE 56 // 2 bytes: [0]=old_A, [1]=N flag (for DAA)
#define JIT_CTX_FRAME_CYCLES_PTR 60 // u32 *frame_cycles_ptr (dmg->frame_cycles)
#define JIT_CTX_UNUSED_2 64
#define JIT_CTX_UNUSED_3 68
+14
View File
@@ -1120,6 +1120,20 @@ void emit_bne_b(struct code_block *block, int8_t disp)
emit_word(block, 0x6600 | ((uint8_t) disp));
}
// bls.b - branch if lower or same (unsigned <=)
void emit_bls_b(struct code_block *block, int8_t disp)
{
// 0110 0011 dddd dddd
emit_word(block, 0x6300 | ((uint8_t) disp));
}
// bhi.b - branch if higher (unsigned >)
void emit_bhi_b(struct code_block *block, int8_t disp)
{
// 0110 0010 dddd dddd
emit_word(block, 0x6200 | ((uint8_t) disp));
}
// subi.w #imm16, Dn - subtract immediate word from data register
void emit_subi_w_dn(struct code_block *block, uint16_t imm, uint8_t dreg)
{
+2
View File
@@ -133,6 +133,8 @@ void emit_cmpi_w_imm_dn(struct code_block *block, uint16_t imm, uint8_t dreg);
void emit_cmpa_w_imm_an(struct code_block *block, uint16_t imm, uint8_t areg);
void emit_bcs_b(struct code_block *block, int8_t disp);
void emit_bne_b(struct code_block *block, int8_t disp);
void emit_bls_b(struct code_block *block, int8_t disp);
void emit_bhi_b(struct code_block *block, int8_t disp);
void emit_subi_w_dn(struct code_block *block, uint16_t imm, uint8_t dreg);
void emit_move_l_ind_an_dn(struct code_block *block, uint8_t areg, uint8_t dreg);
void emit_sub_l_dn_dn(struct code_block *block, uint8_t src, uint8_t dest);