From 43d99ec9f4df2393a5a723b0cb0252cb887de01b Mon Sep 17 00:00:00 2001 From: Aaron Culliney Date: Sun, 6 Oct 2019 16:32:06 -0700 Subject: [PATCH] First cut at 65c02 implemented on aarch64 --- Android/jni/Application.mk | 2 +- Android/jni/build.sh | 2 +- Android/jni/sources.mk | 2 +- src/arm/cpu-regs.h | 122 +++- src/arm/cpu.S | 1072 +++++++++++++++++++----------------- src/arm/glue-offsets64.h | 53 ++ src/arm/glue-prologue.h | 121 ++-- src/cpu.h | 5 +- src/glue.h | 2 +- 9 files changed, 809 insertions(+), 572 deletions(-) create mode 100644 src/arm/glue-offsets64.h diff --git a/Android/jni/Application.mk b/Android/jni/Application.mk index 760d24be..44bb6c00 100644 --- a/Android/jni/Application.mk +++ b/Android/jni/Application.mk @@ -1,4 +1,4 @@ -APP_ABI := armeabi-v7a x86 x86_64 +APP_ABI := armeabi-v7a arm64-v8a x86 x86_64 # Do not change APP_PLATFORM if we care about Gingerbread (2.3.3) devices! We must compile against android-10, # otherwise we may encounter runtime load-library errors from symbols that should have been inlined against older diff --git a/Android/jni/build.sh b/Android/jni/build.sh index 36a1dcd9..037e36f4 100755 --- a/Android/jni/build.sh +++ b/Android/jni/build.sh @@ -148,7 +148,7 @@ if test "x$do_build" = "x1" -o "x$do_release" = "x1" ; then # Symbolicate and move symbols file into location to be deployed on device SYMFILE=libapple2ix.so.sym - ARCHES_TO_SYMBOLICATE='armeabi-v7a x86 x86_64' + ARCHES_TO_SYMBOLICATE='armeabi-v7a arm64-v8a x86 x86_64' for arch in $ARCHES_TO_SYMBOLICATE ; do SYMDIR=../assets/symbols/$arch/libapple2ix.so diff --git a/Android/jni/sources.mk b/Android/jni/sources.mk index 53038b98..533a6b11 100644 --- a/Android/jni/sources.mk +++ b/Android/jni/sources.mk @@ -63,7 +63,7 @@ APPLE2_MAIN_SRC = \ $(APPLE2_SRC_PATH)/zlib-helpers.c \ $(APPLE2_SRC_PATH)/../externals/jsmn/jsmn.c -APPLE2_OPTIM_CFLAGS := -Os +APPLE2_OPTIM_CFLAGS := -O2 # match the same optimization level as BUILD_MODE=release for ndk-build APPLE2_BASE_CFLAGS := -DAPPLE2IX=1 -DINTERFACE_TOUCH=1 -DMOBILE_DEVICE=1 -DVIDEO_OPENGL=1 -std=gnu11 -fPIC $(APPLE2_OPTIM_CFLAGS) -I$(APPLE2_SRC_PATH) APPLE2_BASE_LDLIBS := -Wl,-z,text -Wl,-z,noexecstack -llog -landroid -lGLESv2 -lz -lOpenSLES -latomic diff --git a/src/arm/cpu-regs.h b/src/arm/cpu-regs.h index 0612350e..95ebe1ef 100644 --- a/src/arm/cpu-regs.h +++ b/src/arm/cpu-regs.h @@ -15,45 +15,107 @@ #include "cpu.h" #include "glue-offsets.h" -// ARM register mappings - -// r0, r1 are scratch regs, with r0 generally as the "important byte" -#define EffectiveAddr r2 /* 16bit Effective address */ -#define PC_Reg r3 /* 16bit 6502 Program Counter */ -#define SP_Reg r4 /* 16bit 6502 Stack pointer */ -#define F_Reg r5 /* 8bit 6502 flags */ -#define Y_Reg r6 /* 8bit 6502 Y register */ -#define X_Reg r7 /* 8bit 6502 X register */ -#define A_Reg r8 /* 8bit 6502 A register */ -// r9 is "ARM platform register" ... used as a scratch register -#define reg_args r10 /* cpu65_run() args register */ -#define reg_vmem_r r11 /* cpu65_vmem_r table address */ -// r12 is "ARM Intra-Procedure-call scratch register" ... used as a scratch register -// r13 ARM SP -// r14 ARM return addr -// r15 ARM PC - -#ifdef __aarch64__ -# error 20150205 ARM 64bit untested!!! -# define PTR_SHIFT #4 // 4<<1 = 8 -# define ROR_BIT 0x8000000000000000 -#else -# define PTR_SHIFT #2 // 2<<1 = 4 -# define ROR_BIT 0x80000000 -#endif #if !defined(__APPLE__) # define NO_UNDERSCORES 1 -# define STRBNE strneb +#endif + +// ARM register mappings + +#define bz beq +#define bnz bne + +#define ROR_BIT 0x80000000 + +#ifdef __aarch64__ + +# define DOT_ARM +# define ALIGN .align 2; +# define PTR_SHIFT #3 // 1<<3 = 8 +# define BLX blr +# define BX br + +// x: 64bit addressing mode +// w: 32bit addressing mode + +# define xr0 x0 /* scratch/"important byte" */ +# define wr0 w0 /* scratch/"important byte" */ +# define xr1 x1 /* scratch */ +# define wr1 w1 /* scratch */ + +# define wr9 w2 + +// NOTE: ARMv8 Procedure Call Standard indicates that x19-x28 are callee saved ... so we can call back into C without needing to +// first save these ... +# define xEffectiveAddr x19 /* 16bit Effective address */ +# define EffectiveAddr w19 /* 16bit Effective address */ +# define PC_Reg w20 /* 16bit 6502 Program Counter */ +# define xSP_Reg x21 /* 16bit 6502 Stack pointer */ +# define SP_Reg w21 /* 16bit 6502 Stack pointer */ +# define xF_Reg x22 /* 8bit 6502 flags */ +# define F_Reg w22 /* 8bit 6502 flags */ +# define Y_Reg w23 /* 8bit 6502 Y register */ +# define X_Reg w24 /* 8bit 6502 X register */ +# define A_Reg w25 /* 8bit 6502 A register */ +# define xA_Reg x25 /* 8bit 6502 A register */ +# define reg_args x26 /* cpu65_run() args register */ +# define reg_vmem_r x27 /* cpu65_vmem_r table address */ + +# define xr12 x28 /* scratch */ +# define wr12 w28 /* scratch */ +// x29 : frame pointer (callee-saved) +// x30 : return address +// xzr/wzr : zero register +// sp : stack pointer +// pc : instruction pointer + #else -# define STRBNE strbne + +# define DOT_ARM .arm; +# define ALIGN .balign 4; +# define PTR_SHIFT #2 // 1<<2 = 4 +# define BLX blx +# define BX bx + +// r0, r1 are scratch regs, with r0 generally as the "important byte" +# define xr0 r0 /* scratch/"important byte" */ +# define wr0 r0 /* scratch/"important byte" */ +# define xr1 r1 /* scratch */ +# define wr1 r1 /* scratch */ +# define wr9 r9 /* scratch */ +// r12 is "ARM Intra-Procedure-call scratch register" ... used as a scratch register +# define xr12 r12 /* scratch */ +# define wr12 r12 /* scratch */ + +// NOTE: these need to be preserved in subroutine (C) invocations ... */ +# define EffectiveAddr r2 /* 16bit Effective address */ +# define xEffectiveAddr r2 /* 16bit Effective address */ +# define PC_Reg r3 /* 16bit 6502 Program Counter */ + +// NOTE: ARMv7 PCS states : "A subroutine must preserve the contents of the registers r4-r8, r10, r11 and SP [...]" +# define xSP_Reg r4 /* 16bit 6502 Stack pointer */ +# define SP_Reg r4 /* 16bit 6502 Stack pointer */ +# define xF_Reg r5 /* 8bit 6502 flags */ +# define F_Reg r5 /* 8bit 6502 flags */ +# define Y_Reg r6 /* 8bit 6502 Y register */ +# define X_Reg r7 /* 8bit 6502 X register */ +# define A_Reg r8 /* 8bit 6502 A register */ +# define xA_Reg r8 /* 8bit 6502 A register */ + +// r9 is "ARM platform register" ... used as a scratch register +# define reg_args r10 /* cpu65_run() args register */ +# define reg_vmem_r r11 /* cpu65_vmem_r table address */ +// r13 ARM SP +// r14 ARM LR (return addr) +// r15 ARM PC + #endif #if NO_UNDERSCORES -# define ENTRY(x) .globl x; .arm; .balign 4; x##: +# define ENTRY(x) .global x; DOT_ARM ALIGN x##: # define CALL(x) x #else -# define ENTRY(x) .globl _##x; .arm; .balign 4; _##x##: +# define ENTRY(x) .global _##x; DOT_ARM ALIGN _##x##: # define CALL(x) _##x #endif diff --git a/src/arm/cpu.S b/src/arm/cpu.S index 04f4c607..c5c59dc9 100644 --- a/src/arm/cpu.S +++ b/src/arm/cpu.S @@ -17,13 +17,45 @@ #include "vm.h" +#if __aarch64__ +# define arm_flags xr12 +# define APSR NZCV +# define APSR_nzcvq NZCV + +# define Enter \ + stp x29, x30, [sp, -16]!; \ + mov x29, sp; \ + stp x19, x20, [sp, -16]!; \ + stp x21, x22, [sp, -16]!; \ + stp x23, x24, [sp, -16]!; \ + stp x25, x26, [sp, -16]!; \ + stp x27, x28, [sp, -16]!; + +# define Exit \ + ldp x27, x28, [sp], 16; \ + ldp x25, x26, [sp], 16; \ + ldp x23, x24, [sp], 16; \ + ldp x21, x22, [sp], 16; \ + ldp x19, x20, [sp], 16; \ + ldp x29, x30, [sp], 16; \ + ret +#else +# define arm_flags wr12 + +# define Enter \ + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + +# define Exit \ + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +#endif + #define DecodeFlags \ - ldr r1, [reg_args, #CPU65_FLAGS_DECODE]; \ - ldrb F_Reg, [r1, r0]; + ldr xr1, [reg_args, #CPU65_FLAGS_DECODE]; \ + ldrb F_Reg, [xr1, xr0]; #define EncodeFlags \ - ldr r1, [reg_args, #CPU65_FLAGS_ENCODE]; \ - ldrb r0, [r1, F_Reg]; + ldr xr1, [reg_args, #CPU65_FLAGS_ENCODE]; \ + ldrb wr0, [xr1, xF_Reg]; #define CommonSaveCPUState \ /* save EA */ \ @@ -38,7 +70,7 @@ strb A_Reg, [reg_args, #CPU65_A]; \ /* save flags */ \ EncodeFlags \ - strb r0, [reg_args, #CPU65_F] + strb wr0, [reg_args, #CPU65_F] // Tracing is necessary for some CPU tests and to verify operation against other emulators #if CPU_TRACING @@ -61,20 +93,20 @@ #define CPUStatsReset \ - eor r1, r1, r1; \ - strb r1, [reg_args, #CPU65_OPCYCLES]; \ - strb r1, [reg_args, #CPU65_RW]; + eor wr1, wr1, wr1; \ + strb wr1, [reg_args, #CPU65_OPCYCLES]; \ + strb wr1, [reg_args, #CPU65_RW]; #define CPUStatsSetRead \ - ldrb r1, [reg_args, #CPU65_RW]; \ - orr r1, r1, #1; \ - strb r1, [reg_args, #CPU65_RW]; + ldrb wr1, [reg_args, #CPU65_RW]; \ + orr wr1, wr1, #1; \ + strb wr1, [reg_args, #CPU65_RW]; #define CPUStatsSetWrite \ - strb r0, [reg_args, #CPU65_D]; \ - ldrb r1, [reg_args, #CPU65_RW]; \ - orr r1, r1, #2; \ - strb r1, [reg_args, #CPU65_RW]; + strb wr0, [reg_args, #CPU65_D]; \ + ldrb wr1, [reg_args, #CPU65_RW]; \ + orr wr1, wr1, #2; \ + strb wr1, [reg_args, #CPU65_RW]; // ---------------------------------------------------------------------------- // CPU (6502) helper macros @@ -82,88 +114,91 @@ // Add 16bit x with <=16bit amt #define AddUint16(x, amt) \ add x, x, amt; \ - bic x, #0x10000; + bic x, x, #0xFF0000; // Increment 16bit x #define IncUint16(x) \ AddUint16(x, #1) -// Add 8bit x with <=8bit amt -#define AddUint8(x, amt) \ - add x, x, amt; \ - bic SP_Reg, #0x0100; - // Increment 8bit x #define IncUint8(x) \ - AddUint8(x, #1) - -// Subtract 8bit x by <=8bit amt -#define SubUint8(x, amt) \ - subs x, x, amt; \ - movmi x, #0xFF; + add x, x, #1; \ + and x, x, #0xFF; // Decrement 8bit x #define DecUint8(x) \ - SubUint8(x, #1) + sub x, x, #1; \ + and x, x, #0xFF; #define GetFromPC_B \ mov EffectiveAddr, PC_Reg; \ IncUint16(PC_Reg) \ - orrs r0, r0, #0x10000; /* clear ARM processor flags */ \ - ldr r1, [reg_vmem_r, EffectiveAddr, LSL PTR_SHIFT]; \ - blx r1; \ + orr wr0, wr0, #0x10000; /* clear ARM processor flags */ \ + ldr xr1, [reg_vmem_r, xEffectiveAddr, LSL PTR_SHIFT]; \ + BLX xr1; \ TRACE_ARG -#define hi_byte r0 -#define lo_byte r9 +#define word_reg PC_Reg +#define hi_byte wr0 +#define lo_byte wr9 #define GetFromPC_W \ mov EffectiveAddr, PC_Reg; \ AddUint16(PC_Reg, #2) \ - ldr r1, [reg_vmem_r, EffectiveAddr, LSL PTR_SHIFT]; \ - blx r1; \ + lsl PC_Reg, PC_Reg, #16; \ + ldr xr1, [reg_vmem_r, xEffectiveAddr, LSL PTR_SHIFT]; \ + BLX xr1; \ TRACE_ARG \ - mov lo_byte, r0; \ + orr word_reg, word_reg, wr0; \ IncUint16(EffectiveAddr) \ - ldr r1, [reg_vmem_r, EffectiveAddr, LSL PTR_SHIFT]; \ - blx r1; \ + ldr xr1, [reg_vmem_r, xEffectiveAddr, LSL PTR_SHIFT]; \ + BLX xr1; \ TRACE_ARG; \ - mov hi_byte, hi_byte, LSL #8; \ - orr r0, hi_byte, lo_byte; + lsl wr0, wr0, #8; /* hi byte */ \ + orr wr0, word_reg, wr0; \ + lsl wr0, wr0, #16; \ + lsr wr0, wr0, #16; \ + lsr PC_Reg, PC_Reg, #16; #define JumpNextInstruction \ TRACE_PROLOGUE \ GetFromPC_B \ - strb r0, [reg_args, #CPU65_OPCODE]; /* r0 should be next opcode */ \ - ldr r1, [reg_args, #CPU65__OPCODES]; \ - ldr r1, [r1, r0, LSL PTR_SHIFT]; \ - bx r1; + strb wr0, [reg_args, #CPU65_OPCODE]; /* wr0 should be next opcode */ \ + ldr xr1, [reg_args, #CPU65__OPCODES]; \ + ldr xr1, [xr1, xr0, LSL PTR_SHIFT]; \ + eor xr0, xr0, xr0; \ + msr APSR_nzcvq, xr0; \ + BX xr1; #define GetFromEA_B \ CPUStatsSetRead \ - ldr r1, [reg_vmem_r, EffectiveAddr, LSL PTR_SHIFT]; \ - blx r1; + ldr xr1, [reg_vmem_r, xEffectiveAddr, LSL PTR_SHIFT]; \ + BLX xr1; #define GetFromEA_W \ - ldr r1, [reg_vmem_r, EffectiveAddr, LSL PTR_SHIFT]; \ - blx r1; \ - mov lo_byte, r0; \ + lsl PC_Reg, PC_Reg, #16; \ + ldr xr1, [reg_vmem_r, xEffectiveAddr, LSL PTR_SHIFT]; \ + BLX xr1; \ + orr word_reg, word_reg, wr0; \ IncUint16(EffectiveAddr) \ - ldr r1, [reg_vmem_r, EffectiveAddr, LSL PTR_SHIFT]; \ - blx r1; \ - mov hi_byte, hi_byte, LSL #8; \ - orr r0, hi_byte, lo_byte; + ldr xr1, [reg_vmem_r, xEffectiveAddr, LSL PTR_SHIFT]; \ + BLX xr1; \ + lsl wr0, wr0, #8; /* hi byte */ \ + orr wr0, word_reg, wr0; \ + lsl wr0, wr0, #16; \ + lsr wr0, wr0, #16; \ + lsr PC_Reg, PC_Reg, #16; #define PutToEA_B \ CPUStatsSetWrite \ - ldr r1, [reg_args, #CPU65_VMEM_W]; \ - ldr r1, [r1, EffectiveAddr, LSL PTR_SHIFT]; \ - blx r1; + ldr xr1, [reg_args, #CPU65_VMEM_W]; \ + ldr xr1, [xr1, xEffectiveAddr, LSL PTR_SHIFT]; \ + BLX xr1; #define GetFromMem_B(x) \ mov EffectiveAddr, x; \ - ldr r1, [reg_vmem_r, EffectiveAddr, LSL PTR_SHIFT]; \ - blx r1; + ldr xr1, [reg_vmem_r, xEffectiveAddr, LSL PTR_SHIFT]; \ + BLX xr1; #define GetFromMem_W(x) \ mov EffectiveAddr, x; \ @@ -172,103 +207,96 @@ #define Continue \ b continue; -#define cbw \ +#define cbw(x) \ /* Virtual x86: Convert Byte-to-Word */ \ - mov r0, r0, LSL #24; \ - mov r0, r0, ASR #24; + lsl x, x, #24; \ + asr x, x, #24; -#define _IncOpCycles \ +#define _AddOpCycles(x) \ ldrb scratch_count, [reg_args, #CPU65_OPCYCLES]; \ - add scratch_count, scratch_count, #1; + add scratch_count, scratch_count, x; -#define IncOpCycles \ - _IncOpCycles \ +#define AddOpCycles(x) \ + _AddOpCycles(x) \ strb scratch_count, [reg_args, #CPU65_OPCYCLES]; -#define scratch_count r1 -#define pc_hi_prev r9 -#define pc_hi_next r0 +#define FlagNotZero(x) \ + mrs arm_flags, APSR; \ + lsr x, arm_flags, #30; \ + and x, x, #1; /* Z_Flag */ \ + eor x, x, #1; + +#define scratch_count wr1 +#define pc_hi_prev wr9 +#define pc_hi_next wr0 #define BranchXCycles \ - _IncOpCycles \ - mov pc_hi_prev, PC_Reg, LSR #8; \ - cbw; \ - add PC_Reg, PC_Reg, r0; /* branch PC */ \ - mov PC_Reg, PC_Reg, LSL #16; /* 16bit under/overflow protection */ \ - mov PC_Reg, PC_Reg, LSR #16; \ - mov pc_hi_next, PC_Reg, LSR #8; \ - teq pc_hi_next, pc_hi_prev; \ - addne scratch_count, scratch_count, #1; /* +1 branch taken */ \ + _AddOpCycles(#1) \ + lsr pc_hi_prev, PC_Reg, #8; \ + cbw(wr0); \ + add PC_Reg, PC_Reg, wr0; /* branch PC */ \ + bic PC_Reg, PC_Reg, #0xFF0000; /* 16bit under/overflow protection */ \ + lsr pc_hi_next, PC_Reg, #8; \ + eor wr0, pc_hi_next, pc_hi_prev; \ + \ + cmp wr0, #0; \ + FlagNotZero(xr12) \ + \ + add scratch_count, scratch_count, wr12; /* +0/1 branch not/taken */ \ strb scratch_count, [reg_args, #CPU65_OPCYCLES]; -#define arm_flags r12 #define lahf \ - /* Virtual x86: Load %AH (r12) from ARM CPU Flags */ \ + /* Virtual x86: Load %AH (wr12) from ARM CPU Flags */ \ mrs arm_flags, APSR; \ - mov arm_flags, arm_flags, LSR #28; - -#define bt \ - /* Virtual x86: Bit Test (and set ... carry flag only) */ \ - tst F_Reg, #C_Flag; \ - mrs arm_flags, APSR; \ - biceq arm_flags, arm_flags, #0x20000000; \ - orrne arm_flags, arm_flags, #0x20000000; \ - msr APSR_nzcvq, arm_flags; - -#define btc \ - /* Virtual x86: Bit Test and Clear (carry flag only) */ \ - tst F_Reg, #C_Flag; \ - mrs arm_flags, APSR; \ - bicne arm_flags, arm_flags, #0x20000000; \ - /*orreq arm_flags, arm_flags, #0x20000000;*/ \ - msr APSR_nzcvq, arm_flags; + lsr arm_flags, arm_flags, #28; #define FlagC \ lahf; \ and arm_flags, arm_flags, #C_Flag; \ bic F_Reg, F_Reg, #C_Flag; \ - orr F_Reg, F_Reg, arm_flags; + orr xF_Reg, xF_Reg, arm_flags; #define FlagZ \ lahf; \ and arm_flags, arm_flags, #Z_Flag; \ bic F_Reg, F_Reg, #Z_Flag; \ - orr F_Reg, F_Reg, arm_flags; + orr xF_Reg, xF_Reg, arm_flags; #define FlagN \ lahf; \ and arm_flags, arm_flags, #N_Flag; \ bic F_Reg, F_Reg, #N_Flag; \ - orr F_Reg, F_Reg, arm_flags; + orr xF_Reg, xF_Reg, arm_flags; #define FlagNZ \ lahf; \ and arm_flags, arm_flags, #NZ_Flags; \ bic F_Reg, F_Reg, #NZ_Flags; \ - orr F_Reg, F_Reg, arm_flags; + orr xF_Reg, xF_Reg, arm_flags; #define FlagNZC \ lahf; \ and arm_flags, arm_flags, #NZC_Flags; \ bic F_Reg, F_Reg, #NZC_Flags; \ - orr F_Reg, F_Reg, arm_flags; + orr xF_Reg, xF_Reg, arm_flags; #define FlagNVZ \ lahf; \ and arm_flags, arm_flags, #NVZ_Flags; \ - bic F_Reg, F_Reg, #NVZ_Flags; \ - orr F_Reg, F_Reg, arm_flags; + bic F_Reg, F_Reg, #NZ_Flags; \ + bic F_Reg, F_Reg, #V_Flag; \ + orr xF_Reg, xF_Reg, arm_flags; #define FlagNVZC \ lahf; \ bic F_Reg, F_Reg, #NVZC_Flags; \ - orr F_Reg, F_Reg, arm_flags; + orr xF_Reg, xF_Reg, arm_flags; -#define stack_loc r1 +#define stack_loc xr1 #ifdef APPLE2_VM # define RestoreAltZP \ ldr stack_loc, [reg_args, #BASE_STACKZP]; \ add stack_loc, stack_loc, #0x100; \ - add stack_loc, stack_loc, SP_Reg; + add stack_loc, stack_loc, xSP_Reg; #else # error FIXME TODO ... #endif @@ -297,8 +325,8 @@ #if CPU_TRACING #define GetImm \ _GetImm \ - ldr r1, [reg_vmem_r, EffectiveAddr, LSL PTR_SHIFT]; \ - blx r1; \ + ldr xr1, [reg_vmem_r, xEffectiveAddr, LSL PTR_SHIFT]; \ + BLX xr1; \ TRACE_ARG #else #define GetImm \ @@ -309,13 +337,13 @@ order address, and the third byte is the high order byte. */ #define GetAbs \ GetFromPC_W; \ - mov EffectiveAddr, r0; + mov EffectiveAddr, wr0; /* Zero Page Addressing - the second byte of the instruction is an address on the zero page */ #define GetZPage \ GetFromPC_B; \ - mov EffectiveAddr, r0; + mov EffectiveAddr, wr0; /* Zero Page Indexed Addressing - The effective address is calculated by adding the second byte to the contents of the index register. Due @@ -324,25 +352,28 @@ not occur. */ #define GetZPage_X \ GetFromPC_B; \ - add r0, r0, X_Reg; \ - bic r0, #0x100; \ - mov EffectiveAddr, r0; + add wr0, wr0, X_Reg; \ + bic wr0, wr0, #0x100; \ + mov EffectiveAddr, wr0; #define GetZPage_Y \ GetFromPC_B; \ - add r0, r0, Y_Reg; \ - bic r0, #0x100; \ - mov EffectiveAddr, r0; + add wr0, wr0, Y_Reg; \ + bic wr0, wr0, #0x100; \ + mov EffectiveAddr, wr0; -#define ea_hi_prev r1 -#define ea_hi_next r9 -#define AddIndexRegAndTestPageBoundary(IndexReg) \ - mov ea_hi_prev, r0, LSR #8; \ - add r0, r0, IndexReg; \ - bic r0, #0x10000; \ - mov ea_hi_next, r0, LSR #8; \ - teq ea_hi_prev, ea_hi_next; \ - beq 9f; +#define ea_hi_prev wr1 +#define AddIndexReg(IndexReg) \ + lsr ea_hi_prev, wr0, #8; \ + add wr0, wr0, IndexReg; \ + bic wr0, wr0, #0x10000; + +#define ea_hi_next wr9 +#define TestPageBoundary(x) \ + lsr ea_hi_next, wr0, #8; \ + eor ea_hi_next, ea_hi_prev, ea_hi_next; \ + cmp ea_hi_next, #0; \ + FlagNotZero(x); /* Absolute Indexed Addressing - The effective address is formed by adding the contents of X or Y to the address contained in the @@ -350,27 +381,27 @@ #define GetAbs_X \ GetFromPC_W \ - AddIndexRegAndTestPageBoundary(X_Reg) \ - IncOpCycles \ -9: mov EffectiveAddr, r0; + AddIndexReg(X_Reg) \ + TestPageBoundary(xr12) \ + AddOpCycles(wr12) \ + mov EffectiveAddr, wr0; #define GetAbs_X_STx \ GetFromPC_W \ - AddIndexRegAndTestPageBoundary(X_Reg) \ - /* IncOpCycles */ \ -9: mov EffectiveAddr, r0; + AddIndexReg(X_Reg) \ + mov EffectiveAddr, wr0; #define GetAbs_Y \ GetFromPC_W \ - AddIndexRegAndTestPageBoundary(Y_Reg) \ - IncOpCycles \ -9: mov EffectiveAddr, r0; + AddIndexReg(Y_Reg) \ + TestPageBoundary(xr12) \ + AddOpCycles(wr12) \ + mov EffectiveAddr, wr0; #define GetAbs_Y_STA \ GetFromPC_W \ - AddIndexRegAndTestPageBoundary(Y_Reg) \ - /* IncOpCycles */ \ -9: mov EffectiveAddr, r0; + AddIndexReg(Y_Reg) \ + mov EffectiveAddr, wr0; /* Zero Page Indirect Addressing (65c02) - The second byte of the instruction points to a memory location on page zero containing the @@ -378,15 +409,15 @@ zero contains the high order byte of the address. */ #define GetIndZPage \ GetFromPC_B \ - mov EffectiveAddr, r0; \ + mov EffectiveAddr, wr0; \ GetFromEA_B \ - mov r12, r0; \ + mov wr12, wr0; \ add EffectiveAddr, EffectiveAddr, #1; \ - bic EffectiveAddr, #0x100; \ + bic EffectiveAddr, EffectiveAddr, #0x100; \ GetFromEA_B \ - mov r0, r0, LSL #8; \ - orr r0, r12, r0; \ - mov EffectiveAddr, r0; + lsl wr0, wr0, #8; \ + orr wr0, wr12, wr0; \ + mov EffectiveAddr, wr0; /* Zero Page Indexed Indirect Addressing - The second byte is added to the contents of the X index register; the carry is discarded. The @@ -397,17 +428,17 @@ and low-order bytes must be in page zero. */ #define GetIndZPage_X \ GetFromPC_B \ - mov EffectiveAddr, r0; \ + mov EffectiveAddr, wr0; \ add EffectiveAddr, EffectiveAddr, X_Reg; \ - bic EffectiveAddr, #0x100; \ + bic EffectiveAddr, EffectiveAddr, #0x100; \ GetFromEA_B \ - mov r12, r0; \ + mov wr12, wr0; \ add EffectiveAddr, EffectiveAddr, #1; \ - bic EffectiveAddr, #0x100; \ + bic EffectiveAddr, EffectiveAddr, #0x100; \ GetFromEA_B \ - mov r0, r0, LSL #8; \ - orr r0, r12, r0; \ - mov EffectiveAddr, r0; + lsl wr0, wr0, #8; \ + orr wr0, wr12, wr0; \ + mov EffectiveAddr, wr0; /* Indirect Indexed Addressing - The second byte of the instruction points to a memory location in page zero. The contents of this @@ -418,338 +449,370 @@ effective address. */ #define _GetIndZPage_Y \ GetFromPC_B \ - mov EffectiveAddr, r0; \ + mov EffectiveAddr, wr0; \ GetFromEA_B \ - mov r12, r0; \ + mov wr12, wr0; \ add EffectiveAddr, EffectiveAddr, #1; \ - bic EffectiveAddr, #0x100; \ + bic EffectiveAddr, EffectiveAddr, #0x100; \ GetFromEA_B \ - mov r0, r0, LSL #8; \ - orr r0, r12, r0; \ - AddIndexRegAndTestPageBoundary(Y_Reg) + lsl wr0, wr0, #8; \ + orr wr0, wr12, wr0; \ + AddIndexReg(Y_Reg) #define GetIndZPage_Y \ _GetIndZPage_Y \ - IncOpCycles \ -9: mov EffectiveAddr, r0; + TestPageBoundary(xr12) \ + AddOpCycles(wr12) \ + mov EffectiveAddr, wr0; #define GetIndZPage_Y_STA \ _GetIndZPage_Y \ - /*IncOpCycles*/ \ -9: mov EffectiveAddr, r0; + mov EffectiveAddr, wr0; // ---------------------------------------------------------------------------- // 65c02 instruction macros -#if 0 -#error this attempts to leverage ARM 32bit flags, but does so incorrectly ... possibly there is some efficiency gains here if correctness can be assured -#define DoADC_b \ - GetFromEA_B \ - mov A_Reg, A_Reg, LSL #24; \ - mov r0, r0, LSL #24; \ - \ - /* clear ARM 'C' */ \ - adcs r1, r1, #0; \ - \ - /* set HI-C */ \ - tst F_Reg, #C_Flag; \ - adcnes A_Reg, A_Reg, #0x01000000; \ - \ - /* lahf; */ \ - /* and arm_flags, arm_flags, #VC_Flags; */ \ - /* mov arm_flags_int, arm_flags; */ \ - \ - adcs A_Reg, A_Reg, r0; \ - FlagNVZC \ - /* merge intermediate V,C */ \ - /* orr F_Reg, F_Reg, arm_flags_int; */ \ - mov A_Reg, A_Reg, LSR #24; -#else -#define sign_a r1 -#define sign_0 r9 +#define sign_a wr1 +#define sign_0 wr9 #define DoADC_b \ GetFromEA_B \ /* save operand sign bits */ \ - mov sign_a, A_Reg, LSR #7; \ - mov sign_0, r0, LSR #7; \ + lsr sign_a, A_Reg, #7; \ + lsr sign_0, wr0, #7; \ /* perform ADC with incoming carry */\ tst F_Reg, #C_Flag; \ - addne A_Reg, A_Reg, #1; \ - add A_Reg, A_Reg, r0; \ + bz 1f; \ + add A_Reg, A_Reg, #1; \ +1: add A_Reg, A_Reg, wr0; \ /* clear and set flags */ \ bic F_Reg, F_Reg, #NVZC_Flags; \ tst A_Reg, #0x100; \ - orrne F_Reg, F_Reg, #C_Flag; \ - bic A_Reg, #0x100; \ + bz 2f; \ + orr F_Reg, F_Reg, #C_Flag; \ +2: bic A_Reg, A_Reg, #0x100; \ \ tst A_Reg, #0x80; \ - orrne F_Reg, F_Reg, #N_Flag; \ + bz 3f; \ + orr F_Reg, F_Reg, #N_Flag; \ \ - teq sign_a, sign_0; \ - bne 1f; \ - mov sign_0, A_Reg, LSR #7; \ +3: eor sign_0, sign_a, sign_0; \ + cmp sign_0, #0; \ + bnz 4f; \ + lsr sign_0, A_Reg, #7; \ cmp sign_a, sign_0; \ - orrne F_Reg, F_Reg, #V_Flag; \ + bz 4f; \ + orr F_Reg, F_Reg, #V_Flag; \ \ -1: tst A_Reg, #0xFF; \ - orreq F_Reg, F_Reg, #Z_Flag; -#endif +4: tst A_Reg, #0xFF; \ + bne 5f; \ + orr F_Reg, F_Reg, #Z_Flag; \ +5: #ifndef NDEBUG -#define DebugBCDCheck \ +# define DebugBCDCheck \ tst A_Reg, #0x80; \ - tstne A_Reg, #0x60; \ - blne CALL(debug_illegal_bcd); \ - tst A_Reg, #0x08; \ - tstne A_Reg, #0x06; \ - blne CALL(debug_illegal_bcd); \ - tst r0, #0x80; \ - tstne r0, #0x60; \ - blne CALL(debug_illegal_bcd); \ - tst r0, #0x08; \ - tstne r0, #0x06; \ - blne CALL(debug_illegal_bcd); + bz 6f; \ + tst A_Reg, #0x60; \ + bz 6f; \ + bl CALL(debug_illegal_bcd); \ +6: tst A_Reg, #0x08; \ + bz 7f; \ + tst A_Reg, #0x06; \ + bz 7f; \ + bl CALL(debug_illegal_bcd); \ +7: tst wr0, #0x80; \ + bz 8f; \ + tst wr0, #0x60; \ + bz 8f; \ + bl CALL(debug_illegal_bcd); \ +8: tst wr0, #0x08; \ + bz 9f; \ + tst wr0, #0x06; \ + bz 9f; \ + bl CALL(debug_illegal_bcd); \ +9: #else -#define DebugBCDCheck +# define DebugBCDCheck #endif #define DoAND \ GetFromEA_B \ - mov r0, r0, LSL #24; \ - mov A_Reg, A_Reg, LSL #24; \ - ands A_Reg, A_Reg, r0; \ + lsl wr0, wr0, #24; \ + lsl A_Reg, A_Reg, #24; \ + ands A_Reg, A_Reg, wr0; \ FlagNZ \ - mov A_Reg, A_Reg, LSR #24; + lsr A_Reg, A_Reg, #24; #define _DoASL(x) \ - mov x, x, LSL #24; \ + lsl x, x, #24; \ adcs x, x, x; \ FlagNZC \ - mov x, x, LSR #24; + lsr x, x, #24; #define DoASL \ GetFromEA_B \ - _DoASL(r0) \ + _DoASL(wr0) \ PutToEA_B #define _DoBIT \ GetFromEA_B \ - mov r1, A_Reg; \ - and r1, r1, r0; + mov wr1, A_Reg; \ + and wr1, wr1, wr0; #define DoBIT \ _DoBIT \ - bic F_Reg, F_Reg, #NVZ_Flags; \ - tst r1, #0xFF; \ - orreq F_Reg, F_Reg, #Z_Flag; \ - tst r0, #0x40; \ - orrne F_Reg, F_Reg, #V_Flag; \ - tst r0, #0x80; \ - orrne F_Reg, F_Reg, #N_Flag; + bic F_Reg, F_Reg, #NZ_Flags; \ + bic F_Reg, F_Reg, #V_Flag; \ + tst wr1, #0xFF; \ + bnz 1f; \ + orr F_Reg, F_Reg, #Z_Flag; \ +1: tst wr0, #0x40; \ + bz 2f; \ + orr F_Reg, F_Reg, #V_Flag; \ +2: tst wr0, #0x80; \ + bz 3f; \ + orr F_Reg, F_Reg, #N_Flag; \ +3: #define DoCMP \ GetFromEA_B \ /* TODO FIXME : maybe actually use cmp or cmn instruction? */ \ - mov r0, r0, LSL #24; \ - mov r1, A_Reg, LSL #24; \ - subs r1, r1, r0; \ + lsl wr0, wr0, #24; \ + lsl wr1, A_Reg, #24; \ + subs wr1, wr1, wr0; \ /*cmc;*/ \ FlagNZC #define DoCPX \ GetFromEA_B \ - mov r0, r0, LSL #24; \ - mov r1, X_Reg, LSL #24; \ - subs r1, r1, r0; \ + lsl wr0, wr0, #24; \ + lsl wr1, X_Reg, #24; \ + subs wr1, wr1, wr0; \ /*cmc;*/ \ FlagNZC #define DoCPY \ GetFromEA_B \ - mov r0, r0, LSL #24; \ - mov r1, Y_Reg, LSL #24; \ - subs r1, r1, r0; \ + lsl wr0, wr0, #24; \ + lsl wr1, Y_Reg, #24; \ + subs wr1, wr1, wr0; \ /*cmc;*/ \ FlagNZC #define _DoDEC(x) \ - mov r1, #1; \ - mov r1, r1, LSL #24; \ - mov x, x, LSL #24; \ - subs x, x, r1; \ + mov wr1, #1; \ + lsl wr1, wr1, #24; \ + lsl x, x, #24; \ + subs x, x, wr1; \ FlagNZ \ - mov x, x, LSR #24; + lsr x, x, #24; #define DoDEC \ GetFromEA_B \ - _DoDEC(r0) \ + _DoDEC(wr0) \ PutToEA_B #define DoEOR \ GetFromEA_B \ - mov r0, r0, LSL #24; \ - mov A_Reg, A_Reg, LSL #24; \ - eors A_Reg, A_Reg, r0; \ + lsl wr0, wr0, #24; \ + lsl A_Reg, A_Reg, #24; \ + eor A_Reg, A_Reg, wr0; \ + cmp A_Reg, #0; \ FlagNZ \ - mov A_Reg, A_Reg, LSR #24; + lsr A_Reg, A_Reg, #24; #define _DoINC(x) \ - mov r1, #1; \ - mov r1, r1, LSL #24; \ - mov x, x, LSL #24; \ - adds x, x, r1; \ + mov wr1, #1; \ + lsl wr1, wr1, #24; \ + lsl x, x, #24; \ + adds x, x, wr1; \ FlagNZ \ - mov x, x, LSR #24; + lsr x, x, #24; #define DoINC \ GetFromEA_B \ - _DoINC(r0) \ + _DoINC(wr0) \ PutToEA_B #define DoLDA \ GetFromEA_B \ - mov A_Reg, r0; \ - mov r0, r0, LSL #24; \ - orrs r0, r0, r0; \ + mov A_Reg, wr0; \ + lsl wr0, wr0, #24; \ + orr wr0, wr0, wr0; \ + cmp wr0, #0; \ FlagNZ #define DoLDX \ GetFromEA_B \ - mov X_Reg, r0; \ - mov r0, r0, LSL #24; \ - orrs r0, r0, r0; \ + mov X_Reg, wr0; \ + lsl wr0, wr0, #24; \ + orr wr0, wr0, wr0; \ + cmp wr0, #0; \ FlagNZ #define DoLDY \ GetFromEA_B \ - mov Y_Reg, r0; \ - mov r0, r0, LSL #24; \ - orrs r0, r0, r0; \ + mov Y_Reg, wr0; \ + lsl wr0, wr0, #24; \ + orr wr0, wr0, wr0; \ + cmp wr0, #0; \ FlagNZ -#define _DoLSR(x) \ +#if __aarch64__ +# define _DoLSR(x) \ + and wr1, x, #1; \ + lsl wr1, wr1, #1; /* C_Flag */ \ + lsr x, x, #1; \ + cmp x, #0; \ + FlagNZ; \ + bic F_Reg, F_Reg, #C_Flag; \ + orr F_Reg, F_Reg, wr1; +#else +# define _DoLSR(x) \ lsrs x, x, #1; \ - FlagNZC + FlagNZC; +#endif #define DoLSR \ GetFromEA_B \ - _DoLSR(r0) \ + _DoLSR(wr0) \ PutToEA_B #define DoORA \ GetFromEA_B \ - mov r0, r0, LSL #24; \ - mov A_Reg, A_Reg, LSL #24; \ - orrs A_Reg, A_Reg, r0; \ + lsl wr0, wr0, #24; \ + lsl A_Reg, A_Reg, #24; \ + orr A_Reg, A_Reg, wr0; \ + cmp A_Reg, #0; \ FlagNZ \ - mov A_Reg, A_Reg, LSR #24; + lsr A_Reg, A_Reg, #24; #define _DoPLx(x) \ Pop(x); \ - mov r0, x, LSL #24; \ - orrs r0, r0, r0; \ + lsl wr0, x, #24; \ + orr wr0, wr0, wr0; \ + cmp wr0, #0; \ FlagNZ -#define _DoROL(x) \ - mov x, x, LSL #8; \ - tst F_Reg, #C_Flag; \ - orrne x, x, #0x80; \ +#if __aarch64__ +# define _FlagROL(x) \ + /* HACK FIXME: Is there a easier way way to flag C here? */ \ + and xr1, x, #0x8000; /* out carry -> */ \ + lsr wr1, wr1, #14; /* C_FLAG */ \ + bic F_Reg, F_Reg, #C_Flag; \ + orr F_Reg, F_Reg, wr1; \ + /* Now calc N & Z */ \ + lsl x, x, #49; \ + cmp x, #0; \ + FlagNZ \ + lsr x, x, #56; +#else +# define _FlagROL(x) \ lsls x, x, #17; \ FlagNZC \ - mov x, x, LSR #24; + lsr x, x, #24; +#endif + +#define _DoROL(x) \ + lsl x, x, #8; \ + lsl F_Reg, F_Reg, #6; /* C_FLAG (in carry) -> 0x80 */ \ + and wr1, F_Reg, #0x80; \ + orr x, x, xr1; \ + lsr F_Reg, F_Reg, #6; /* undo */ \ + _FlagROL(x) #define DoROL \ GetFromEA_B \ - _DoROL(r0) \ + _DoROL(xr0) \ PutToEA_B - #define _DoROR(x) \ tst F_Reg, #C_Flag; \ - orrne x, x, #0x100; \ - ror x, x, #1; \ + bz 1f; \ + orr x, x, #0x100; \ +1: ror x, x, #1; \ bic F_Reg, F_Reg, #NZC_Flags; \ tst x, #ROR_BIT; \ - bicne x, #ROR_BIT; \ - orrne F_Reg, F_Reg, #C_Flag; \ - tst x, #0xFF; \ - orreq F_Reg, F_Reg, #Z_Flag; \ - tst x, #0x80; \ - orrne F_Reg, F_Reg, #N_Flag; + bz 2f; \ + bic x, x, #ROR_BIT; \ + orr F_Reg, F_Reg, #C_Flag; \ +2: tst x, #0xFF; \ + bnz 3f; \ + orr F_Reg, F_Reg, #Z_Flag; \ +3: tst x, #0x80; \ + bz 4f; \ + orr F_Reg, F_Reg, #N_Flag; \ +4: #define DoROR \ GetFromEA_B \ - _DoROR(r0) \ + _DoROR(wr0) \ PutToEA_B -#if 0 -#error this attempts to leverage ARM 32bit flags, but does so incorrectly ... possibly there is some efficiency gains here if correctness can be assured -#define DoSBC_b \ - GetFromEA_B \ - mvn r0, r0; \ - adcs r1, r1, #0; \ - bt; \ - adcs A_Reg, A_Reg, r0; \ - FlagNVZC -#else #define DoSBC_b \ GetFromEA_B \ /* save operand sign bits */ \ - mov sign_a, A_Reg, LSR #7; \ - mov sign_0, r0, LSR #7; \ + lsr sign_a, A_Reg, #7; \ + lsr sign_0, wr0, #7; \ /* perform SBC with incoming borrow */\ - sub A_Reg, A_Reg, r0; \ + sub A_Reg, A_Reg, wr0; \ tst F_Reg, #C_Flag; \ - subeq A_Reg, A_Reg, #1; \ + bnz 5f; \ + sub A_Reg, A_Reg, #1; \ /* clear and set flags */ \ - bic F_Reg, F_Reg, #NVZC_Flags; \ +5: bic F_Reg, F_Reg, #NVZC_Flags; \ tst A_Reg, #0x80000000; \ - orreq F_Reg, F_Reg, #C_Flag; \ - mov A_Reg, A_Reg, LSL #24; \ - mov A_Reg, A_Reg, LSR #24; \ + bnz 6f; \ + orr F_Reg, F_Reg, #C_Flag; \ +6: lsl A_Reg, A_Reg, #24; \ + lsr A_Reg, A_Reg, #24; \ \ tst A_Reg, #0x80; \ - orrne F_Reg, F_Reg, #N_Flag; \ + bz 7f; \ + orr F_Reg, F_Reg, #N_Flag; \ \ - teq sign_a, sign_0; \ - beq 1f; \ - mov sign_0, A_Reg, LSR #7; \ +7: eor sign_0, sign_a, sign_0; \ + cmp sign_0, #0; \ + beq 8f; \ + lsr sign_0, A_Reg, #7; \ cmp sign_a, sign_0; \ - orrne F_Reg, F_Reg, #V_Flag; \ + bz 8f; \ + orr F_Reg, F_Reg, #V_Flag; \ \ -1: tst A_Reg, #0xFF; \ - orreq F_Reg, F_Reg, #Z_Flag; -#endif +8: tst A_Reg, #0xFF; \ + bnz 9f; \ + orr F_Reg, F_Reg, #Z_Flag; \ +9: #define DoSTA \ - mov r0, A_Reg; \ + mov wr0, A_Reg; \ PutToEA_B #define DoSTX \ - mov r0, X_Reg; \ + mov wr0, X_Reg; \ PutToEA_B #define DoSTY \ - mov r0, Y_Reg; \ + mov wr0, Y_Reg; \ PutToEA_B #define DoSTZ \ - mov r0, #0; \ + mov wr0, #0; \ PutToEA_B #define DoTRB \ GetFromEA_B \ - tst r0, A_Reg; \ + tst wr0, A_Reg; \ FlagZ \ - mvn r1, A_Reg; \ - and r0, r0, r1; \ + mvn wr1, A_Reg; \ + and wr0, wr0, wr1; \ PutToEA_B #define DoTSB \ GetFromEA_B \ - tst r0, A_Reg; \ + tst wr0, A_Reg; \ FlagZ \ - orr r0, A_Reg, r0; \ + orr wr0, A_Reg, wr0; \ PutToEA_B /* ---------------------------------------------------------------------- @@ -761,49 +824,54 @@ ADd memory to accumulator with Carry ---------------------------------- */ -#define carry r9 -#define lo_nyb r1 +#define carry wr9 +#define lo_nyb wr1 // Decimal mode ENTRY(op_ADC_dec) - IncOpCycles + AddOpCycles(#1) GetFromEA_B DebugBCDCheck // isolate lo nybbles mov lo_nyb, A_Reg - mov r9, r0 + mov wr9, wr0 and lo_nyb, lo_nyb, #0x0F - and r9, r9, #0x0F + and wr9, wr9, #0x0F // lo nybble addition with carry tst F_Reg, #C_Flag - addne r9, r9, #1 - add lo_nyb, lo_nyb, r9 + bz 1f + add wr9, wr9, #1 +1: add lo_nyb, lo_nyb, wr9 // prep 65c02 flags - bic F_Reg, #NVZC_Flags + bic F_Reg, F_Reg, #NVZC_Flags // lo nybble DAA (Decimal Adjustment after Addition), saving carry (+0, +1, +2) cmp lo_nyb, #0x09 - addhi lo_nyb, lo_nyb, #6 - mov carry, lo_nyb, LSR #4 + bls 2f + add lo_nyb, lo_nyb, #6 +2: lsr carry, lo_nyb, #4 and lo_nyb, lo_nyb, #0x0F // isolate hi nybbles - mov A_Reg, A_Reg, LSR #4 - mov r0, r0, LSR #4 + lsr A_Reg, A_Reg, #4 + lsr wr0, wr0, #4 // hi nybble addition with carry - add r0, r0, carry - add r0, A_Reg, r0 + add wr0, wr0, carry + add wr0, A_Reg, wr0 // hi nybble DAA - cmp r0, #0x09 - addhi r0, #6 - orrhi F_Reg, #C_Flag + cmp wr0, #0x09 + bls 3f + add wr0, wr0, #6 + orr F_Reg, F_Reg, #C_Flag // merge nybbles - mov r0, r0, LSL #4 - orr A_Reg, r0, lo_nyb +3: lsl wr0, wr0, #4 + orr A_Reg, wr0, lo_nyb // NZ flags tst A_Reg, #0x80 - orrne F_Reg, F_Reg, #N_Flag - tst A_Reg, #0xFF - orreq F_Reg, F_Reg, #Z_Flag - Continue + bz 4f + orr F_Reg, F_Reg, #N_Flag +4: tst A_Reg, #0xFF + bnz 5f + orr F_Reg, F_Reg, #Z_Flag +5: Continue #define maybe_DoADC_d \ tst F_Reg, #D_Flag; /* Decimal mode? */ \ @@ -1094,10 +1162,12 @@ ENTRY(op_BIT_abs_x) ENTRY(op_BIT_imm) GetImm _DoBIT - tst r1, #0xFF - orreq F_Reg, F_Reg, #Z_Flag - bicne F_Reg, F_Reg, #Z_Flag - Continue + tst wr1, #0xFF + bnz 1f + orr F_Reg, F_Reg, #Z_Flag + b 2f +1: bic F_Reg, F_Reg, #Z_Flag +2: Continue .ltorg @@ -1157,18 +1227,19 @@ ENTRY(op_BRA) ENTRY(op_UNK) /* make undefined opcodes fault */ ENTRY(op_BRK) IncUint16(PC_Reg) - mov r0, PC_Reg - mov r0, r0, ROR #8 - Push(r0) - mov r0, r0, LSR #24 - Push(r0) - orr F_Reg, F_Reg, #BX_Flags + mov wr0, PC_Reg + ror wr0, wr0, #8 + Push(wr0) + lsr wr0, wr0, #24 + Push(wr0) + orr F_Reg, F_Reg, #B_Flag + orr F_Reg, F_Reg, #X_Flag EncodeFlags - Push(r0) + Push(wr0) orr F_Reg, F_Reg, #I_Flag ldrh EffectiveAddr, [reg_args, #INTERRUPT_VECTOR] GetFromEA_W - mov PC_Reg, r0 + mov PC_Reg, wr0 Continue .ltorg @@ -1204,7 +1275,7 @@ ENTRY(op_BVS) // 0x70 ---------------------------------- */ ENTRY(op_CLC) // 0x18 - bic F_Reg, #C_Flag + bic F_Reg, F_Reg, #C_Flag Continue /* ---------------------------------- @@ -1212,7 +1283,7 @@ ENTRY(op_CLC) // 0x18 ---------------------------------- */ ENTRY(op_CLD) // 0xd8 - bic F_Reg, #D_Flag + bic F_Reg, F_Reg, #D_Flag Continue /* ---------------------------------- @@ -1220,7 +1291,7 @@ ENTRY(op_CLD) // 0xd8 ---------------------------------- */ ENTRY(op_CLI) // 0x58 - bic F_Reg, #I_Flag + bic F_Reg, F_Reg, #I_Flag Continue /* ---------------------------------- @@ -1228,7 +1299,7 @@ ENTRY(op_CLI) // 0x58 ---------------------------------- */ ENTRY(op_CLV) // 0xB8 - bic F_Reg, #V_Flag + bic F_Reg, F_Reg, #V_Flag Continue .ltorg @@ -1505,39 +1576,41 @@ ENTRY(op_JMP_abs) ENTRY(op_JMP_ind) // 0x6c GetFromPC_W - mov r1, r0, LSL #24 - teq r1, #0xFF000000 - beq jmp_special - GetFromMem_W(r0) - mov PC_Reg, r0 + lsl wr1, wr0, #24 + eor wr1, wr1, #0xFF000000 + cmp wr1, #0 + bz jmp_special + GetFromMem_W(wr0) + mov PC_Reg, wr0 sub EffectiveAddr, EffectiveAddr, #1 Continue jmp_special: // see JMP indirect note in _Understanding the Apple IIe_ 4-25 - mov PC_Reg, r0 + mov PC_Reg, wr0 subs PC_Reg, PC_Reg, #0xFF - movmi PC_Reg, PC_Reg, LSL #16 - movmi PC_Reg, PC_Reg, LSR #16 - GetFromMem_B(PC_Reg) - mov r9, r0, LSL #8 + bpl 1f + lsl PC_Reg, PC_Reg, #16 + lsr PC_Reg, PC_Reg, #16 +1: GetFromMem_B(PC_Reg) + lsl wr9, wr0, #8 add PC_Reg, PC_Reg, #0xFF - bic PC_Reg, #0x10000 + bic PC_Reg, PC_Reg, #0x10000 GetFromMem_B(PC_Reg) - orr r0, r9, r0 - mov PC_Reg, r0 + orr wr0, wr9, wr0 + mov PC_Reg, wr0 Continue // 65c02 : 0x7C ENTRY(op_JMP_abs_ind_x) GetFromPC_W - mov EffectiveAddr, r0 - mov r0, X_Reg + mov EffectiveAddr, wr0 + mov wr0, X_Reg #warning HACK FIXME TODO : need to check the Bible here ... is this a signed addition (in which case we need to cbw) - //cbw - add EffectiveAddr, EffectiveAddr, r0 - mov EffectiveAddr, EffectiveAddr, LSL #16 // 16bit under/overflow protection - mov EffectiveAddr, EffectiveAddr, LSR #16 + //cbw(wr0) + add EffectiveAddr, EffectiveAddr, wr0 + lsl EffectiveAddr, EffectiveAddr, #16 // 16bit under/overflow protection + lsr EffectiveAddr, EffectiveAddr, #16 GetFromMem_W(EffectiveAddr) - mov PC_Reg, r0 + mov PC_Reg, wr0 Continue /* ---------------------------------- @@ -1546,13 +1619,13 @@ ENTRY(op_JMP_abs_ind_x) ENTRY(op_JSR) // 0x20 GetAbs - mov r0, PC_Reg - sub r0, r0, #1 - mov r0, r0, LSL #16 // handle underflow -- can this happen in second mem page? - mov r0, r0, ROR #24 - Push(r0) // push hi_byte - mov r0, r0, LSR #24 - Push(r0) // push lo_byte + mov wr0, PC_Reg + sub wr0, wr0, #1 + lsl wr0, wr0, #16 // handle underflow -- can this happen in second mem page? + ror wr0, wr0, #24 + Push(wr0) // push hi byte + lsr wr0, wr0, #24 + Push(wr0) // push lo byte mov PC_Reg, EffectiveAddr Continue @@ -1783,7 +1856,7 @@ ENTRY(op_PHA) // 0x48 ENTRY(op_PHP) // 0x08 EncodeFlags - Push(r0) + Push(wr0) Continue /* ---------------------------------- @@ -1817,9 +1890,10 @@ ENTRY(op_PLA) // 0x68 ---------------------------------- */ ENTRY(op_PLP) // 0x28 - Pop(r0) + Pop(wr0) DecodeFlags - orr F_Reg, F_Reg, #BX_Flags + orr F_Reg, F_Reg, #B_Flag + orr F_Reg, F_Reg, #X_Flag Continue /* ---------------------------------- @@ -1847,7 +1921,7 @@ ENTRY(op_PLY) ---------------------------------- */ ENTRY(op_ROL_acc) // 0x2a - _DoROL(A_Reg) + _DoROL(xA_Reg) Continue ENTRY(op_ROL_zpage) // 0x26 @@ -1904,12 +1978,13 @@ ENTRY(op_ROR_abs_x) // 0x7e ---------------------------------- */ ENTRY(op_RTI) // 0x40 - Pop(r0) + Pop(wr0) DecodeFlags - orr F_Reg, F_Reg, #BX_Flags + orr F_Reg, F_Reg, #B_Flag + orr F_Reg, F_Reg, #X_Flag Pop(lo_byte) Pop(hi_byte) - mov hi_byte, hi_byte, LSL #8 + lsl hi_byte, hi_byte, #8 orr PC_Reg, hi_byte, lo_byte Continue @@ -1920,7 +1995,7 @@ ENTRY(op_RTI) // 0x40 ENTRY(op_RTS) // 0x60 Pop(lo_byte) Pop(hi_byte) - mov hi_byte, hi_byte, LSL #8 + lsl hi_byte, hi_byte, #8 orr PC_Reg, hi_byte, lo_byte IncUint16(PC_Reg) Continue @@ -1933,51 +2008,56 @@ ENTRY(op_RTS) // 0x60 ---------------------------------- */ #define borrow carry ENTRY(op_SBC_dec) - IncOpCycles + AddOpCycles(#1) GetFromEA_B DebugBCDCheck // isolate lo nybbles mov lo_nyb, A_Reg - mov r9, r0 + mov wr9, wr0 and lo_nyb, lo_nyb, #0x0F - and r9, r9, #0x0F + and wr9, wr9, #0x0F // lo nybble subtraction with borrow - sub lo_nyb, lo_nyb, r9 + sub lo_nyb, lo_nyb, wr9 and lo_nyb, lo_nyb, #0xFF tst F_Reg, #C_Flag - subeq lo_nyb, lo_nyb, #1 - andeq lo_nyb, lo_nyb, #0xFF + bnz 1f + sub lo_nyb, lo_nyb, #1 + and lo_nyb, lo_nyb, #0xFF // prep 65c02 flags - bic F_Reg, #NVZC_Flags - orr F_Reg, #C_Flag +1: bic F_Reg, F_Reg, #NVZC_Flags + orr F_Reg, F_Reg, #C_Flag // lo nybble DAS (Decimal Adjustment after Subtraction), saving borrow eor borrow, borrow, borrow cmp lo_nyb, #0x09 - movhi borrow, #1 - subhi lo_nyb, lo_nyb, #6 - andhi lo_nyb, lo_nyb, #0x0F + bls 2f + mov borrow, #1 + sub lo_nyb, lo_nyb, #6 + and lo_nyb, lo_nyb, #0x0F // isolate hi nybbles - mov A_Reg, A_Reg, LSR #4 - mov r0, r0, LSR #4 +2: lsr A_Reg, A_Reg, #4 + lsr wr0, wr0, #4 // hi nybble subtraction with borrow - sub r0, A_Reg, r0 - and r0, r0, #0xFF - sub r0, r0, borrow - and r0, r0, #0xFF + sub wr0, A_Reg, wr0 + and wr0, wr0, #0xFF + sub wr0, wr0, borrow + and wr0, wr0, #0xFF // hi nybble DAS - cmp r0, #0x09 - subhi r0, #6 - bichi F_Reg, #C_Flag - andhi r0, r0, #0x0F + cmp wr0, #0x09 + bls 3f + sub wr0, wr0, #6 + bic F_Reg, F_Reg, #C_Flag + and wr0, wr0, #0x0F // merge nybbles - mov r0, r0, LSL #4 - orr A_Reg, r0, lo_nyb +3: lsl wr0, wr0, #4 + orr A_Reg, wr0, lo_nyb // NZ flags tst A_Reg, #0x80 - orrne F_Reg, F_Reg, #N_Flag - tst A_Reg, #0xFF - orreq F_Reg, F_Reg, #Z_Flag - Continue + bz 4f + orr F_Reg, F_Reg, #N_Flag +4: tst A_Reg, #0xFF + bnz 5f + orr F_Reg, F_Reg, #Z_Flag +5: Continue #define maybe_DoSBC_d \ tst F_Reg, #D_Flag; /* Decimal mode? */ \ @@ -2049,7 +2129,7 @@ ENTRY(op_SBC_ind_zpage) ---------------------------------- */ ENTRY(op_SEC) // 0x38 - orr F_Reg, #C_Flag + orr F_Reg, F_Reg, #C_Flag Continue /* ---------------------------------- @@ -2057,7 +2137,7 @@ ENTRY(op_SEC) // 0x38 ---------------------------------- */ ENTRY(op_SED) // 0xf8 - orr F_Reg, #D_Flag + orr F_Reg, F_Reg, #D_Flag Continue /* ---------------------------------- @@ -2065,7 +2145,7 @@ ENTRY(op_SED) // 0xf8 ---------------------------------- */ ENTRY(op_SEI) // 0x78 - orr F_Reg, #I_Flag + orr F_Reg, F_Reg, #I_Flag Continue /* ---------------------------------- @@ -2265,8 +2345,9 @@ ENTRY(op_STZ_abs_x) ENTRY(op_TAX) // 0xaa mov X_Reg, A_Reg - mov r0, A_Reg, LSL #24 - orrs r0, r0, r0 + lsl wr0, A_Reg, #24 + orr wr0, wr0, wr0 + cmp wr0, #0 FlagNZ Continue @@ -2276,8 +2357,9 @@ ENTRY(op_TAX) // 0xaa ENTRY(op_TAY) // 0xa8 mov Y_Reg, A_Reg - mov r0, A_Reg, LSL #24 - orrs r0, r0, r0 + lsl wr0, A_Reg, #24 + orr wr0, wr0, wr0 + cmp wr0, #0 FlagNZ Continue @@ -2321,8 +2403,9 @@ ENTRY(op_TSB_zpage) ENTRY(op_TSX) // 0xba mov X_Reg, SP_Reg - mov r0, SP_Reg, LSL #24 - orrs r0, r0, r0 + lsl wr0, SP_Reg, #24 + orr wr0, wr0, wr0 + cmp wr0, #0 FlagNZ Continue @@ -2332,8 +2415,9 @@ ENTRY(op_TSX) // 0xba ENTRY(op_TXA) // 0x8a mov A_Reg, X_Reg - mov r0, X_Reg, LSL #24 - orrs r0, r0, r0 + lsl wr0, X_Reg, #24 + orr wr0, wr0, wr0 + cmp wr0, #0 FlagNZ Continue @@ -2351,8 +2435,9 @@ ENTRY(op_TXS) // 0x9a ENTRY(op_TYA) // 0x98 mov A_Reg, Y_Reg - mov r0, Y_Reg, LSL #24 - orrs r0, r0, r0 + lsl wr0, Y_Reg, #24 + orr wr0, wr0, wr0 + cmp wr0, #0 FlagNZ Continue @@ -2380,37 +2465,37 @@ ENTRY(op_WAI_65c02) Keep executing until we've executed >= cpu65_cycles_to_execute ------------------------------------------------------------------------- */ -#define cycles_exe r0 +#define cycles_exe wr12 continue: - ldr r1, [reg_args, #CPU65__OPCYCLES] - ldrb r0, [reg_args, #CPU65_OPCODE] - ldrb cycles_exe, [r1, r0] - ldrb r1, [reg_args, #CPU65_OPCYCLES] - add cycles_exe, cycles_exe, r1 + ldr xr1, [reg_args, #CPU65__OPCYCLES] + ldrb wr0, [reg_args, #CPU65_OPCODE] + ldrb cycles_exe, [xr1, xr0] + ldrb wr1, [reg_args, #CPU65_OPCYCLES] + add cycles_exe, cycles_exe, wr1 strb cycles_exe, [reg_args, #CPU65_OPCYCLES] TRACE_EPILOGUE - ldr r1, [reg_args, #GC_CYCLES_TIMER_0] - sub r1, r1, cycles_exe - str r1, [reg_args, #GC_CYCLES_TIMER_0] + ldr wr1, [reg_args, #GC_CYCLES_TIMER_0] + sub wr1, wr1, cycles_exe + str wr1, [reg_args, #GC_CYCLES_TIMER_0] - ldr r1, [reg_args, #GC_CYCLES_TIMER_1] - sub r1, r1, cycles_exe - str r1, [reg_args, #GC_CYCLES_TIMER_1] + ldr wr1, [reg_args, #GC_CYCLES_TIMER_1] + sub wr1, wr1, cycles_exe + str wr1, [reg_args, #GC_CYCLES_TIMER_1] - ldr r1, [reg_args, #CPU65_CYCLE_COUNT] - add r1, r1, cycles_exe - str r1, [reg_args, #CPU65_CYCLE_COUNT] + ldr wr1, [reg_args, #CPU65_CYCLE_COUNT] + add wr1, wr1, cycles_exe + str wr1, [reg_args, #CPU65_CYCLE_COUNT] -continue1: ldr r1, [reg_args, #CPU65_CYCLES_TO_EXECUTE] - subs r1, r1, cycles_exe - str r1, [reg_args, #CPU65_CYCLES_TO_EXECUTE] +continue1: ldr wr1, [reg_args, #CPU65_CYCLES_TO_EXECUTE] + subs wr1, wr1, cycles_exe + str wr1, [reg_args, #CPU65_CYCLES_TO_EXECUTE] bmi exit_cpu65_run beq exit_cpu65_run -continue2: ldrb r0, [reg_args, #CPU65__SIGNAL] - orrs r0, r0, r0 - bne exception +continue2: ldrb wr0, [reg_args, #CPU65__SIGNAL] + tst wr0, #0xFF + bnz exception CPUStatsReset JumpNextInstruction @@ -2418,20 +2503,20 @@ continue2: ldrb r0, [reg_args, #CPU65__SIGNAL] Exception handlers ------------------------------------------------------------------------- */ -exception: tst r0, #ResetSig +exception: tst wr0, #ResetSig beq ex_irq - ldrb r0, [reg_args, #JOY_BUTTON0] // OpenApple - tst r0, #0xFF + ldrb wr0, [reg_args, #JOY_BUTTON0] // OpenApple + tst wr0, #0xFF bne exit_reinit - ldr r0, [reg_args, #JOY_BUTTON1] // ClosedApple - tst r0, #0xFF + ldrb wr0, [reg_args, #JOY_BUTTON1] // ClosedApple + tst wr0, #0xFF bne exit_reinit -ex_reset: eor r0, r0, r0 - strb r0, [reg_args, #CPU65__SIGNAL] +ex_reset: eor wr0, wr0, wr0 + strb wr0, [reg_args, #CPU65__SIGNAL] ldrh EffectiveAddr, [reg_args, #RESET_VECTOR] GetFromEA_W - mov PC_Reg, r0 + mov PC_Reg, wr0 CPUStatsReset JumpNextInstruction @@ -2440,23 +2525,23 @@ ex_irq: tst F_Reg, #I_Flag // Already interrupt CPUStatsReset JumpNextInstruction // Yes (ignored) ... 1: TRACE_IRQ // No (handle IRQ) ... - mov r0, PC_Reg - mov r0, r0, ROR #8 - Push(r0) - mov r0, r0, LSR #24 - Push(r0) + mov wr0, PC_Reg + ror wr0, wr0, #8 + Push(wr0) + lsr wr0, wr0, #24 + Push(wr0) orr F_Reg, F_Reg, #X_Flag EncodeFlags - Push(r0) + Push(wr0) orr F_Reg, F_Reg, #BI_Flags //bic F_Reg, F_Reg, #D_Flag // AppleWin clears Decimal bit? ldrh EffectiveAddr, [reg_args, #INTERRUPT_VECTOR] GetFromEA_W - mov PC_Reg, r0 + mov PC_Reg, wr0 CPUStatsReset - ldrb r0, [reg_args, #CPU65_OPCYCLES] - add r0, r0, #7 // IRQ handling will take additional 7 cycles - strb r0, [reg_args, #CPU65_OPCYCLES] + ldrb wr0, [reg_args, #CPU65_OPCYCLES] + add wr0, wr0, #7 // IRQ handling will take additional 7 cycles + strb wr0, [reg_args, #CPU65_OPCYCLES] JumpNextInstruction /* ------------------------------------------------------------------------- @@ -2464,24 +2549,24 @@ ex_irq: tst F_Reg, #I_Flag // Already interrupt ------------------------------------------------------------------------- */ ENTRY(cpu65_run) - push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + Enter // Restore CPU state when being called from C. - mov reg_args, r0 + mov reg_args, xr0 ldr reg_vmem_r, [reg_args, #CPU65_VMEM_R] ldrh EffectiveAddr, [reg_args, #CPU65_EA] ldrh PC_Reg, [reg_args, #CPU65_PC] ldrb A_Reg, [reg_args, #CPU65_A] - ldrb r0, [reg_args, #CPU65_F] + ldrb wr0, [reg_args, #CPU65_F] DecodeFlags ldrb X_Reg, [reg_args, #CPU65_X] ldrb Y_Reg, [reg_args, #CPU65_Y] ldrb SP_Reg, [reg_args, #CPU65_SP] - ldrb r0, [reg_args, #EMUL_REINITIALIZE] - teq r0, #0 - eorne r0, r0, r0 - STRBNE r0, [reg_args, #EMUL_REINITIALIZE] - bne ex_reset - b continue2 + ldrb wr0, [reg_args, #EMUL_REINITIALIZE] + cmp wr0, #0 + bz continue2 + eor wr0, wr0, wr0 + strb wr0, [reg_args, #EMUL_REINITIALIZE] + b ex_reset /* ------------------------------------------------------------------------- 65c02 CPU processing loop exit point @@ -2491,13 +2576,13 @@ exit_cpu65_run: // Save CPU state when returning from being called from C strh PC_Reg, [reg_args, #CPU65_PC] CommonSaveCPUState - pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + Exit -exit_reinit: mov r0, #0 - strb r0, [reg_args, #CPU65__SIGNAL] - mov r0, #1 - strb r0, [reg_args, #EMUL_REINITIALIZE] - pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +exit_reinit: mov wr0, #0 + strb wr0, [reg_args, #CPU65__SIGNAL] + mov wr0, #1 + strb wr0, [reg_args, #EMUL_REINITIALIZE] + Exit /* ------------------------------------------------------------------------- Debugger hooks @@ -2505,9 +2590,8 @@ exit_reinit: mov r0, #0 ENTRY(cpu65_direct_write) #warning FIXME TODO implement cpu65_direct_write ... - mov r0, #42 - ldr r0, [r0] // segfault - mov pc, lr + mov wr0, #42 + ldr wr0, [xr0] // segfault .ltorg diff --git a/src/arm/glue-offsets64.h b/src/arm/glue-offsets64.h new file mode 100644 index 00000000..2ce440ee --- /dev/null +++ b/src/arm/glue-offsets64.h @@ -0,0 +1,53 @@ +/* This file is auto-generated for a specific architecture ABI */ +#define UNUSED0 0 +#define CPU65_TRACE_PROLOGUE 8 +#define CPU65_TRACE_ARG 16 +#define UNUSED1 24 +#define UNUSED2 32 +#define CPU65_TRACE_EPILOGUE 40 +#define CPU65_TRACE_IRQ 48 +#define DEBUG_ILLEGAL_BCD 56 +#define CPU65_VMEM_R 64 +#define CPU65_VMEM_W 72 +#define CPU65_FLAGS_ENCODE 80 +#define CPU65_FLAGS_DECODE 88 +#define CPU65__OPCODES 96 +#define CPU65__OPCYCLES 104 +#define BASE_RAMRD 112 +#define BASE_RAMWRT 120 +#define BASE_TEXTRD 128 +#define BASE_TEXTWRT 136 +#define BASE_HGRRD 144 +#define BASE_HGRWRT 152 +#define BASE_STACKZP 160 +#define BASE_D000_RD 168 +#define BASE_E000_RD 176 +#define BASE_D000_WRT 184 +#define BASE_E000_WRT 192 +#define BASE_C3ROM 200 +#define BASE_C4ROM 208 +#define BASE_C5ROM 216 +#define BASE_CXROM 224 +#define SOFTSWITCHES 232 +#define GC_CYCLES_TIMER_0 236 +#define GC_CYCLES_TIMER_1 240 +#define CPU65_CYCLES_TO_EXECUTE 244 +#define CPU65_CYCLE_COUNT 248 +#define UNUSED3 252 +#define INTERRUPT_VECTOR 256 +#define RESET_VECTOR 258 +#define CPU65_PC 260 +#define CPU65_EA 262 +#define CPU65_A 264 +#define CPU65_F 265 +#define CPU65_X 266 +#define CPU65_Y 267 +#define CPU65_SP 268 +#define CPU65_D 269 +#define CPU65_RW 270 +#define CPU65_OPCODE 271 +#define CPU65_OPCYCLES 272 +#define CPU65__SIGNAL 273 +#define JOY_BUTTON0 274 +#define JOY_BUTTON1 275 +#define EMUL_REINITIALIZE 276 diff --git a/src/arm/glue-prologue.h b/src/arm/glue-prologue.h index 4ef45958..d84b0004 100644 --- a/src/arm/glue-prologue.h +++ b/src/arm/glue-prologue.h @@ -12,79 +12,120 @@ #include "vm.h" #include "cpu-regs.h" +#if __aarch64__ + +# define _GLUE_REG_SAVE \ + stp x29, x30, [sp, -16]!; + +# define _GLUE_REG_RESTORE \ + ldp x29, x30, [sp], 16; \ + ret + +# define _GLUE_REG_SAVE0 \ + stp x0, x30, [sp, -16]!; + +# define _GLUE_REG_RESTORE0 \ + ldp x0, x30, [sp], 16; \ + ret + +# define _GLUE_RET \ + ret + +#else + +# define _GLUE_REG_SAVE \ + push {EffectiveAddr, PC_Reg, lr}; + +# define _GLUE_REG_RESTORE \ + pop {EffectiveAddr, PC_Reg, pc}; + +# define _GLUE_REG_SAVE0 \ + push {r0, EffectiveAddr, PC_Reg, lr}; + +# define _GLUE_REG_RESTORE0 \ + pop {r0, EffectiveAddr, PC_Reg, pc}; + +# define _GLUE_RET \ + mov pc, lr; + +#endif + #define GLUE_EXTERN_C_READ(func) #define _GLUE_BANK_MAYBE_READ_CX(func,x,pointer) \ -ENTRY(func) ldr r0, [reg_args, #SOFTSWITCHES]; \ - ldr r1, [reg_args, x ## pointer]; \ - tst r0, $SS_CXROM; \ - bne 1f; \ - push {EffectiveAddr, PC_Reg, lr}; \ - blx r1; \ - pop {EffectiveAddr, PC_Reg, pc}; \ -1: ldrb r0, [r1, EffectiveAddr]; \ - mov pc, lr; +ENTRY(func) ldr wr0, [reg_args, #SOFTSWITCHES]; \ + ldr xr1, [reg_args, x ## pointer]; \ + tst wr0, #SS_CXROM; \ + bnz 1f; \ + _GLUE_REG_SAVE; \ + BLX xr1; \ + _GLUE_REG_RESTORE; \ +1: ldrb wr0, [xr1, xEffectiveAddr]; \ + _GLUE_RET #define GLUE_BANK_MAYBE_READ_CX(func,pointer) _GLUE_BANK_MAYBE_READ_CX(func,#,pointer) #define _GLUE_BANK_MAYBE_READ_C3(func,x,pointer) \ -ENTRY(func) ldr r0, [reg_args, #SOFTSWITCHES]; \ - ldr r1, [reg_args, x ## pointer]; \ - tst r0, $SS_CXROM; \ - bne 1f; \ - tst r0, $SS_C3ROM; \ - bne 1f; \ - push {EffectiveAddr, PC_Reg, lr}; \ - blx r1; \ - pop {EffectiveAddr, PC_Reg, pc}; \ -1: ldrb r0, [r1, EffectiveAddr]; \ - mov pc, lr; +ENTRY(func) ldr wr0, [reg_args, #SOFTSWITCHES]; \ + ldr xr1, [reg_args, x ## pointer]; \ + tst wr0, #SS_CXROM; \ + bnz 1f; \ + tst wr0, #SS_C3ROM; \ + bnz 1f; \ + _GLUE_REG_SAVE; \ + BLX xr1; \ + _GLUE_REG_RESTORE; \ +1: ldrb wr0, [xr1, xEffectiveAddr]; \ + _GLUE_RET #define GLUE_BANK_MAYBE_READ_C3(func,pointer) _GLUE_BANK_MAYBE_READ_C3(func,#,pointer) #define _GLUE_BANK_READ(func,x,pointer) \ -ENTRY(func) ldr r1, [reg_args, x ## pointer]; \ - ldrb r0, [r1, EffectiveAddr]; \ - mov pc, lr; +ENTRY(func) ldr xr1, [reg_args, x ## pointer]; \ + ldrb wr0, [xr1, xEffectiveAddr]; \ + _GLUE_RET #define GLUE_BANK_READ(func,pointer) _GLUE_BANK_READ(func,#,pointer) #define _GLUE_BANK_WRITE(func,x,pointer) \ -ENTRY(func) ldr r1, [reg_args, x ## pointer]; \ - strb r0, [r1, EffectiveAddr]; \ - mov pc, lr; +ENTRY(func) ldr xr1, [reg_args, x ## pointer]; \ + strb wr0, [xr1, xEffectiveAddr]; \ + _GLUE_RET #define GLUE_BANK_WRITE(func,pointer) _GLUE_BANK_WRITE(func,#,pointer) #define _GLUE_BANK_MAYBEWRITE(func,x,pointer) \ -ENTRY(func) ldr r1, [reg_args, x ## pointer]; \ - teq r1, #0; \ - STRBNE r0, [r1, EffectiveAddr]; \ - mov pc, lr; +ENTRY(func) ldr xr1, [reg_args, x ## pointer]; \ + eor xr12, xr12, xr12; \ + eor xr1, xr1, xr12; \ + cmp xr1, #0; \ + bz 1f; \ + strb wr0, [xr1, xEffectiveAddr]; \ +1: _GLUE_RET #define GLUE_BANK_MAYBEWRITE(func,pointer) _GLUE_BANK_MAYBEWRITE(func,#,pointer) #define _GLUE_INLINE_READ(func,x,off) \ -ENTRY(func) ldrb r0, [reg_args, x ## off]; \ - mov pc, lr; +ENTRY(func) ldrb wr0, [reg_args, x ## off]; \ + _GLUE_RET #define GLUE_INLINE_READ(func,off) _GLUE_INLINE_READ(func,#,off) #define GLUE_C_WRITE(func) \ -ENTRY(func) push {r0, EffectiveAddr, PC_Reg, lr}; \ - and r0, r0, #0xff; \ - mov r1, r0; \ - mov r0, EffectiveAddr; \ +ENTRY(func) _GLUE_REG_SAVE0; \ + and wr0, wr0, #0xff; \ + mov wr1, wr0; \ + mov wr0, EffectiveAddr; \ bl CALL(c_##func); \ - pop {r0, EffectiveAddr, PC_Reg, pc}; + _GLUE_REG_RESTORE0; #define GLUE_C_READ(func) \ -ENTRY(func) push {EffectiveAddr, PC_Reg, lr}; \ - mov r0, EffectiveAddr; \ +ENTRY(func) _GLUE_REG_SAVE; \ + mov wr0, EffectiveAddr; \ bl CALL(c_##func); \ - pop {EffectiveAddr, PC_Reg, pc}; + _GLUE_REG_RESTORE; #define GLUE_C_READ_ALTZP(FUNC) GLUE_C_READ(FUNC) diff --git a/src/cpu.h b/src/cpu.h index 137a0936..54542820 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -87,7 +87,7 @@ void cpu65_trace_checkpoint(void); # define D_Flag 0x20 /* 6502 Decimal mode */ # define Z_Flag 0x40 /* 6502 Zero */ # define N_Flag 0x80 /* 6502 Negative */ -#elif defined(__arm__) +#elif defined(__arm__) || defined(__aarch64__) // VCZN positions match positions of shifted status register # define V_Flag 0x1 # define C_Flag 0x2 @@ -101,10 +101,7 @@ void cpu65_trace_checkpoint(void); # define I_Flag 0x20 # define B_Flag 0x40 # define D_Flag 0x80 -# define BX_Flags 0x50 # define BI_Flags 0x60 -#elif defined(__aarch64__) -# error soon ... #else # error unknown machine architecture #endif diff --git a/src/glue.h b/src/glue.h index fb6e7f09..9459f904 100644 --- a/src/glue.h +++ b/src/glue.h @@ -186,7 +186,7 @@ typedef struct cpu65_run_args_s { #define OUTPUT_CPU65_RW() printf("#define CPU65_RW %ld\n", offsetof(cpu65_run_args_s, cpu65_rw)) uint8_t cpu65_opcode; // Last opcode #define OUTPUT_CPU65_OPCODE() printf("#define CPU65_OPCODE %ld\n", offsetof(cpu65_run_args_s, cpu65_opcode)) - uint8_t cpu65_opcycles; // Last opcode extra cycles + uint8_t cpu65_opcycles; // Last opcode cycles #define OUTPUT_CPU65_OPCYCLES() printf("#define CPU65_OPCYCLES %ld\n", offsetof(cpu65_run_args_s, cpu65_opcycles)) uint8_t cpu65__signal;