mirror of
https://github.com/mauiaaron/apple2.git
synced 2025-01-12 06:29:58 +00:00
WIP: changes to cpu throttling/timing
WARN: this is still a WIP and untested. Instead of nanosleep() the cpu thread now uses an adaptive spinloop. This better emulates the actual apple //e speed in preparation to do near-realtime audio. The drawback is that the cpu thread is pegged at 100% usage.
This commit is contained in:
parent
d599a2174a
commit
96d083a2c3
47
src/cpu.S
47
src/cpu.S
@ -32,8 +32,9 @@
|
||||
|
||||
#define DebugCurrEA SN(cpu65_debug)
|
||||
#define DebugCurrByte SN(cpu65_debug)+2
|
||||
#define DebugCurrOp SN(cpu65_debug)+3
|
||||
#define XCyclesCount SN(cpu65_debug)+4
|
||||
#define DebugCurrRW SN(cpu65_debug)+3
|
||||
#define DebugCycleCount SN(cpu65_debug)+4
|
||||
#define DebugCurrOpcode SN(cpu65_debug)+5
|
||||
|
||||
/* -------------------------------------------------------------------------
|
||||
CPU (6502) Helper Routines
|
||||
@ -57,7 +58,7 @@
|
||||
(,EffectiveAddr_E,8); \
|
||||
|
||||
#define GetFromEA_B \
|
||||
orb $1, DebugCurrOp; \
|
||||
orb $1, DebugCurrRW; \
|
||||
call *SN(cpu65_vmem) \
|
||||
(,EffectiveAddr_E,8);
|
||||
|
||||
@ -71,7 +72,7 @@
|
||||
(,EffectiveAddr_E,8);
|
||||
|
||||
#define PutToEA_B \
|
||||
orb $2, DebugCurrOp; \
|
||||
orb $2, DebugCurrRW; \
|
||||
orb %al, DebugCurrByte; \
|
||||
call *SN(cpu65_vmem)+4 \
|
||||
(,EffectiveAddr_E,8);
|
||||
@ -90,10 +91,6 @@
|
||||
call *SN(cpu65_vmem) \
|
||||
(,EffectiveAddr_E,8); \
|
||||
|
||||
// reset operation code before each instruction
|
||||
#define ZeroOp movb $0, DebugCurrOp; \
|
||||
movb $0, XCyclesCount;
|
||||
|
||||
// NOTE: the orb functions as a move, but we want to
|
||||
// set the flags and we know %ah is zero
|
||||
#define Continue \
|
||||
@ -139,12 +136,12 @@
|
||||
9:
|
||||
|
||||
#define BranchXCycles \
|
||||
incb XCyclesCount; /* +1 cycle branch taken */ \
|
||||
incb DebugCycleCount; /* +1 branch taken */ \
|
||||
pushl %ebx; \
|
||||
movw PC_Reg, %bx; \
|
||||
addb %al, %bl; \
|
||||
jnc 9f; \
|
||||
incb XCyclesCount; /* +1 cycle branch across pg boundary */ \
|
||||
incb DebugCycleCount; /* +1 branch across pg boundary */ \
|
||||
9: addw %ax, PC_Reg; \
|
||||
popl %ebx;
|
||||
|
||||
@ -234,7 +231,7 @@
|
||||
addb X_Reg, %al; \
|
||||
jnc 9f; \
|
||||
adcb $0, %ah; \
|
||||
incb XCyclesCount; /* +1 cycle on page boundary */ \
|
||||
incb DebugCycleCount; /* +1 cycle on page boundary */ \
|
||||
9: movl %eax, EffectiveAddr_E;
|
||||
|
||||
#define GetAbs_Y \
|
||||
@ -242,7 +239,7 @@
|
||||
addb Y_Reg, %al; \
|
||||
jnc 9f; \
|
||||
adcb $0, %ah; \
|
||||
incb XCyclesCount; /* +1 cycle on page boundary */ \
|
||||
incb DebugCycleCount; /* +1 cycle on page boundary */ \
|
||||
9: movl %eax, EffectiveAddr_E;
|
||||
|
||||
/* Absolute Indirect Addressing - The second and third bytes of the
|
||||
@ -309,7 +306,7 @@
|
||||
addb Y_Reg, %al; \
|
||||
jnc 9f; \
|
||||
adcb $0, %ah; \
|
||||
incb XCyclesCount; /* +1 cycle on page boundary */ \
|
||||
incb DebugCycleCount; /* +1 cycle on page boundary */ \
|
||||
9: movl %eax, EffectiveAddr_E;
|
||||
|
||||
#define DoADC_b GetFromEA_B \
|
||||
@ -464,7 +461,7 @@
|
||||
|
||||
// Decimal mode
|
||||
op_ADC_dec:
|
||||
incb XCyclesCount // +1 cycle
|
||||
incb DebugCycleCount // +1 cycle
|
||||
DoADC_d
|
||||
Continue
|
||||
|
||||
@ -1590,7 +1587,7 @@ op_RTS:
|
||||
---------------------------------- */
|
||||
|
||||
op_SBC_dec:
|
||||
incb XCyclesCount // +1 cycle
|
||||
incb DebugCycleCount // +1 cycle
|
||||
DoSBC_d
|
||||
Continue
|
||||
|
||||
@ -1727,7 +1724,7 @@ op_STA_imm:
|
||||
op_STA_zpage:
|
||||
GetZPage
|
||||
DoSTA
|
||||
incb XCyclesCount // +1 cycle on write
|
||||
incb DebugCycleCount // +1 cycle on write
|
||||
Continue
|
||||
|
||||
op_STA_zpage_x:
|
||||
@ -1747,13 +1744,13 @@ op_STA_abs:
|
||||
op_STA_abs_x:
|
||||
GetAbs_X
|
||||
DoSTA
|
||||
incb XCyclesCount // +1 cycle on write
|
||||
incb DebugCycleCount // +1 cycle on write
|
||||
Continue
|
||||
|
||||
op_STA_abs_y:
|
||||
GetAbs_Y
|
||||
DoSTA
|
||||
incb XCyclesCount // +1 cycle on write
|
||||
incb DebugCycleCount // +1 cycle on write
|
||||
Continue
|
||||
|
||||
op_STA_ind_x:
|
||||
@ -1764,7 +1761,7 @@ op_STA_ind_x:
|
||||
op_STA_ind_y:
|
||||
GetIndZPage_Y
|
||||
DoSTA
|
||||
incb XCyclesCount // +1 cycle on write
|
||||
incb DebugCycleCount // +1 cycle on write
|
||||
Continue
|
||||
|
||||
// 65c02 : 0x92
|
||||
@ -1817,7 +1814,7 @@ op_RMB7_65c02:
|
||||
op_STX_zpage:
|
||||
GetZPage
|
||||
DoSTX
|
||||
incb XCyclesCount // +1 cycle on write
|
||||
incb DebugCycleCount // +1 cycle on write
|
||||
Continue
|
||||
|
||||
// HACK : is this used? need to study coverage ...
|
||||
@ -1838,7 +1835,7 @@ op_STX_abs:
|
||||
op_STY_zpage:
|
||||
GetZPage
|
||||
DoSTY
|
||||
incb XCyclesCount // +1 cycle on write
|
||||
incb DebugCycleCount // +1 cycle on write
|
||||
Continue
|
||||
|
||||
op_STY_zpage_x:
|
||||
@ -1860,7 +1857,7 @@ op_STY_abs:
|
||||
op_STZ_zpage:
|
||||
GetZPage
|
||||
DoSTZ
|
||||
incb XCyclesCount // +1 cycle on write
|
||||
incb DebugCycleCount // +1 cycle on write
|
||||
Continue
|
||||
|
||||
// 65c02 : 0x74
|
||||
@ -1878,7 +1875,7 @@ op_STZ_abs:
|
||||
op_STZ_abs_x:
|
||||
GetAbs_X
|
||||
DoSTZ
|
||||
incb XCyclesCount // +1 cycle on write
|
||||
incb DebugCycleCount // +1 cycle on write
|
||||
Continue
|
||||
|
||||
/* ----------------------------------
|
||||
@ -2664,8 +2661,10 @@ continue: SaveState
|
||||
call SN(timing_throttle)
|
||||
ReplaceState
|
||||
xorb %ah, %ah
|
||||
ZeroOp
|
||||
movb $0, DebugCurrRW
|
||||
movb $0, DebugCycleCount
|
||||
GetFromPC_B
|
||||
movb $al, DebugCurrOpcode
|
||||
jmp *cpu65__opcodes(,%eax,4)
|
||||
|
||||
/* Exception handler */
|
||||
|
@ -43,8 +43,9 @@ struct cpu65_extra
|
||||
{
|
||||
uint16_t ea; /* Last effective address */
|
||||
uint8_t d; /* Last data byte written */
|
||||
uint8_t op; /* 1 = read occured, 2 = write, 3 = both */
|
||||
uint8_t xcycles; /* Last opcode extra cycles */
|
||||
uint8_t rw; /* 1 = read occured, 2 = write, 3 = both */
|
||||
uint8_t opcode; /* Last opcode */
|
||||
uint8_t opcycles; /* Last opcode extra cycles */
|
||||
};
|
||||
|
||||
/* 6502 CPU models */
|
||||
|
@ -732,17 +732,19 @@ void c_read_random() {
|
||||
static void cpu_thread(void *dummyptr) {
|
||||
do
|
||||
{
|
||||
LOG("cpu_thread : entering cpu65_run()...");
|
||||
cpu65_run();
|
||||
reinitialize();
|
||||
} while (1);
|
||||
}
|
||||
|
||||
static void main_thread(void *dummyptr) {
|
||||
struct timespec abstime = { .tv_sec=0, .tv_nsec=8333333 }; // 120Hz
|
||||
do
|
||||
{
|
||||
// sleep waiting for the cpu thread to ping us that it's sleeping...
|
||||
// sleep waiting for the cpu thread to ping us to render
|
||||
pthread_mutex_lock(&mutex);
|
||||
pthread_cond_wait(&cond, &mutex);
|
||||
pthread_cond_timedwait(&cond, &mutex, &abstime);
|
||||
pthread_mutex_unlock(&mutex);
|
||||
|
||||
c_periodic_update(0);
|
||||
|
168
src/timing.c
168
src/timing.c
@ -17,16 +17,20 @@
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <pthread.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define DEFAULT_SLEEP 120
|
||||
#define CALIBRATE_HZ 120
|
||||
|
||||
static unsigned int sleep_hz = DEFAULT_SLEEP; // sleep intervals per sec
|
||||
static unsigned long cpu_target_hz = APPLE2_HZ; // target clock speed
|
||||
static unsigned long cycles_interval = APPLE2_HZ / DEFAULT_SLEEP; // Number of 65c02 instructions to be executed at sleep_hz
|
||||
static unsigned long processing_interval = NANOSECONDS / DEFAULT_SLEEP; // Number of nanoseconds in sleep_hz intervals
|
||||
static unsigned long calibrate_interval = NANOSECONDS / CALIBRATE_HZ; // calibration interval for drifting
|
||||
static unsigned long cycle_nanoseconds = NANOSECONDS / APPLE2_HZ; // nanosecs per cycle
|
||||
static unsigned int cycle_nanoseconds_count;
|
||||
|
||||
static struct timespec deltat, t0, ti, tj;
|
||||
static unsigned long cycle=0;
|
||||
|
||||
static unsigned long cycle_count=0; // CPU cycle counter
|
||||
static int spinloop_count=0; // spin loop counter
|
||||
|
||||
static long sleep_adjust=0;
|
||||
static long sleep_adjust_inc=0;
|
||||
|
||||
@ -58,73 +62,137 @@ static inline long timespec_nsecs(struct timespec t) {
|
||||
return t.tv_sec*NANOSECONDS + t.tv_nsec;
|
||||
}
|
||||
|
||||
// spin loop to throttle to target CPU Hz
|
||||
static inline void _spin_loop(unsigned long c)
|
||||
{
|
||||
static volatile unsigned int spinney=0; // volatile to prevent being optimized away
|
||||
for (unsigned long i=0; i<c; i++)
|
||||
{
|
||||
++spinney;
|
||||
}
|
||||
}
|
||||
|
||||
static void _determine_initial_spinloop_counter()
|
||||
{
|
||||
struct timespec s0, s1;
|
||||
|
||||
// time the spinloop to determine a good starting value for the spin counter
|
||||
|
||||
unsigned long avg_spin_nsecs = 0;
|
||||
unsigned int const samples = 5;
|
||||
unsigned int i=0;
|
||||
spinloop_count = 500000000;
|
||||
do
|
||||
{
|
||||
clock_gettime(CLOCK_MONOTONIC, &s0);
|
||||
_spin_loop(spinloop_count);
|
||||
clock_gettime(CLOCK_MONOTONIC, &s1);
|
||||
deltat = timespec_diff(s0, s1);
|
||||
|
||||
if (deltat.tv_sec > 0)
|
||||
{
|
||||
printf("oops long wait (>= %lu sec) adjusting loop count (%d -> %d)\n", deltat.tv_sec, spinloop_count, spinloop_count>>1);
|
||||
spinloop_count >>= 1;
|
||||
i = 0;
|
||||
avg_spin_nsecs = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("spinloop = %lu nsec\n", deltat.tv_nsec);
|
||||
avg_spin_nsecs += deltat.tv_nsec;
|
||||
++i;
|
||||
} while (i<samples);
|
||||
|
||||
avg_spin_nsecs = (avg_spin_nsecs / samples);
|
||||
printf("average = %lu nsec\n", avg_spin_nsecs);
|
||||
|
||||
spinloop_count = cycle_nanoseconds * spinloop_count / avg_spin_nsecs;
|
||||
|
||||
cycle_nanoseconds_count = cycle_nanoseconds / spinloop_count;
|
||||
|
||||
printf("counter for a single cycle = %d\n", spinloop_count);
|
||||
}
|
||||
|
||||
void timing_initialize() {
|
||||
|
||||
// should do this only on startup
|
||||
_determine_initial_spinloop_counter();
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &t0);
|
||||
ti=t0;
|
||||
}
|
||||
|
||||
void timing_set_cpu_target_hz(unsigned long hz) {
|
||||
cpu_target_hz = hz;
|
||||
}
|
||||
|
||||
void timing_set_sleep_hz(unsigned int hz) {
|
||||
sleep_hz = hz;
|
||||
void timing_set_cpu_scale(unsigned int scale)
|
||||
{
|
||||
// ...
|
||||
}
|
||||
|
||||
/*
|
||||
* Throttles the 65c02 CPU down to a target frequency of X.
|
||||
* Currently set to target the Apple //e @ 1.02MHz
|
||||
* Throttles 6502 CPU down to the target CPU frequency (default is speed of original Apple //e).
|
||||
*
|
||||
* This uses an adaptive spin loop to stay closer to the target CPU frequency.
|
||||
*
|
||||
* This is called from cpu65_run() on the cpu-thread
|
||||
*/
|
||||
void timing_throttle() {
|
||||
++cycle;
|
||||
void timing_throttle()
|
||||
{
|
||||
static unsigned int drift_interval_counter=0; // in nsecs since last
|
||||
static unsigned int instruction_interval_counter=0; // instruction count since last
|
||||
static unsigned int spin_adjust_interval=INT_MAX;
|
||||
static int8_t spin_adjust_count=0; // +/- 1
|
||||
|
||||
static time_t severe_lag=0;
|
||||
++instruction_interval_counter;
|
||||
|
||||
if ((cycle%cycles_interval) == 0)
|
||||
unsigned int opcycles = cpu65__opcycles[cpu65_debug.opcode] + cpu65_debug.opcycles;
|
||||
if (!opcycles)
|
||||
{
|
||||
opcycles = 2; // assume 2 cycles for UNK opcodes
|
||||
}
|
||||
cycle_count += opcycles;
|
||||
|
||||
// wake render thread as we go to sleep
|
||||
pthread_mutex_lock(&mutex);
|
||||
pthread_cond_signal(&cond);
|
||||
pthread_mutex_unlock(&mutex);
|
||||
int8_t c = instruction_interval_counter%spin_adjust_interval ? spin_adjust_count : 0;
|
||||
_spin_loop(opcycles * (spinloop_count + c) );
|
||||
drift_interval_counter += c*cycle_nanoseconds;
|
||||
|
||||
if (drift_interval_counter < calibrate_interval)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// calibrate emulator clock to real clock ...
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &tj);
|
||||
deltat = timespec_diff(ti, tj);
|
||||
ti=tj;
|
||||
if (deltat.tv_sec != 0)
|
||||
|
||||
// NOTE: these calculations could overflow if emulator speed is severely dampened back...
|
||||
unsigned long real_counter = NANOSECONDS * deltat.tv_sec;
|
||||
real_counter += deltat.tv_nsec;
|
||||
long diff_nsecs = real_counter - drift_interval_counter; // whole +/- nsec diff
|
||||
|
||||
float nsecs_per_oneloop = cycle_nanoseconds/(float)spinloop_count;
|
||||
unsigned int instruction_interval_nsecs = instruction_interval_counter * nsecs_per_oneloop;
|
||||
|
||||
// reset
|
||||
drift_interval_counter=0;
|
||||
instruction_interval_counter=0;
|
||||
|
||||
// calculate spin adjustment
|
||||
if (diff_nsecs == 0)
|
||||
{
|
||||
// severely lagging, don't bother sleeping ...
|
||||
if (severe_lag < time(NULL))
|
||||
{
|
||||
severe_lag = time(NULL)+2;
|
||||
fprintf(stderr, "Severe lag detected...\n");
|
||||
// nothing to do
|
||||
}
|
||||
else if (abs(diff_nsecs) > instruction_interval_nsecs)
|
||||
{
|
||||
// spin for additional +/- X each instruction
|
||||
spinloop_count += diff_nsecs / instruction_interval_nsecs;
|
||||
spin_adjust_interval=INT_MAX;
|
||||
}
|
||||
else
|
||||
{
|
||||
deltat.tv_nsec = processing_interval - deltat.tv_nsec + sleep_adjust_inc;
|
||||
nanosleep(&deltat, NULL); // NOTE: spec says will return right away if deltat.tv_nsec value < 0 ...
|
||||
ti.tv_nsec += deltat.tv_nsec;
|
||||
}
|
||||
|
||||
if ((cycle%cpu_target_hz) == 0)
|
||||
{
|
||||
clock_gettime(CLOCK_MONOTONIC, &tj);
|
||||
|
||||
deltat = timespec_diff(t0, tj);
|
||||
struct timespec t = (struct timespec) {.tv_sec=1, .tv_nsec=0 };
|
||||
|
||||
long adj = (deltat.tv_sec == 0)
|
||||
? timespec_nsecs(timespec_diff(deltat, t))
|
||||
: -1 * timespec_nsecs(timespec_diff(t, deltat));
|
||||
|
||||
sleep_adjust += adj;
|
||||
sleep_adjust_inc = sleep_adjust/sleep_hz;
|
||||
|
||||
t0=tj;
|
||||
ti=t0;
|
||||
}
|
||||
// sub adjustment : spin for additional +/- 1 every interval
|
||||
spin_adjust_count = diff_nsecs < 0 ? -1 : 1;
|
||||
spin_adjust_interval = instruction_interval_nsecs / abs(diff_nsecs);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,12 +13,11 @@
|
||||
#ifndef _TIMING_H_
|
||||
#define _TIMING_H_
|
||||
|
||||
#define APPLE2_HZ 2040000
|
||||
#define APPLE2_HZ 1020000
|
||||
#define NANOSECONDS 1000000000
|
||||
|
||||
void timing_set_cpu_target_hz(unsigned long hz);
|
||||
|
||||
void timing_set_sleep_hz(unsigned int hz);
|
||||
// 0 = run as fast as possible, 1 = approximate apple, X = 1/X rate
|
||||
void timing_set_cpu_scale(unsigned int scale);
|
||||
|
||||
void timing_initialize();
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user