WIP: changes to cpu throttling/timing

WARN: this is still a WIP and untested.  Instead of nanosleep() the cpu thread
now uses an adaptive spinloop.  This better emulates the actual apple //e speed
in preparation to do near-realtime audio.  The drawback is that the cpu thread
is pegged at 100% usage.
This commit is contained in:
Aaron Culliney 2013-07-21 17:20:03 -07:00
parent d599a2174a
commit 96d083a2c3
5 changed files with 154 additions and 85 deletions

View File

@ -32,8 +32,9 @@
#define DebugCurrEA SN(cpu65_debug) #define DebugCurrEA SN(cpu65_debug)
#define DebugCurrByte SN(cpu65_debug)+2 #define DebugCurrByte SN(cpu65_debug)+2
#define DebugCurrOp SN(cpu65_debug)+3 #define DebugCurrRW SN(cpu65_debug)+3
#define XCyclesCount SN(cpu65_debug)+4 #define DebugCycleCount SN(cpu65_debug)+4
#define DebugCurrOpcode SN(cpu65_debug)+5
/* ------------------------------------------------------------------------- /* -------------------------------------------------------------------------
CPU (6502) Helper Routines CPU (6502) Helper Routines
@ -57,7 +58,7 @@
(,EffectiveAddr_E,8); \ (,EffectiveAddr_E,8); \
#define GetFromEA_B \ #define GetFromEA_B \
orb $1, DebugCurrOp; \ orb $1, DebugCurrRW; \
call *SN(cpu65_vmem) \ call *SN(cpu65_vmem) \
(,EffectiveAddr_E,8); (,EffectiveAddr_E,8);
@ -71,7 +72,7 @@
(,EffectiveAddr_E,8); (,EffectiveAddr_E,8);
#define PutToEA_B \ #define PutToEA_B \
orb $2, DebugCurrOp; \ orb $2, DebugCurrRW; \
orb %al, DebugCurrByte; \ orb %al, DebugCurrByte; \
call *SN(cpu65_vmem)+4 \ call *SN(cpu65_vmem)+4 \
(,EffectiveAddr_E,8); (,EffectiveAddr_E,8);
@ -90,10 +91,6 @@
call *SN(cpu65_vmem) \ call *SN(cpu65_vmem) \
(,EffectiveAddr_E,8); \ (,EffectiveAddr_E,8); \
// reset operation code before each instruction
#define ZeroOp movb $0, DebugCurrOp; \
movb $0, XCyclesCount;
// NOTE: the orb functions as a move, but we want to // NOTE: the orb functions as a move, but we want to
// set the flags and we know %ah is zero // set the flags and we know %ah is zero
#define Continue \ #define Continue \
@ -139,12 +136,12 @@
9: 9:
#define BranchXCycles \ #define BranchXCycles \
incb XCyclesCount; /* +1 cycle branch taken */ \ incb DebugCycleCount; /* +1 branch taken */ \
pushl %ebx; \ pushl %ebx; \
movw PC_Reg, %bx; \ movw PC_Reg, %bx; \
addb %al, %bl; \ addb %al, %bl; \
jnc 9f; \ jnc 9f; \
incb XCyclesCount; /* +1 cycle branch across pg boundary */ \ incb DebugCycleCount; /* +1 branch across pg boundary */ \
9: addw %ax, PC_Reg; \ 9: addw %ax, PC_Reg; \
popl %ebx; popl %ebx;
@ -234,7 +231,7 @@
addb X_Reg, %al; \ addb X_Reg, %al; \
jnc 9f; \ jnc 9f; \
adcb $0, %ah; \ adcb $0, %ah; \
incb XCyclesCount; /* +1 cycle on page boundary */ \ incb DebugCycleCount; /* +1 cycle on page boundary */ \
9: movl %eax, EffectiveAddr_E; 9: movl %eax, EffectiveAddr_E;
#define GetAbs_Y \ #define GetAbs_Y \
@ -242,7 +239,7 @@
addb Y_Reg, %al; \ addb Y_Reg, %al; \
jnc 9f; \ jnc 9f; \
adcb $0, %ah; \ adcb $0, %ah; \
incb XCyclesCount; /* +1 cycle on page boundary */ \ incb DebugCycleCount; /* +1 cycle on page boundary */ \
9: movl %eax, EffectiveAddr_E; 9: movl %eax, EffectiveAddr_E;
/* Absolute Indirect Addressing - The second and third bytes of the /* Absolute Indirect Addressing - The second and third bytes of the
@ -309,7 +306,7 @@
addb Y_Reg, %al; \ addb Y_Reg, %al; \
jnc 9f; \ jnc 9f; \
adcb $0, %ah; \ adcb $0, %ah; \
incb XCyclesCount; /* +1 cycle on page boundary */ \ incb DebugCycleCount; /* +1 cycle on page boundary */ \
9: movl %eax, EffectiveAddr_E; 9: movl %eax, EffectiveAddr_E;
#define DoADC_b GetFromEA_B \ #define DoADC_b GetFromEA_B \
@ -464,7 +461,7 @@
// Decimal mode // Decimal mode
op_ADC_dec: op_ADC_dec:
incb XCyclesCount // +1 cycle incb DebugCycleCount // +1 cycle
DoADC_d DoADC_d
Continue Continue
@ -1590,7 +1587,7 @@ op_RTS:
---------------------------------- */ ---------------------------------- */
op_SBC_dec: op_SBC_dec:
incb XCyclesCount // +1 cycle incb DebugCycleCount // +1 cycle
DoSBC_d DoSBC_d
Continue Continue
@ -1727,7 +1724,7 @@ op_STA_imm:
op_STA_zpage: op_STA_zpage:
GetZPage GetZPage
DoSTA DoSTA
incb XCyclesCount // +1 cycle on write incb DebugCycleCount // +1 cycle on write
Continue Continue
op_STA_zpage_x: op_STA_zpage_x:
@ -1747,13 +1744,13 @@ op_STA_abs:
op_STA_abs_x: op_STA_abs_x:
GetAbs_X GetAbs_X
DoSTA DoSTA
incb XCyclesCount // +1 cycle on write incb DebugCycleCount // +1 cycle on write
Continue Continue
op_STA_abs_y: op_STA_abs_y:
GetAbs_Y GetAbs_Y
DoSTA DoSTA
incb XCyclesCount // +1 cycle on write incb DebugCycleCount // +1 cycle on write
Continue Continue
op_STA_ind_x: op_STA_ind_x:
@ -1764,7 +1761,7 @@ op_STA_ind_x:
op_STA_ind_y: op_STA_ind_y:
GetIndZPage_Y GetIndZPage_Y
DoSTA DoSTA
incb XCyclesCount // +1 cycle on write incb DebugCycleCount // +1 cycle on write
Continue Continue
// 65c02 : 0x92 // 65c02 : 0x92
@ -1817,7 +1814,7 @@ op_RMB7_65c02:
op_STX_zpage: op_STX_zpage:
GetZPage GetZPage
DoSTX DoSTX
incb XCyclesCount // +1 cycle on write incb DebugCycleCount // +1 cycle on write
Continue Continue
// HACK : is this used? need to study coverage ... // HACK : is this used? need to study coverage ...
@ -1838,7 +1835,7 @@ op_STX_abs:
op_STY_zpage: op_STY_zpage:
GetZPage GetZPage
DoSTY DoSTY
incb XCyclesCount // +1 cycle on write incb DebugCycleCount // +1 cycle on write
Continue Continue
op_STY_zpage_x: op_STY_zpage_x:
@ -1860,7 +1857,7 @@ op_STY_abs:
op_STZ_zpage: op_STZ_zpage:
GetZPage GetZPage
DoSTZ DoSTZ
incb XCyclesCount // +1 cycle on write incb DebugCycleCount // +1 cycle on write
Continue Continue
// 65c02 : 0x74 // 65c02 : 0x74
@ -1878,7 +1875,7 @@ op_STZ_abs:
op_STZ_abs_x: op_STZ_abs_x:
GetAbs_X GetAbs_X
DoSTZ DoSTZ
incb XCyclesCount // +1 cycle on write incb DebugCycleCount // +1 cycle on write
Continue Continue
/* ---------------------------------- /* ----------------------------------
@ -2664,8 +2661,10 @@ continue: SaveState
call SN(timing_throttle) call SN(timing_throttle)
ReplaceState ReplaceState
xorb %ah, %ah xorb %ah, %ah
ZeroOp movb $0, DebugCurrRW
movb $0, DebugCycleCount
GetFromPC_B GetFromPC_B
movb $al, DebugCurrOpcode
jmp *cpu65__opcodes(,%eax,4) jmp *cpu65__opcodes(,%eax,4)
/* Exception handler */ /* Exception handler */

View File

@ -43,8 +43,9 @@ struct cpu65_extra
{ {
uint16_t ea; /* Last effective address */ uint16_t ea; /* Last effective address */
uint8_t d; /* Last data byte written */ uint8_t d; /* Last data byte written */
uint8_t op; /* 1 = read occured, 2 = write, 3 = both */ uint8_t rw; /* 1 = read occured, 2 = write, 3 = both */
uint8_t xcycles; /* Last opcode extra cycles */ uint8_t opcode; /* Last opcode */
uint8_t opcycles; /* Last opcode extra cycles */
}; };
/* 6502 CPU models */ /* 6502 CPU models */

View File

@ -732,17 +732,19 @@ void c_read_random() {
static void cpu_thread(void *dummyptr) { static void cpu_thread(void *dummyptr) {
do do
{ {
LOG("cpu_thread : entering cpu65_run()...");
cpu65_run(); cpu65_run();
reinitialize(); reinitialize();
} while (1); } while (1);
} }
static void main_thread(void *dummyptr) { static void main_thread(void *dummyptr) {
struct timespec abstime = { .tv_sec=0, .tv_nsec=8333333 }; // 120Hz
do do
{ {
// sleep waiting for the cpu thread to ping us that it's sleeping... // sleep waiting for the cpu thread to ping us to render
pthread_mutex_lock(&mutex); pthread_mutex_lock(&mutex);
pthread_cond_wait(&cond, &mutex); pthread_cond_timedwait(&cond, &mutex, &abstime);
pthread_mutex_unlock(&mutex); pthread_mutex_unlock(&mutex);
c_periodic_update(0); c_periodic_update(0);

View File

@ -17,16 +17,20 @@
#include <stdio.h> #include <stdio.h>
#include <time.h> #include <time.h>
#include <pthread.h> #include <pthread.h>
#include <limits.h>
#define DEFAULT_SLEEP 120 #define CALIBRATE_HZ 120
static unsigned int sleep_hz = DEFAULT_SLEEP; // sleep intervals per sec
static unsigned long cpu_target_hz = APPLE2_HZ; // target clock speed static unsigned long cpu_target_hz = APPLE2_HZ; // target clock speed
static unsigned long cycles_interval = APPLE2_HZ / DEFAULT_SLEEP; // Number of 65c02 instructions to be executed at sleep_hz static unsigned long calibrate_interval = NANOSECONDS / CALIBRATE_HZ; // calibration interval for drifting
static unsigned long processing_interval = NANOSECONDS / DEFAULT_SLEEP; // Number of nanoseconds in sleep_hz intervals static unsigned long cycle_nanoseconds = NANOSECONDS / APPLE2_HZ; // nanosecs per cycle
static unsigned int cycle_nanoseconds_count;
static struct timespec deltat, t0, ti, tj; static struct timespec deltat, t0, ti, tj;
static unsigned long cycle=0;
static unsigned long cycle_count=0; // CPU cycle counter
static int spinloop_count=0; // spin loop counter
static long sleep_adjust=0; static long sleep_adjust=0;
static long sleep_adjust_inc=0; static long sleep_adjust_inc=0;
@ -58,73 +62,137 @@ static inline long timespec_nsecs(struct timespec t) {
return t.tv_sec*NANOSECONDS + t.tv_nsec; return t.tv_sec*NANOSECONDS + t.tv_nsec;
} }
// spin loop to throttle to target CPU Hz
static inline void _spin_loop(unsigned long c)
{
static volatile unsigned int spinney=0; // volatile to prevent being optimized away
for (unsigned long i=0; i<c; i++)
{
++spinney;
}
}
static void _determine_initial_spinloop_counter()
{
struct timespec s0, s1;
// time the spinloop to determine a good starting value for the spin counter
unsigned long avg_spin_nsecs = 0;
unsigned int const samples = 5;
unsigned int i=0;
spinloop_count = 500000000;
do
{
clock_gettime(CLOCK_MONOTONIC, &s0);
_spin_loop(spinloop_count);
clock_gettime(CLOCK_MONOTONIC, &s1);
deltat = timespec_diff(s0, s1);
if (deltat.tv_sec > 0)
{
printf("oops long wait (>= %lu sec) adjusting loop count (%d -> %d)\n", deltat.tv_sec, spinloop_count, spinloop_count>>1);
spinloop_count >>= 1;
i = 0;
avg_spin_nsecs = 0;
continue;
}
printf("spinloop = %lu nsec\n", deltat.tv_nsec);
avg_spin_nsecs += deltat.tv_nsec;
++i;
} while (i<samples);
avg_spin_nsecs = (avg_spin_nsecs / samples);
printf("average = %lu nsec\n", avg_spin_nsecs);
spinloop_count = cycle_nanoseconds * spinloop_count / avg_spin_nsecs;
cycle_nanoseconds_count = cycle_nanoseconds / spinloop_count;
printf("counter for a single cycle = %d\n", spinloop_count);
}
void timing_initialize() { void timing_initialize() {
// should do this only on startup
_determine_initial_spinloop_counter();
clock_gettime(CLOCK_MONOTONIC, &t0); clock_gettime(CLOCK_MONOTONIC, &t0);
ti=t0; ti=t0;
} }
void timing_set_cpu_target_hz(unsigned long hz) { void timing_set_cpu_scale(unsigned int scale)
cpu_target_hz = hz; {
} // ...
void timing_set_sleep_hz(unsigned int hz) {
sleep_hz = hz;
} }
/* /*
* Throttles the 65c02 CPU down to a target frequency of X. * Throttles 6502 CPU down to the target CPU frequency (default is speed of original Apple //e).
* Currently set to target the Apple //e @ 1.02MHz *
* This uses an adaptive spin loop to stay closer to the target CPU frequency.
* *
* This is called from cpu65_run() on the cpu-thread
*/ */
void timing_throttle() { void timing_throttle()
++cycle; {
static unsigned int drift_interval_counter=0; // in nsecs since last
static unsigned int instruction_interval_counter=0; // instruction count since last
static unsigned int spin_adjust_interval=INT_MAX;
static int8_t spin_adjust_count=0; // +/- 1
static time_t severe_lag=0; ++instruction_interval_counter;
if ((cycle%cycles_interval) == 0) unsigned int opcycles = cpu65__opcycles[cpu65_debug.opcode] + cpu65_debug.opcycles;
if (!opcycles)
{ {
opcycles = 2; // assume 2 cycles for UNK opcodes
}
cycle_count += opcycles;
// wake render thread as we go to sleep int8_t c = instruction_interval_counter%spin_adjust_interval ? spin_adjust_count : 0;
pthread_mutex_lock(&mutex); _spin_loop(opcycles * (spinloop_count + c) );
pthread_cond_signal(&cond); drift_interval_counter += c*cycle_nanoseconds;
pthread_mutex_unlock(&mutex);
clock_gettime(CLOCK_MONOTONIC, &tj); if (drift_interval_counter < calibrate_interval)
deltat = timespec_diff(ti, tj); {
ti=tj; return;
if (deltat.tv_sec != 0) }
{
// severely lagging, don't bother sleeping ...
if (severe_lag < time(NULL))
{
severe_lag = time(NULL)+2;
fprintf(stderr, "Severe lag detected...\n");
}
}
else
{
deltat.tv_nsec = processing_interval - deltat.tv_nsec + sleep_adjust_inc;
nanosleep(&deltat, NULL); // NOTE: spec says will return right away if deltat.tv_nsec value < 0 ...
ti.tv_nsec += deltat.tv_nsec;
}
if ((cycle%cpu_target_hz) == 0) // -------------------------------------------------------------------------
{ // calibrate emulator clock to real clock ...
clock_gettime(CLOCK_MONOTONIC, &tj);
deltat = timespec_diff(t0, tj); clock_gettime(CLOCK_MONOTONIC, &tj);
struct timespec t = (struct timespec) {.tv_sec=1, .tv_nsec=0 }; deltat = timespec_diff(ti, tj);
ti=tj;
long adj = (deltat.tv_sec == 0) // NOTE: these calculations could overflow if emulator speed is severely dampened back...
? timespec_nsecs(timespec_diff(deltat, t)) unsigned long real_counter = NANOSECONDS * deltat.tv_sec;
: -1 * timespec_nsecs(timespec_diff(t, deltat)); real_counter += deltat.tv_nsec;
long diff_nsecs = real_counter - drift_interval_counter; // whole +/- nsec diff
sleep_adjust += adj; float nsecs_per_oneloop = cycle_nanoseconds/(float)spinloop_count;
sleep_adjust_inc = sleep_adjust/sleep_hz; unsigned int instruction_interval_nsecs = instruction_interval_counter * nsecs_per_oneloop;
t0=tj; // reset
ti=t0; drift_interval_counter=0;
} instruction_interval_counter=0;
// calculate spin adjustment
if (diff_nsecs == 0)
{
// nothing to do
}
else if (abs(diff_nsecs) > instruction_interval_nsecs)
{
// spin for additional +/- X each instruction
spinloop_count += diff_nsecs / instruction_interval_nsecs;
spin_adjust_interval=INT_MAX;
}
else
{
// sub adjustment : spin for additional +/- 1 every interval
spin_adjust_count = diff_nsecs < 0 ? -1 : 1;
spin_adjust_interval = instruction_interval_nsecs / abs(diff_nsecs);
} }
} }

View File

@ -13,12 +13,11 @@
#ifndef _TIMING_H_ #ifndef _TIMING_H_
#define _TIMING_H_ #define _TIMING_H_
#define APPLE2_HZ 2040000 #define APPLE2_HZ 1020000
#define NANOSECONDS 1000000000 #define NANOSECONDS 1000000000
void timing_set_cpu_target_hz(unsigned long hz); // 0 = run as fast as possible, 1 = approximate apple, X = 1/X rate
void timing_set_cpu_scale(unsigned int scale);
void timing_set_sleep_hz(unsigned int hz);
void timing_initialize(); void timing_initialize();