1
0
mirror of https://github.com/TomHarte/CLK.git synced 2025-01-13 07:30:21 +00:00

Merge pull request #748 from TomHarte/SDLLatency

Introduces sync matching to the SDL version.
This commit is contained in:
Thomas Harte 2020-02-10 23:38:24 -05:00 committed by GitHub
commit 654f5b0478
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 241 additions and 68 deletions

View File

@ -0,0 +1,88 @@
//
// ScanSynchroniser.hpp
// Clock Signal
//
// Created by Thomas Harte on 09/02/2020.
// Copyright © 2020 Thomas Harte. All rights reserved.
//
#ifndef ScanSynchroniser_h
#define ScanSynchroniser_h
#include "../Outputs/ScanTarget.hpp"
#include <cmath>
namespace Time {
/*!
Where an emulated machine is sufficiently close to a host machine's frame rate that a small nudge in
its speed multiplier will bring it into frame synchronisation, the ScanSynchroniser provides a sequence of
speed multipliers designed both to adjust the machine to the proper speed and, in a reasonable amount
of time, to bring it into phase.
*/
class ScanSynchroniser {
public:
/*!
@returns @c true if the emulated machine can be synchronised with the host frame output based on its
current @c [scan]status and the host machine's @c frame_duration; @c false otherwise.
*/
bool can_synchronise(const Outputs::Display::ScanStatus &scan_status, double frame_duration) {
ratio_ = 1.0;
if(scan_status.field_duration_gradient < 0.00001) {
// Check out the machine's current frame time.
// If it's within 3% of a non-zero integer multiple of the
// display rate, mark this time window to be split over the sync.
ratio_ = (frame_duration * base_multiplier_) / scan_status.field_duration;
const double integer_ratio = round(ratio_);
if(integer_ratio > 0.0) {
ratio_ /= integer_ratio;
return ratio_ <= maximum_rate_adjustment && ratio_ >= 1.0 / maximum_rate_adjustment;
}
}
return false;
}
/*!
@returns The appropriate speed multiplier for the next frame based on the inputs previously supplied to @c can_synchronise.
Results are undefined if @c can_synchroise returned @c false.
*/
double next_speed_multiplier(const Outputs::Display::ScanStatus &scan_status) {
// The host versus emulated ratio is calculated based on the current perceived frame duration of the machine.
// Either that number is exactly correct or it's already the result of some sort of low-pass filter. So there's
// no benefit to second guessing it here — just take it to be correct.
//
// ... with one slight caveat, which is that it is desireable to adjust phase here, to align vertical sync points.
// So the set speed multiplier may be adjusted slightly to aim for that.
double speed_multiplier = 1.0 / (ratio_ / base_multiplier_);
if(scan_status.current_position > 0.0) {
if(scan_status.current_position < 0.5) speed_multiplier /= phase_adjustment_ratio;
else speed_multiplier *= phase_adjustment_ratio;
}
speed_multiplier_ = (speed_multiplier_ * 0.95) + (speed_multiplier * 0.05);
return speed_multiplier_ * base_multiplier_;
}
void set_base_speed_multiplier(double multiplier) {
base_multiplier_ = multiplier;
}
double get_base_speed_multiplier() {
return base_multiplier_;
}
private:
static constexpr double maximum_rate_adjustment = 1.03;
static constexpr double phase_adjustment_ratio = 1.005;
// Managed local state.
double speed_multiplier_ = 1.0;
double base_multiplier_ = 1.0;
// Temporary storage to bridge the can_synchronise -> next_speed_multiplier gap.
double ratio_ = 1.0;
};
}
#endif /* ScanSynchroniser_h */

View File

@ -9,9 +9,16 @@
#ifndef TimeTypes_h
#define TimeTypes_h
#include <chrono>
namespace Time {
typedef double Seconds;
typedef int64_t Nanos;
inline Nanos nanos_now() {
return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
}
}

View File

@ -397,9 +397,10 @@ class ConcreteMachine:
memset(processor_write_memory_map_, 0, sizeof(processor_write_memory_map_));
memset(mos6560_bus_handler_.video_memory_map, 0, sizeof(mos6560_bus_handler_.video_memory_map));
#define set_ram(baseaddr, length) \
write_to_map(processor_read_memory_map_, &ram_[baseaddr], baseaddr, length); \
write_to_map(processor_write_memory_map_, &ram_[baseaddr], baseaddr, length);
#define set_ram(baseaddr, length) { \
write_to_map(processor_read_memory_map_, &ram_[baseaddr], baseaddr, length); \
write_to_map(processor_write_memory_map_, &ram_[baseaddr], baseaddr, length); \
}
// Add 6502-visible RAM as requested.
set_ram(0x0000, 0x0400);

View File

@ -1129,6 +1129,7 @@
4B643F391D77AD1900D431D6 /* CSStaticAnalyser.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; name = CSStaticAnalyser.mm; path = StaticAnalyser/CSStaticAnalyser.mm; sourceTree = "<group>"; };
4B643F3C1D77AE5C00D431D6 /* CSMachine+Target.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "CSMachine+Target.h"; sourceTree = "<group>"; };
4B643F3E1D77B88000D431D6 /* DocumentController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DocumentController.swift; sourceTree = "<group>"; };
4B644ED023F0FB55006C0CC5 /* ScanSynchroniser.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = ScanSynchroniser.hpp; sourceTree = "<group>"; };
4B65085F22F4CF8D009C1100 /* Keyboard.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Keyboard.cpp; sourceTree = "<group>"; };
4B680CE123A5553100451D43 /* 68000ComparativeTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = 68000ComparativeTests.mm; sourceTree = "<group>"; };
4B680CE323A555CA00451D43 /* 68000 Comparative Tests */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "68000 Comparative Tests"; sourceTree = "<group>"; };
@ -3726,6 +3727,7 @@
4BB06B211F316A3F00600C7A /* ForceInline.hpp */,
4B80214322EE7C3E00068002 /* JustInTime.hpp */,
4B449C942063389900A095C8 /* TimeTypes.hpp */,
4B644ED023F0FB55006C0CC5 /* ScanSynchroniser.hpp */,
);
name = ClockReceiver;
path = ../../ClockReceiver;

View File

@ -54,11 +54,15 @@
<CommandLineArguments>
<CommandLineArgument
argument = "/Users/thomasharte/Downloads/test-dsk-for-rw-and-50-60-hz/TEST-RW-60Hz.DSK"
isEnabled = "NO">
</CommandLineArgument>
<CommandLineArgument
argument = "&quot;/Users/thomasharte/Library/Mobile Documents/com~apple~CloudDocs/Desktop/Soft/Master System/R-Type (NTSC).sms&quot;"
isEnabled = "YES">
</CommandLineArgument>
<CommandLineArgument
argument = "--speed=5"
isEnabled = "YES">
isEnabled = "NO">
</CommandLineArgument>
<CommandLineArgument
argument = "--rompath=/Users/thomasharte/Projects/CLK/ROMImages"

View File

@ -21,6 +21,9 @@
#include "Typer.hpp"
#include "../../../../Activity/Observer.hpp"
#include "../../../../ClockReceiver/TimeTypes.hpp"
#include "../../../../ClockReceiver/ScanSynchroniser.hpp"
#import "CSStaticAnalyser+TargetVector.h"
#import "NSBundle+DataResource.h"
#import "NSData+StdVector.h"
@ -154,11 +157,12 @@ struct ActivityObserver: public Activity::Observer {
CSHighPrecisionTimer *_timer;
CGSize _pixelSize;
std::atomic_flag _isUpdating;
int64_t _syncTime;
int64_t _timeDiff;
Time::Nanos _syncTime;
Time::Nanos _timeDiff;
double _refreshPeriod;
BOOL _isSyncLocking;
double _speedMultiplier;
Time::ScanSynchroniser _scanSynchroniser;
NSTimer *_joystickTimer;
@ -169,7 +173,6 @@ struct ActivityObserver: public Activity::Observer {
self = [super init];
if(self) {
_analyser = result;
_speedMultiplier = 1.0;
Machine::Error error;
std::vector<ROMMachine::ROM> missing_roms;
@ -726,13 +729,13 @@ struct ActivityObserver: public Activity::Observer {
- (void)openGLViewDisplayLinkDidFire:(CSOpenGLView *)view now:(const CVTimeStamp *)now outputTime:(const CVTimeStamp *)outputTime {
// First order of business: grab a timestamp.
const auto timeNow = std::chrono::high_resolution_clock::now().time_since_epoch().count();
const auto timeNow = Time::nanos_now();
CGSize pixelSize = view.backingSize;
BOOL isSyncLocking;
@synchronized(self) {
// Store a means to map from CVTimeStamp.hostTime to std::chrono::high_resolution_clock;
// there is an extremely dodgy assumption here that both are in the same units (and, below, that both as in ns).
// Store a means to map from CVTimeStamp.hostTime to Time::Nanos;
// there is an extremely dodgy assumption here that the former is in ns.
if(!_timeDiff) {
_timeDiff = int64_t(now->hostTime) - int64_t(timeNow);
}
@ -761,11 +764,11 @@ struct ActivityObserver: public Activity::Observer {
#define TICKS 600
- (void)start {
__block auto lastTime = std::chrono::high_resolution_clock::now().time_since_epoch().count();
__block auto lastTime = Time::nanos_now();
_timer = [[CSHighPrecisionTimer alloc] initWithTask:^{
// Grab the time now and, therefore, the amount of time since the timer last fired.
const auto timeNow = std::chrono::high_resolution_clock::now().time_since_epoch().count();
const auto timeNow = Time::nanos_now();
const auto duration = timeNow - lastTime;
@ -774,42 +777,15 @@ struct ActivityObserver: public Activity::Observer {
@synchronized(self) {
// If this tick includes vsync then inspect the machine.
if(timeNow >= self->_syncTime && lastTime < self->_syncTime) {
// Grab the scan status and check out the machine's current frame time.
// If it's stable and within 3% of a non-zero integer multiple of the
// display rate, mark this time window to be split over the sync.
const auto scan_status = self->_machine->crt_machine()->get_scan_status();
double ratio = 1.0;
if(scan_status.field_duration_gradient < 0.00001) {
ratio = self->_refreshPeriod / scan_status.field_duration;
const double integerRatio = round(ratio);
if(integerRatio > 0.0) {
ratio /= integerRatio;
constexpr double maximumAdjustment = 1.03;
splitAndSync = ratio <= maximumAdjustment && ratio >= 1 / maximumAdjustment;
}
}
self->_isSyncLocking = splitAndSync;
splitAndSync = self->_isSyncLocking = self->_scanSynchroniser.can_synchronise(self->_machine->crt_machine()->get_scan_status(), self->_refreshPeriod);
// If the time window is being split, run up to the split, then check out machine speed, possibly
// adjusting multiplier, then run after the split.
if(splitAndSync) {
self->_machine->crt_machine()->run_for((double)(self->_syncTime - lastTime) / 1e9);
// The host versus emulated ratio is calculated based on the current perceived frame duration of the machine.
// Either that number is exactly correct or it's already the result of some sort of low-pass filter. So there's
// no benefit to second guessing it here — just take it to be correct.
//
// ... with one slight caveat, which is that it is desireable to adjust phase here, to align vertical sync points.
// So the set speed multiplier may be adjusted slightly to aim for that.
double speed_multiplier = 1.0 / ratio;
if(scan_status.current_position > 0.0) {
constexpr double adjustmentRatio = 1.005;
if(scan_status.current_position < 0.5) speed_multiplier /= adjustmentRatio;
else speed_multiplier *= adjustmentRatio;
}
self->_speedMultiplier = (self->_speedMultiplier * 0.95) + (speed_multiplier * 0.05);
self->_machine->crt_machine()->set_speed_multiplier(self->_speedMultiplier);
self->_machine->crt_machine()->set_speed_multiplier(
self->_scanSynchroniser.next_speed_multiplier(self->_machine->crt_machine()->get_scan_status())
);
self->_machine->crt_machine()->run_for((double)(timeNow - self->_syncTime) / 1e9);
}
}

View File

@ -8,6 +8,7 @@
#include <algorithm>
#include <array>
#include <atomic>
#include <cstdio>
#include <cstdlib>
#include <cstring>
@ -21,11 +22,12 @@
#include "../../Analyser/Static/StaticAnalyser.hpp"
#include "../../Machines/Utility/MachineForTarget.hpp"
#include "../../ClockReceiver/TimeTypes.hpp"
#include "../../ClockReceiver/ScanSynchroniser.hpp"
#include "../../Machines/MediaTarget.hpp"
#include "../../Machines/CRTMachine.hpp"
#include "../../Concurrency/BestEffortUpdater.hpp"
#include "../../Activity/Observer.hpp"
#include "../../Outputs/OpenGL/Primitives/Rectangle.hpp"
#include "../../Outputs/OpenGL/ScanTarget.hpp"
@ -33,18 +35,111 @@
namespace {
struct BestEffortUpdaterDelegate: public Concurrency::BestEffortUpdater::Delegate {
Time::Seconds update(Concurrency::BestEffortUpdater *updater, Time::Seconds duration, bool did_skip_previous_update, int flags) override {
std::lock_guard<std::mutex> lock_guard(*machine_mutex);
return machine->crt_machine()->run_until(duration, flags);
struct MachineRunner {
MachineRunner() {
frame_lock_.clear();
}
~MachineRunner() {
stop();
}
void start() {
last_time_ = Time::nanos_now();
timer_ = SDL_AddTimer(timer_period, &sdl_callback, reinterpret_cast<void *>(this));
}
void stop() {
if(timer_) {
SDL_RemoveTimer(timer_);
timer_ = 0;
}
}
void signal_vsync() {
const auto now = Time::nanos_now();
const auto previous_vsync_time = vsync_time_.load();
vsync_time_.store(now);
// Update estimate of current frame time.
frame_time_average_ -= frame_times_[frame_time_pointer_];
frame_times_[frame_time_pointer_] = now - previous_vsync_time;
frame_time_average_ += frame_times_[frame_time_pointer_];
frame_time_pointer_ = (frame_time_pointer_ + 1) & (frame_times_.size() - 1);
_frame_period.store((1e9 * 32.0) / double(frame_time_average_));
}
void signal_did_draw() {
frame_lock_.clear();
}
void set_speed_multiplier(double multiplier) {
scan_synchroniser_.set_base_speed_multiplier(multiplier);
}
std::mutex *machine_mutex;
Machine::DynamicMachine *machine;
private:
SDL_TimerID timer_ = 0;
Time::Nanos last_time_ = 0;
std::atomic<Time::Nanos> vsync_time_;
std::atomic_flag frame_lock_;
Time::ScanSynchroniser scan_synchroniser_;
// A slightly clumsy means of trying to derive frame rate from calls to
// signal_vsync(); SDL_DisplayMode provides only an integral quantity
// whereas, empirically, it's fairly common for monitors to run at the
// NTSC-esque frame rates of 59.94Hz.
std::array<Time::Nanos, 32> frame_times_;
Time::Nanos frame_time_average_ = 0;
size_t frame_time_pointer_ = 0;
std::atomic<double> _frame_period;
static constexpr Uint32 timer_period = 4;
static Uint32 sdl_callback(Uint32 interval, void *param) {
reinterpret_cast<MachineRunner *>(param)->update();
return timer_period;
}
void update() {
const auto time_now = Time::nanos_now();
const auto vsync_time = vsync_time_.load();
std::unique_lock<std::mutex> lock_guard(*machine_mutex);
const auto crt_machine = machine->crt_machine();
bool split_and_sync = false;
if(last_time_ < vsync_time && time_now >= vsync_time) {
split_and_sync = scan_synchroniser_.can_synchronise(crt_machine->get_scan_status(), _frame_period);
}
if(split_and_sync) {
crt_machine->run_for(double(vsync_time - last_time_) / 1e9);
crt_machine->set_speed_multiplier(
scan_synchroniser_.next_speed_multiplier(crt_machine->get_scan_status())
);
// This is a bit of an SDL ugliness; wait here until the next frame is drawn.
// That is, unless and until I can think of a good way of running background
// updates via a share group — possibly an extra intermediate buffer is needed?
lock_guard.unlock();
while(frame_lock_.test_and_set());
lock_guard.lock();
crt_machine->run_for(double(time_now - vsync_time) / 1e9);
} else {
crt_machine->set_speed_multiplier(scan_synchroniser_.get_base_speed_multiplier());
crt_machine->run_for(double(time_now - last_time_) / 1e9);
}
last_time_ = time_now;
}
};
struct SpeakerDelegate: public Outputs::Speaker::Speaker::Delegate {
// This is set to a relatively large number for now.
// This is empirically the best that I can seem to do with SDL's timer precision.
static constexpr int buffer_size = 1024;
void speaker_did_complete_samples(Outputs::Speaker::Speaker *speaker, const std::vector<int16_t> &buffer) final {
@ -56,7 +151,6 @@ struct SpeakerDelegate: public Outputs::Speaker::Speaker::Delegate {
}
void audio_callback(Uint8 *stream, int len) {
updater->update();
std::lock_guard<std::mutex> lock_guard(audio_buffer_mutex_);
std::size_t sample_length = static_cast<std::size_t>(len) / sizeof(int16_t);
@ -75,7 +169,6 @@ struct SpeakerDelegate: public Outputs::Speaker::Speaker::Delegate {
}
SDL_AudioDeviceID audio_device;
Concurrency::BestEffortUpdater *updater;
std::mutex audio_buffer_mutex_;
std::vector<int16_t> audio_buffer_;
@ -391,8 +484,7 @@ int main(int argc, char *argv[]) {
return EXIT_FAILURE;
}
Concurrency::BestEffortUpdater updater;
BestEffortUpdaterDelegate best_effort_updater_delegate;
MachineRunner machine_runner;
SpeakerDelegate speaker_delegate;
// For vanilla SDL purposes, assume system ROMs can be found in one of:
@ -478,21 +570,18 @@ int main(int argc, char *argv[]) {
char *end;
double speed = strtod(speed_string, &end);
if(end-speed_string != strlen(speed_string)) {
if(size_t(end - speed_string) != strlen(speed_string)) {
std::cerr << "Unable to parse speed: " << speed_string << std::endl;
} else if(speed <= 0.0) {
std::cerr << "Cannot run at speed " << speed_string << "; speeds must be positive." << std::endl;
} else {
machine->crt_machine()->set_speed_multiplier(speed);
// TODO: what if not a 'CRT' machine? Likely rests on resolving this project's machine naming policy.
machine_runner.set_speed_multiplier(speed);
}
}
// Wire up the best-effort updater, its delegate, and the speaker delegate.
best_effort_updater_delegate.machine = machine.get();
best_effort_updater_delegate.machine_mutex = &machine_mutex;
speaker_delegate.updater = &updater;
updater.set_delegate(&best_effort_updater_delegate);
machine_runner.machine = machine.get();
machine_runner.machine_mutex = &machine_mutex;
// Attempt to set up video and audio.
if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO) < 0) {
@ -644,15 +733,21 @@ int main(int argc, char *argv[]) {
const bool uses_mouse = !!machine->mouse_machine();
bool should_quit = false;
Uint32 fullscreen_mode = 0;
machine_runner.start();
while(!should_quit) {
// Wait for vsync, draw a new frame and post a machine update.
// NB: machine_mutex is *not* currently locked, therefore it shouldn't
// be 'most' of the time.
SDL_GL_SwapWindow(window);
// Draw a new frame, indicating completion of the draw to the machine runner.
scan_target.update(int(window_width), int(window_height));
scan_target.draw(int(window_width), int(window_height));
if(activity_observer) activity_observer->draw();
updater.update();
machine_runner.signal_did_draw();
// Wait for presentation of that frame, posting a vsync.
SDL_GL_SwapWindow(window);
machine_runner.signal_vsync();
// NB: machine_mutex is *not* currently locked, therefore it shouldn't
// be 'most' of the time — assuming most of the time is spent waiting
// on vsync, anyway.
// Grab the machine lock and process all pending events.
std::lock_guard<std::mutex> lock_guard(machine_mutex);
@ -877,7 +972,7 @@ int main(int argc, char *argv[]) {
}
// Clean up.
updater.flush(); // Ensure no further updates will occur.
machine_runner.stop(); // Ensure no further updates will occur.
joysticks.clear();
SDL_DestroyWindow( window );
SDL_Quit();