Merge pull request #748 from TomHarte/SDLLatency

Introduces sync matching to the SDL version.
2025-08-10 11:25:23 +00:00 · 2020-02-10 23:38:24 -05:00
parent f1cd35fa16 886d923e30
commit 654f5b0478
7 changed files with 241 additions and 68 deletions
--- a/ClockReceiver/ScanSynchroniser.hpp
+++ b/ClockReceiver/ScanSynchroniser.hpp
@@ -0,0 +1,88 @@
+//
+//  ScanSynchroniser.hpp
+//  Clock Signal
+//
+//  Created by Thomas Harte on 09/02/2020.
+//  Copyright © 2020 Thomas Harte. All rights reserved.
+//
+
+#ifndef ScanSynchroniser_h
+#define ScanSynchroniser_h
+
+#include "../Outputs/ScanTarget.hpp"
+
+#include <cmath>
+
+namespace Time {
+
+/*!
+	Where an emulated machine is sufficiently close to a host machine's frame rate that a small nudge in
+	its speed multiplier will bring it into frame synchronisation, the ScanSynchroniser provides a sequence of
+	speed multipliers designed both to adjust the machine to the proper speed and, in a reasonable amount
+	of time, to bring it into phase.
+*/
+class ScanSynchroniser {
+	public:
+		/*!
+			@returns @c true if the emulated machine can be synchronised with the host frame output based on its
+				current @c [scan]status and the host machine's @c frame_duration; @c false otherwise.
+		*/
+		bool can_synchronise(const Outputs::Display::ScanStatus &scan_status, double frame_duration) {
+			ratio_ = 1.0;
+			if(scan_status.field_duration_gradient < 0.00001) {
+				// Check out the machine's current frame time.
+				// If it's within 3% of a non-zero integer multiple of the
+				// display rate, mark this time window to be split over the sync.
+				ratio_ = (frame_duration * base_multiplier_) / scan_status.field_duration;
+				const double integer_ratio = round(ratio_);
+				if(integer_ratio > 0.0) {
+					ratio_ /= integer_ratio;
+					return ratio_ <= maximum_rate_adjustment && ratio_ >= 1.0 / maximum_rate_adjustment;
+				}
+			}
+			return false;
+		}
+
+		/*!
+			@returns The appropriate speed multiplier for the next frame based on the inputs previously supplied to @c can_synchronise.
+				Results are undefined if @c can_synchroise returned @c false.
+		*/
+		double next_speed_multiplier(const Outputs::Display::ScanStatus &scan_status) {
+			// The host versus emulated ratio is calculated based on the current perceived frame duration of the machine.
+			// Either that number is exactly correct or it's already the result of some sort of low-pass filter. So there's
+			// no benefit to second guessing it here — just take it to be correct.
+			//
+			// ... with one slight caveat, which is that it is desireable to adjust phase here, to align vertical sync points.
+			// So the set speed multiplier may be adjusted slightly to aim for that.
+			double speed_multiplier = 1.0 / (ratio_ / base_multiplier_);
+			if(scan_status.current_position > 0.0) {
+				if(scan_status.current_position < 0.5) speed_multiplier /= phase_adjustment_ratio;
+				else speed_multiplier *= phase_adjustment_ratio;
+			}
+			speed_multiplier_ = (speed_multiplier_ * 0.95) + (speed_multiplier * 0.05);
+			return speed_multiplier_ * base_multiplier_;
+		}
+
+		void set_base_speed_multiplier(double multiplier) {
+			base_multiplier_ = multiplier;
+		}
+
+		double get_base_speed_multiplier() {
+			return base_multiplier_;
+		}
+
+	private:
+		static constexpr double maximum_rate_adjustment = 1.03;
+		static constexpr double phase_adjustment_ratio = 1.005;
+
+		// Managed local state.
+		double speed_multiplier_ = 1.0;
+		double base_multiplier_ = 1.0;
+
+		// Temporary storage to bridge the can_synchronise -> next_speed_multiplier gap.
+		double ratio_ = 1.0;
+};
+
+}
+
+#endif /* ScanSynchroniser_h */
--- a/ClockReceiver/TimeTypes.hpp
+++ b/ClockReceiver/TimeTypes.hpp
@@ -9,9 +9,16 @@
 #ifndef TimeTypes_h
 #define TimeTypes_h

+#include <chrono>
+
 namespace Time {

 typedef double Seconds;
+typedef int64_t Nanos;
+
+inline Nanos nanos_now() {
+	return std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
+}

 }

--- a/Machines/Commodore/Vic-20/Vic20.cpp
+++ b/Machines/Commodore/Vic-20/Vic20.cpp
@@ -397,9 +397,10 @@ class ConcreteMachine:
 			memset(processor_write_memory_map_, 0, sizeof(processor_write_memory_map_));
 			memset(mos6560_bus_handler_.video_memory_map, 0, sizeof(mos6560_bus_handler_.video_memory_map));

-#define set_ram(baseaddr, length)	\
-	write_to_map(processor_read_memory_map_, &ram_[baseaddr], baseaddr, length);	\
-	write_to_map(processor_write_memory_map_, &ram_[baseaddr], baseaddr, length);
+#define set_ram(baseaddr, length)	{ \
+		write_to_map(processor_read_memory_map_, &ram_[baseaddr], baseaddr, length);	\
+		write_to_map(processor_write_memory_map_, &ram_[baseaddr], baseaddr, length);	\
+	}

 			// Add 6502-visible RAM as requested.
 			set_ram(0x0000, 0x0400);
--- a/Signal.xcodeproj/project.pbxproj
+++ b/Signal.xcodeproj/project.pbxproj
@@ -1129,6 +1129,7 @@
 		4B643F391D77AD1900D431D6 /* CSStaticAnalyser.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; name = CSStaticAnalyser.mm; path = StaticAnalyser/CSStaticAnalyser.mm; sourceTree = "<group>"; };
 		4B643F3C1D77AE5C00D431D6 /* CSMachine+Target.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "CSMachine+Target.h"; sourceTree = "<group>"; };
 		4B643F3E1D77B88000D431D6 /* DocumentController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DocumentController.swift; sourceTree = "<group>"; };
+		4B644ED023F0FB55006C0CC5 /* ScanSynchroniser.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = ScanSynchroniser.hpp; sourceTree = "<group>"; };
 		4B65085F22F4CF8D009C1100 /* Keyboard.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Keyboard.cpp; sourceTree = "<group>"; };
 		4B680CE123A5553100451D43 /* 68000ComparativeTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = 68000ComparativeTests.mm; sourceTree = "<group>"; };
 		4B680CE323A555CA00451D43 /* 68000 Comparative Tests */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "68000 Comparative Tests"; sourceTree = "<group>"; };
@@ -3726,6 +3727,7 @@
 				4BB06B211F316A3F00600C7A /* ForceInline.hpp */,
 				4B80214322EE7C3E00068002 /* JustInTime.hpp */,
 				4B449C942063389900A095C8 /* TimeTypes.hpp */,
+				4B644ED023F0FB55006C0CC5 /* ScanSynchroniser.hpp */,
 			);
 			name = ClockReceiver;
 			path = ../../ClockReceiver;
--- a/Signal.xcodeproj/xcshareddata/xcschemes/Clock
+++ b/Signal.xcodeproj/xcshareddata/xcschemes/Clock
@@ -54,11 +54,15 @@
      <CommandLineArguments>
         <CommandLineArgument
            argument = "/Users/thomasharte/Downloads/test-dsk-for-rw-and-50-60-hz/TEST-RW-60Hz.DSK"
+            isEnabled = "NO">
+         </CommandLineArgument>
+         <CommandLineArgument
+            argument = "&quot;/Users/thomasharte/Library/Mobile Documents/com~apple~CloudDocs/Desktop/Soft/Master System/R-Type (NTSC).sms&quot;"
            isEnabled = "YES">
         </CommandLineArgument>
         <CommandLineArgument
            argument = "--speed=5"
-            isEnabled = "YES">
+            isEnabled = "NO">
         </CommandLineArgument>
         <CommandLineArgument
            argument = "--rompath=/Users/thomasharte/Projects/CLK/ROMImages"
--- a/Signal/Machine/CSMachine.mm
+++ b/Signal/Machine/CSMachine.mm
@@ -21,6 +21,9 @@
 #include "Typer.hpp"
 #include "../../../../Activity/Observer.hpp"

+#include "../../../../ClockReceiver/TimeTypes.hpp"
+#include "../../../../ClockReceiver/ScanSynchroniser.hpp"
+
 #import "CSStaticAnalyser+TargetVector.h"
 #import "NSBundle+DataResource.h"
 #import "NSData+StdVector.h"
@@ -154,11 +157,12 @@ struct ActivityObserver: public Activity::Observer {
 	CSHighPrecisionTimer *_timer;
 	CGSize _pixelSize;
 	std::atomic_flag _isUpdating;
-	int64_t _syncTime;
-	int64_t _timeDiff;
+	Time::Nanos _syncTime;
+	Time::Nanos _timeDiff;
 	double _refreshPeriod;
 	BOOL _isSyncLocking;
-	double _speedMultiplier;
+
+	Time::ScanSynchroniser _scanSynchroniser;

 	NSTimer *_joystickTimer;

@@ -169,7 +173,6 @@ struct ActivityObserver: public Activity::Observer {
 	self = [super init];
 	if(self) {
 		_analyser = result;
-		_speedMultiplier = 1.0;

 		Machine::Error error;
 		std::vector<ROMMachine::ROM> missing_roms;
@@ -726,13 +729,13 @@ struct ActivityObserver: public Activity::Observer {

 - (void)openGLViewDisplayLinkDidFire:(CSOpenGLView *)view now:(const CVTimeStamp *)now outputTime:(const CVTimeStamp *)outputTime {
 	// First order of business: grab a timestamp.
-	const auto timeNow = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+	const auto timeNow = Time::nanos_now();

 	CGSize pixelSize = view.backingSize;
 	BOOL isSyncLocking;
 	@synchronized(self) {
-		// Store a means to map from CVTimeStamp.hostTime to std::chrono::high_resolution_clock;
-		// there is an extremely dodgy assumption here that both are in the same units (and, below, that both as in ns).
+		// Store a means to map from CVTimeStamp.hostTime to Time::Nanos;
+		// there is an extremely dodgy assumption here that the former is in ns.
 		if(!_timeDiff) {
 			_timeDiff = int64_t(now->hostTime) - int64_t(timeNow);
 		}
@@ -761,11 +764,11 @@ struct ActivityObserver: public Activity::Observer {
 #define TICKS	600

 - (void)start {
-	__block auto lastTime = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+	__block auto lastTime = Time::nanos_now();

 	_timer = [[CSHighPrecisionTimer alloc] initWithTask:^{
 		// Grab the time now and, therefore, the amount of time since the timer last fired.
-		const auto timeNow = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+		const auto timeNow = Time::nanos_now();
 		const auto duration = timeNow - lastTime;

 		
@@ -774,42 +777,15 @@ struct ActivityObserver: public Activity::Observer {
 		@synchronized(self) {
 			// If this tick includes vsync then inspect the machine.
 			if(timeNow >= self->_syncTime && lastTime < self->_syncTime) {
-				// Grab the scan status and check out the machine's current frame time.
-				// If it's stable and within 3% of a non-zero integer multiple of the
-				// display rate, mark this time window to be split over the sync.
-				const auto scan_status = self->_machine->crt_machine()->get_scan_status();
-				double ratio = 1.0;
-				if(scan_status.field_duration_gradient < 0.00001) {
-					ratio = self->_refreshPeriod / scan_status.field_duration;
-					const double integerRatio = round(ratio);
-					if(integerRatio > 0.0) {
-						ratio /= integerRatio;
-
-						constexpr double maximumAdjustment = 1.03;
-						splitAndSync = ratio <= maximumAdjustment && ratio >= 1 / maximumAdjustment;
-					}
-				}
-				self->_isSyncLocking = splitAndSync;
+				splitAndSync = self->_isSyncLocking = self->_scanSynchroniser.can_synchronise(self->_machine->crt_machine()->get_scan_status(), self->_refreshPeriod);

 				// If the time window is being split, run up to the split, then check out machine speed, possibly
 				// adjusting multiplier, then run after the split.
 				if(splitAndSync) {
 					self->_machine->crt_machine()->run_for((double)(self->_syncTime - lastTime) / 1e9);
-
-					// The host versus emulated ratio is calculated based on the current perceived frame duration of the machine.
-					// Either that number is exactly correct or it's already the result of some sort of low-pass filter. So there's
-					// no benefit to second guessing it here — just take it to be correct.
-					//
-					// ... with one slight caveat, which is that it is desireable to adjust phase here, to align vertical sync points.
-					// So the set speed multiplier may be adjusted slightly to aim for that.
-					double speed_multiplier = 1.0 / ratio;
-					if(scan_status.current_position > 0.0) {
-						constexpr double adjustmentRatio = 1.005;
-						if(scan_status.current_position < 0.5) speed_multiplier /= adjustmentRatio;
-						else speed_multiplier *= adjustmentRatio;
-					}
-					self->_speedMultiplier = (self->_speedMultiplier * 0.95) + (speed_multiplier * 0.05);
-					self->_machine->crt_machine()->set_speed_multiplier(self->_speedMultiplier);
+					self->_machine->crt_machine()->set_speed_multiplier(
+						self->_scanSynchroniser.next_speed_multiplier(self->_machine->crt_machine()->get_scan_status())
+					);
 					self->_machine->crt_machine()->run_for((double)(timeNow - self->_syncTime) / 1e9);
 				}
 			}
--- a/OSBindings/SDL/main.cpp
+++ b/OSBindings/SDL/main.cpp
@@ -8,6 +8,7 @@

 #include <algorithm>
 #include <array>
+#include <atomic>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
@@ -21,11 +22,12 @@
 #include "../../Analyser/Static/StaticAnalyser.hpp"
 #include "../../Machines/Utility/MachineForTarget.hpp"

+#include "../../ClockReceiver/TimeTypes.hpp"
+#include "../../ClockReceiver/ScanSynchroniser.hpp"
+
 #include "../../Machines/MediaTarget.hpp"
 #include "../../Machines/CRTMachine.hpp"

-#include "../../Concurrency/BestEffortUpdater.hpp"
-
 #include "../../Activity/Observer.hpp"
 #include "../../Outputs/OpenGL/Primitives/Rectangle.hpp"
 #include "../../Outputs/OpenGL/ScanTarget.hpp"
@@ -33,18 +35,111 @@

 namespace {

-struct BestEffortUpdaterDelegate: public Concurrency::BestEffortUpdater::Delegate {
-	Time::Seconds update(Concurrency::BestEffortUpdater *updater, Time::Seconds duration, bool did_skip_previous_update, int flags) override {
-		std::lock_guard<std::mutex> lock_guard(*machine_mutex);
-		return machine->crt_machine()->run_until(duration, flags);
+struct MachineRunner {
+	MachineRunner() {
+		frame_lock_.clear();
+	}
+
+	~MachineRunner() {
+		stop();
+	}
+
+	void start() {
+		last_time_ = Time::nanos_now();
+		timer_ = SDL_AddTimer(timer_period, &sdl_callback, reinterpret_cast<void *>(this));
+	}
+
+	void stop() {
+		if(timer_) {
+			SDL_RemoveTimer(timer_);
+			timer_ = 0;
+		}
+	}
+
+	void signal_vsync() {
+		const auto now = Time::nanos_now();
+		const auto previous_vsync_time = vsync_time_.load();
+		vsync_time_.store(now);
+
+		// Update estimate of current frame time.
+		frame_time_average_ -= frame_times_[frame_time_pointer_];
+		frame_times_[frame_time_pointer_] = now - previous_vsync_time;
+		frame_time_average_ += frame_times_[frame_time_pointer_];
+		frame_time_pointer_ = (frame_time_pointer_ + 1) & (frame_times_.size() - 1);
+
+		_frame_period.store((1e9 * 32.0) / double(frame_time_average_));
+	}
+
+	void signal_did_draw() {
+		frame_lock_.clear();
+	}
+
+	void set_speed_multiplier(double multiplier) {
+		scan_synchroniser_.set_base_speed_multiplier(multiplier);
 	}

 	std::mutex *machine_mutex;
 	Machine::DynamicMachine *machine;
+
+	private:
+		SDL_TimerID timer_ = 0;
+		Time::Nanos last_time_ = 0;
+		std::atomic<Time::Nanos> vsync_time_;
+		std::atomic_flag frame_lock_;
+
+		Time::ScanSynchroniser scan_synchroniser_;
+
+		// A slightly clumsy means of trying to derive frame rate from calls to
+		// signal_vsync(); SDL_DisplayMode provides only an integral quantity
+		// whereas, empirically, it's fairly common for monitors to run at the
+		// NTSC-esque frame rates of 59.94Hz.
+		std::array<Time::Nanos, 32> frame_times_;
+		Time::Nanos frame_time_average_ = 0;
+		size_t frame_time_pointer_ = 0;
+		std::atomic<double> _frame_period;
+
+		static constexpr Uint32 timer_period = 4;
+		static Uint32 sdl_callback(Uint32 interval, void *param) {
+			reinterpret_cast<MachineRunner *>(param)->update();
+			return timer_period;
+		}
+
+		void update() {
+			const auto time_now = Time::nanos_now();
+			const auto vsync_time = vsync_time_.load();
+
+			std::unique_lock<std::mutex> lock_guard(*machine_mutex);
+			const auto crt_machine = machine->crt_machine();
+
+			bool split_and_sync = false;
+			if(last_time_ < vsync_time && time_now >= vsync_time) {
+				split_and_sync = scan_synchroniser_.can_synchronise(crt_machine->get_scan_status(), _frame_period);
+			}
+
+			if(split_and_sync) {
+				crt_machine->run_for(double(vsync_time - last_time_) / 1e9);
+				crt_machine->set_speed_multiplier(
+					scan_synchroniser_.next_speed_multiplier(crt_machine->get_scan_status())
+				);
+
+				// This is a bit of an SDL ugliness; wait here until the next frame is drawn.
+				// That is, unless and until I can think of a good way of running background
+				// updates via a share group — possibly an extra intermediate buffer is needed?
+				lock_guard.unlock();
+				while(frame_lock_.test_and_set());
+				lock_guard.lock();
+
+				crt_machine->run_for(double(time_now - vsync_time) / 1e9);
+			} else {
+				crt_machine->set_speed_multiplier(scan_synchroniser_.get_base_speed_multiplier());
+				crt_machine->run_for(double(time_now - last_time_) / 1e9);
+			}
+			last_time_ = time_now;
+		}
 };

 struct SpeakerDelegate: public Outputs::Speaker::Speaker::Delegate {
-	// This is set to a relatively large number for now.
+	// This is empirically the best that I can seem to do with SDL's timer precision.
 	static constexpr int buffer_size = 1024;

 	void speaker_did_complete_samples(Outputs::Speaker::Speaker *speaker, const std::vector<int16_t> &buffer) final {
@@ -56,7 +151,6 @@ struct SpeakerDelegate: public Outputs::Speaker::Speaker::Delegate {
 	}

 	void audio_callback(Uint8 *stream, int len) {
-		updater->update();
 		std::lock_guard<std::mutex> lock_guard(audio_buffer_mutex_);

 		std::size_t sample_length = static_cast<std::size_t>(len) / sizeof(int16_t);
@@ -75,7 +169,6 @@ struct SpeakerDelegate: public Outputs::Speaker::Speaker::Delegate {
 	}

 	SDL_AudioDeviceID audio_device;
-	Concurrency::BestEffortUpdater *updater;

 	std::mutex audio_buffer_mutex_;
 	std::vector<int16_t> audio_buffer_;
@@ -391,8 +484,7 @@ int main(int argc, char *argv[]) {
 		return EXIT_FAILURE;
 	}

-	Concurrency::BestEffortUpdater updater;
-	BestEffortUpdaterDelegate best_effort_updater_delegate;
+	MachineRunner machine_runner;
 	SpeakerDelegate speaker_delegate;

 	// For vanilla SDL purposes, assume system ROMs can be found in one of:
@@ -478,21 +570,18 @@ int main(int argc, char *argv[]) {
 		char *end;
 		double speed = strtod(speed_string, &end);

-		if(end-speed_string != strlen(speed_string)) {
+		if(size_t(end - speed_string) != strlen(speed_string)) {
 			std::cerr << "Unable to parse speed: " << speed_string << std::endl;
 		} else if(speed <= 0.0) {
 			std::cerr << "Cannot run at speed " << speed_string << "; speeds must be positive." << std::endl;
 		} else {
-			machine->crt_machine()->set_speed_multiplier(speed);
-			// TODO: what if not a 'CRT' machine? Likely rests on resolving this project's machine naming policy.
+			machine_runner.set_speed_multiplier(speed);
 		}
 	}

 	// Wire up the best-effort updater, its delegate, and the speaker delegate.
-	best_effort_updater_delegate.machine = machine.get();
-	best_effort_updater_delegate.machine_mutex = &machine_mutex;
-	speaker_delegate.updater = &updater;
-	updater.set_delegate(&best_effort_updater_delegate);
+	machine_runner.machine = machine.get();
+	machine_runner.machine_mutex = &machine_mutex;

 	// Attempt to set up video and audio.
 	if(SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO) < 0) {
@@ -644,15 +733,21 @@ int main(int argc, char *argv[]) {
 	const bool uses_mouse = !!machine->mouse_machine();
 	bool should_quit = false;
 	Uint32 fullscreen_mode = 0;
+	machine_runner.start();
 	while(!should_quit) {
-		// Wait for vsync, draw a new frame and post a machine update.
-		// NB: machine_mutex is *not* currently locked, therefore it shouldn't
-		// be 'most' of the time.
-		SDL_GL_SwapWindow(window);
+		// Draw a new frame, indicating completion of the draw to the machine runner.
 		scan_target.update(int(window_width), int(window_height));
 		scan_target.draw(int(window_width), int(window_height));
 		if(activity_observer) activity_observer->draw();
-		updater.update();
+		machine_runner.signal_did_draw();
+
+		// Wait for presentation of that frame, posting a vsync.
+		SDL_GL_SwapWindow(window);
+		machine_runner.signal_vsync();
+
+		// NB: machine_mutex is *not* currently locked, therefore it shouldn't
+		// be 'most' of the time — assuming most of the time is spent waiting
+		// on vsync, anyway.

 		// Grab the machine lock and process all pending events.
 		std::lock_guard<std::mutex> lock_guard(machine_mutex);
@@ -877,7 +972,7 @@ int main(int argc, char *argv[]) {
 	}

 	// Clean up.
-	updater.flush();	// Ensure no further updates will occur.
+	machine_runner.stop();	// Ensure no further updates will occur.
 	joysticks.clear();
 	SDL_DestroyWindow( window );
 	SDL_Quit();