Merge pull request #828 from TomHarte/LockFreeQueue

Completes LockFreeQueue branch.
2025-04-05 04:37:41 +00:00 · 2020-07-30 21:46:56 -04:00 · 2020-07-30 21:46:56 -04:00 · 9c97c0a906
commit 9c97c0a906
parent b14bedbe29 8cacab196d
7 changed files with 224 additions and 166 deletions
--- a/Machines/Apple/Macintosh/Audio.cpp
+++ b/Machines/Apple/Macintosh/Audio.cpp
@ -25,7 +25,7 @@ Audio::Audio(Concurrency::DeferringAsyncTaskQueue &task_queue) : task_queue_(tas
 void Audio::post_sample(uint8_t sample) {
 	// Store sample directly indexed by current write pointer; this ensures that collected samples
 	// directly map to volume and enabled/disabled states.
-	sample_queue_.buffer[sample_queue_.write_pointer] = sample;
+	sample_queue_.buffer[sample_queue_.write_pointer].store(sample, std::memory_order::memory_order_relaxed);
 	sample_queue_.write_pointer = (sample_queue_.write_pointer + 1) % sample_queue_.buffer.size();
 }

@ -80,7 +80,7 @@ void Audio::get_samples(std::size_t number_of_samples, int16_t *target) {

 		// Determine the output level, and output that many samples.
 		// (Hoping that the copiler substitutes an effective memset16-type operation here).
-		const int16_t output_level = volume_multiplier_ * (int16_t(sample_queue_.buffer[sample_queue_.read_pointer]) - 128);
+		const int16_t output_level = volume_multiplier_ * (int16_t(sample_queue_.buffer[sample_queue_.read_pointer].load(std::memory_order::memory_order_relaxed)) - 128);
 		for(size_t c = 0; c < cycles_left_in_sample; ++c) {
 			target[c] = output_level;
 		}
--- a/Machines/Apple/Macintosh/Audio.hpp
+++ b/Machines/Apple/Macintosh/Audio.hpp
@ -63,7 +63,7 @@ class Audio: public ::Outputs::Speaker::SampleSource {
 		// A queue of fetched samples; read from by one thread,
 		// written to by another.
 		struct {
-			std::array<uint8_t, 740> buffer;
+			std::array<std::atomic<uint8_t>, 740> buffer;
 			size_t read_pointer = 0, write_pointer = 0;
 		} sample_queue_;

--- a/Outputs/OpenGL/ScanTarget.cpp
+++ b/Outputs/OpenGL/ScanTarget.cpp
@ -114,7 +114,8 @@ void ScanTarget::set_target_framebuffer(GLuint target_framebuffer) {
 }

 void ScanTarget::setup_pipeline() {
-	const auto data_type_size = Outputs::Display::size_for_data_type(modals_.input_data_type);
+	auto modals = BufferingScanTarget::modals();
+	const auto data_type_size = Outputs::Display::size_for_data_type(modals.input_data_type);

 	// Resize the texture only if required.
 	if(data_type_size != write_area_data_size()) {
@ -127,7 +128,7 @@ void ScanTarget::setup_pipeline() {
 	test_gl(glBindBuffer, GL_ARRAY_BUFFER, line_buffer_name_);

 	// Destroy or create a QAM buffer and shader, if appropriate.
-	const bool needs_qam_buffer = (modals_.display_type == DisplayType::CompositeColour || modals_.display_type == DisplayType::SVideo);
+	const bool needs_qam_buffer = (modals.display_type == DisplayType::CompositeColour || modals.display_type == DisplayType::SVideo);
 	if(needs_qam_buffer) {
 		if(!qam_chroma_texture_) {
 			qam_chroma_texture_ = std::make_unique<TextureTarget>(LineBufferWidth, LineBufferHeight, QAMChromaTextureUnit, GL_NEAREST, false);
@ -146,8 +147,8 @@ void ScanTarget::setup_pipeline() {
 	output_shader_ = conversion_shader();
 	enable_vertex_attributes(ShaderType::Conversion, *output_shader_);
 	set_uniforms(ShaderType::Conversion, *output_shader_);
-	output_shader_->set_uniform("origin", modals_.visible_area.origin.x, modals_.visible_area.origin.y);
-	output_shader_->set_uniform("size", modals_.visible_area.size.width, modals_.visible_area.size.height);
+	output_shader_->set_uniform("origin", modals.visible_area.origin.x, modals.visible_area.origin.y);
+	output_shader_->set_uniform("size", modals.visible_area.size.width, modals.visible_area.size.height);
 	output_shader_->set_uniform("textureName", GLint(UnprocessedLineBufferTextureUnit - GL_TEXTURE0));
 	output_shader_->set_uniform("qamTextureName", GLint(QAMChromaTextureUnit - GL_TEXTURE0));

@ -161,7 +162,8 @@ void ScanTarget::setup_pipeline() {
 }

 bool ScanTarget::is_soft_display_type() {
-	return modals_.display_type == DisplayType::CompositeColour || modals_.display_type == DisplayType::CompositeMonochrome;
+	const auto display_type = modals().display_type;
+	return display_type == DisplayType::CompositeColour || display_type == DisplayType::CompositeMonochrome;
 }

 void ScanTarget::update(int, int output_height) {
@ -186,10 +188,10 @@ void ScanTarget::update(int, int output_height) {
 	// Grab the new output list.
 	perform([=] (const OutputArea &area) {
 		// Establish the pipeline if necessary.
-		const bool did_setup_pipeline = modals_are_dirty_;
-		if(modals_are_dirty_) {
+		const auto new_modals = BufferingScanTarget::new_modals();
+		const bool did_setup_pipeline = bool(new_modals);
+		if(did_setup_pipeline) {
 			setup_pipeline();
-			modals_are_dirty_ = false;
 		}

 		// Determine the start time of this submission group and the number of lines it will contain.
@ -291,7 +293,7 @@ void ScanTarget::update(int, int output_height) {

 				// Determine the proper clear colour — this needs to be anything that describes black
 				// in the input colour encoding at use.
-				if(modals_.input_data_type == InputDataType::Luminance8Phase8) {
+				if(modals().input_data_type == InputDataType::Luminance8Phase8) {
 					// Supply both a zero luminance and a colour-subcarrier-disengaging phase.
 					test_gl(glClearColor, 0.0f, 1.0f, 0.0f, 0.0f);
 				} else {
--- a/Outputs/OpenGL/ScanTarget.hpp
+++ b/Outputs/OpenGL/ScanTarget.hpp
@ -151,7 +151,7 @@ class ScanTarget: public Outputs::Display::BufferingScanTarget {

 		// Storage for the various buffers.
 		std::vector<uint8_t> write_area_texture_;
-		std::array<Scan, 16384> scan_buffer_;
+		std::array<Scan, LineBufferHeight*5> scan_buffer_;
 		std::array<Line, LineBufferHeight> line_buffer_;
 		std::array<LineMetadata, LineBufferHeight> line_metadata_buffer_;
 };
--- a/Outputs/OpenGL/ScanTargetGLSLFragments.cpp
+++ b/Outputs/OpenGL/ScanTargetGLSLFragments.cpp
@ -23,14 +23,15 @@ void ScanTarget::set_uniforms(ShaderType type, Shader &target) const {
 	// converge even allowing for the fact that they may not be spaced by exactly
 	// the expected distance. Cf. the stencil-powered logic for making sure all
 	// pixels are painted only exactly once per field.
+	const auto modals = BufferingScanTarget::modals();
 	switch(type) {
 		case ShaderType::Composition: break;
 		default:
-			target.set_uniform("rowHeight", GLfloat(1.05f / modals_.expected_vertical_lines));
-			target.set_uniform("scale", GLfloat(modals_.output_scale.x), GLfloat(modals_.output_scale.y) * modals_.aspect_ratio * (3.0f / 4.0f));
-			target.set_uniform("phaseOffset", GLfloat(modals_.input_data_tweaks.phase_linked_luminance_offset));
+			target.set_uniform("rowHeight", GLfloat(1.05f / modals.expected_vertical_lines));
+			target.set_uniform("scale", GLfloat(modals.output_scale.x), GLfloat(modals.output_scale.y) * modals.aspect_ratio * (3.0f / 4.0f));
+			target.set_uniform("phaseOffset", GLfloat(modals.input_data_tweaks.phase_linked_luminance_offset));

-			const float clocks_per_angle = float(modals_.cycles_per_line) * float(modals_.colour_cycle_denominator) / float(modals_.colour_cycle_numerator);
+			const float clocks_per_angle = float(modals.cycles_per_line) * float(modals.colour_cycle_denominator) / float(modals.colour_cycle_numerator);
 			GLfloat texture_offsets[4];
 			GLfloat angles[4];
 			for(int c = 0; c < 4; ++c) {
@ -41,7 +42,7 @@ void ScanTarget::set_uniforms(ShaderType type, Shader &target) const {
 			target.set_uniform("textureCoordinateOffsets", 1, 4, texture_offsets);
 			target.set_uniform("compositeAngleOffsets", 4, 1, angles);

-			switch(modals_.composite_colour_space) {
+			switch(modals.composite_colour_space) {
 				case ColourSpace::YIQ: {
 					const GLfloat rgbToYIQ[] = {0.299f, 0.596f, 0.211f, 0.587f, -0.274f, -0.523f, 0.114f, -0.322f, 0.312f};
 					const GLfloat yiqToRGB[] = {1.0f, 1.0f, 1.0f, 0.956f, -0.272f, -1.106f, 0.621f, -0.647f, 1.703f};
@ -61,9 +62,10 @@ void ScanTarget::set_uniforms(ShaderType type, Shader &target) const {
 }

 void ScanTarget::set_sampling_window(int output_width, int, Shader &target) {
-	if(modals_.display_type != DisplayType::CompositeColour) {
-		const float one_pixel_width = float(modals_.cycles_per_line) * modals_.visible_area.size.width / float(output_width);
-		const float clocks_per_angle = float(modals_.cycles_per_line) * float(modals_.colour_cycle_denominator) / float(modals_.colour_cycle_numerator);
+	const auto modals = BufferingScanTarget::modals();
+	if(modals.display_type != DisplayType::CompositeColour) {
+		const float one_pixel_width = float(modals.cycles_per_line) * modals.visible_area.size.width / float(output_width);
+		const float clocks_per_angle = float(modals.cycles_per_line) * float(modals.colour_cycle_denominator) / float(modals.colour_cycle_numerator);
 		GLfloat texture_offsets[4];
 		GLfloat angles[4];
 		for(int c = 0; c < 4; ++c) {
@ -191,8 +193,9 @@ std::vector<std::string> ScanTarget::bindings(ShaderType type) const {

 std::string ScanTarget::sampling_function() const {
 	std::string fragment_shader;
+	const auto modals = BufferingScanTarget::modals();

-	if(modals_.display_type == DisplayType::SVideo) {
+	if(modals.display_type == DisplayType::SVideo) {
 		fragment_shader +=
 			"vec2 svideo_sample(vec2 coordinate, float angle) {";
 	} else {
@ -200,8 +203,8 @@ std::string ScanTarget::sampling_function() const {
 			"float composite_sample(vec2 coordinate, float angle) {";
 	}

-	const bool is_svideo = modals_.display_type == DisplayType::SVideo;
-	switch(modals_.input_data_type) {
+	const bool is_svideo = modals.display_type == DisplayType::SVideo;
+	switch(modals.input_data_type) {
 		case InputDataType::Luminance1:
 		case InputDataType::Luminance8:
 			// Easy, just copy across.
@ -255,6 +258,8 @@ std::string ScanTarget::sampling_function() const {
 }

 std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
+	const auto modals = BufferingScanTarget::modals();
+
 	// Compose a vertex shader. If the display type is RGB, generate just the proper
 	// geometry position, plus a solitary textureCoordinate.
 	//
@ -301,7 +306,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {

 		"out vec4 fragColour;";

-	if(modals_.display_type != DisplayType::RGB) {
+	if(modals.display_type != DisplayType::RGB) {
 		vertex_shader +=
 			"out float compositeAngle;"
 			"out float compositeAmplitude;"
@ -316,7 +321,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
 			"uniform vec4 compositeAngleOffsets;";
 	}

-	if(modals_.display_type == DisplayType::SVideo || modals_.display_type == DisplayType::CompositeColour) {
+	if(modals.display_type == DisplayType::SVideo || modals.display_type == DisplayType::CompositeColour) {
 		vertex_shader += "out vec2 qamTextureCoordinates[4];";
 		fragment_shader += "in vec2 qamTextureCoordinates[4];";
 	}
@ -332,7 +337,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
 			"gl_Position = vec4(eyePosition, 0.0, 1.0);";

 	// For everything other than RGB, calculate the two composite outputs.
-	if(modals_.display_type != DisplayType::RGB) {
+	if(modals.display_type != DisplayType::RGB) {
 		vertex_shader +=
 			"compositeAngle = (mix(startCompositeAngle, endCompositeAngle, lateral) / 32.0) * 3.141592654;"
 			"compositeAmplitude = lineCompositeAmplitude / 255.0;"
@ -346,7 +351,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
 		"textureCoordinates[2] = vec2(centreClock + textureCoordinateOffsets[2], lineY + 0.5) / textureSize(textureName, 0);"
 		"textureCoordinates[3] = vec2(centreClock + textureCoordinateOffsets[3], lineY + 0.5) / textureSize(textureName, 0);";

-	if((modals_.display_type == DisplayType::SVideo) || (modals_.display_type == DisplayType::CompositeColour)) {
+	if((modals.display_type == DisplayType::SVideo) || (modals.display_type == DisplayType::CompositeColour)) {
 		vertex_shader +=
 			"float centreCompositeAngle = abs(mix(startCompositeAngle, endCompositeAngle, lateral)) * 4.0 / 64.0;"
 			"centreCompositeAngle = floor(centreCompositeAngle);"
@ -360,7 +365,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {

 	// Compose a fragment shader.

-	if(modals_.display_type != DisplayType::RGB) {
+	if(modals.display_type != DisplayType::RGB) {
 		fragment_shader +=
 			"uniform mat3 lumaChromaToRGB;"
 			"uniform mat3 rgbToLumaChroma;";
@ -372,7 +377,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
 		"void main(void) {"
 			"vec3 fragColour3;";

-	switch(modals_.display_type) {
+	switch(modals.display_type) {
 		case DisplayType::CompositeColour:
 			fragment_shader +=
 				"vec4 angles = compositeAngle + compositeAngleOffsets;"
@ -460,13 +465,13 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
 	}

 	// Apply a brightness adjustment if requested.
-	if(fabs(modals_.brightness - 1.0f) > 0.05f) {
-		fragment_shader += "fragColour3 = fragColour3 * " + std::to_string(modals_.brightness) + ";";
+	if(fabs(modals.brightness - 1.0f) > 0.05f) {
+		fragment_shader += "fragColour3 = fragColour3 * " + std::to_string(modals.brightness) + ";";
 	}

 	// Apply a gamma correction if required.
-	if(fabs(output_gamma_ - modals_.intended_gamma) > 0.05f) {
-		const float gamma_ratio = output_gamma_ / modals_.intended_gamma;
+	if(fabs(output_gamma_ - modals.intended_gamma) > 0.05f) {
+		const float gamma_ratio = output_gamma_ / modals.intended_gamma;
 		fragment_shader += "fragColour3 = pow(fragColour3, vec3(" + std::to_string(gamma_ratio) + "));";
 	}

@ -482,6 +487,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
 }

 std::unique_ptr<Shader> ScanTarget::composition_shader() const {
+	const auto modals = BufferingScanTarget::modals();
 	const std::string vertex_shader =
 		"#version 150\n"

@ -516,7 +522,7 @@ std::unique_ptr<Shader> ScanTarget::composition_shader() const {

 		"void main(void) {";

-	switch(modals_.input_data_type) {
+	switch(modals.input_data_type) {
 		case InputDataType::Luminance1:
 			fragment_shader += "fragColour = textureLod(textureName, textureCoordinate, 0).rrrr;";
 		break;
@ -556,7 +562,8 @@ std::unique_ptr<Shader> ScanTarget::composition_shader() const {
 }

 std::unique_ptr<Shader> ScanTarget::qam_separation_shader() const {
-	const bool is_svideo = modals_.display_type == DisplayType::SVideo;
+	const auto modals = BufferingScanTarget::modals();
+	const bool is_svideo = modals.display_type == DisplayType::SVideo;

 	// Sets up texture coordinates to run between startClock and endClock, mapping to
 	// coordinates that correlate with four times the absolute value of the composite angle.
@ -632,7 +639,7 @@ std::unique_ptr<Shader> ScanTarget::qam_separation_shader() const {
 		sampling_function() +
 		"void main(void) {";

-	if(modals_.display_type == DisplayType::SVideo) {
+	if(modals.display_type == DisplayType::SVideo) {
 		fragment_shader +=
 			"fragColour = vec4(svideo_sample(textureCoordinate, compositeAngle).rgg * vec3(1.0, cos(compositeAngle), sin(compositeAngle)), 1.0);";
 	} else {
--- a/Outputs/ScanTargets/BufferingScanTarget.cpp
+++ b/Outputs/ScanTargets/BufferingScanTarget.cpp
@ -11,6 +11,12 @@
 #include <cassert>
 #include <cstring>

+// If enabled, this uses the producer lock to cover both production and consumption
+// rather than attempting to proceed lockfree. This is primarily for diagnostic purposes;
+// it allows empirical exploration of whether the logical and memory barriers that are
+// meant to mediate things between the read pointers and the submit pointers are functioning.
+#define ONE_BIG_LOCK
+
 #define TextureAddressGetY(v)	uint16_t((v) >> 11)
 #define TextureAddressGetX(v)	uint16_t((v) & 0x7ff)
 #define TextureSub(a, b)		(((a) - (b)) & 0x3fffff)
@ -20,41 +26,25 @@ using namespace Outputs::Display;

 BufferingScanTarget::BufferingScanTarget() {
 	// Ensure proper initialisation of the two atomic pointer sets.
-	read_pointers_.store(write_pointers_);
-	submit_pointers_.store(write_pointers_);
+	read_pointers_.store(write_pointers_, std::memory_order::memory_order_relaxed);
+	submit_pointers_.store(write_pointers_, std::memory_order::memory_order_relaxed);

 	// Establish initial state for is_updating_.
-	is_updating_.clear();
+	is_updating_.clear(std::memory_order::memory_order_relaxed);
 }

-void BufferingScanTarget::end_scan() {
-	if(vended_scan_) {
-		std::lock_guard lock_guard(write_pointers_mutex_);
-		vended_scan_->data_y = TextureAddressGetY(vended_write_area_pointer_);
-		vended_scan_->line = write_pointers_.line;
-		vended_scan_->scan.end_points[0].data_offset += TextureAddressGetX(vended_write_area_pointer_);
-		vended_scan_->scan.end_points[1].data_offset += TextureAddressGetX(vended_write_area_pointer_);
-
-#ifdef LOG_SCANS
-		if(vended_scan_->scan.composite_amplitude) {
-			std::cout << "S: ";
-			std::cout << vended_scan_->scan.end_points[0].composite_angle << "/" << vended_scan_->scan.end_points[0].data_offset << "/" << vended_scan_->scan.end_points[0].cycles_since_end_of_horizontal_retrace << " -> ";
-			std::cout << vended_scan_->scan.end_points[1].composite_angle << "/" << vended_scan_->scan.end_points[1].data_offset << "/" << vended_scan_->scan.end_points[1].cycles_since_end_of_horizontal_retrace << " => ";
-			std::cout << double(vended_scan_->scan.end_points[1].composite_angle - vended_scan_->scan.end_points[0].composite_angle) / (double(vended_scan_->scan.end_points[1].data_offset - vended_scan_->scan.end_points[0].data_offset) * 64.0f) << "/";
-			std::cout << double(vended_scan_->scan.end_points[1].composite_angle - vended_scan_->scan.end_points[0].composite_angle) / (double(vended_scan_->scan.end_points[1].cycles_since_end_of_horizontal_retrace - vended_scan_->scan.end_points[0].cycles_since_end_of_horizontal_retrace) * 64.0f);
-			std::cout << std::endl;
-		}
-#endif
-	}
-	vended_scan_ = nullptr;
-}
+// MARK: - Producer; pixel data.

 uint8_t *BufferingScanTarget::begin_data(size_t required_length, size_t required_alignment) {
 	assert(required_alignment);

+	// Acquire the standard producer lock, nominally over write_pointers_.
+	std::lock_guard lock_guard(producer_mutex_);
+
+	// If allocation has already failed on this line, continue the trend.
 	if(allocation_has_failed_) return nullptr;

-	std::lock_guard lock_guard(write_pointers_mutex_);
+	// If there isn't yet a write area then mark allocation as failed and finish.
 	if(!write_area_) {
 		allocation_has_failed_ = true;
 		return nullptr;
@ -76,7 +66,7 @@ uint8_t *BufferingScanTarget::begin_data(size_t required_length, size_t required

 	// Check whether that steps over the read pointer.
 	const auto end_address = TextureAddress(end_x, output_y);
-	const auto read_pointers = read_pointers_.load();
+	const auto read_pointers = read_pointers_.load(std::memory_order::memory_order_relaxed);

 	const auto end_distance = TextureSub(end_address, read_pointers.write_area);
 	const auto previous_distance = TextureSub(write_pointers_.write_area, read_pointers.write_area);
@ -100,9 +90,11 @@ uint8_t *BufferingScanTarget::begin_data(size_t required_length, size_t required
 }

 void BufferingScanTarget::end_data(size_t actual_length) {
-	if(allocation_has_failed_ || !data_is_allocated_) return;
+	// Acquire the producer lock.
+	std::lock_guard lock_guard(producer_mutex_);

-	std::lock_guard lock_guard(write_pointers_mutex_);
+	// Do nothing if no data write is actually ongoing.
+	if(allocation_has_failed_ || !data_is_allocated_) return;

 	// Bookend the start of the new data, to safeguard for precision errors in sampling.
 	memcpy(
@ -128,12 +120,57 @@ void BufferingScanTarget::end_data(size_t actual_length) {
 	data_is_allocated_ = false;
 }

-void BufferingScanTarget::will_change_owner() {
-	allocation_has_failed_ = true;
-	vended_scan_ = nullptr;
+// MARK: - Producer; scans.
+
+Outputs::Display::ScanTarget::Scan *BufferingScanTarget::begin_scan() {
+	std::lock_guard lock_guard(producer_mutex_);
+
+	// If there's already an allocation failure on this line, do no work.
+	if(allocation_has_failed_) {
+		vended_scan_ = nullptr;
+		return nullptr;
+	}
+
+	const auto result = &scan_buffer_[write_pointers_.scan_buffer];
+	const auto read_pointers = read_pointers_.load(std::memory_order::memory_order_relaxed);
+
+	// Advance the pointer.
+	const auto next_write_pointer = decltype(write_pointers_.scan_buffer)((write_pointers_.scan_buffer + 1) % scan_buffer_size_);
+
+	// Check whether that's too many.
+	if(next_write_pointer == read_pointers.scan_buffer) {
+		allocation_has_failed_ = true;
+		vended_scan_ = nullptr;
+		return nullptr;
+	}
+	write_pointers_.scan_buffer = next_write_pointer;
+	++provided_scans_;
+
+	// Fill in extra OpenGL-specific details.
+	result->line = write_pointers_.line;
+
+	vended_scan_ = result;
+	return &result->scan;
 }

+void BufferingScanTarget::end_scan() {
+	std::lock_guard lock_guard(producer_mutex_);
+
+	// Complete the scan only if one is afoot.
+	if(vended_scan_) {
+		vended_scan_->data_y = TextureAddressGetY(vended_write_area_pointer_);
+		vended_scan_->line = write_pointers_.line;
+		vended_scan_->scan.end_points[0].data_offset += TextureAddressGetX(vended_write_area_pointer_);
+		vended_scan_->scan.end_points[1].data_offset += TextureAddressGetX(vended_write_area_pointer_);
+		vended_scan_ = nullptr;
+	}
+}
+
+// MARK: - Producer; lines.
+
 void BufferingScanTarget::announce(Event event, bool is_visible, const Outputs::Display::ScanTarget::Scan::EndPoint &location, uint8_t composite_amplitude) {
+	std::lock_guard lock_guard(producer_mutex_);
+
 	// Forward the event to the display metrics tracker.
 	display_metrics_.announce_event(event);

@ -147,114 +184,95 @@ void BufferingScanTarget::announce(Event event, bool is_visible, const Outputs::
 		frame_is_complete_ = true;
 	}

+	// Proceed from here only if a change in visibility has occurred.
 	if(output_is_visible_ == is_visible) return;
+	output_is_visible_ = is_visible;
+
 	if(is_visible) {
-		const auto read_pointers = read_pointers_.load();
-		std::lock_guard lock_guard(write_pointers_mutex_);
+		const auto read_pointers = read_pointers_.load(std::memory_order::memory_order_relaxed);

-		// Commit the most recent line only if any scans fell on it.
-		// Otherwise there's no point outputting it, it'll contribute nothing.
-		if(provided_scans_) {
-			// Store metadata if concluding a previous line.
-			if(active_line_) {
-				line_metadata_buffer_[size_t(write_pointers_.line)].is_first_in_frame = is_first_in_frame_;
-				line_metadata_buffer_[size_t(write_pointers_.line)].previous_frame_was_complete = previous_frame_was_complete_;
-				is_first_in_frame_ = false;
-			}
-
-			// Attempt to allocate a new line; note allocation failure if necessary.
-			const auto next_line = uint16_t((write_pointers_.line + 1) % line_buffer_size_);
-			if(next_line == read_pointers.line) {
-				allocation_has_failed_ = true;
-				active_line_ = nullptr;
-			} else {
-				write_pointers_.line = next_line;
-				active_line_ = &line_buffer_[size_t(write_pointers_.line)];
-			}
-			provided_scans_ = 0;
+		// Attempt to allocate a new line, noting allocation failure if necessary.
+		const auto next_line = uint16_t((write_pointers_.line + 1) % line_buffer_size_);
+		if(next_line == read_pointers.line) {
+			allocation_has_failed_ = true;
 		}
+		provided_scans_ = 0;

-		if(active_line_) {
-			active_line_->end_points[0].x = location.x;
-			active_line_->end_points[0].y = location.y;
-			active_line_->end_points[0].cycles_since_end_of_horizontal_retrace = location.cycles_since_end_of_horizontal_retrace;
-			active_line_->end_points[0].composite_angle = location.composite_angle;
-			active_line_->line = write_pointers_.line;
-			active_line_->composite_amplitude = composite_amplitude;
+		// If there was space for a new line, establish its start.
+		if(!allocation_has_failed_) {
+			Line &active_line = line_buffer_[size_t(write_pointers_.line)];
+			active_line.end_points[0].x = location.x;
+			active_line.end_points[0].y = location.y;
+			active_line.end_points[0].cycles_since_end_of_horizontal_retrace = location.cycles_since_end_of_horizontal_retrace;
+			active_line.end_points[0].composite_angle = location.composite_angle;
+			active_line.line = write_pointers_.line;
+			active_line.composite_amplitude = composite_amplitude;
 		}
 	} else {
-		if(active_line_) {
-			// A successfully-allocated line is ending.
-			active_line_->end_points[1].x = location.x;
-			active_line_->end_points[1].y = location.y;
-			active_line_->end_points[1].cycles_since_end_of_horizontal_retrace = location.cycles_since_end_of_horizontal_retrace;
-			active_line_->end_points[1].composite_angle = location.composite_angle;
+		// Commit the most recent line only if any scans fell on it and all allocation was successful.
+		if(!allocation_has_failed_ && provided_scans_) {
+			// Store metadata.
+			LineMetadata &metadata = line_metadata_buffer_[size_t(write_pointers_.line)];
+			metadata.is_first_in_frame = is_first_in_frame_;
+			metadata.previous_frame_was_complete = previous_frame_was_complete_;
+			is_first_in_frame_ = false;

-#ifdef LOG_LINES
-			if(active_line_->composite_amplitude) {
-				std::cout << "L: ";
-				std::cout << active_line_->end_points[0].composite_angle << "/" << active_line_->end_points[0].cycles_since_end_of_horizontal_retrace << " -> ";
-				std::cout << active_line_->end_points[1].composite_angle << "/" << active_line_->end_points[1].cycles_since_end_of_horizontal_retrace << " => ";
-				std::cout << (active_line_->end_points[1].composite_angle - active_line_->end_points[0].composite_angle) << "/" << (active_line_->end_points[1].cycles_since_end_of_horizontal_retrace - active_line_->end_points[0].cycles_since_end_of_horizontal_retrace) << " => ";
-				std::cout << double(active_line_->end_points[1].composite_angle - active_line_->end_points[0].composite_angle) / (double(active_line_->end_points[1].cycles_since_end_of_horizontal_retrace - active_line_->end_points[0].cycles_since_end_of_horizontal_retrace) * 64.0f);
-				std::cout << std::endl;
-			}
-#endif
-		}
+			// Store actual line data.
+			Line &active_line = line_buffer_[size_t(write_pointers_.line)];
+			active_line.end_points[1].x = location.x;
+			active_line.end_points[1].y = location.y;
+			active_line.end_points[1].cycles_since_end_of_horizontal_retrace = location.cycles_since_end_of_horizontal_retrace;
+			active_line.end_points[1].composite_angle = location.composite_angle;

-		// A line is complete; submit latest updates if nothing failed.
-		if(allocation_has_failed_) {
-			// Reset all pointers to where they were; this also means
-			// the stencil won't be properly populated.
-			write_pointers_ = submit_pointers_.load();
-			frame_is_complete_ = false;
+			// Advance the line pointer.
+			write_pointers_.line = uint16_t((write_pointers_.line + 1) % line_buffer_size_);
+
+			// Update the submit pointers with all lines, scans and data written during this line.
+			submit_pointers_.store(write_pointers_, std::memory_order::memory_order_release);
 		} else {
-			// Advance submit pointer.
-			submit_pointers_.store(write_pointers_);
+			// Something failed, or there was nothing on the line anyway, so reset all pointers to where they
+			// were before this line. Mark frame as incomplete if this was an allocation failure.
+			write_pointers_ = submit_pointers_.load(std::memory_order::memory_order_relaxed);
+			frame_is_complete_ &= !allocation_has_failed_;
 		}
+
+		// Reset the allocation-has-failed flag for the next line
+		// and mark no line as active.
 		allocation_has_failed_ = false;
 	}
-	output_is_visible_ = is_visible;
+}
+
+// MARK: - Producer; other state.
+
+void BufferingScanTarget::will_change_owner() {
+	std::lock_guard lock_guard(producer_mutex_);
+	allocation_has_failed_ = true;
+	vended_scan_ = nullptr;
 }

 const Outputs::Display::Metrics &BufferingScanTarget::display_metrics() {
 	return display_metrics_;
 }

-Outputs::Display::ScanTarget::Scan *BufferingScanTarget::begin_scan() {
-	if(allocation_has_failed_) return nullptr;
-
-	std::lock_guard lock_guard(write_pointers_mutex_);
-
-	const auto result = &scan_buffer_[write_pointers_.scan_buffer];
-	const auto read_pointers = read_pointers_.load();
-
-	// Advance the pointer.
-	const auto next_write_pointer = decltype(write_pointers_.scan_buffer)((write_pointers_.scan_buffer + 1) % scan_buffer_size_);
-
-	// Check whether that's too many.
-	if(next_write_pointer == read_pointers.scan_buffer) {
-		allocation_has_failed_ = true;
-		return nullptr;
-	}
-	write_pointers_.scan_buffer = next_write_pointer;
-	++provided_scans_;
-
-	// Fill in extra OpenGL-specific details.
-	result->line = write_pointers_.line;
-
-	vended_scan_ = result;
-	return &result->scan;
-}
-
 void BufferingScanTarget::set_write_area(uint8_t *base) {
-	std::lock_guard lock_guard(write_pointers_mutex_);
+	// This is a bit of a hack. This call needs the producer mutex and should be
+	// safe to call from a @c perform block in order to support all potential consumers.
+	// But the temporary hack of ONE_BIG_LOCK then implies that either I need a recursive
+	// mutex, or I have to make a coupling assumption about my caller. I've done the latter,
+	// because ONE_BIG_LOCK is really really meant to be temporary. I hope.
+#ifndef ONE_BIG_LOCK
+	std::lock_guard lock_guard(producer_mutex_);
+#endif
 	write_area_ = base;
 	data_type_size_ = Outputs::Display::size_for_data_type(modals_.input_data_type);
 	write_pointers_ = submit_pointers_ = read_pointers_ = PointerSet();
+	allocation_has_failed_ = true;
+	vended_scan_ = nullptr;
 }

 size_t BufferingScanTarget::write_area_data_size() const {
+	// TODO: can I guarantee this is safe without requiring that set_write_area
+	// be within an @c perform block?
 	return data_type_size_;
 }

@ -265,12 +283,18 @@ void BufferingScanTarget::set_modals(Modals modals) {
 	});
 }

+// MARK: - Consumer.
+
 void BufferingScanTarget::perform(const std::function<void(const OutputArea &)> &function) {
+#ifdef ONE_BIG_LOCK
+	std::lock_guard lock_guard(producer_mutex_);
+#endif
+
 	// The area to draw is that between the read pointers, representing wherever reading
 	// last stopped, and the submit pointers, representing all the new data that has been
 	// cleared for submission.
-	const auto submit_pointers = submit_pointers_.load();
-	const auto read_pointers = read_pointers_.load();
+	const auto submit_pointers = submit_pointers_.load(std::memory_order::memory_order_acquire);
+	const auto read_pointers = read_pointers_.load(std::memory_order::memory_order_relaxed);

 	OutputArea area;

@ -291,7 +315,7 @@ void BufferingScanTarget::perform(const std::function<void(const OutputArea &)>
 	is_updating_.clear(std::memory_order_release);

 	// Update the read pointers.
-	read_pointers_.store(submit_pointers);
+	read_pointers_.store(submit_pointers, std::memory_order::memory_order_relaxed);
 }

 void BufferingScanTarget::perform(const std::function<void(void)> &function) {
@ -310,3 +334,15 @@ void BufferingScanTarget::set_line_buffer(Line *line_buffer, LineMetadata *metad
 	line_metadata_buffer_ = metadata_buffer;
 	line_buffer_size_ = size;
 }
+
+const Outputs::Display::ScanTarget::Modals *BufferingScanTarget::new_modals() {
+	if(!modals_are_dirty_) {
+		return nullptr;
+	}
+	modals_are_dirty_ = false;
+	return &modals_;
+}
+
+const Outputs::Display::ScanTarget::Modals &BufferingScanTarget::modals() const {
+	return modals_;
+}
--- a/Outputs/ScanTargets/BufferingScanTarget.hpp
+++ b/Outputs/ScanTargets/BufferingScanTarget.hpp
@ -94,11 +94,6 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
 		/// Sets the area of memory to use as line and line metadata buffers.
 		void set_line_buffer(Line *line_buffer, LineMetadata *metadata_buffer, size_t size);

-		// These are safe to read only within a `perform` block.
-		// TODO: can I do better than that?
-		Modals modals_;
-		bool modals_are_dirty_ = false;
-
 		/// Sets a new base address for the texture.
 		/// When called this will flush all existing data and load up the
 		/// new data size.
@ -133,6 +128,13 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
 		/// Acts as per void(void) @c perform but also dequeues all latest available video output.
 		void perform(const std::function<void(const OutputArea &)> &);

+		/// @returns new Modals if any have been set since the last call to get_new_modals().
+		///		The caller must be within a @c perform block.
+		const Modals *new_modals();
+
+		/// @returns the current @c Modals.
+		const Modals &modals() const;
+
 	private:
 		// ScanTarget overrides.
 		void set_modals(Modals) final;
@ -160,14 +162,15 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
 		int vended_write_area_pointer_ = 0;

 		// Ephemeral state that helps in line composition.
-		Line *active_line_ = nullptr;
 		int provided_scans_ = 0;
 		bool is_first_in_frame_ = true;
 		bool frame_is_complete_ = true;
 		bool previous_frame_was_complete_ = true;

-		// TODO: make this an implementation detail.
-		// ... and expose some sort of difference?
+		// By convention everything in the PointerSet points to the next instance
+		// of whatever it is that will be used. So a client should start with whatever
+		// is pointed to by the read pointers and carry until it gets to a value that
+		// is equal to whatever is in the submit pointers.
 		struct PointerSet {
 			// This constructor is here to appease GCC's interpretation of
 			// an ambiguity in the C++ standard; cf. https://stackoverflow.com/questions/17430377
@ -191,16 +194,21 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
 		/// A pointer to the final thing currently cleared for submission.
 		std::atomic<PointerSet> submit_pointers_;

-		/// A pointer to the first thing not yet submitted for display.
+		/// A pointer to the first thing not yet submitted for display; this is
+		/// atomic since it also acts as the buffer into which the write_pointers_
+		/// may run and is therefore used by both producer and consumer.
 		std::atomic<PointerSet> read_pointers_;

 		/// This is used as a spinlock to guard `perform` calls.
 		std::atomic_flag is_updating_;

-		/// A mutex for gettng access to write_pointers_; access to write_pointers_,
-		/// data_type_size_ or write_area_texture_ is almost never contended, so this
-		/// is cheap for the main use case.
-		std::mutex write_pointers_mutex_;
+		/// A mutex for gettng access to anything the producer modifies — i.e. the write_pointers_,
+		/// data_type_size_ and write_area_texture_, and all other state to do with capturing
+		/// data, scans and lines.
+		///
+		/// This is almost never contended. The main collision is a user-prompted change of modals while the
+		/// emulation thread is running.
+		std::mutex producer_mutex_;

 		/// A pointer to the next thing that should be provided to the caller for data.
 		PointerSet write_pointers_;
@ -213,6 +221,11 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
 		Line *line_buffer_ = nullptr;
 		LineMetadata *line_metadata_buffer_ = nullptr;
 		size_t line_buffer_size_ = 0;
+
+		// Current modals and whether they've yet been returned
+		// from a call to @c get_new_modals.
+		Modals modals_;
+		bool modals_are_dirty_ = false;
 };