Makes all three PointerSets and is_updating_ private.

2024-11-26 08:49:37 +00:00 · 2020-07-26 17:27:19 -04:00 · 2020-07-26 17:27:19 -04:00 · 8bef7ff4c5
commit 8bef7ff4c5
parent f9f500c194
3 changed files with 384 additions and 327 deletions
--- a/Outputs/OpenGL/ScanTarget.cpp
+++ b/Outputs/OpenGL/ScanTarget.cpp
@ -97,23 +97,23 @@ ScanTarget::ScanTarget(GLuint target_framebuffer, float output_gamma) :
 }

 ScanTarget::~ScanTarget() {
-	while(is_updating_.test_and_set());
+	perform([=] {
 		glDeleteBuffers(1, &scan_buffer_name_);
 		glDeleteTextures(1, &write_area_texture_name_);
 		glDeleteVertexArrays(1, &scan_vertex_array_);
+	});
 }

 void ScanTarget::set_target_framebuffer(GLuint target_framebuffer) {
-	while(is_updating_.test_and_set());
+	perform([=] {
 		target_framebuffer_ = target_framebuffer;
-	is_updating_.clear();
+	});
 }

 void ScanTarget::setup_pipeline() {
 	const auto data_type_size = Outputs::Display::size_for_data_type(modals_.input_data_type);

-	// Ensure the lock guard here has a restricted scope; this is the only time that a thread
-	// other than the main owner of write_pointers_ may adjust it.
+	// Resize the texture only if required.
 	if(data_type_size != write_area_data_size()) {
 		write_area_texture_.resize(WriteAreaWidth*WriteAreaHeight*data_type_size);
 		set_write_area(write_area_texture_.data());
@ -162,8 +162,8 @@ bool ScanTarget::is_soft_display_type() {
 }

 void ScanTarget::update(int, int output_height) {
+	// If the GPU is still busy, don't wait; we'll catch it next time.
 	if(fence_ != nullptr) {
-		// if the GPU is still busy, don't wait; we'll catch it next time
 		if(glClientWaitSync(fence_, GL_SYNC_FLUSH_COMMANDS_BIT, 0) == GL_TIMEOUT_EXPIRED) {
 			display_metrics_.announce_draw_status(
 				lines_submitted_,
@ -173,15 +173,15 @@ void ScanTarget::update(int, int output_height) {
 		}
 		fence_ = nullptr;
 	}
+
+	// Update the display metrics.
 	display_metrics_.announce_draw_status(
 		lines_submitted_,
 		std::chrono::high_resolution_clock::now() - line_submission_begin_time_,
 		true);

-	// Spin until the is-drawing flag is reset; the wait sync above will deal
-	// with instances where waiting is inappropriate.
-	while(is_updating_.test_and_set());
-
+	// Grab the new output list.
+	perform([=] (const OutputArea &area) {
 		// Establish the pipeline if necessary.
 		const bool did_setup_pipeline = modals_are_dirty_;
 		if(modals_are_dirty_) {
@ -192,15 +192,11 @@ void ScanTarget::update(int, int output_height) {
 		// Determine the start time of this submission group.
 		line_submission_begin_time_ = std::chrono::high_resolution_clock::now();

-	// Grab the current read and submit pointers.
-	const auto submit_pointers = submit_pointers_.load();
-	const auto read_pointers = read_pointers_.load();
-
 		// Determine how many lines are about to be submitted.
-	lines_submitted_ = (read_pointers.line + line_buffer_.size() - submit_pointers.line) % line_buffer_.size();
+		lines_submitted_ = (area.end.line - area.start.line + line_buffer_.size()) % line_buffer_.size();

 		// Submit scans; only the new ones need to be communicated.
-	size_t new_scans = (submit_pointers.scan_buffer + scan_buffer_.size() - read_pointers.scan_buffer) % scan_buffer_.size();
+		size_t new_scans = (area.end.scan - area.start.scan + scan_buffer_.size()) % scan_buffer_.size();
 		if(new_scans) {
 			test_gl(glBindBuffer, GL_ARRAY_BUFFER, scan_buffer_name_);

@ -211,11 +207,12 @@ void ScanTarget::update(int, int output_height) {
 			);
 			test_gl_error();

-		if(read_pointers.scan_buffer < submit_pointers.scan_buffer) {
-			memcpy(destination, &scan_buffer_[read_pointers.scan_buffer], new_scans_size);
+			// Copy as a single chunk if possible; otherwise copy in two parts.
+			if(area.start.scan < area.end.scan) {
+				memcpy(destination, &scan_buffer_[size_t(area.start.scan)], new_scans_size);
 			} else {
-			const size_t first_portion_length = (scan_buffer_.size() - read_pointers.scan_buffer) * sizeof(Scan);
-			memcpy(destination, &scan_buffer_[read_pointers.scan_buffer], first_portion_length);
+				const size_t first_portion_length = (scan_buffer_.size() - area.start.scan) * sizeof(Scan);
+				memcpy(destination, &scan_buffer_[area.start.scan], first_portion_length);
 				memcpy(&destination[first_portion_length], &scan_buffer_[0], new_scans_size - first_portion_length);
 			}

@ -225,7 +222,7 @@ void ScanTarget::update(int, int output_height) {
 		}

 		// Submit texture.
-	if(submit_pointers.write_area != read_pointers.write_area) {
+		if(area.start.write_area_x != area.end.write_area_x || area.start.write_area_y != area.end.write_area_y) {
 			test_gl(glActiveTexture, SourceDataTextureUnit);
 			test_gl(glBindTexture, GL_TEXTURE_2D, write_area_texture_name_);

@ -249,37 +246,35 @@ void ScanTarget::update(int, int output_height) {
 				texture_exists_ = true;
 			}

-		const auto start_y = TextureAddressGetY(read_pointers.write_area);
-		const auto end_y = TextureAddressGetY(submit_pointers.write_area);
-		if(end_y >= start_y) {
+			if(area.end.write_area_y >= area.start.write_area_y) {
 				// Submit the direct region from the submit pointer to the read pointer.
 				test_gl(glTexSubImage2D,
 					GL_TEXTURE_2D, 0,
-				0, start_y,
+					0, area.start.write_area_y,
 					WriteAreaWidth,
-				1 + end_y - start_y,
+					1 + area.end.write_area_y - area.start.write_area_y,
 					formatForDepth(write_area_data_size()),
 					GL_UNSIGNED_BYTE,
-				&write_area_texture_[size_t(TextureAddress(0, start_y)) * write_area_data_size()]);
+					&write_area_texture_[size_t(TextureAddress(0, area.start.write_area_y)) * write_area_data_size()]);
 			} else {
 				// The circular buffer wrapped around; submit the data from the read pointer to the end of
 				// the buffer and from the start of the buffer to the submit pointer.
+				test_gl(glTexSubImage2D,
+					GL_TEXTURE_2D, 0,
+					0, area.start.write_area_y,
+					WriteAreaWidth,
+					WriteAreaHeight - area.start.write_area_y,
+					formatForDepth(write_area_data_size()),
+					GL_UNSIGNED_BYTE,
+					&write_area_texture_[size_t(TextureAddress(0, area.start.write_area_y)) * write_area_data_size()]);
 				test_gl(glTexSubImage2D,
 					GL_TEXTURE_2D, 0,
 					0, 0,
 					WriteAreaWidth,
-				1 + end_y,
+					1 + area.end.write_area_y,
 					formatForDepth(write_area_data_size()),
 					GL_UNSIGNED_BYTE,
 					&write_area_texture_[0]);
-			test_gl(glTexSubImage2D,
-				GL_TEXTURE_2D, 0,
-				0, start_y,
-				WriteAreaWidth,
-				WriteAreaHeight - start_y,
-				formatForDepth(write_area_data_size()),
-				GL_UNSIGNED_BYTE,
-				&write_area_texture_[size_t(TextureAddress(0, start_y)) * write_area_data_size()]);
 			}
 		}

@ -288,8 +283,8 @@ void ScanTarget::update(int, int output_height) {
 			unprocessed_line_texture_.bind_framebuffer();

 			// Clear newly-touched lines; that is everything from (read+1) to submit.
-		const uint16_t first_line_to_clear = (read_pointers.line+1)%line_buffer_.size();
-		const uint16_t final_line_to_clear = submit_pointers.line;
+			const auto first_line_to_clear = GLsizei((area.start.line+1)%line_buffer_.size());
+			const auto final_line_to_clear = GLsizei(area.end.line);
 			if(first_line_to_clear != final_line_to_clear) {
 				test_gl(glEnable, GL_SCISSOR_TEST);

@ -303,12 +298,12 @@ void ScanTarget::update(int, int output_height) {
 				}

 				if(first_line_to_clear < final_line_to_clear) {
-				test_gl(glScissor, 0, first_line_to_clear, unprocessed_line_texture_.get_width(), final_line_to_clear - first_line_to_clear);
+					test_gl(glScissor, GLint(0), GLint(first_line_to_clear), unprocessed_line_texture_.get_width(), final_line_to_clear - first_line_to_clear);
 					test_gl(glClear, GL_COLOR_BUFFER_BIT);
 				} else {
-				test_gl(glScissor, 0, 0, unprocessed_line_texture_.get_width(), final_line_to_clear);
+					test_gl(glScissor, GLint(0), GLint(0), unprocessed_line_texture_.get_width(), final_line_to_clear);
 					test_gl(glClear, GL_COLOR_BUFFER_BIT);
-				test_gl(glScissor, 0, first_line_to_clear, unprocessed_line_texture_.get_width(), unprocessed_line_texture_.get_height() - first_line_to_clear);
+					test_gl(glScissor, GLint(0), GLint(first_line_to_clear), unprocessed_line_texture_.get_width(), unprocessed_line_texture_.get_height() - first_line_to_clear);
 					test_gl(glClear, GL_COLOR_BUFFER_BIT);
 				}

@ -376,13 +371,13 @@ void ScanTarget::update(int, int output_height) {
 		}

 		// Figure out how many new lines are ready.
-	uint16_t new_lines = (submit_pointers.line + LineBufferHeight - read_pointers.line) % LineBufferHeight;
+		auto new_lines = (area.end.line - area.start.line + LineBufferHeight) % LineBufferHeight;
 		if(new_lines) {
 			// Prepare to output lines.
 			test_gl(glBindVertexArray, line_vertex_array_);

 			// Bind the accumulation framebuffer, unless there's going to be QAM work first.
-		if(!qam_separation_shader_ || line_metadata_buffer_[read_pointers.line].is_first_in_frame) {
+			if(!qam_separation_shader_ || line_metadata_buffer_[area.start.line].is_first_in_frame) {
 				accumulation_texture_->bind_framebuffer();
 				output_shader_->bind();

@ -399,13 +394,13 @@ void ScanTarget::update(int, int output_height) {
 			test_gl(glBindBuffer, GL_ARRAY_BUFFER, line_buffer_name_);

 			// Divide spans by which frame they're in.
-		uint16_t start_line = read_pointers.line;
+			auto start_line = area.start.line;
 			while(new_lines) {
 				uint16_t end_line = (start_line + 1) % LineBufferHeight;

 				// Find the limit of spans to draw in this cycle.
 				size_t lines = 1;
-			while(end_line != submit_pointers.line && !line_metadata_buffer_[end_line].is_first_in_frame) {
+				while(end_line != area.end.line && !line_metadata_buffer_[end_line].is_first_in_frame) {
 					end_line = (end_line + 1) % LineBufferHeight;
 					++lines;
 				}
@ -477,18 +472,14 @@ void ScanTarget::update(int, int output_height) {
 		// That's it for operations affecting the accumulation buffer.
 		is_drawing_to_accumulation_buffer_.clear();

-	// All data now having been spooled to the GPU, update the read pointers to
-	// the submit pointer location.
-	read_pointers_.store(submit_pointers);
-
 		// Grab a fence sync object to avoid busy waiting upon the next extry into this
 		// function, and reset the is_updating_ flag.
 		fence_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-	is_updating_.clear();
+	});
 }

 void ScanTarget::draw(int output_width, int output_height) {
-	while(is_drawing_to_accumulation_buffer_.test_and_set());
+	while(is_drawing_to_accumulation_buffer_.test_and_set(std::memory_order_acquire));

 	if(accumulation_texture_) {
 		// Copy the accumulation texture to the target.
@ -501,5 +492,5 @@ void ScanTarget::draw(int output_width, int output_height) {
 		accumulation_texture_->draw(float(output_width) / float(output_height), 4.0f / 255.0f);
 	}

-	is_drawing_to_accumulation_buffer_.clear();
+	is_drawing_to_accumulation_buffer_.clear(std::memory_order_release);
 }
--- a/Outputs/ScanTargets/BufferingScanTarget.cpp
+++ b/Outputs/ScanTargets/BufferingScanTarget.cpp
@ -22,15 +22,6 @@ BufferingScanTarget::BufferingScanTarget() {
 	is_updating_.clear();
 }

-void BufferingScanTarget::set_modals(Modals modals) {
-	// Don't change the modals while drawing is ongoing; a previous set might be
-	// in the process of being established.
-	while(is_updating_.test_and_set());
-	modals_ = modals;
-	modals_are_dirty_ = true;
-	is_updating_.clear();
-}
-
 void BufferingScanTarget::end_scan() {
 	if(vended_scan_) {
 		std::lock_guard lock_guard(write_pointers_mutex_);
@ -261,3 +252,45 @@ void BufferingScanTarget::set_write_area(uint8_t *base) {
 size_t BufferingScanTarget::write_area_data_size() const {
 	return data_type_size_;
 }
+
+void BufferingScanTarget::set_modals(Modals modals) {
+	perform([=] {
+		modals_ = modals;
+		modals_are_dirty_ = true;
+	});
+}
+
+void BufferingScanTarget::perform(const std::function<void(const OutputArea &)> &function) {
+	// The area to draw is that between the read pointers, representing wherever reading
+	// last stopped, and the submit pointers, representing all the new data that has been
+	// cleared for submission.
+	const auto submit_pointers = submit_pointers_.load();
+	const auto read_pointers = read_pointers_.load();
+
+	OutputArea area;
+
+	area.start.line = read_pointers.line;
+	area.end.line = submit_pointers.line;
+
+	area.start.scan = read_pointers.scan_buffer;
+	area.end.scan = submit_pointers.scan_buffer;
+
+	area.start.write_area_x = TextureAddressGetX(read_pointers.write_area);
+	area.start.write_area_y = TextureAddressGetY(read_pointers.write_area);
+	area.end.write_area_x = TextureAddressGetX(submit_pointers.write_area);
+	area.end.write_area_y = TextureAddressGetY(submit_pointers.write_area);
+
+	// Perform only while holding the is_updating lock.
+	while(is_updating_.test_and_set(std::memory_order_acquire));
+	function(area);
+	is_updating_.clear(std::memory_order_release);
+
+	// Update the read pointers.
+	read_pointers_.store(submit_pointers);
+}
+
+void BufferingScanTarget::perform(const std::function<void(void)> &function) {
+	while(is_updating_.test_and_set(std::memory_order_acquire));
+	function();
+	is_updating_.clear(std::memory_order_release);
+}
--- a/Outputs/ScanTargets/BufferingScanTarget.hpp
+++ b/Outputs/ScanTargets/BufferingScanTarget.hpp
@ -14,6 +14,7 @@

 #include <array>
 #include <atomic>
+#include <functional>
 #include <mutex>
 #include <vector>

@ -95,10 +96,8 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
 			bool previous_frame_was_complete;
 		};

-		// TODO: put this behind accessors.
-		std::atomic_flag is_updating_;
-
-		// These are safe to read if you have is_updating_.
+		// These are safe to read only within a `perform` block.
+		// TODO: can I do better than that?
 		Modals modals_;
 		bool modals_are_dirty_ = false;

@ -109,32 +108,35 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
 		std::array<Line, LineBufferHeight> line_buffer_;
 		std::array<LineMetadata, LineBufferHeight> line_metadata_buffer_;

-		// TODO: make this an implementation detail.
-		// ... and expose some sort of difference?
-		struct PointerSet {
-			// This constructor is here to appease GCC's interpretation of
-			// an ambiguity in the C++ standard; cf. https://stackoverflow.com/questions/17430377
-			PointerSet() noexcept {}
-
-			// Squeezing this struct into 64 bits makes the std::atomics more likely
-			// to be lock free; they are under LLVM x86-64.
-			int write_area = 1;	// By convention this points to the vended area. Which is preceded by a guard pixel. So a sensible default construction is write_area = 1.
-			uint16_t scan_buffer = 0;
-			uint16_t line = 0;
-		};
-
-		/// A pointer to the final thing currently cleared for submission.
-		std::atomic<PointerSet> submit_pointers_;
-
-		/// A pointer to the first thing not yet submitted for display.
-		std::atomic<PointerSet> read_pointers_;
-
 		// Used by subclasses to set a new base address for the texture.
 		// When called this will flush all existing data and load up the
 		// new data size.
 		void set_write_area(uint8_t *base);
 		size_t write_area_data_size() const;

+		/// Defines a segment of data now ready for output, consisting of start and endpoints for:
+		///
+		///	(i) the region of the write area that has been modified; if the caller is using shared memory
+		/// for the write area then it can ignore this information;
+		///
+		/// (ii) the number of scans that have been completed; and
+		///
+		/// (iii) the number of lines that have been completed.
+		///
+		/// New write areas and scans are exposed only upon completion of the corresponding lines.
+		struct OutputArea {
+			struct Endpoint {
+				int write_area_x, write_area_y;
+				size_t scan;
+				size_t line;
+			};
+
+			Endpoint start, end;
+		};
+		void perform(const std::function<void(const OutputArea &)> &);
+		void perform(const std::function<void(void)> &);
+
+
 	private:
 		// ScanTarget overrides.
 		void set_modals(Modals) final;
@ -145,14 +147,6 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
 		void announce(Event event, bool is_visible, const Outputs::Display::ScanTarget::Scan::EndPoint &location, uint8_t colour_burst_amplitude) final;
 		void will_change_owner() final;

-		/// A mutex for gettng access to write_pointers_; access to write_pointers_,
-		/// data_type_size_ or write_area_texture_ is almost never contended, so this
-		/// is cheap for the main use case.
-		std::mutex write_pointers_mutex_;
-
-		/// A pointer to the next thing that should be provided to the caller for data.
-		PointerSet write_pointers_;
-
 		// Uses a texture to vend write areas.
 		uint8_t *write_area_ = nullptr;
 		size_t data_type_size_ = 0;
@ -175,6 +169,45 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
 		bool is_first_in_frame_ = true;
 		bool frame_is_complete_ = true;
 		bool previous_frame_was_complete_ = true;
+
+		// TODO: make this an implementation detail.
+		// ... and expose some sort of difference?
+		struct PointerSet {
+			// This constructor is here to appease GCC's interpretation of
+			// an ambiguity in the C++ standard; cf. https://stackoverflow.com/questions/17430377
+			PointerSet() noexcept {}
+
+			// Squeezing this struct into 64 bits makes the std::atomics more likely
+			// to be lock free; they are under LLVM x86-64.
+
+			// Points to the vended area in the write area texture.
+			// The vended area is always preceded by a guard pixel, so a
+			// sensible default construction is write_area = 1.
+			int32_t write_area = 1;
+
+			// Points into the scan buffer.
+			uint16_t scan_buffer = 0;
+
+			// Points into the line buffer.
+			uint16_t line = 0;
+		};
+
+		/// A pointer to the final thing currently cleared for submission.
+		std::atomic<PointerSet> submit_pointers_;
+
+		/// A pointer to the first thing not yet submitted for display.
+		std::atomic<PointerSet> read_pointers_;
+
+		/// This is used as a spinlock to guard `perform` calls.
+		std::atomic_flag is_updating_;
+
+		/// A mutex for gettng access to write_pointers_; access to write_pointers_,
+		/// data_type_size_ or write_area_texture_ is almost never contended, so this
+		/// is cheap for the main use case.
+		std::mutex write_pointers_mutex_;
+
+		/// A pointer to the next thing that should be provided to the caller for data.
+		PointerSet write_pointers_;
 };