1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-11-20 15:31:10 +00:00

Merge pull request #828 from TomHarte/LockFreeQueue

Completes LockFreeQueue branch.
This commit is contained in:
Thomas Harte 2020-07-30 21:46:56 -04:00 committed by GitHub
commit 9c97c0a906
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 224 additions and 166 deletions

View File

@ -25,7 +25,7 @@ Audio::Audio(Concurrency::DeferringAsyncTaskQueue &task_queue) : task_queue_(tas
void Audio::post_sample(uint8_t sample) {
// Store sample directly indexed by current write pointer; this ensures that collected samples
// directly map to volume and enabled/disabled states.
sample_queue_.buffer[sample_queue_.write_pointer] = sample;
sample_queue_.buffer[sample_queue_.write_pointer].store(sample, std::memory_order::memory_order_relaxed);
sample_queue_.write_pointer = (sample_queue_.write_pointer + 1) % sample_queue_.buffer.size();
}
@ -80,7 +80,7 @@ void Audio::get_samples(std::size_t number_of_samples, int16_t *target) {
// Determine the output level, and output that many samples.
// (Hoping that the copiler substitutes an effective memset16-type operation here).
const int16_t output_level = volume_multiplier_ * (int16_t(sample_queue_.buffer[sample_queue_.read_pointer]) - 128);
const int16_t output_level = volume_multiplier_ * (int16_t(sample_queue_.buffer[sample_queue_.read_pointer].load(std::memory_order::memory_order_relaxed)) - 128);
for(size_t c = 0; c < cycles_left_in_sample; ++c) {
target[c] = output_level;
}

View File

@ -63,7 +63,7 @@ class Audio: public ::Outputs::Speaker::SampleSource {
// A queue of fetched samples; read from by one thread,
// written to by another.
struct {
std::array<uint8_t, 740> buffer;
std::array<std::atomic<uint8_t>, 740> buffer;
size_t read_pointer = 0, write_pointer = 0;
} sample_queue_;

View File

@ -114,7 +114,8 @@ void ScanTarget::set_target_framebuffer(GLuint target_framebuffer) {
}
void ScanTarget::setup_pipeline() {
const auto data_type_size = Outputs::Display::size_for_data_type(modals_.input_data_type);
auto modals = BufferingScanTarget::modals();
const auto data_type_size = Outputs::Display::size_for_data_type(modals.input_data_type);
// Resize the texture only if required.
if(data_type_size != write_area_data_size()) {
@ -127,7 +128,7 @@ void ScanTarget::setup_pipeline() {
test_gl(glBindBuffer, GL_ARRAY_BUFFER, line_buffer_name_);
// Destroy or create a QAM buffer and shader, if appropriate.
const bool needs_qam_buffer = (modals_.display_type == DisplayType::CompositeColour || modals_.display_type == DisplayType::SVideo);
const bool needs_qam_buffer = (modals.display_type == DisplayType::CompositeColour || modals.display_type == DisplayType::SVideo);
if(needs_qam_buffer) {
if(!qam_chroma_texture_) {
qam_chroma_texture_ = std::make_unique<TextureTarget>(LineBufferWidth, LineBufferHeight, QAMChromaTextureUnit, GL_NEAREST, false);
@ -146,8 +147,8 @@ void ScanTarget::setup_pipeline() {
output_shader_ = conversion_shader();
enable_vertex_attributes(ShaderType::Conversion, *output_shader_);
set_uniforms(ShaderType::Conversion, *output_shader_);
output_shader_->set_uniform("origin", modals_.visible_area.origin.x, modals_.visible_area.origin.y);
output_shader_->set_uniform("size", modals_.visible_area.size.width, modals_.visible_area.size.height);
output_shader_->set_uniform("origin", modals.visible_area.origin.x, modals.visible_area.origin.y);
output_shader_->set_uniform("size", modals.visible_area.size.width, modals.visible_area.size.height);
output_shader_->set_uniform("textureName", GLint(UnprocessedLineBufferTextureUnit - GL_TEXTURE0));
output_shader_->set_uniform("qamTextureName", GLint(QAMChromaTextureUnit - GL_TEXTURE0));
@ -161,7 +162,8 @@ void ScanTarget::setup_pipeline() {
}
bool ScanTarget::is_soft_display_type() {
return modals_.display_type == DisplayType::CompositeColour || modals_.display_type == DisplayType::CompositeMonochrome;
const auto display_type = modals().display_type;
return display_type == DisplayType::CompositeColour || display_type == DisplayType::CompositeMonochrome;
}
void ScanTarget::update(int, int output_height) {
@ -186,10 +188,10 @@ void ScanTarget::update(int, int output_height) {
// Grab the new output list.
perform([=] (const OutputArea &area) {
// Establish the pipeline if necessary.
const bool did_setup_pipeline = modals_are_dirty_;
if(modals_are_dirty_) {
const auto new_modals = BufferingScanTarget::new_modals();
const bool did_setup_pipeline = bool(new_modals);
if(did_setup_pipeline) {
setup_pipeline();
modals_are_dirty_ = false;
}
// Determine the start time of this submission group and the number of lines it will contain.
@ -291,7 +293,7 @@ void ScanTarget::update(int, int output_height) {
// Determine the proper clear colour — this needs to be anything that describes black
// in the input colour encoding at use.
if(modals_.input_data_type == InputDataType::Luminance8Phase8) {
if(modals().input_data_type == InputDataType::Luminance8Phase8) {
// Supply both a zero luminance and a colour-subcarrier-disengaging phase.
test_gl(glClearColor, 0.0f, 1.0f, 0.0f, 0.0f);
} else {

View File

@ -151,7 +151,7 @@ class ScanTarget: public Outputs::Display::BufferingScanTarget {
// Storage for the various buffers.
std::vector<uint8_t> write_area_texture_;
std::array<Scan, 16384> scan_buffer_;
std::array<Scan, LineBufferHeight*5> scan_buffer_;
std::array<Line, LineBufferHeight> line_buffer_;
std::array<LineMetadata, LineBufferHeight> line_metadata_buffer_;
};

View File

@ -23,14 +23,15 @@ void ScanTarget::set_uniforms(ShaderType type, Shader &target) const {
// converge even allowing for the fact that they may not be spaced by exactly
// the expected distance. Cf. the stencil-powered logic for making sure all
// pixels are painted only exactly once per field.
const auto modals = BufferingScanTarget::modals();
switch(type) {
case ShaderType::Composition: break;
default:
target.set_uniform("rowHeight", GLfloat(1.05f / modals_.expected_vertical_lines));
target.set_uniform("scale", GLfloat(modals_.output_scale.x), GLfloat(modals_.output_scale.y) * modals_.aspect_ratio * (3.0f / 4.0f));
target.set_uniform("phaseOffset", GLfloat(modals_.input_data_tweaks.phase_linked_luminance_offset));
target.set_uniform("rowHeight", GLfloat(1.05f / modals.expected_vertical_lines));
target.set_uniform("scale", GLfloat(modals.output_scale.x), GLfloat(modals.output_scale.y) * modals.aspect_ratio * (3.0f / 4.0f));
target.set_uniform("phaseOffset", GLfloat(modals.input_data_tweaks.phase_linked_luminance_offset));
const float clocks_per_angle = float(modals_.cycles_per_line) * float(modals_.colour_cycle_denominator) / float(modals_.colour_cycle_numerator);
const float clocks_per_angle = float(modals.cycles_per_line) * float(modals.colour_cycle_denominator) / float(modals.colour_cycle_numerator);
GLfloat texture_offsets[4];
GLfloat angles[4];
for(int c = 0; c < 4; ++c) {
@ -41,7 +42,7 @@ void ScanTarget::set_uniforms(ShaderType type, Shader &target) const {
target.set_uniform("textureCoordinateOffsets", 1, 4, texture_offsets);
target.set_uniform("compositeAngleOffsets", 4, 1, angles);
switch(modals_.composite_colour_space) {
switch(modals.composite_colour_space) {
case ColourSpace::YIQ: {
const GLfloat rgbToYIQ[] = {0.299f, 0.596f, 0.211f, 0.587f, -0.274f, -0.523f, 0.114f, -0.322f, 0.312f};
const GLfloat yiqToRGB[] = {1.0f, 1.0f, 1.0f, 0.956f, -0.272f, -1.106f, 0.621f, -0.647f, 1.703f};
@ -61,9 +62,10 @@ void ScanTarget::set_uniforms(ShaderType type, Shader &target) const {
}
void ScanTarget::set_sampling_window(int output_width, int, Shader &target) {
if(modals_.display_type != DisplayType::CompositeColour) {
const float one_pixel_width = float(modals_.cycles_per_line) * modals_.visible_area.size.width / float(output_width);
const float clocks_per_angle = float(modals_.cycles_per_line) * float(modals_.colour_cycle_denominator) / float(modals_.colour_cycle_numerator);
const auto modals = BufferingScanTarget::modals();
if(modals.display_type != DisplayType::CompositeColour) {
const float one_pixel_width = float(modals.cycles_per_line) * modals.visible_area.size.width / float(output_width);
const float clocks_per_angle = float(modals.cycles_per_line) * float(modals.colour_cycle_denominator) / float(modals.colour_cycle_numerator);
GLfloat texture_offsets[4];
GLfloat angles[4];
for(int c = 0; c < 4; ++c) {
@ -191,8 +193,9 @@ std::vector<std::string> ScanTarget::bindings(ShaderType type) const {
std::string ScanTarget::sampling_function() const {
std::string fragment_shader;
const auto modals = BufferingScanTarget::modals();
if(modals_.display_type == DisplayType::SVideo) {
if(modals.display_type == DisplayType::SVideo) {
fragment_shader +=
"vec2 svideo_sample(vec2 coordinate, float angle) {";
} else {
@ -200,8 +203,8 @@ std::string ScanTarget::sampling_function() const {
"float composite_sample(vec2 coordinate, float angle) {";
}
const bool is_svideo = modals_.display_type == DisplayType::SVideo;
switch(modals_.input_data_type) {
const bool is_svideo = modals.display_type == DisplayType::SVideo;
switch(modals.input_data_type) {
case InputDataType::Luminance1:
case InputDataType::Luminance8:
// Easy, just copy across.
@ -255,6 +258,8 @@ std::string ScanTarget::sampling_function() const {
}
std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
const auto modals = BufferingScanTarget::modals();
// Compose a vertex shader. If the display type is RGB, generate just the proper
// geometry position, plus a solitary textureCoordinate.
//
@ -301,7 +306,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
"out vec4 fragColour;";
if(modals_.display_type != DisplayType::RGB) {
if(modals.display_type != DisplayType::RGB) {
vertex_shader +=
"out float compositeAngle;"
"out float compositeAmplitude;"
@ -316,7 +321,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
"uniform vec4 compositeAngleOffsets;";
}
if(modals_.display_type == DisplayType::SVideo || modals_.display_type == DisplayType::CompositeColour) {
if(modals.display_type == DisplayType::SVideo || modals.display_type == DisplayType::CompositeColour) {
vertex_shader += "out vec2 qamTextureCoordinates[4];";
fragment_shader += "in vec2 qamTextureCoordinates[4];";
}
@ -332,7 +337,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
"gl_Position = vec4(eyePosition, 0.0, 1.0);";
// For everything other than RGB, calculate the two composite outputs.
if(modals_.display_type != DisplayType::RGB) {
if(modals.display_type != DisplayType::RGB) {
vertex_shader +=
"compositeAngle = (mix(startCompositeAngle, endCompositeAngle, lateral) / 32.0) * 3.141592654;"
"compositeAmplitude = lineCompositeAmplitude / 255.0;"
@ -346,7 +351,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
"textureCoordinates[2] = vec2(centreClock + textureCoordinateOffsets[2], lineY + 0.5) / textureSize(textureName, 0);"
"textureCoordinates[3] = vec2(centreClock + textureCoordinateOffsets[3], lineY + 0.5) / textureSize(textureName, 0);";
if((modals_.display_type == DisplayType::SVideo) || (modals_.display_type == DisplayType::CompositeColour)) {
if((modals.display_type == DisplayType::SVideo) || (modals.display_type == DisplayType::CompositeColour)) {
vertex_shader +=
"float centreCompositeAngle = abs(mix(startCompositeAngle, endCompositeAngle, lateral)) * 4.0 / 64.0;"
"centreCompositeAngle = floor(centreCompositeAngle);"
@ -360,7 +365,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
// Compose a fragment shader.
if(modals_.display_type != DisplayType::RGB) {
if(modals.display_type != DisplayType::RGB) {
fragment_shader +=
"uniform mat3 lumaChromaToRGB;"
"uniform mat3 rgbToLumaChroma;";
@ -372,7 +377,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
"void main(void) {"
"vec3 fragColour3;";
switch(modals_.display_type) {
switch(modals.display_type) {
case DisplayType::CompositeColour:
fragment_shader +=
"vec4 angles = compositeAngle + compositeAngleOffsets;"
@ -460,13 +465,13 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
}
// Apply a brightness adjustment if requested.
if(fabs(modals_.brightness - 1.0f) > 0.05f) {
fragment_shader += "fragColour3 = fragColour3 * " + std::to_string(modals_.brightness) + ";";
if(fabs(modals.brightness - 1.0f) > 0.05f) {
fragment_shader += "fragColour3 = fragColour3 * " + std::to_string(modals.brightness) + ";";
}
// Apply a gamma correction if required.
if(fabs(output_gamma_ - modals_.intended_gamma) > 0.05f) {
const float gamma_ratio = output_gamma_ / modals_.intended_gamma;
if(fabs(output_gamma_ - modals.intended_gamma) > 0.05f) {
const float gamma_ratio = output_gamma_ / modals.intended_gamma;
fragment_shader += "fragColour3 = pow(fragColour3, vec3(" + std::to_string(gamma_ratio) + "));";
}
@ -482,6 +487,7 @@ std::unique_ptr<Shader> ScanTarget::conversion_shader() const {
}
std::unique_ptr<Shader> ScanTarget::composition_shader() const {
const auto modals = BufferingScanTarget::modals();
const std::string vertex_shader =
"#version 150\n"
@ -516,7 +522,7 @@ std::unique_ptr<Shader> ScanTarget::composition_shader() const {
"void main(void) {";
switch(modals_.input_data_type) {
switch(modals.input_data_type) {
case InputDataType::Luminance1:
fragment_shader += "fragColour = textureLod(textureName, textureCoordinate, 0).rrrr;";
break;
@ -556,7 +562,8 @@ std::unique_ptr<Shader> ScanTarget::composition_shader() const {
}
std::unique_ptr<Shader> ScanTarget::qam_separation_shader() const {
const bool is_svideo = modals_.display_type == DisplayType::SVideo;
const auto modals = BufferingScanTarget::modals();
const bool is_svideo = modals.display_type == DisplayType::SVideo;
// Sets up texture coordinates to run between startClock and endClock, mapping to
// coordinates that correlate with four times the absolute value of the composite angle.
@ -632,7 +639,7 @@ std::unique_ptr<Shader> ScanTarget::qam_separation_shader() const {
sampling_function() +
"void main(void) {";
if(modals_.display_type == DisplayType::SVideo) {
if(modals.display_type == DisplayType::SVideo) {
fragment_shader +=
"fragColour = vec4(svideo_sample(textureCoordinate, compositeAngle).rgg * vec3(1.0, cos(compositeAngle), sin(compositeAngle)), 1.0);";
} else {

View File

@ -11,6 +11,12 @@
#include <cassert>
#include <cstring>
// If enabled, this uses the producer lock to cover both production and consumption
// rather than attempting to proceed lockfree. This is primarily for diagnostic purposes;
// it allows empirical exploration of whether the logical and memory barriers that are
// meant to mediate things between the read pointers and the submit pointers are functioning.
#define ONE_BIG_LOCK
#define TextureAddressGetY(v) uint16_t((v) >> 11)
#define TextureAddressGetX(v) uint16_t((v) & 0x7ff)
#define TextureSub(a, b) (((a) - (b)) & 0x3fffff)
@ -20,41 +26,25 @@ using namespace Outputs::Display;
BufferingScanTarget::BufferingScanTarget() {
// Ensure proper initialisation of the two atomic pointer sets.
read_pointers_.store(write_pointers_);
submit_pointers_.store(write_pointers_);
read_pointers_.store(write_pointers_, std::memory_order::memory_order_relaxed);
submit_pointers_.store(write_pointers_, std::memory_order::memory_order_relaxed);
// Establish initial state for is_updating_.
is_updating_.clear();
is_updating_.clear(std::memory_order::memory_order_relaxed);
}
void BufferingScanTarget::end_scan() {
if(vended_scan_) {
std::lock_guard lock_guard(write_pointers_mutex_);
vended_scan_->data_y = TextureAddressGetY(vended_write_area_pointer_);
vended_scan_->line = write_pointers_.line;
vended_scan_->scan.end_points[0].data_offset += TextureAddressGetX(vended_write_area_pointer_);
vended_scan_->scan.end_points[1].data_offset += TextureAddressGetX(vended_write_area_pointer_);
#ifdef LOG_SCANS
if(vended_scan_->scan.composite_amplitude) {
std::cout << "S: ";
std::cout << vended_scan_->scan.end_points[0].composite_angle << "/" << vended_scan_->scan.end_points[0].data_offset << "/" << vended_scan_->scan.end_points[0].cycles_since_end_of_horizontal_retrace << " -> ";
std::cout << vended_scan_->scan.end_points[1].composite_angle << "/" << vended_scan_->scan.end_points[1].data_offset << "/" << vended_scan_->scan.end_points[1].cycles_since_end_of_horizontal_retrace << " => ";
std::cout << double(vended_scan_->scan.end_points[1].composite_angle - vended_scan_->scan.end_points[0].composite_angle) / (double(vended_scan_->scan.end_points[1].data_offset - vended_scan_->scan.end_points[0].data_offset) * 64.0f) << "/";
std::cout << double(vended_scan_->scan.end_points[1].composite_angle - vended_scan_->scan.end_points[0].composite_angle) / (double(vended_scan_->scan.end_points[1].cycles_since_end_of_horizontal_retrace - vended_scan_->scan.end_points[0].cycles_since_end_of_horizontal_retrace) * 64.0f);
std::cout << std::endl;
}
#endif
}
vended_scan_ = nullptr;
}
// MARK: - Producer; pixel data.
uint8_t *BufferingScanTarget::begin_data(size_t required_length, size_t required_alignment) {
assert(required_alignment);
// Acquire the standard producer lock, nominally over write_pointers_.
std::lock_guard lock_guard(producer_mutex_);
// If allocation has already failed on this line, continue the trend.
if(allocation_has_failed_) return nullptr;
std::lock_guard lock_guard(write_pointers_mutex_);
// If there isn't yet a write area then mark allocation as failed and finish.
if(!write_area_) {
allocation_has_failed_ = true;
return nullptr;
@ -76,7 +66,7 @@ uint8_t *BufferingScanTarget::begin_data(size_t required_length, size_t required
// Check whether that steps over the read pointer.
const auto end_address = TextureAddress(end_x, output_y);
const auto read_pointers = read_pointers_.load();
const auto read_pointers = read_pointers_.load(std::memory_order::memory_order_relaxed);
const auto end_distance = TextureSub(end_address, read_pointers.write_area);
const auto previous_distance = TextureSub(write_pointers_.write_area, read_pointers.write_area);
@ -100,9 +90,11 @@ uint8_t *BufferingScanTarget::begin_data(size_t required_length, size_t required
}
void BufferingScanTarget::end_data(size_t actual_length) {
if(allocation_has_failed_ || !data_is_allocated_) return;
// Acquire the producer lock.
std::lock_guard lock_guard(producer_mutex_);
std::lock_guard lock_guard(write_pointers_mutex_);
// Do nothing if no data write is actually ongoing.
if(allocation_has_failed_ || !data_is_allocated_) return;
// Bookend the start of the new data, to safeguard for precision errors in sampling.
memcpy(
@ -128,12 +120,57 @@ void BufferingScanTarget::end_data(size_t actual_length) {
data_is_allocated_ = false;
}
void BufferingScanTarget::will_change_owner() {
allocation_has_failed_ = true;
vended_scan_ = nullptr;
// MARK: - Producer; scans.
Outputs::Display::ScanTarget::Scan *BufferingScanTarget::begin_scan() {
std::lock_guard lock_guard(producer_mutex_);
// If there's already an allocation failure on this line, do no work.
if(allocation_has_failed_) {
vended_scan_ = nullptr;
return nullptr;
}
const auto result = &scan_buffer_[write_pointers_.scan_buffer];
const auto read_pointers = read_pointers_.load(std::memory_order::memory_order_relaxed);
// Advance the pointer.
const auto next_write_pointer = decltype(write_pointers_.scan_buffer)((write_pointers_.scan_buffer + 1) % scan_buffer_size_);
// Check whether that's too many.
if(next_write_pointer == read_pointers.scan_buffer) {
allocation_has_failed_ = true;
vended_scan_ = nullptr;
return nullptr;
}
write_pointers_.scan_buffer = next_write_pointer;
++provided_scans_;
// Fill in extra OpenGL-specific details.
result->line = write_pointers_.line;
vended_scan_ = result;
return &result->scan;
}
void BufferingScanTarget::end_scan() {
std::lock_guard lock_guard(producer_mutex_);
// Complete the scan only if one is afoot.
if(vended_scan_) {
vended_scan_->data_y = TextureAddressGetY(vended_write_area_pointer_);
vended_scan_->line = write_pointers_.line;
vended_scan_->scan.end_points[0].data_offset += TextureAddressGetX(vended_write_area_pointer_);
vended_scan_->scan.end_points[1].data_offset += TextureAddressGetX(vended_write_area_pointer_);
vended_scan_ = nullptr;
}
}
// MARK: - Producer; lines.
void BufferingScanTarget::announce(Event event, bool is_visible, const Outputs::Display::ScanTarget::Scan::EndPoint &location, uint8_t composite_amplitude) {
std::lock_guard lock_guard(producer_mutex_);
// Forward the event to the display metrics tracker.
display_metrics_.announce_event(event);
@ -147,114 +184,95 @@ void BufferingScanTarget::announce(Event event, bool is_visible, const Outputs::
frame_is_complete_ = true;
}
// Proceed from here only if a change in visibility has occurred.
if(output_is_visible_ == is_visible) return;
output_is_visible_ = is_visible;
if(is_visible) {
const auto read_pointers = read_pointers_.load();
std::lock_guard lock_guard(write_pointers_mutex_);
const auto read_pointers = read_pointers_.load(std::memory_order::memory_order_relaxed);
// Commit the most recent line only if any scans fell on it.
// Otherwise there's no point outputting it, it'll contribute nothing.
if(provided_scans_) {
// Store metadata if concluding a previous line.
if(active_line_) {
line_metadata_buffer_[size_t(write_pointers_.line)].is_first_in_frame = is_first_in_frame_;
line_metadata_buffer_[size_t(write_pointers_.line)].previous_frame_was_complete = previous_frame_was_complete_;
is_first_in_frame_ = false;
}
// Attempt to allocate a new line; note allocation failure if necessary.
const auto next_line = uint16_t((write_pointers_.line + 1) % line_buffer_size_);
if(next_line == read_pointers.line) {
allocation_has_failed_ = true;
active_line_ = nullptr;
} else {
write_pointers_.line = next_line;
active_line_ = &line_buffer_[size_t(write_pointers_.line)];
}
provided_scans_ = 0;
// Attempt to allocate a new line, noting allocation failure if necessary.
const auto next_line = uint16_t((write_pointers_.line + 1) % line_buffer_size_);
if(next_line == read_pointers.line) {
allocation_has_failed_ = true;
}
provided_scans_ = 0;
if(active_line_) {
active_line_->end_points[0].x = location.x;
active_line_->end_points[0].y = location.y;
active_line_->end_points[0].cycles_since_end_of_horizontal_retrace = location.cycles_since_end_of_horizontal_retrace;
active_line_->end_points[0].composite_angle = location.composite_angle;
active_line_->line = write_pointers_.line;
active_line_->composite_amplitude = composite_amplitude;
// If there was space for a new line, establish its start.
if(!allocation_has_failed_) {
Line &active_line = line_buffer_[size_t(write_pointers_.line)];
active_line.end_points[0].x = location.x;
active_line.end_points[0].y = location.y;
active_line.end_points[0].cycles_since_end_of_horizontal_retrace = location.cycles_since_end_of_horizontal_retrace;
active_line.end_points[0].composite_angle = location.composite_angle;
active_line.line = write_pointers_.line;
active_line.composite_amplitude = composite_amplitude;
}
} else {
if(active_line_) {
// A successfully-allocated line is ending.
active_line_->end_points[1].x = location.x;
active_line_->end_points[1].y = location.y;
active_line_->end_points[1].cycles_since_end_of_horizontal_retrace = location.cycles_since_end_of_horizontal_retrace;
active_line_->end_points[1].composite_angle = location.composite_angle;
// Commit the most recent line only if any scans fell on it and all allocation was successful.
if(!allocation_has_failed_ && provided_scans_) {
// Store metadata.
LineMetadata &metadata = line_metadata_buffer_[size_t(write_pointers_.line)];
metadata.is_first_in_frame = is_first_in_frame_;
metadata.previous_frame_was_complete = previous_frame_was_complete_;
is_first_in_frame_ = false;
#ifdef LOG_LINES
if(active_line_->composite_amplitude) {
std::cout << "L: ";
std::cout << active_line_->end_points[0].composite_angle << "/" << active_line_->end_points[0].cycles_since_end_of_horizontal_retrace << " -> ";
std::cout << active_line_->end_points[1].composite_angle << "/" << active_line_->end_points[1].cycles_since_end_of_horizontal_retrace << " => ";
std::cout << (active_line_->end_points[1].composite_angle - active_line_->end_points[0].composite_angle) << "/" << (active_line_->end_points[1].cycles_since_end_of_horizontal_retrace - active_line_->end_points[0].cycles_since_end_of_horizontal_retrace) << " => ";
std::cout << double(active_line_->end_points[1].composite_angle - active_line_->end_points[0].composite_angle) / (double(active_line_->end_points[1].cycles_since_end_of_horizontal_retrace - active_line_->end_points[0].cycles_since_end_of_horizontal_retrace) * 64.0f);
std::cout << std::endl;
}
#endif
}
// Store actual line data.
Line &active_line = line_buffer_[size_t(write_pointers_.line)];
active_line.end_points[1].x = location.x;
active_line.end_points[1].y = location.y;
active_line.end_points[1].cycles_since_end_of_horizontal_retrace = location.cycles_since_end_of_horizontal_retrace;
active_line.end_points[1].composite_angle = location.composite_angle;
// A line is complete; submit latest updates if nothing failed.
if(allocation_has_failed_) {
// Reset all pointers to where they were; this also means
// the stencil won't be properly populated.
write_pointers_ = submit_pointers_.load();
frame_is_complete_ = false;
// Advance the line pointer.
write_pointers_.line = uint16_t((write_pointers_.line + 1) % line_buffer_size_);
// Update the submit pointers with all lines, scans and data written during this line.
submit_pointers_.store(write_pointers_, std::memory_order::memory_order_release);
} else {
// Advance submit pointer.
submit_pointers_.store(write_pointers_);
// Something failed, or there was nothing on the line anyway, so reset all pointers to where they
// were before this line. Mark frame as incomplete if this was an allocation failure.
write_pointers_ = submit_pointers_.load(std::memory_order::memory_order_relaxed);
frame_is_complete_ &= !allocation_has_failed_;
}
// Reset the allocation-has-failed flag for the next line
// and mark no line as active.
allocation_has_failed_ = false;
}
output_is_visible_ = is_visible;
}
// MARK: - Producer; other state.
void BufferingScanTarget::will_change_owner() {
std::lock_guard lock_guard(producer_mutex_);
allocation_has_failed_ = true;
vended_scan_ = nullptr;
}
const Outputs::Display::Metrics &BufferingScanTarget::display_metrics() {
return display_metrics_;
}
Outputs::Display::ScanTarget::Scan *BufferingScanTarget::begin_scan() {
if(allocation_has_failed_) return nullptr;
std::lock_guard lock_guard(write_pointers_mutex_);
const auto result = &scan_buffer_[write_pointers_.scan_buffer];
const auto read_pointers = read_pointers_.load();
// Advance the pointer.
const auto next_write_pointer = decltype(write_pointers_.scan_buffer)((write_pointers_.scan_buffer + 1) % scan_buffer_size_);
// Check whether that's too many.
if(next_write_pointer == read_pointers.scan_buffer) {
allocation_has_failed_ = true;
return nullptr;
}
write_pointers_.scan_buffer = next_write_pointer;
++provided_scans_;
// Fill in extra OpenGL-specific details.
result->line = write_pointers_.line;
vended_scan_ = result;
return &result->scan;
}
void BufferingScanTarget::set_write_area(uint8_t *base) {
std::lock_guard lock_guard(write_pointers_mutex_);
// This is a bit of a hack. This call needs the producer mutex and should be
// safe to call from a @c perform block in order to support all potential consumers.
// But the temporary hack of ONE_BIG_LOCK then implies that either I need a recursive
// mutex, or I have to make a coupling assumption about my caller. I've done the latter,
// because ONE_BIG_LOCK is really really meant to be temporary. I hope.
#ifndef ONE_BIG_LOCK
std::lock_guard lock_guard(producer_mutex_);
#endif
write_area_ = base;
data_type_size_ = Outputs::Display::size_for_data_type(modals_.input_data_type);
write_pointers_ = submit_pointers_ = read_pointers_ = PointerSet();
allocation_has_failed_ = true;
vended_scan_ = nullptr;
}
size_t BufferingScanTarget::write_area_data_size() const {
// TODO: can I guarantee this is safe without requiring that set_write_area
// be within an @c perform block?
return data_type_size_;
}
@ -265,12 +283,18 @@ void BufferingScanTarget::set_modals(Modals modals) {
});
}
// MARK: - Consumer.
void BufferingScanTarget::perform(const std::function<void(const OutputArea &)> &function) {
#ifdef ONE_BIG_LOCK
std::lock_guard lock_guard(producer_mutex_);
#endif
// The area to draw is that between the read pointers, representing wherever reading
// last stopped, and the submit pointers, representing all the new data that has been
// cleared for submission.
const auto submit_pointers = submit_pointers_.load();
const auto read_pointers = read_pointers_.load();
const auto submit_pointers = submit_pointers_.load(std::memory_order::memory_order_acquire);
const auto read_pointers = read_pointers_.load(std::memory_order::memory_order_relaxed);
OutputArea area;
@ -291,7 +315,7 @@ void BufferingScanTarget::perform(const std::function<void(const OutputArea &)>
is_updating_.clear(std::memory_order_release);
// Update the read pointers.
read_pointers_.store(submit_pointers);
read_pointers_.store(submit_pointers, std::memory_order::memory_order_relaxed);
}
void BufferingScanTarget::perform(const std::function<void(void)> &function) {
@ -310,3 +334,15 @@ void BufferingScanTarget::set_line_buffer(Line *line_buffer, LineMetadata *metad
line_metadata_buffer_ = metadata_buffer;
line_buffer_size_ = size;
}
const Outputs::Display::ScanTarget::Modals *BufferingScanTarget::new_modals() {
if(!modals_are_dirty_) {
return nullptr;
}
modals_are_dirty_ = false;
return &modals_;
}
const Outputs::Display::ScanTarget::Modals &BufferingScanTarget::modals() const {
return modals_;
}

View File

@ -94,11 +94,6 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
/// Sets the area of memory to use as line and line metadata buffers.
void set_line_buffer(Line *line_buffer, LineMetadata *metadata_buffer, size_t size);
// These are safe to read only within a `perform` block.
// TODO: can I do better than that?
Modals modals_;
bool modals_are_dirty_ = false;
/// Sets a new base address for the texture.
/// When called this will flush all existing data and load up the
/// new data size.
@ -133,6 +128,13 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
/// Acts as per void(void) @c perform but also dequeues all latest available video output.
void perform(const std::function<void(const OutputArea &)> &);
/// @returns new Modals if any have been set since the last call to get_new_modals().
/// The caller must be within a @c perform block.
const Modals *new_modals();
/// @returns the current @c Modals.
const Modals &modals() const;
private:
// ScanTarget overrides.
void set_modals(Modals) final;
@ -160,14 +162,15 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
int vended_write_area_pointer_ = 0;
// Ephemeral state that helps in line composition.
Line *active_line_ = nullptr;
int provided_scans_ = 0;
bool is_first_in_frame_ = true;
bool frame_is_complete_ = true;
bool previous_frame_was_complete_ = true;
// TODO: make this an implementation detail.
// ... and expose some sort of difference?
// By convention everything in the PointerSet points to the next instance
// of whatever it is that will be used. So a client should start with whatever
// is pointed to by the read pointers and carry until it gets to a value that
// is equal to whatever is in the submit pointers.
struct PointerSet {
// This constructor is here to appease GCC's interpretation of
// an ambiguity in the C++ standard; cf. https://stackoverflow.com/questions/17430377
@ -191,16 +194,21 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
/// A pointer to the final thing currently cleared for submission.
std::atomic<PointerSet> submit_pointers_;
/// A pointer to the first thing not yet submitted for display.
/// A pointer to the first thing not yet submitted for display; this is
/// atomic since it also acts as the buffer into which the write_pointers_
/// may run and is therefore used by both producer and consumer.
std::atomic<PointerSet> read_pointers_;
/// This is used as a spinlock to guard `perform` calls.
std::atomic_flag is_updating_;
/// A mutex for gettng access to write_pointers_; access to write_pointers_,
/// data_type_size_ or write_area_texture_ is almost never contended, so this
/// is cheap for the main use case.
std::mutex write_pointers_mutex_;
/// A mutex for gettng access to anything the producer modifies — i.e. the write_pointers_,
/// data_type_size_ and write_area_texture_, and all other state to do with capturing
/// data, scans and lines.
///
/// This is almost never contended. The main collision is a user-prompted change of modals while the
/// emulation thread is running.
std::mutex producer_mutex_;
/// A pointer to the next thing that should be provided to the caller for data.
PointerSet write_pointers_;
@ -213,6 +221,11 @@ class BufferingScanTarget: public Outputs::Display::ScanTarget {
Line *line_buffer_ = nullptr;
LineMetadata *line_metadata_buffer_ = nullptr;
size_t line_buffer_size_ = 0;
// Current modals and whether they've yet been returned
// from a call to @c get_new_modals.
Modals modals_;
bool modals_are_dirty_ = false;
};