// // ScanTarget.cpp // Clock Signal // // Created by Thomas Harte on 05/11/2018. // Copyright © 2018 Thomas Harte. All rights reserved. // #include "ScanTarget.hpp" #include "OpenGL.hpp" #include "Primitives/Rectangle.hpp" #include #include #include using namespace Outputs::Display::OpenGL; #ifndef NDEBUG //#define LOG_LINES //#define LOG_SCANS #endif namespace { /// The texture unit from which to source input data. constexpr GLenum SourceDataTextureUnit = GL_TEXTURE0; /// The texture unit which contains raw line-by-line composite, S-Video or RGB data. constexpr GLenum UnprocessedLineBufferTextureUnit = GL_TEXTURE1; /// The texture unit that contains a pre-lowpass-filtered but fixed-resolution version of the chroma signal; /// this is used when processing composite video only, and for chroma information only. Luminance is calculated /// at the fidelity permitted by the output target, but my efforts to separate, demodulate and filter /// chrominance during output without either massively sampling or else incurring significant high-frequency /// noise that sampling reduces into a Moire, have proven to be unsuccessful for the time being. constexpr GLenum QAMChromaTextureUnit = GL_TEXTURE2; /// The texture unit that contains the current display. constexpr GLenum AccumulationTextureUnit = GL_TEXTURE3; constexpr GLint internalFormatForDepth(std::size_t depth) { switch(depth) { default: return GL_FALSE; case 1: return GL_R8UI; case 2: return GL_RG8UI; case 3: return GL_RGB8UI; case 4: return GL_RGBA8UI; } } constexpr GLenum formatForDepth(std::size_t depth) { switch(depth) { default: return GL_FALSE; case 1: return GL_RED_INTEGER; case 2: return GL_RG_INTEGER; case 3: return GL_RGB_INTEGER; case 4: return GL_RGBA_INTEGER; } } Log::Logger logger; } template void ScanTarget::allocate_buffer(const T &array, GLuint &buffer_name, GLuint &vertex_array_name) { const auto buffer_size = array.size() * sizeof(array[0]); test_gl(glGenBuffers, 1, &buffer_name); test_gl(glBindBuffer, GL_ARRAY_BUFFER, buffer_name); test_gl(glBufferData, GL_ARRAY_BUFFER, GLsizeiptr(buffer_size), NULL, GL_STREAM_DRAW); test_gl(glGenVertexArrays, 1, &vertex_array_name); test_gl(glBindVertexArray, vertex_array_name); test_gl(glBindBuffer, GL_ARRAY_BUFFER, buffer_name); } ScanTarget::ScanTarget(GLuint target_framebuffer, float output_gamma) : target_framebuffer_(target_framebuffer), output_gamma_(output_gamma), unprocessed_line_texture_(LineBufferWidth, LineBufferHeight, UnprocessedLineBufferTextureUnit, GL_NEAREST, false), full_display_rectangle_(-1.0f, -1.0f, 2.0f, 2.0f) { set_scan_buffer(scan_buffer_.data(), scan_buffer_.size()); set_line_buffer(line_buffer_.data(), line_metadata_buffer_.data(), line_buffer_.size()); // Allocate space for the scans and lines. allocate_buffer(scan_buffer_, scan_buffer_name_, scan_vertex_array_); allocate_buffer(line_buffer_, line_buffer_name_, line_vertex_array_); // TODO: if this is OpenGL 4.4 or newer, use glBufferStorage rather than glBufferData // and specify GL_MAP_PERSISTENT_BIT. Then map the buffer now, and let the client // write straight into it. test_gl(glGenTextures, 1, &write_area_texture_name_); test_gl(glBlendFunc, GL_SRC_ALPHA, GL_CONSTANT_COLOR); test_gl(glBlendColor, 0.4f, 0.4f, 0.4f, 1.0f); // Establish initial state for is_drawing_to_accumulation_buffer_. is_drawing_to_accumulation_buffer_.clear(); } ScanTarget::~ScanTarget() { perform([=] { glDeleteBuffers(1, &scan_buffer_name_); glDeleteTextures(1, &write_area_texture_name_); glDeleteVertexArrays(1, &scan_vertex_array_); }); } void ScanTarget::set_target_framebuffer(GLuint target_framebuffer) { perform([=] { target_framebuffer_ = target_framebuffer; }); } void ScanTarget::setup_pipeline() { auto modals = BufferingScanTarget::modals(); const auto data_type_size = Outputs::Display::size_for_data_type(modals.input_data_type); // Resize the texture only if required. const size_t required_size = WriteAreaWidth*WriteAreaHeight*data_type_size; if(required_size != write_area_data_size()) { write_area_texture_.resize(required_size); set_write_area(write_area_texture_.data()); } // Prepare to bind line shaders. test_gl(glBindVertexArray, line_vertex_array_); test_gl(glBindBuffer, GL_ARRAY_BUFFER, line_buffer_name_); // Destroy or create a QAM buffer and shader, if appropriate. const bool needs_qam_buffer = (modals.display_type == DisplayType::CompositeColour || modals.display_type == DisplayType::SVideo); if(needs_qam_buffer) { if(!qam_chroma_texture_) { qam_chroma_texture_ = std::make_unique(LineBufferWidth, LineBufferHeight, QAMChromaTextureUnit, GL_NEAREST, false); } qam_separation_shader_ = qam_separation_shader(); enable_vertex_attributes(ShaderType::QAMSeparation, *qam_separation_shader_); set_uniforms(ShaderType::QAMSeparation, *qam_separation_shader_); qam_separation_shader_->set_uniform("textureName", GLint(UnprocessedLineBufferTextureUnit - GL_TEXTURE0)); } else { qam_chroma_texture_.reset(); qam_separation_shader_.reset(); } // Establish an output shader. output_shader_ = conversion_shader(); enable_vertex_attributes(ShaderType::Conversion, *output_shader_); set_uniforms(ShaderType::Conversion, *output_shader_); output_shader_->set_uniform("origin", modals.visible_area.origin.x, modals.visible_area.origin.y); output_shader_->set_uniform("size", modals.visible_area.size.width, modals.visible_area.size.height); output_shader_->set_uniform("textureName", GLint(UnprocessedLineBufferTextureUnit - GL_TEXTURE0)); output_shader_->set_uniform("qamTextureName", GLint(QAMChromaTextureUnit - GL_TEXTURE0)); // Establish an input shader. input_shader_ = composition_shader(); test_gl(glBindVertexArray, scan_vertex_array_); test_gl(glBindBuffer, GL_ARRAY_BUFFER, scan_buffer_name_); enable_vertex_attributes(ShaderType::Composition, *input_shader_); set_uniforms(ShaderType::Composition, *input_shader_); input_shader_->set_uniform("textureName", GLint(SourceDataTextureUnit - GL_TEXTURE0)); } bool ScanTarget::is_soft_display_type() { const auto display_type = modals().display_type; return display_type == DisplayType::CompositeColour || display_type == DisplayType::CompositeMonochrome; } void ScanTarget::update(int, int output_height) { // If the GPU is still busy, don't wait; we'll catch it next time. if(fence_ != nullptr) { if(glClientWaitSync(fence_, GL_SYNC_FLUSH_COMMANDS_BIT, 0) == GL_TIMEOUT_EXPIRED) { display_metrics_.announce_draw_status( lines_submitted_, std::chrono::high_resolution_clock::now() - line_submission_begin_time_, false); return; } fence_ = nullptr; } // Update the display metrics. display_metrics_.announce_draw_status( lines_submitted_, std::chrono::high_resolution_clock::now() - line_submission_begin_time_, true); // Grab the new output list. perform([=] { OutputArea area = get_output_area(); // Establish the pipeline if necessary. const auto new_modals = BufferingScanTarget::new_modals(); const bool did_setup_pipeline = bool(new_modals); if(did_setup_pipeline) { setup_pipeline(); } // Determine the start time of this submission group and the number of lines it will contain. line_submission_begin_time_ = std::chrono::high_resolution_clock::now(); lines_submitted_ = (area.end.line - area.start.line + line_buffer_.size()) % line_buffer_.size(); // Submit scans; only the new ones need to be communicated. size_t new_scans = (area.end.scan - area.start.scan + scan_buffer_.size()) % scan_buffer_.size(); if(new_scans) { test_gl(glBindBuffer, GL_ARRAY_BUFFER, scan_buffer_name_); // Map only the required portion of the buffer. const size_t new_scans_size = new_scans * sizeof(Scan); uint8_t *const destination = static_cast( glMapBufferRange(GL_ARRAY_BUFFER, 0, GLsizeiptr(new_scans_size), GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT) ); test_gl_error(); // Copy as a single chunk if possible; otherwise copy in two parts. if(area.start.scan < area.end.scan) { memcpy(destination, &scan_buffer_[size_t(area.start.scan)], new_scans_size); } else { const size_t first_portion_length = (scan_buffer_.size() - area.start.scan) * sizeof(Scan); memcpy(destination, &scan_buffer_[area.start.scan], first_portion_length); memcpy(&destination[first_portion_length], &scan_buffer_[0], new_scans_size - first_portion_length); } // Flush and unmap the buffer. test_gl(glFlushMappedBufferRange, GL_ARRAY_BUFFER, 0, GLsizeiptr(new_scans_size)); test_gl(glUnmapBuffer, GL_ARRAY_BUFFER); } // Submit texture. if(area.start.write_area_x != area.end.write_area_x || area.start.write_area_y != area.end.write_area_y) { test_gl(glActiveTexture, SourceDataTextureUnit); test_gl(glBindTexture, GL_TEXTURE_2D, write_area_texture_name_); // Create storage for the texture if it doesn't yet exist; this was deferred until here // because the pixel format wasn't initially known. if(!texture_exists_) { test_gl(glTexParameteri, GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); test_gl(glTexParameteri, GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); test_gl(glTexParameteri, GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); test_gl(glTexParameteri, GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); test_gl(glTexImage2D, GL_TEXTURE_2D, 0, internalFormatForDepth(write_area_data_size()), WriteAreaWidth, WriteAreaHeight, 0, formatForDepth(write_area_data_size()), GL_UNSIGNED_BYTE, nullptr); texture_exists_ = true; } if(area.end.write_area_y >= area.start.write_area_y) { // Submit the direct region from the submit pointer to the read pointer. test_gl(glTexSubImage2D, GL_TEXTURE_2D, 0, 0, area.start.write_area_y, WriteAreaWidth, 1 + area.end.write_area_y - area.start.write_area_y, formatForDepth(write_area_data_size()), GL_UNSIGNED_BYTE, &write_area_texture_[size_t(area.start.write_area_y * WriteAreaWidth) * write_area_data_size()]); } else { // The circular buffer wrapped around; submit the data from the read pointer to the end of // the buffer and from the start of the buffer to the submit pointer. test_gl(glTexSubImage2D, GL_TEXTURE_2D, 0, 0, area.start.write_area_y, WriteAreaWidth, WriteAreaHeight - area.start.write_area_y, formatForDepth(write_area_data_size()), GL_UNSIGNED_BYTE, &write_area_texture_[size_t(area.start.write_area_y * WriteAreaWidth) * write_area_data_size()]); test_gl(glTexSubImage2D, GL_TEXTURE_2D, 0, 0, 0, WriteAreaWidth, 1 + area.end.write_area_y, formatForDepth(write_area_data_size()), GL_UNSIGNED_BYTE, &write_area_texture_[0]); } } // Push new input to the unprocessed line buffer. if(new_scans) { unprocessed_line_texture_.bind_framebuffer(); // Clear newly-touched lines; that is everything from (read+1) to submit. const auto first_line_to_clear = GLsizei((area.start.line+1)%line_buffer_.size()); const auto final_line_to_clear = GLsizei(area.end.line); if(first_line_to_clear != final_line_to_clear) { test_gl(glEnable, GL_SCISSOR_TEST); // Determine the proper clear colour — this needs to be anything that describes black // in the input colour encoding at use. if(modals().input_data_type == InputDataType::Luminance8Phase8) { // Supply both a zero luminance and a colour-subcarrier-disengaging phase. test_gl(glClearColor, 0.0f, 1.0f, 0.0f, 0.0f); } else { test_gl(glClearColor, 0.0f, 0.0f, 0.0f, 0.0f); } if(first_line_to_clear < final_line_to_clear) { test_gl(glScissor, GLint(0), GLint(first_line_to_clear), unprocessed_line_texture_.get_width(), final_line_to_clear - first_line_to_clear); test_gl(glClear, GL_COLOR_BUFFER_BIT); } else { test_gl(glScissor, GLint(0), GLint(0), unprocessed_line_texture_.get_width(), final_line_to_clear); test_gl(glClear, GL_COLOR_BUFFER_BIT); test_gl(glScissor, GLint(0), GLint(first_line_to_clear), unprocessed_line_texture_.get_width(), unprocessed_line_texture_.get_height() - first_line_to_clear); test_gl(glClear, GL_COLOR_BUFFER_BIT); } test_gl(glDisable, GL_SCISSOR_TEST); } // Apply new spans. They definitely always go to the first buffer. test_gl(glBindVertexArray, scan_vertex_array_); input_shader_->bind(); test_gl(glDrawArraysInstanced, GL_TRIANGLE_STRIP, 0, 4, GLsizei(new_scans)); } // Logic for reducing resolution: start doing so if the metrics object reports that // it's a good idea. Go up to a quarter of the requested resolution, subject to // clamping at each stage. If the output resolution changes, or anything else about // the output pipeline, just start trying the highest size again. if(display_metrics_.should_lower_resolution() && is_soft_display_type()) { resolution_reduction_level_ = std::min(resolution_reduction_level_+1, 4); } if(output_height_ != output_height || did_setup_pipeline) { resolution_reduction_level_ = 1; output_height_ = output_height; } // Ensure the accumulation buffer is properly sized, allowing for the metrics object's // feelings about whether too high a resolution is being used. const int framebuffer_height = std::max(output_height / resolution_reduction_level_, std::min(540, output_height)); const int proportional_width = (framebuffer_height * 4) / 3; const bool did_create_accumulation_texture = !accumulation_texture_ || ( (accumulation_texture_->get_width() != proportional_width || accumulation_texture_->get_height() != framebuffer_height)); // Work with the accumulation_buffer_ potentially starts from here onwards; set its flag. while(is_drawing_to_accumulation_buffer_.test_and_set()); if(did_create_accumulation_texture) { logger.info().append("Changed output resolution to %d by %d", proportional_width, framebuffer_height); display_metrics_.announce_did_resize(); std::unique_ptr new_framebuffer( new TextureTarget( GLsizei(proportional_width), GLsizei(framebuffer_height), AccumulationTextureUnit, GL_NEAREST, true)); if(accumulation_texture_) { new_framebuffer->bind_framebuffer(); test_gl(glClear, GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); test_gl(glActiveTexture, AccumulationTextureUnit); accumulation_texture_->bind_texture(); accumulation_texture_->draw(4.0f / 3.0f); test_gl(glClear, GL_STENCIL_BUFFER_BIT); new_framebuffer->bind_texture(); } accumulation_texture_ = std::move(new_framebuffer); // In the absence of a way to resize a stencil buffer, just mark // what's currently present as invalid to avoid an improper clear // for this frame. stencil_is_valid_ = false; } if(did_setup_pipeline || did_create_accumulation_texture) { set_sampling_window(proportional_width, framebuffer_height, *output_shader_); } // Figure out how many new lines are ready. auto new_lines = (area.end.line - area.start.line + LineBufferHeight) % LineBufferHeight; if(new_lines) { // Prepare to output lines. test_gl(glBindVertexArray, line_vertex_array_); // Bind the accumulation framebuffer, unless there's going to be QAM work first. if(!qam_separation_shader_ || line_metadata_buffer_[area.start.line].is_first_in_frame) { accumulation_texture_->bind_framebuffer(); output_shader_->bind(); // Enable blending and stenciling. test_gl(glEnable, GL_BLEND); test_gl(glEnable, GL_STENCIL_TEST); } // Set the proper stencil function regardless. test_gl(glStencilFunc, GL_EQUAL, 0, GLuint(~0)); test_gl(glStencilOp, GL_KEEP, GL_KEEP, GL_INCR); // Prepare to upload data that will consitute lines. test_gl(glBindBuffer, GL_ARRAY_BUFFER, line_buffer_name_); // Divide spans by which frame they're in. auto start_line = area.start.line; while(new_lines) { uint16_t end_line = (start_line + 1) % LineBufferHeight; // Find the limit of spans to draw in this cycle. size_t lines = 1; while(end_line != area.end.line && !line_metadata_buffer_[end_line].is_first_in_frame) { end_line = (end_line + 1) % LineBufferHeight; ++lines; } // If this is start-of-frame, clear any untouched pixels and flush the stencil buffer if(line_metadata_buffer_[start_line].is_first_in_frame) { if(stencil_is_valid_ && line_metadata_buffer_[start_line].previous_frame_was_complete) { full_display_rectangle_.draw(0.0f, 0.0f, 0.0f); } stencil_is_valid_ = true; test_gl(glClear, GL_STENCIL_BUFFER_BIT); // Rebind the program for span output. test_gl(glBindVertexArray, line_vertex_array_); if(!qam_separation_shader_) { output_shader_->bind(); } } // Upload. const auto buffer_size = lines * sizeof(Line); if(!end_line || end_line > start_line) { test_gl(glBufferSubData, GL_ARRAY_BUFFER, 0, GLsizeiptr(buffer_size), &line_buffer_[start_line]); } else { uint8_t *destination = static_cast( glMapBufferRange(GL_ARRAY_BUFFER, 0, GLsizeiptr(buffer_size), GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT) ); assert(destination); test_gl_error(); const size_t buffer_length = line_buffer_.size() * sizeof(Line); const size_t start_position = start_line * sizeof(Line); memcpy(&destination[0], &line_buffer_[start_line], buffer_length - start_position); memcpy(&destination[buffer_length - start_position], &line_buffer_[0], end_line * sizeof(Line)); test_gl(glFlushMappedBufferRange, GL_ARRAY_BUFFER, 0, GLsizeiptr(buffer_size)); test_gl(glUnmapBuffer, GL_ARRAY_BUFFER); } // Produce colour information, if required. if(qam_separation_shader_) { qam_separation_shader_->bind(); qam_chroma_texture_->bind_framebuffer(); test_gl(glClear, GL_COLOR_BUFFER_BIT); // TODO: this is here as a hint that the old framebuffer doesn't need reloading; // test whether that's a valid optimisation on desktop OpenGL. test_gl(glDisable, GL_BLEND); test_gl(glDisable, GL_STENCIL_TEST); test_gl(glDrawArraysInstanced, GL_TRIANGLE_STRIP, 0, 4, GLsizei(lines)); accumulation_texture_->bind_framebuffer(); output_shader_->bind(); test_gl(glEnable, GL_BLEND); test_gl(glEnable, GL_STENCIL_TEST); } // Render to the output. test_gl(glDrawArraysInstanced, GL_TRIANGLE_STRIP, 0, 4, GLsizei(lines)); start_line = end_line; new_lines -= lines; } // Disable blending and the stencil test again. test_gl(glDisable, GL_STENCIL_TEST); test_gl(glDisable, GL_BLEND); } // That's it for operations affecting the accumulation buffer. is_drawing_to_accumulation_buffer_.clear(); // Grab a fence sync object to avoid busy waiting upon the next extry into this // function, and reset the is_updating_ flag. fence_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); complete_output_area(area); }); } void ScanTarget::draw(int output_width, int output_height) { while(is_drawing_to_accumulation_buffer_.test_and_set(std::memory_order_acquire)); if(accumulation_texture_) { // Copy the accumulation texture to the target. test_gl(glBindFramebuffer, GL_FRAMEBUFFER, target_framebuffer_); test_gl(glViewport, 0, 0, (GLsizei)output_width, (GLsizei)output_height); test_gl(glClearColor, 0.0f, 0.0f, 0.0f, 0.0f); test_gl(glClear, GL_COLOR_BUFFER_BIT); accumulation_texture_->bind_texture(); accumulation_texture_->draw(float(output_width) / float(output_height), 4.0f / 255.0f); } is_drawing_to_accumulation_buffer_.clear(std::memory_order_release); }