From 86dab3a5d6570a178cbba9a4112e84aefe66f616 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Tue, 10 May 2016 07:23:47 -0400 Subject: [PATCH 1/3] Quickest first attempt to switch to instancing for the output array. Texture coordinates seem to be off. --- Outputs/CRT/CRT.cpp | 30 ++++++++++++------- Outputs/CRT/Internals/CRTConstants.hpp | 3 +- Outputs/CRT/Internals/CRTOpenGL.cpp | 10 ++++++- Outputs/CRT/Internals/CRTOpenGL.hpp | 8 ++--- .../CRT/Internals/Shaders/OutputShader.cpp | 14 +++++---- 5 files changed, 44 insertions(+), 21 deletions(-) diff --git a/Outputs/CRT/CRT.cpp b/Outputs/CRT/CRT.cpp index 09a10a954..ccd029a9a 100644 --- a/Outputs/CRT/CRT.cpp +++ b/Outputs/CRT/CRT.cpp @@ -96,10 +96,12 @@ Flywheel::SyncEvent CRT::get_next_horizontal_sync_event(bool hsync_is_requested, return _horizontal_flywheel->get_next_event_in_period(hsync_is_requested, cycles_to_run_for, cycles_advanced); } -#define output_position_x(v) (*(uint16_t *)&next_run[OutputVertexSize*v + OutputVertexOffsetOfPosition + 0]) -#define output_position_y(v) (*(uint16_t *)&next_run[OutputVertexSize*v + OutputVertexOffsetOfPosition + 2]) -#define output_tex_x(v) (*(uint16_t *)&next_run[OutputVertexSize*v + OutputVertexOffsetOfTexCoord + 0]) -#define output_tex_y(v) (*(uint16_t *)&next_run[OutputVertexSize*v + OutputVertexOffsetOfTexCoord + 2]) +#define output_position_x1() (*(uint16_t *)&next_run[OutputVertexOffsetOfPosition + 0]) +#define output_position_x2() (*(uint16_t *)&next_run[OutputVertexOffsetOfTerminators + 0]) +#define output_position_y() (*(uint16_t *)&next_run[OutputVertexOffsetOfPosition + 2]) +#define output_tex_x1() (*(uint16_t *)&next_run[OutputVertexOffsetOfTexCoord + 0]) +#define output_tex_x2() (*(uint16_t *)&next_run[OutputVertexOffsetOfTerminators + 2]) +#define output_tex_y() (*(uint16_t *)&next_run[OutputVertexOffsetOfTexCoord + 2]) #define source_input_position_x(v) (*(uint16_t *)&next_run[SourceVertexSize*v + SourceVertexOffsetOfInputPosition + 0]) #define source_input_position_y(v) (*(uint16_t *)&next_run[SourceVertexSize*v + SourceVertexOffsetOfInputPosition + 2]) @@ -188,18 +190,26 @@ void CRT::advance_cycles(unsigned int number_of_cycles, unsigned int source_divi { if( _is_writing_composite_run == _did_start_run && - _openGL_output_builder->composite_output_run_has_room_for_vertices(_did_start_run ? 3 : 6) && + _openGL_output_builder->composite_output_run_has_room_for_vertex() && !_openGL_output_builder->composite_output_buffer_is_full()) { uint8_t *next_run = _openGL_output_builder->get_next_output_run(); if(next_run) { - output_position_x(0) = output_position_x(1) = output_position_x(2) = (uint16_t)_horizontal_flywheel->get_current_output_position(); - output_position_y(0) = output_position_y(1) = output_position_y(2) = (uint16_t)(_vertical_flywheel->get_current_output_position() / _vertical_flywheel_output_divider); - output_tex_x(0) = output_tex_x(1) = output_tex_x(2) = (uint16_t)_horizontal_flywheel->get_current_output_position(); - output_tex_y(0) = output_tex_y(1) = output_tex_y(2) = _openGL_output_builder->get_composite_output_y(); + if(_did_start_run) + { + output_position_x1() = (uint16_t)_horizontal_flywheel->get_current_output_position(); + output_position_y() = (uint16_t)(_vertical_flywheel->get_current_output_position() / _vertical_flywheel_output_divider); + output_tex_x1() = (uint16_t)_horizontal_flywheel->get_current_output_position(); + output_tex_y() = _openGL_output_builder->get_composite_output_y(); + } + else + { + output_position_x2() = (uint16_t)_horizontal_flywheel->get_current_output_position(); + output_tex_x2() = (uint16_t)_horizontal_flywheel->get_current_output_position(); + _openGL_output_builder->complete_output_run(); + } - _openGL_output_builder->complete_output_run(3); _did_start_run ^= true; } } diff --git a/Outputs/CRT/Internals/CRTConstants.hpp b/Outputs/CRT/Internals/CRTConstants.hpp index 98c1fc830..0d826c553 100644 --- a/Outputs/CRT/Internals/CRTConstants.hpp +++ b/Outputs/CRT/Internals/CRTConstants.hpp @@ -19,8 +19,9 @@ namespace CRT { // or is one of the intermediate buffers that we've used to convert from composite towards RGB. const GLsizei OutputVertexOffsetOfPosition = 0; const GLsizei OutputVertexOffsetOfTexCoord = 4; +const GLsizei OutputVertexOffsetOfTerminators = 6; -const GLsizei OutputVertexSize = 8; +const GLsizei OutputVertexSize = 12; // Input vertices, used only in composite mode, map from the input buffer to temporary buffer locations; such // remapping occurs to ensure a continous stream of data for each scan, giving correct out-of-bounds behaviour diff --git a/Outputs/CRT/Internals/CRTOpenGL.cpp b/Outputs/CRT/Internals/CRTOpenGL.cpp index 8ace1dbe5..c124f1be2 100644 --- a/Outputs/CRT/Internals/CRTOpenGL.cpp +++ b/Outputs/CRT/Internals/CRTOpenGL.cpp @@ -324,7 +324,8 @@ void OpenGLOutputBuilder::draw_frame(unsigned int output_width, unsigned int out output_shader_program->bind(); // draw - glDrawArrays(GL_TRIANGLE_STRIP, 0, submitted_output_data / OutputVertexSize); +// glDrawArrays(GL_TRIANGLE_STRIP, 0, submitted_output_data / OutputVertexSize); + glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, submitted_output_data / OutputVertexSize); } // copy framebuffer to the intended place @@ -425,16 +426,23 @@ void OpenGLOutputBuilder::prepare_output_vertex_array() { GLint positionAttribute = output_shader_program->get_attrib_location("position"); GLint textureCoordinatesAttribute = output_shader_program->get_attrib_location("srcCoordinates"); + GLint terminatorsAttribute = output_shader_program->get_attrib_location("terminators"); glBindVertexArray(output_vertex_array); glEnableVertexAttribArray((GLuint)positionAttribute); glEnableVertexAttribArray((GLuint)textureCoordinatesAttribute); + glEnableVertexAttribArray((GLuint)terminatorsAttribute); const GLsizei vertexStride = OutputVertexSize; glBindBuffer(GL_ARRAY_BUFFER, output_array_buffer); glVertexAttribPointer((GLuint)positionAttribute, 2, GL_UNSIGNED_SHORT, GL_FALSE, vertexStride, (void *)OutputVertexOffsetOfPosition); glVertexAttribPointer((GLuint)textureCoordinatesAttribute, 2, GL_UNSIGNED_SHORT, GL_FALSE, vertexStride, (void *)OutputVertexOffsetOfTexCoord); + glVertexAttribPointer((GLuint)terminatorsAttribute, 2, GL_UNSIGNED_SHORT, GL_FALSE, vertexStride, (void *)OutputVertexOffsetOfTerminators); + + glVertexAttribDivisor((GLuint)positionAttribute, 1); + glVertexAttribDivisor((GLuint)textureCoordinatesAttribute, 1); + glVertexAttribDivisor((GLuint)terminatorsAttribute, 1); } } diff --git a/Outputs/CRT/Internals/CRTOpenGL.hpp b/Outputs/CRT/Internals/CRTOpenGL.hpp index 3253f61ff..6da343c13 100644 --- a/Outputs/CRT/Internals/CRTOpenGL.hpp +++ b/Outputs/CRT/Internals/CRTOpenGL.hpp @@ -115,9 +115,9 @@ class OpenGLOutputBuilder { _source_buffer_data_pointer += 2 * SourceVertexSize; } - inline bool composite_output_run_has_room_for_vertices(GLsizei vertices_to_write) + inline bool composite_output_run_has_room_for_vertex() { - return _output_buffer_data_pointer <= OutputVertexBufferDataSize - vertices_to_write * OutputVertexSize; + return _output_buffer_data_pointer < OutputVertexBufferDataSize; } inline uint8_t *get_next_output_run() @@ -126,9 +126,9 @@ class OpenGLOutputBuilder { return &_output_buffer_data.get()[_output_buffer_data_pointer % OutputVertexBufferDataSize]; } - inline void complete_output_run(GLsizei vertices_written) + inline void complete_output_run() { - _output_buffer_data_pointer += vertices_written * OutputVertexSize; + _output_buffer_data_pointer += OutputVertexSize; } inline void lock_output() diff --git a/Outputs/CRT/Internals/Shaders/OutputShader.cpp b/Outputs/CRT/Internals/Shaders/OutputShader.cpp index 0b7d735e5..fc234d321 100644 --- a/Outputs/CRT/Internals/Shaders/OutputShader.cpp +++ b/Outputs/CRT/Internals/Shaders/OutputShader.cpp @@ -32,6 +32,7 @@ std::unique_ptr OutputShader::make_shader(const char *fragment_met "in vec2 position;" "in vec2 srcCoordinates;" + "in vec2 terminators;" "uniform vec2 boundsOrigin;" "uniform vec2 boundsSize;" @@ -45,15 +46,18 @@ std::unique_ptr OutputShader::make_shader(const char *fragment_met "void main(void)" "{" - "float laterals[] = float[](0, 0, 1, 0, 1, 1);" - "float lateral = laterals[gl_VertexID %% 6];" + "float lateral = float(gl_VertexID & 1);" + "float longitudinal = float((gl_VertexID & 2) >> 1);" + "lateralVarying = lateral - 0.5;" + "vec2 vSrcCoordinates = mix(srcCoordinates, vec2(terminators.y, srcCoordinates.y), longitudinal);" "ivec2 textureSize = textureSize(texID, 0);" - "iSrcCoordinatesVarying = srcCoordinates;" - "srcCoordinatesVarying = vec2(srcCoordinates.x / textureSize.x, (srcCoordinates.y + 0.5) / textureSize.y);" + "iSrcCoordinatesVarying = vSrcCoordinates;" + "srcCoordinatesVarying = vec2(vSrcCoordinates.x / textureSize.x, (vSrcCoordinates.y + 0.5) / textureSize.y);" - "vec2 floatingPosition = (position / positionConversion) + lateral * scanNormal;" + "vec2 vPosition = mix(position, vec2(terminators.x, position.y), longitudinal);" + "vec2 floatingPosition = (vPosition / positionConversion) + lateral * scanNormal;" "vec2 mappedPosition = (floatingPosition - boundsOrigin) / boundsSize;" "gl_Position = vec4(mappedPosition.x * 2.0 - 1.0, 1.0 - mappedPosition.y * 2.0, 0.0, 1.0);" "}", sampler_type); From b0f0315f12247828dee5becbe261ddde8191475f Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Tue, 10 May 2016 07:30:12 -0400 Subject: [PATCH 2/3] Fixed: this now implements 12-byte instancing for output runs. --- Outputs/CRT/Internals/CRTConstants.hpp | 2 +- Outputs/CRT/Internals/Shaders/OutputShader.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Outputs/CRT/Internals/CRTConstants.hpp b/Outputs/CRT/Internals/CRTConstants.hpp index 0d826c553..ad5b1ed07 100644 --- a/Outputs/CRT/Internals/CRTConstants.hpp +++ b/Outputs/CRT/Internals/CRTConstants.hpp @@ -19,7 +19,7 @@ namespace CRT { // or is one of the intermediate buffers that we've used to convert from composite towards RGB. const GLsizei OutputVertexOffsetOfPosition = 0; const GLsizei OutputVertexOffsetOfTexCoord = 4; -const GLsizei OutputVertexOffsetOfTerminators = 6; +const GLsizei OutputVertexOffsetOfTerminators = 8; const GLsizei OutputVertexSize = 12; diff --git a/Outputs/CRT/Internals/Shaders/OutputShader.cpp b/Outputs/CRT/Internals/Shaders/OutputShader.cpp index fc234d321..0cad8c123 100644 --- a/Outputs/CRT/Internals/Shaders/OutputShader.cpp +++ b/Outputs/CRT/Internals/Shaders/OutputShader.cpp @@ -51,12 +51,12 @@ std::unique_ptr OutputShader::make_shader(const char *fragment_met "lateralVarying = lateral - 0.5;" - "vec2 vSrcCoordinates = mix(srcCoordinates, vec2(terminators.y, srcCoordinates.y), longitudinal);" + "vec2 vSrcCoordinates = vec2(mix(srcCoordinates.x, terminators.y, longitudinal), srcCoordinates.y);" "ivec2 textureSize = textureSize(texID, 0);" "iSrcCoordinatesVarying = vSrcCoordinates;" "srcCoordinatesVarying = vec2(vSrcCoordinates.x / textureSize.x, (vSrcCoordinates.y + 0.5) / textureSize.y);" - "vec2 vPosition = mix(position, vec2(terminators.x, position.y), longitudinal);" + "vec2 vPosition = vec2(mix(position.x, terminators.x, longitudinal), position.y);" "vec2 floatingPosition = (vPosition / positionConversion) + lateral * scanNormal;" "vec2 mappedPosition = (floatingPosition - boundsOrigin) / boundsSize;" "gl_Position = vec4(mappedPosition.x * 2.0 - 1.0, 1.0 - mappedPosition.y * 2.0, 0.0, 1.0);" From 1d0bcb416dcd464a27b886de36a3b9643d28310c Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Tue, 10 May 2016 07:47:47 -0400 Subject: [PATCH 3/3] Dropped the size per output run back down to 8 bytes. --- Outputs/CRT/CRT.cpp | 21 ++++++-------- Outputs/CRT/Internals/CRTConstants.hpp | 7 ++--- Outputs/CRT/Internals/CRTOpenGL.cpp | 28 +++++++++---------- .../CRT/Internals/Shaders/OutputShader.cpp | 10 +++---- 4 files changed, 29 insertions(+), 37 deletions(-) diff --git a/Outputs/CRT/CRT.cpp b/Outputs/CRT/CRT.cpp index ccd029a9a..712823fc1 100644 --- a/Outputs/CRT/CRT.cpp +++ b/Outputs/CRT/CRT.cpp @@ -96,12 +96,10 @@ Flywheel::SyncEvent CRT::get_next_horizontal_sync_event(bool hsync_is_requested, return _horizontal_flywheel->get_next_event_in_period(hsync_is_requested, cycles_to_run_for, cycles_advanced); } -#define output_position_x1() (*(uint16_t *)&next_run[OutputVertexOffsetOfPosition + 0]) -#define output_position_x2() (*(uint16_t *)&next_run[OutputVertexOffsetOfTerminators + 0]) -#define output_position_y() (*(uint16_t *)&next_run[OutputVertexOffsetOfPosition + 2]) -#define output_tex_x1() (*(uint16_t *)&next_run[OutputVertexOffsetOfTexCoord + 0]) -#define output_tex_x2() (*(uint16_t *)&next_run[OutputVertexOffsetOfTerminators + 2]) -#define output_tex_y() (*(uint16_t *)&next_run[OutputVertexOffsetOfTexCoord + 2]) +#define output_x1() (*(uint16_t *)&next_run[OutputVertexOffsetOfHorizontal + 0]) +#define output_x2() (*(uint16_t *)&next_run[OutputVertexOffsetOfHorizontal + 2]) +#define output_position_y() (*(uint16_t *)&next_run[OutputVertexOffsetOfVertical + 0]) +#define output_tex_y() (*(uint16_t *)&next_run[OutputVertexOffsetOfVertical + 2]) #define source_input_position_x(v) (*(uint16_t *)&next_run[SourceVertexSize*v + SourceVertexOffsetOfInputPosition + 0]) #define source_input_position_y(v) (*(uint16_t *)&next_run[SourceVertexSize*v + SourceVertexOffsetOfInputPosition + 2]) @@ -198,15 +196,13 @@ void CRT::advance_cycles(unsigned int number_of_cycles, unsigned int source_divi { if(_did_start_run) { - output_position_x1() = (uint16_t)_horizontal_flywheel->get_current_output_position(); + output_x1() = (uint16_t)_horizontal_flywheel->get_current_output_position(); output_position_y() = (uint16_t)(_vertical_flywheel->get_current_output_position() / _vertical_flywheel_output_divider); - output_tex_x1() = (uint16_t)_horizontal_flywheel->get_current_output_position(); output_tex_y() = _openGL_output_builder->get_composite_output_y(); } else { - output_position_x2() = (uint16_t)_horizontal_flywheel->get_current_output_position(); - output_tex_x2() = (uint16_t)_horizontal_flywheel->get_current_output_position(); + output_x2() = (uint16_t)_horizontal_flywheel->get_current_output_position(); _openGL_output_builder->complete_output_run(); } @@ -237,11 +233,10 @@ void CRT::advance_cycles(unsigned int number_of_cycles, unsigned int source_divi } } -#undef output_position_x +#undef output_x1 +#undef output_x2 #undef output_position_y -#undef output_tex_x #undef output_tex_y -#undef output_lateral #undef input_input_position_x #undef input_input_position_y diff --git a/Outputs/CRT/Internals/CRTConstants.hpp b/Outputs/CRT/Internals/CRTConstants.hpp index ad5b1ed07..bc7e2c709 100644 --- a/Outputs/CRT/Internals/CRTConstants.hpp +++ b/Outputs/CRT/Internals/CRTConstants.hpp @@ -17,11 +17,10 @@ namespace CRT { // Output vertices are those used to copy from an input buffer — whether it describes data that maps directly to RGB // or is one of the intermediate buffers that we've used to convert from composite towards RGB. -const GLsizei OutputVertexOffsetOfPosition = 0; -const GLsizei OutputVertexOffsetOfTexCoord = 4; -const GLsizei OutputVertexOffsetOfTerminators = 8; +const GLsizei OutputVertexOffsetOfHorizontal = 0; +const GLsizei OutputVertexOffsetOfVertical = 4; -const GLsizei OutputVertexSize = 12; +const GLsizei OutputVertexSize = 8; // Input vertices, used only in composite mode, map from the input buffer to temporary buffer locations; such // remapping occurs to ensure a continous stream of data for each scan, giving correct out-of-bounds behaviour diff --git a/Outputs/CRT/Internals/CRTOpenGL.cpp b/Outputs/CRT/Internals/CRTOpenGL.cpp index c124f1be2..29c9fd896 100644 --- a/Outputs/CRT/Internals/CRTOpenGL.cpp +++ b/Outputs/CRT/Internals/CRTOpenGL.cpp @@ -424,25 +424,23 @@ void OpenGLOutputBuilder::prepare_output_vertex_array() { if(output_shader_program) { - GLint positionAttribute = output_shader_program->get_attrib_location("position"); - GLint textureCoordinatesAttribute = output_shader_program->get_attrib_location("srcCoordinates"); - GLint terminatorsAttribute = output_shader_program->get_attrib_location("terminators"); - glBindVertexArray(output_vertex_array); - - glEnableVertexAttribArray((GLuint)positionAttribute); - glEnableVertexAttribArray((GLuint)textureCoordinatesAttribute); - glEnableVertexAttribArray((GLuint)terminatorsAttribute); + glBindBuffer(GL_ARRAY_BUFFER, output_array_buffer); const GLsizei vertexStride = OutputVertexSize; - glBindBuffer(GL_ARRAY_BUFFER, output_array_buffer); - glVertexAttribPointer((GLuint)positionAttribute, 2, GL_UNSIGNED_SHORT, GL_FALSE, vertexStride, (void *)OutputVertexOffsetOfPosition); - glVertexAttribPointer((GLuint)textureCoordinatesAttribute, 2, GL_UNSIGNED_SHORT, GL_FALSE, vertexStride, (void *)OutputVertexOffsetOfTexCoord); - glVertexAttribPointer((GLuint)terminatorsAttribute, 2, GL_UNSIGNED_SHORT, GL_FALSE, vertexStride, (void *)OutputVertexOffsetOfTerminators); + size_t offset = 0; - glVertexAttribDivisor((GLuint)positionAttribute, 1); - glVertexAttribDivisor((GLuint)textureCoordinatesAttribute, 1); - glVertexAttribDivisor((GLuint)terminatorsAttribute, 1); + const char *attributes[] = {"horizontal", "vertical", nullptr}; + const char **attribute = attributes; + while(*attribute) + { + GLint attributeLocation = output_shader_program->get_attrib_location(*attribute); + glEnableVertexAttribArray((GLuint)attributeLocation); + glVertexAttribPointer((GLuint)attributeLocation, 2, GL_UNSIGNED_SHORT, GL_FALSE, vertexStride, (void *)offset); + glVertexAttribDivisor((GLuint)attributeLocation, 1); + offset += 4; + attribute++; + } } } diff --git a/Outputs/CRT/Internals/Shaders/OutputShader.cpp b/Outputs/CRT/Internals/Shaders/OutputShader.cpp index 0cad8c123..3576fcb79 100644 --- a/Outputs/CRT/Internals/Shaders/OutputShader.cpp +++ b/Outputs/CRT/Internals/Shaders/OutputShader.cpp @@ -30,9 +30,8 @@ std::unique_ptr OutputShader::make_shader(const char *fragment_met asprintf(&vertex_shader, "#version 150\n" - "in vec2 position;" - "in vec2 srcCoordinates;" - "in vec2 terminators;" + "in vec2 horizontal;" + "in vec2 vertical;" "uniform vec2 boundsOrigin;" "uniform vec2 boundsSize;" @@ -48,15 +47,16 @@ std::unique_ptr OutputShader::make_shader(const char *fragment_met "{" "float lateral = float(gl_VertexID & 1);" "float longitudinal = float((gl_VertexID & 2) >> 1);" + "float x = mix(horizontal.x, horizontal.y, longitudinal);" "lateralVarying = lateral - 0.5;" - "vec2 vSrcCoordinates = vec2(mix(srcCoordinates.x, terminators.y, longitudinal), srcCoordinates.y);" + "vec2 vSrcCoordinates = vec2(x, vertical.y);" "ivec2 textureSize = textureSize(texID, 0);" "iSrcCoordinatesVarying = vSrcCoordinates;" "srcCoordinatesVarying = vec2(vSrcCoordinates.x / textureSize.x, (vSrcCoordinates.y + 0.5) / textureSize.y);" - "vec2 vPosition = vec2(mix(position.x, terminators.x, longitudinal), position.y);" + "vec2 vPosition = vec2(x, vertical.x);" "vec2 floatingPosition = (vPosition / positionConversion) + lateral * scanNormal;" "vec2 mappedPosition = (floatingPosition - boundsOrigin) / boundsSize;" "gl_Position = vec4(mappedPosition.x * 2.0 - 1.0, 1.0 - mappedPosition.y * 2.0, 0.0, 1.0);"