1
0
mirror of https://github.com/TomHarte/CLK.git synced 2025-01-26 15:32:04 +00:00

Switched to instaced drawing for source[/intermediate] runs too, reducing that data transfer footprint by 50%.

This commit is contained in:
Thomas Harte 2016-05-10 19:50:12 -04:00
parent 7369139f7e
commit b6d2c8cb63
4 changed files with 50 additions and 59 deletions

View File

@ -100,13 +100,15 @@ Flywheel::SyncEvent CRT::get_next_horizontal_sync_event(bool hsync_is_requested,
#define output_position_y() (*(uint16_t *)&next_run[OutputVertexOffsetOfVertical + 0])
#define output_tex_y() (*(uint16_t *)&next_run[OutputVertexOffsetOfVertical + 2])
#define source_input_position_x(v) (*(uint16_t *)&next_run[SourceVertexSize*v + SourceVertexOffsetOfInputPosition + 0])
#define source_input_position_y(v) (*(uint16_t *)&next_run[SourceVertexSize*v + SourceVertexOffsetOfInputPosition + 2])
#define source_output_position_x(v) (*(uint16_t *)&next_run[SourceVertexSize*v + SourceVertexOffsetOfOutputPosition + 0])
#define source_output_position_y(v) (*(uint16_t *)&next_run[SourceVertexSize*v + SourceVertexOffsetOfOutputPosition + 2])
#define source_phase(v) next_run[SourceVertexSize*v + SourceVertexOffsetOfPhaseAndAmplitude + 0]
#define source_amplitude(v) next_run[SourceVertexSize*v + SourceVertexOffsetOfPhaseAndAmplitude + 1]
#define source_phase_time(v) next_run[SourceVertexSize*v + SourceVertexOffsetOfPhaseTime]
#define source_input_position_x1() (*(uint16_t *)&next_run[SourceVertexOffsetOfInputStart + 0])
#define source_input_position_y() (*(uint16_t *)&next_run[SourceVertexOffsetOfInputStart + 2])
#define source_input_position_x2() (*(uint16_t *)&next_run[SourceVertexOffsetOfEnds + 0])
#define source_output_position_x1() (*(uint16_t *)&next_run[SourceVertexOffsetOfOutputStart + 0])
#define source_output_position_y() (*(uint16_t *)&next_run[SourceVertexOffsetOfOutputStart + 2])
#define source_output_position_x2() (*(uint16_t *)&next_run[SourceVertexOffsetOfEnds + 2])
#define source_phase() next_run[SourceVertexOffsetOfPhaseTimeAndAmplitude + 0]
#define source_amplitude() next_run[SourceVertexOffsetOfPhaseTimeAndAmplitude + 2]
#define source_phase_time() next_run[SourceVertexOffsetOfPhaseTimeAndAmplitude + 1]
void CRT::advance_cycles(unsigned int number_of_cycles, unsigned int source_divider, bool hsync_requested, bool vsync_requested, const bool vsync_charging, const Scan::Type type, uint16_t tex_x, uint16_t tex_y)
{
@ -134,20 +136,15 @@ void CRT::advance_cycles(unsigned int number_of_cycles, unsigned int source_divi
next_run = _openGL_output_builder->get_next_source_run();
}
// Vertex output is arranged for triangle strips, as:
//
// 2 [4/5]
//
// [0/1] 3
if(next_run)
{
source_input_position_x(0) = tex_x;
source_input_position_y(0) = source_input_position_y(1) = tex_y;
source_output_position_x(0) = (uint16_t)_horizontal_flywheel->get_current_output_position();
source_output_position_y(0) = source_output_position_y(1) = _openGL_output_builder->get_composite_output_y();
source_phase(0) = source_phase(1) = _colour_burst_phase;
source_amplitude(0) = source_amplitude(1) = _colour_burst_amplitude;
source_phase_time(0) = source_phase_time(1) = _colour_burst_time;
source_input_position_x1() = tex_x;
source_input_position_y() = tex_y;
source_output_position_x1() = (uint16_t)_horizontal_flywheel->get_current_output_position();
source_output_position_y() = _openGL_output_builder->get_composite_output_y();
source_phase() = _colour_burst_phase;
source_amplitude() = _colour_burst_amplitude;
source_phase_time() = (uint8_t)_colour_burst_time; // assumption: burst was within the first 1/16 of the line
}
// decrement the number of cycles left to run for and increment the
@ -169,8 +166,8 @@ void CRT::advance_cycles(unsigned int number_of_cycles, unsigned int source_divi
// if this is a data run then advance the buffer pointer
if(type == Scan::Type::Data && source_divider) tex_x += next_run_length / (_time_multiplier * source_divider);
source_input_position_x(1) = tex_x;
source_output_position_x(1) = (uint16_t)_horizontal_flywheel->get_current_output_position();
source_input_position_x2() = tex_x;
source_output_position_x2() = (uint16_t)_horizontal_flywheel->get_current_output_position();
_openGL_output_builder->complete_source_run();
}
@ -234,13 +231,15 @@ void CRT::advance_cycles(unsigned int number_of_cycles, unsigned int source_divi
#undef output_position_y
#undef output_tex_y
#undef input_input_position_x
#undef input_input_position_y
#undef input_output_position_x
#undef input_output_position_y
#undef input_phase
#undef input_amplitude
#undef input_phase_age
#undef source_input_position_x1
#undef source_input_position_y
#undef source_input_position_x2
#undef source_output_position_x1
#undef source_output_position_y
#undef source_output_position_x2
#undef source_phase
#undef source_amplitude
#undef source_phase_time
#pragma mark - stream feeding methods

View File

@ -24,10 +24,10 @@ const GLsizei OutputVertexSize = 8;
// Input vertices, used only in composite mode, map from the input buffer to temporary buffer locations; such
// remapping occurs to ensure a continous stream of data for each scan, giving correct out-of-bounds behaviour
const GLsizei SourceVertexOffsetOfInputPosition = 0;
const GLsizei SourceVertexOffsetOfOutputPosition = 4;
const GLsizei SourceVertexOffsetOfPhaseAndAmplitude = 8;
const GLsizei SourceVertexOffsetOfPhaseTime = 12;
const GLsizei SourceVertexOffsetOfInputStart = 0;
const GLsizei SourceVertexOffsetOfOutputStart = 4;
const GLsizei SourceVertexOffsetOfEnds = 8;
const GLsizei SourceVertexOffsetOfPhaseTimeAndAmplitude = 12;
const GLsizei SourceVertexSize = 16;
@ -41,7 +41,7 @@ const GLsizei IntermediateBufferHeight = 1024;
// Some internal buffer sizes
const GLsizeiptr OutputVertexBufferDataSize = OutputVertexSize * IntermediateBufferHeight; // i.e. the maximum number of scans of output that can be created between draws
const GLsizeiptr SourceVertexBufferDataSize = 2 * SourceVertexSize * IntermediateBufferHeight * 2; // a multiple of 2 * SourceVertexSize
const GLsizeiptr SourceVertexBufferDataSize = SourceVertexSize * IntermediateBufferHeight * 2; // a multiple of 2 * SourceVertexSize
}
}

View File

@ -298,7 +298,7 @@ void OpenGLOutputBuilder::draw_frame(unsigned int output_width, unsigned int out
}
// draw as desired
glDrawArrays(GL_LINES, 0, submitted_source_data / SourceVertexSize);
glDrawArraysInstanced(GL_LINES, 0, 2, submitted_source_data / SourceVertexSize);
active_pipeline++;
}
@ -324,7 +324,6 @@ void OpenGLOutputBuilder::draw_frame(unsigned int output_width, unsigned int out
output_shader_program->bind();
// draw
// glDrawArrays(GL_TRIANGLE_STRIP, 0, submitted_output_data / OutputVertexSize);
glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, submitted_output_data / OutputVertexSize);
}
@ -393,24 +392,13 @@ void OpenGLOutputBuilder::prepare_source_vertex_array()
{
if(composite_input_shader_program)
{
GLint inputPositionAttribute = composite_input_shader_program->get_attrib_location("inputPosition");
GLint outputPositionAttribute = composite_input_shader_program->get_attrib_location("outputPosition");
GLint phaseAndAmplitudeAttribute = composite_input_shader_program->get_attrib_location("phaseAndAmplitude");
GLint phaseTimeAttribute = composite_input_shader_program->get_attrib_location("phaseTime");
glBindVertexArray(source_vertex_array);
glEnableVertexAttribArray((GLuint)inputPositionAttribute);
glEnableVertexAttribArray((GLuint)outputPositionAttribute);
glEnableVertexAttribArray((GLuint)phaseAndAmplitudeAttribute);
glEnableVertexAttribArray((GLuint)phaseTimeAttribute);
const GLsizei vertexStride = SourceVertexSize;
glBindBuffer(GL_ARRAY_BUFFER, source_array_buffer);
glVertexAttribPointer((GLuint)inputPositionAttribute, 2, GL_UNSIGNED_SHORT, GL_FALSE, vertexStride, (void *)SourceVertexOffsetOfInputPosition);
glVertexAttribPointer((GLuint)outputPositionAttribute, 2, GL_UNSIGNED_SHORT, GL_FALSE, vertexStride, (void *)SourceVertexOffsetOfOutputPosition);
glVertexAttribPointer((GLuint)phaseAndAmplitudeAttribute, 2, GL_UNSIGNED_BYTE, GL_TRUE, vertexStride, (void *)SourceVertexOffsetOfPhaseAndAmplitude);
glVertexAttribPointer((GLuint)phaseTimeAttribute, 1, GL_UNSIGNED_BYTE, GL_FALSE, vertexStride, (void *)SourceVertexOffsetOfPhaseTime);
composite_input_shader_program->enable_vertex_attribute_with_pointer("inputStart", 2, GL_UNSIGNED_SHORT, GL_FALSE, SourceVertexSize, (void *)SourceVertexOffsetOfInputStart, 1);
composite_input_shader_program->enable_vertex_attribute_with_pointer("outputStart", 2, GL_UNSIGNED_SHORT, GL_FALSE, SourceVertexSize, (void *)SourceVertexOffsetOfOutputStart, 1);
composite_input_shader_program->enable_vertex_attribute_with_pointer("ends", 2, GL_UNSIGNED_SHORT, GL_FALSE, SourceVertexSize, (void *)SourceVertexOffsetOfEnds, 1);
composite_input_shader_program->enable_vertex_attribute_with_pointer("phaseTimeAndAmplitude", 3, GL_UNSIGNED_BYTE, GL_FALSE, SourceVertexSize, (void *)SourceVertexOffsetOfPhaseTimeAndAmplitude, 1);
}
}

View File

@ -35,10 +35,10 @@ std::unique_ptr<IntermediateShader> IntermediateShader::make_shader(const char *
asprintf(&vertex_shader,
"#version 150\n"
"in vec2 inputPosition;"
"in vec2 outputPosition;"
"in vec2 phaseAndAmplitude;"
"in float phaseTime;"
"in vec2 inputStart;"
"in vec2 outputStart;"
"in vec2 ends;"
"in vec3 phaseTimeAndAmplitude;"
"uniform float phaseCyclesPerTick;"
"uniform ivec2 outputTextureSize;"
@ -53,8 +53,12 @@ std::unique_ptr<IntermediateShader> IntermediateShader::make_shader(const char *
"void main(void)"
"{"
"float direction = float(gl_VertexID & 1);"
"vec2 extensionVector = vec2(extension, 0.0) * 2.0 * (direction - 0.5);"
"float extent = float(gl_VertexID & 1);"
"vec2 inputPosition = vec2(mix(inputStart.x, ends.x, extent), inputStart.y);"
"vec2 outputPosition = vec2(mix(outputStart.x, ends.y, extent), outputStart.y);"
"vec2 extensionVector = vec2(extension, 0.0) * 2.0 * (extent - 0.5);"
"vec2 extendedInputPosition = %s + extensionVector;"
"vec2 extendedOutputPosition = outputPosition + extensionVector;"
@ -75,8 +79,8 @@ std::unique_ptr<IntermediateShader> IntermediateShader::make_shader(const char *
"inputPositionsVarying[10] = mappedInputPosition + (vec2(offsets[0], 0.0) / textureSize);"
"delayLinePositionVarying = mappedInputPosition - vec2(0.0, 1.0);"
"phaseAndAmplitudeVarying.x = (phaseCyclesPerTick * (extendedOutputPosition.x - phaseTime) + phaseAndAmplitude.x) * 2.0 * 3.141592654;"
"phaseAndAmplitudeVarying.y = 0.33;" // TODO: reinstate connection with phaseAndAmplitude
"phaseAndAmplitudeVarying.x = (phaseCyclesPerTick * (extendedOutputPosition.x - phaseTimeAndAmplitude.y) + (phaseTimeAndAmplitude.x / 256.0)) * 2.0 * 3.141592654;"
"phaseAndAmplitudeVarying.y = 0.33;" // TODO: reinstate connection with (phaseTimeAndAmplitude.y/256.0)
"vec2 eyePosition = 2.0*(extendedOutputPosition / outputTextureSize) - vec2(1.0) + vec2(1.0)/outputTextureSize;"
"gl_Position = vec4(eyePosition, 0.0, 1.0);"