From 15296e43a40965babae78e91529b59626a2acd5d Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Tue, 1 Sep 2020 21:58:33 -0400 Subject: [PATCH] Attempts correctly to set up the CPU side of a composite video pipeline, at least. So: I think this is really close, but I'm out of time for the day. --- .../Clock Signal/ScanTarget/CSScanTarget.mm | 162 +++++++++++------- .../Clock Signal/ScanTarget/ScanTarget.metal | 91 +++++----- 2 files changed, 151 insertions(+), 102 deletions(-) diff --git a/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm b/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm index d5b609efe..b4721a7c4 100644 --- a/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm +++ b/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm @@ -388,6 +388,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget; if(_pipeline == Pipeline::CompositeColour) { if(!_separatedLumaTexture) { _separatedLumaTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor]; + _separatedLumaState = [_view.device newComputePipelineStateWithFunction:[library newFunctionWithName:@"separateLumaKernel"] error:nil]; } } else { _separatedLumaTexture = nil; @@ -713,68 +714,104 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget; const auto outputArea = _scanTarget.get_output_area(); - // Ensure texture changes are noted. - const auto writeAreaModificationStart = size_t(outputArea.start.write_area_x + outputArea.start.write_area_y * 2048) * _bytesPerInputPixel; - const auto writeAreaModificationEnd = size_t(outputArea.end.write_area_x + outputArea.end.write_area_y * 2048) * _bytesPerInputPixel; + if(outputArea.end.line != outputArea.start.line) { + + // Ensure texture changes are noted. + const auto writeAreaModificationStart = size_t(outputArea.start.write_area_x + outputArea.start.write_area_y * 2048) * _bytesPerInputPixel; + const auto writeAreaModificationEnd = size_t(outputArea.end.write_area_x + outputArea.end.write_area_y * 2048) * _bytesPerInputPixel; #define FlushRegion(start, size) [_writeAreaBuffer didModifyRange:NSMakeRange(start, size)] - RangePerform(writeAreaModificationStart, writeAreaModificationEnd, _totalTextureBytes, FlushRegion); + RangePerform(writeAreaModificationStart, writeAreaModificationEnd, _totalTextureBytes, FlushRegion); #undef FlushRegion - // Obtain a source for render command encoders. - id commandBuffer = [_commandQueue commandBuffer]; + // Obtain a source for render command encoders. + id commandBuffer = [_commandQueue commandBuffer]; - // - // Drawing algorithm used below, in broad terms: - // - // Maintain a persistent buffer of current CRT state. - // - // During each frame, paint to the persistent buffer anything new. Update a stencil buffer to track - // every pixel so-far touched. - // - // At the end of the frame, draw a 'frame cleaner', which is a whole-screen rect that paints over - // only those areas that the stencil buffer indicates weren't painted this frame. - // - // Hence every pixel is touched every frame, regardless of the machine's output. - // + // + // Drawing algorithm used below, in broad terms: + // + // Maintain a persistent buffer of current CRT state. + // + // During each frame, paint to the persistent buffer anything new. Update a stencil buffer to track + // every pixel so-far touched. + // + // At the end of the frame, draw a 'frame cleaner', which is a whole-screen rect that paints over + // only those areas that the stencil buffer indicates weren't painted this frame. + // + // Hence every pixel is touched every frame, regardless of the machine's output. + // - switch(_pipeline) { - case Pipeline::DirectToDisplay: { - // Output scans directly, broken up by frame. - size_t line = outputArea.start.line; - size_t scan = outputArea.start.scan; - while(line != outputArea.end.line) { - if(_lineMetadataBuffer[line].is_first_in_frame && _lineMetadataBuffer[line].previous_frame_was_complete) { - [self outputFrom:scan to:_lineMetadataBuffer[line].first_scan commandBuffer:commandBuffer]; - [self outputFrameCleanerToCommandBuffer:commandBuffer]; - scan = _lineMetadataBuffer[line].first_scan; - } - line = (line + 1) % NumBufferedLines; - } - [self outputFrom:scan to:outputArea.end.scan commandBuffer:commandBuffer]; - } break; - - default: // TODO: add composite colour pipeline, and eliminate default. - case Pipeline::SVideo: { - // Build the composition buffer. - [self composeOutputArea:outputArea commandBuffer:commandBuffer]; - - if(outputArea.end.line != outputArea.start.line) { - // Filter to the finalised line texture. - id computeEncoder = [commandBuffer computeCommandEncoder]; - [computeEncoder setTexture:_compositionTexture atIndex:0]; - [computeEncoder setTexture:_finalisedLineTexture atIndex:1]; - [computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0]; - - if(outputArea.end.line > outputArea.start.line) { - [self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line]; - } else { - [self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line]; - if(outputArea.end.line) { - [self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0]; + switch(_pipeline) { + case Pipeline::DirectToDisplay: { + // Output scans directly, broken up by frame. + size_t line = outputArea.start.line; + size_t scan = outputArea.start.scan; + while(line != outputArea.end.line) { + if(_lineMetadataBuffer[line].is_first_in_frame && _lineMetadataBuffer[line].previous_frame_was_complete) { + [self outputFrom:scan to:_lineMetadataBuffer[line].first_scan commandBuffer:commandBuffer]; + [self outputFrameCleanerToCommandBuffer:commandBuffer]; + scan = _lineMetadataBuffer[line].first_scan; } + line = (line + 1) % NumBufferedLines; } + [self outputFrom:scan to:outputArea.end.scan commandBuffer:commandBuffer]; + } break; - [computeEncoder endEncoding]; + case Pipeline::CompositeColour: + case Pipeline::SVideo: { + // Build the composition buffer. + [self composeOutputArea:outputArea commandBuffer:commandBuffer]; + + if(_pipeline == Pipeline::SVideo) { + // Filter from composition to the finalised line texture. + id computeEncoder = [commandBuffer computeCommandEncoder]; + [computeEncoder setTexture:_compositionTexture atIndex:0]; + [computeEncoder setTexture:_finalisedLineTexture atIndex:1]; + [computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0]; + + if(outputArea.end.line > outputArea.start.line) { + [self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line]; + } else { + [self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line]; + if(outputArea.end.line) { + [self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0]; + } + } + + [computeEncoder endEncoding]; + } else { + // Separate luma and then filter. + id separateComputeEncoder = [commandBuffer computeCommandEncoder]; + [separateComputeEncoder setTexture:_compositionTexture atIndex:0]; + [separateComputeEncoder setTexture:_separatedLumaTexture atIndex:1]; + [separateComputeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0]; + + if(outputArea.end.line > outputArea.start.line) { + [self dispatchComputeCommandEncoder:separateComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line]; + } else { + [self dispatchComputeCommandEncoder:separateComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line]; + if(outputArea.end.line) { + [self dispatchComputeCommandEncoder:separateComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0]; + } + } + + [separateComputeEncoder endEncoding]; + + id filterComputeEncoder = [commandBuffer computeCommandEncoder]; + [filterComputeEncoder setTexture:_separatedLumaTexture atIndex:0]; + [filterComputeEncoder setTexture:_finalisedLineTexture atIndex:1]; + [filterComputeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0]; + + if(outputArea.end.line > outputArea.start.line) { + [self dispatchComputeCommandEncoder:filterComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line]; + } else { + [self dispatchComputeCommandEncoder:filterComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line]; + if(outputArea.end.line) { + [self dispatchComputeCommandEncoder:filterComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0]; + } + } + + [filterComputeEncoder endEncoding]; + } // Output lines, broken up by frame. size_t startLine = outputArea.start.line; @@ -788,15 +825,18 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget; line = (line + 1) % NumBufferedLines; } [self outputFrom:startLine to:outputArea.end.line commandBuffer:commandBuffer]; - } - } break; - } + } break; + } - // Add a callback to update the scan target buffer and commit the drawing. - [commandBuffer addCompletedHandler:^(id _Nonnull) { + // Add a callback to update the scan target buffer and commit the drawing. + [commandBuffer addCompletedHandler:^(id _Nonnull) { + self->_scanTarget.complete_output_area(outputArea); + }]; + [commandBuffer commit]; + } else { + // There was no work, but to be contractually correct: self->_scanTarget.complete_output_area(outputArea); - }]; - [commandBuffer commit]; + } } } diff --git a/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal b/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal index e0d04f9c2..af86c6318 100644 --- a/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal +++ b/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal @@ -307,8 +307,8 @@ float3 convertRed1Green1Blue1(SourceInterpolator vert, texture2d texture const float level = mix(colour.r, dot(colour.gb, colourSubcarrier), vert.colourAmplitude); \ return float4( \ level, \ - float2(0.5f) + level*colourSubcarrier*0.5f, \ - 1.0 \ + float2(0.5f) + quadrature(vert.colourPhase)*0.5f, \ + vert.colourPhase \ ); \ } @@ -352,52 +352,16 @@ fragment float4 clearFragment() { return float4(0.0, 0.0, 0.0, 0.64); } -// MARK: - Conversion fragment shaders - -template float4 applyFilter(SourceInterpolator vert [[stage_in]], texture2d texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) { -#define Sample(x) texture.sample(standardSampler, vert.textureCoordinates + float2(x, 0.0f)) - float4(0.0f, 0.5f, 0.5f, 0.0f) - float4 rawSamples[] = { - Sample(-7), Sample(-6), Sample(-5), Sample(-4), Sample(-3), Sample(-2), Sample(-1), - Sample(0), - Sample(1), Sample(2), Sample(3), Sample(4), Sample(5), Sample(6), Sample(7), - }; -#undef Sample - -#define Sample(c, o, a) uniforms.firCoefficients[c] * rawSamples[o].rgb - - const float3 colour = - Sample(0, 0, -7) + Sample(1, 1, -6) + Sample(2, 2, -5) + Sample(3, 3, -4) + - Sample(4, 4, -3) + Sample(5, 5, -2) + Sample(6, 6, -1) + - Sample(7, 7, 0) + - Sample(6, 8, 1) + Sample(5, 9, 2) + Sample(4, 10, 3) + - Sample(3, 11, 4) + Sample(2, 12, 5) + Sample(1, 13, 6) + Sample(0, 14, 7); - -#undef Sample - - // This would be `if constexpr`, were this C++17; the compiler should do compile-time selection here regardless. - if(applyCompositeAmplitude) { - return float4(uniforms.toRGB * (colour * float3(1.0f, 1.0f / vert.colourAmplitude, 1.0f / vert.colourAmplitude)), 1.0f); - } else { - return float4(uniforms.toRGB * colour, 1.0f); - } -} - -fragment float4 filterSVideoFragment(SourceInterpolator vert [[stage_in]], texture2d texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) { - return applyFilter(vert, texture, uniforms); -} - -fragment float4 filterCompositeFragment(SourceInterpolator vert [[stage_in]], texture2d texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) { - return applyFilter(vert, texture, uniforms); -} +// MARK: - fragment float4 interpolateFragment(CopyInterpolator vert [[stage_in]], texture2d texture [[texture(0)]]) { return texture.sample(linearSampler, vert.textureCoordinates); } - // MARK: - Kernel functions -// TEST FUNCTION. Just copies from input to output. +/// Given input pixels of the form (luminance, 0.5 + 0.5*chrominance*cos(phase), 0.5 + 0.5*chrominance*sin(phase)), applies a lowpass +/// filter to the two chrominance parts, then uses the toRGB matrix to convert to RGB and stores. kernel void filterChromaKernel( texture2d inTexture [[texture(0)]], texture2d outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]], @@ -431,3 +395,48 @@ kernel void filterChromaKernel( texture2d inTexture [[textu outTexture.write(float4(uniforms.toRGB * colour, 1.0f), gid + uint2(7, offset)); } + +/// Given input pixels of the form: +/// +/// (composite sample, cos(phase), sin(phase), colour amplitude), applies a lowpass +/// +/// Filters to separate luminance, subtracts that and scales and maps the remaining chrominance in order to output +/// pixels in the form: +/// +/// (luminance, 0.5 + 0.5*chrominance*cos(phase), 0.5 + 0.5*chrominance*sin(phase)) +/// +/// i.e. the input form for the filterChromaKernel, above]. +kernel void separateLumaKernel( texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]], + uint2 gid [[thread_position_in_grid]], + constant Uniforms &uniforms [[buffer(0)]], + constant int &offset [[buffer(1)]]) { + // TODO! + constexpr float4 moveToZero = float4(0.0f, 0.5f, 0.5f, 0.0f); + const float4 rawSamples[] = { + inTexture.read(gid + uint2(0, offset)) - moveToZero, + inTexture.read(gid + uint2(1, offset)) - moveToZero, + inTexture.read(gid + uint2(2, offset)) - moveToZero, + inTexture.read(gid + uint2(3, offset)) - moveToZero, + inTexture.read(gid + uint2(4, offset)) - moveToZero, + inTexture.read(gid + uint2(5, offset)) - moveToZero, + inTexture.read(gid + uint2(6, offset)) - moveToZero, + inTexture.read(gid + uint2(7, offset)) - moveToZero, + inTexture.read(gid + uint2(8, offset)) - moveToZero, + inTexture.read(gid + uint2(9, offset)) - moveToZero, + inTexture.read(gid + uint2(10, offset)) - moveToZero, + inTexture.read(gid + uint2(11, offset)) - moveToZero, + inTexture.read(gid + uint2(12, offset)) - moveToZero, + inTexture.read(gid + uint2(13, offset)) - moveToZero, + inTexture.read(gid + uint2(14, offset)) - moveToZero, + }; + +#define Sample(x, y) uniforms.firCoefficients[y] * rawSamples[x].rgb + const float3 colour = + Sample(0, 0) + Sample(1, 1) + Sample(2, 2) + Sample(3, 3) + Sample(4, 4) + Sample(5, 5) + Sample(6, 6) + + Sample(7, 7) + + Sample(8, 6) + Sample(9, 5) + Sample(10, 4) + Sample(11, 3) + Sample(12, 2) + Sample(13, 1) + Sample(14, 0); +#undef Sample + + outTexture.write(float4(uniforms.toRGB * colour, 1.0f), gid + uint2(7, offset)); +}