1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-07-07 23:29:06 +00:00

Restricts S-Video processing to updated lines.

This commit is contained in:
Thomas Harte 2020-09-01 21:27:40 -04:00
parent 245f2654f0
commit 9e2bf2af7e
2 changed files with 66 additions and 38 deletions

View File

@ -207,6 +207,11 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
id<MTLComputePipelineState> _separatedLumaState;
NSUInteger _lineBufferPixelsPerLine;
size_t _lineOffsetBuffer;
id<MTLBuffer> _lineOffsetBuffers[NumBufferedLines]; // Allocating NumBufferedLines buffers ensures these can't possibly be exhausted;
// for this list to be exhausted there'd have to be more draw calls in flight than
// there are lines for them to operate upon.
// The scan target in C++-world terms and the non-GPU storage for it.
BufferingScanTarget _scanTarget;
BufferingScanTarget::LineMetadata _lineMetadataBuffer[NumBufferedLines];
@ -266,6 +271,13 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
depthStencilDescriptor.frontFaceStencil.stencilFailureOperation = MTLStencilOperationReplace;
_clearStencilState = [view.device newDepthStencilStateWithDescriptor:depthStencilDescriptor];
// Allocate a large number of single-int buffers, for supplying offsets to the compute shaders.
// There's a ridiculous amount of overhead in this, but it avoids allocations during drawing,
// and a single int per instance is all I need.
for(size_t c = 0; c < NumBufferedLines; ++c) {
_lineOffsetBuffers[c] = [_view.device newBufferWithLength:sizeof(int) options:SharedResourceOptionsStandard];
}
// Ensure the is-drawing flag is initially clear.
_isDrawing.clear();
}
@ -667,7 +679,12 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
[encoder endEncoding];
}
- (void)dispatchComputeCommandEncoder:(id<MTLComputeCommandEncoder>)encoder pipelineState:(id<MTLComputePipelineState>)pipelineState width:(NSUInteger)width height:(NSUInteger)height {
- (void)dispatchComputeCommandEncoder:(id<MTLComputeCommandEncoder>)encoder pipelineState:(id<MTLComputePipelineState>)pipelineState width:(NSUInteger)width height:(NSUInteger)height offset:(size_t)offset {
// Store and apply the offset.
*(reinterpret_cast<int *>(_lineOffsetBuffers[_lineOffsetBuffer].contents)) = int(offset);
[encoder setBuffer:_lineOffsetBuffers[_lineOffsetBuffer] offset:0 atIndex:1];
_lineOffsetBuffer = (_lineOffsetBuffer + 1) % NumBufferedLines;
// This follows the recommendations at https://developer.apple.com/documentation/metal/calculating_threadgroup_and_grid_sizes ;
// I currently have no independent opinion whatsoever.
const MTLSize threadsPerThreadgroup = MTLSizeMake(
@ -677,6 +694,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
);
const MTLSize threadsPerGrid = MTLSizeMake(width, height, 1);
// Set the pipeline state and dispatch the drawing. Which may slightly overdraw.
[encoder setComputePipelineState:pipelineState];
[encoder dispatchThreads:threadsPerGrid threadsPerThreadgroup:threadsPerThreadgroup];
}
@ -740,29 +758,37 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
// Build the composition buffer.
[self composeOutputArea:outputArea commandBuffer:commandBuffer];
// Filter to the finalised line texture.
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
[computeEncoder setTexture:_compositionTexture atIndex:0];
[computeEncoder setTexture:_finalisedLineTexture atIndex:1];
[computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
if(outputArea.end.line != outputArea.start.line) {
// Filter to the finalised line texture.
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
[computeEncoder setTexture:_compositionTexture atIndex:0];
[computeEncoder setTexture:_finalisedLineTexture atIndex:1];
[computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
// TODO: limit processed area to those lines that are actually in use.
[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines];
[computeEncoder endEncoding];
// Output lines, broken up by frame.
size_t startLine = outputArea.start.line;
size_t line = outputArea.start.line;
while(line != outputArea.end.line) {
if(_lineMetadataBuffer[line].is_first_in_frame && _lineMetadataBuffer[line].previous_frame_was_complete) {
[self outputFrom:startLine to:line commandBuffer:commandBuffer];
[self outputFrameCleanerToCommandBuffer:commandBuffer];
startLine = line;
if(outputArea.end.line > outputArea.start.line) {
[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line];
} else {
[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line];
if(outputArea.end.line) {
[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0];
}
}
line = (line + 1) % NumBufferedLines;
[computeEncoder endEncoding];
// Output lines, broken up by frame.
size_t startLine = outputArea.start.line;
size_t line = outputArea.start.line;
while(line != outputArea.end.line) {
if(_lineMetadataBuffer[line].is_first_in_frame && _lineMetadataBuffer[line].previous_frame_was_complete) {
[self outputFrom:startLine to:line commandBuffer:commandBuffer];
[self outputFrameCleanerToCommandBuffer:commandBuffer];
startLine = line;
}
line = (line + 1) % NumBufferedLines;
}
[self outputFrom:startLine to:outputArea.end.line commandBuffer:commandBuffer];
}
[self outputFrom:startLine to:outputArea.end.line commandBuffer:commandBuffer];
} break;
}

View File

@ -7,6 +7,7 @@
//
#include <metal_stdlib>
using namespace metal;
// TODO: I'm being very loose, so far, in use of alpha. Sometimes it's 0.64, somtimes its 1.0.
@ -402,24 +403,25 @@ fragment float4 interpolateFragment(CopyInterpolator vert [[stage_in]], texture2
kernel void filterChromaKernel( texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]],
constant Uniforms &uniforms [[buffer(0)]]) {
constant Uniforms &uniforms [[buffer(0)]],
constant int &offset [[buffer(1)]]) {
constexpr float4 moveToZero = float4(0.0f, 0.5f, 0.5f, 0.0f);
const float4 rawSamples[] = {
inTexture.read(gid) - moveToZero,
inTexture.read(gid + uint2(1, 0)) - moveToZero,
inTexture.read(gid + uint2(2, 0)) - moveToZero,
inTexture.read(gid + uint2(3, 0)) - moveToZero,
inTexture.read(gid + uint2(4, 0)) - moveToZero,
inTexture.read(gid + uint2(5, 0)) - moveToZero,
inTexture.read(gid + uint2(6, 0)) - moveToZero,
inTexture.read(gid + uint2(7, 0)) - moveToZero,
inTexture.read(gid + uint2(8, 0)) - moveToZero,
inTexture.read(gid + uint2(9, 0)) - moveToZero,
inTexture.read(gid + uint2(10, 0)) - moveToZero,
inTexture.read(gid + uint2(11, 0)) - moveToZero,
inTexture.read(gid + uint2(12, 0)) - moveToZero,
inTexture.read(gid + uint2(13, 0)) - moveToZero,
inTexture.read(gid + uint2(14, 0)) - moveToZero,
inTexture.read(gid + uint2(0, offset)) - moveToZero,
inTexture.read(gid + uint2(1, offset)) - moveToZero,
inTexture.read(gid + uint2(2, offset)) - moveToZero,
inTexture.read(gid + uint2(3, offset)) - moveToZero,
inTexture.read(gid + uint2(4, offset)) - moveToZero,
inTexture.read(gid + uint2(5, offset)) - moveToZero,
inTexture.read(gid + uint2(6, offset)) - moveToZero,
inTexture.read(gid + uint2(7, offset)) - moveToZero,
inTexture.read(gid + uint2(8, offset)) - moveToZero,
inTexture.read(gid + uint2(9, offset)) - moveToZero,
inTexture.read(gid + uint2(10, offset)) - moveToZero,
inTexture.read(gid + uint2(11, offset)) - moveToZero,
inTexture.read(gid + uint2(12, offset)) - moveToZero,
inTexture.read(gid + uint2(13, offset)) - moveToZero,
inTexture.read(gid + uint2(14, offset)) - moveToZero,
};
#define Sample(x, y) uniforms.firCoefficients[y] * rawSamples[x].rgb
@ -429,5 +431,5 @@ kernel void filterChromaKernel( texture2d<float, access::read> inTexture [[textu
Sample(8, 6) + Sample(9, 5) + Sample(10, 4) + Sample(11, 3) + Sample(12, 2) + Sample(13, 1) + Sample(14, 0);
#undef Sample
outTexture.write(float4(uniforms.toRGB * colour, 1.0f), gid + uint2(7, 0));
outTexture.write(float4(uniforms.toRGB * colour, 1.0f), gid + uint2(7, offset));
}