1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-11-26 23:52:26 +00:00

Forces a no-op compute shader into the S-Video pipeline.

The intention is to restrict the area acted over, and to do the S-Video filtering in there. Then I'll just need two such stages for composite.
This commit is contained in:
Thomas Harte 2020-09-01 18:39:52 -04:00
parent 67d4dbf91a
commit 67ca298a72
2 changed files with 101 additions and 50 deletions

View File

@ -202,9 +202,10 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
// Textures: additional storage used when processing S-Video and composite colour input. // Textures: additional storage used when processing S-Video and composite colour input.
id<MTLTexture> _finalisedLineTexture; id<MTLTexture> _finalisedLineTexture;
MTLRenderPassDescriptor *_finalisedLineRenderPass; id<MTLComputePipelineState> _finalisedLineState;
id<MTLTexture> _separatedLumaTexture; id<MTLTexture> _separatedLumaTexture;
MTLRenderPassDescriptor *_separatedLumaRenderPass; id<MTLComputePipelineState> _separatedLumaState;
NSUInteger _lineBufferPixelsPerLine;
// The scan target in C++-world terms and the non-GPU storage for it. // The scan target in C++-world terms and the non-GPU storage for it.
BufferingScanTarget _scanTarget; BufferingScanTarget _scanTarget;
@ -341,16 +342,15 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
width:2048 // This 'should do'. width:2048 // This 'should do'.
height:NumBufferedLines height:NumBufferedLines
mipmapped:NO]; mipmapped:NO];
lineTextureDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
lineTextureDescriptor.resourceOptions = MTLResourceStorageModePrivate; lineTextureDescriptor.resourceOptions = MTLResourceStorageModePrivate;
if(_pipeline == Pipeline::DirectToDisplay) { if(_pipeline == Pipeline::DirectToDisplay) {
// Buffers are not required when outputting direct to display; so if this isn't that then release anything // Buffers are not required when outputting direct to display; so if this isn't that then release anything
// currently being held and return. // currently being held and return.
_finalisedLineTexture = nil; _finalisedLineTexture = nil;
_finalisedLineRenderPass = nil; _finalisedLineState = nil;
_separatedLumaTexture = nil; _separatedLumaTexture = nil;
_separatedLumaRenderPass = nil; _separatedLumaState = nil;
_compositionTexture = nil; _compositionTexture = nil;
_compositionRenderPass = nil; _compositionRenderPass = nil;
return; return;
@ -358,12 +358,18 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
// Create a composition texture if one does not yet exist. // Create a composition texture if one does not yet exist.
if(!_compositionTexture) { if(!_compositionTexture) {
lineTextureDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
_compositionTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor]; _compositionTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor];
} }
// Grab the shader library.
id<MTLLibrary> library = [_view.device newDefaultLibrary];
lineTextureDescriptor.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead;
// The finalised texture will definitely exist. // The finalised texture will definitely exist.
if(!_finalisedLineTexture) { if(!_finalisedLineTexture) {
_finalisedLineTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor]; _finalisedLineTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor];
_finalisedLineState = [_view.device newComputePipelineStateWithFunction:[library newFunctionWithName:@"copyKernel"] error:nil];
} }
// A luma separation texture will exist only for composite colour. // A luma separation texture will exist only for composite colour.
@ -542,7 +548,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
// Create suitable FIR filters. // Create suitable FIR filters.
auto *const firCoefficients = uniforms()->firCoefficients; auto *const firCoefficients = uniforms()->firCoefficients;
const float cyclesPerLine = float(modals.cycles_per_line) * uniforms()->cyclesMultiplier; _lineBufferPixelsPerLine = NSUInteger(modals.cycles_per_line) * NSUInteger(uniforms()->cyclesMultiplier);
const float colourCyclesPerLine = float(modals.colour_cycle_numerator) / float(modals.colour_cycle_denominator); const float colourCyclesPerLine = float(modals.colour_cycle_numerator) / float(modals.colour_cycle_denominator);
if(isSVideoOutput) { if(isSVideoOutput) {
@ -553,7 +559,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
firCoefficients[7].x = 1.0f; firCoefficients[7].x = 1.0f;
} else { } else {
// In composite, filter luminance gently. // In composite, filter luminance gently.
SignalProcessing::FIRFilter luminancefilter(15, cyclesPerLine, 0.0f, colourCyclesPerLine * 0.5f); SignalProcessing::FIRFilter luminancefilter(15, float(_lineBufferPixelsPerLine), 0.0f, colourCyclesPerLine * 0.5f);
const auto calculatedCoefficients = luminancefilter.get_coefficients(); const auto calculatedCoefficients = luminancefilter.get_coefficients();
for(size_t c = 0; c < 8; ++c) { for(size_t c = 0; c < 8; ++c) {
firCoefficients[c].x = calculatedCoefficients[c]; firCoefficients[c].x = calculatedCoefficients[c];
@ -561,13 +567,13 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
} }
// Whether S-Video or composite, apply the same relatively strong filter to colour channels. // Whether S-Video or composite, apply the same relatively strong filter to colour channels.
SignalProcessing::FIRFilter chrominancefilter(15, cyclesPerLine, 0.0f, colourCyclesPerLine * (isSVideoOutput ? 1.0f : 0.25f)); SignalProcessing::FIRFilter chrominancefilter(15, float(_lineBufferPixelsPerLine), 0.0f, colourCyclesPerLine * (isSVideoOutput ? 1.0f : 0.25f));
const auto calculatedCoefficients = chrominancefilter.get_coefficients(); const auto calculatedCoefficients = chrominancefilter.get_coefficients();
for(size_t c = 0; c < 8; ++c) { for(size_t c = 0; c < 8; ++c) {
firCoefficients[c].y = firCoefficients[c].z = calculatedCoefficients[c] * (isSVideoOutput ? 4.0f : 4.0f); firCoefficients[c].y = firCoefficients[c].z = calculatedCoefficients[c] * (isSVideoOutput ? 4.0f : 4.0f);
} }
uniforms()->radiansPerPixel = (colourCyclesPerLine * 3.141592654f * 2.0f) / cyclesPerLine; uniforms()->radiansPerPixel = (colourCyclesPerLine * 3.141592654f * 2.0f) / float(_lineBufferPixelsPerLine);
} }
// Build the output pipeline. // Build the output pipeline.
@ -602,7 +608,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
[encoder setRenderPipelineState:_outputPipeline]; [encoder setRenderPipelineState:_outputPipeline];
if(_pipeline != Pipeline::DirectToDisplay) { if(_pipeline != Pipeline::DirectToDisplay) {
[encoder setFragmentTexture:_compositionTexture atIndex:0]; [encoder setFragmentTexture:_finalisedLineTexture atIndex:0];
[encoder setVertexBuffer:_linesBuffer offset:0 atIndex:0]; [encoder setVertexBuffer:_linesBuffer offset:0 atIndex:0];
} else { } else {
[encoder setFragmentTexture:_writeAreaTexture atIndex:0]; [encoder setFragmentTexture:_writeAreaTexture atIndex:0];
@ -643,6 +649,38 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
[encoder endEncoding]; [encoder endEncoding];
} }
- (void)composeOutputArea:(const BufferingScanTarget::OutputArea &)outputArea commandBuffer:(id<MTLCommandBuffer>)commandBuffer {
// Output all scans to the composition buffer.
const id<MTLRenderCommandEncoder> encoder = [commandBuffer renderCommandEncoderWithDescriptor:_compositionRenderPass];
[encoder setRenderPipelineState:_composePipeline];
[encoder setVertexBuffer:_scansBuffer offset:0 atIndex:0];
[encoder setVertexBuffer:_uniformsBuffer offset:0 atIndex:1];
[encoder setVertexTexture:_compositionTexture atIndex:0];
[encoder setFragmentBuffer:_uniformsBuffer offset:0 atIndex:0];
[encoder setFragmentTexture:_writeAreaTexture atIndex:0];
#define OutputScans(start, size) [encoder drawPrimitives:MTLPrimitiveTypeLine vertexStart:0 vertexCount:2 instanceCount:size baseInstance:start]
RangePerform(outputArea.start.scan, outputArea.end.scan, NumBufferedScans, OutputScans);
#undef OutputScans
[encoder endEncoding];
}
- (void)dispatchComputeCommandEncoder:(id<MTLComputeCommandEncoder>)encoder pipelineState:(id<MTLComputePipelineState>)pipelineState width:(NSUInteger)width height:(NSUInteger)height {
// This follows the recommendations at https://developer.apple.com/documentation/metal/calculating_threadgroup_and_grid_sizes ;
// I currently have no independent opinion whatsoever.
const MTLSize threadsPerThreadgroup = MTLSizeMake(
pipelineState.threadExecutionWidth,
pipelineState.maxTotalThreadsPerThreadgroup / pipelineState.threadExecutionWidth,
1
);
const MTLSize threadsPerGrid = MTLSizeMake(width, height, 1);
[encoder setComputePipelineState:pipelineState];
[encoder dispatchThreads:threadsPerGrid threadsPerThreadgroup:threadsPerThreadgroup];
}
- (void)updateFrameBuffer { - (void)updateFrameBuffer {
// TODO: rethink BufferingScanTarget::perform. Is it now really just for guarding the modals? // TODO: rethink BufferingScanTarget::perform. Is it now really just for guarding the modals?
_scanTarget.perform([=] { _scanTarget.perform([=] {
@ -681,22 +719,37 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
// Hence every pixel is touched every frame, regardless of the machine's output. // Hence every pixel is touched every frame, regardless of the machine's output.
// //
if(_pipeline != Pipeline::DirectToDisplay) { switch(_pipeline) {
// Output all scans to the composition buffer. case Pipeline::DirectToDisplay: {
id<MTLRenderCommandEncoder> encoder = [commandBuffer renderCommandEncoderWithDescriptor:_compositionRenderPass]; // Output scans directly, broken up by frame.
[encoder setRenderPipelineState:_composePipeline]; size_t line = outputArea.start.line;
size_t scan = outputArea.start.scan;
while(line != outputArea.end.line) {
if(_lineMetadataBuffer[line].is_first_in_frame && _lineMetadataBuffer[line].previous_frame_was_complete) {
[self outputFrom:scan to:_lineMetadataBuffer[line].first_scan commandBuffer:commandBuffer];
[self outputFrameCleanerToCommandBuffer:commandBuffer];
scan = _lineMetadataBuffer[line].first_scan;
}
line = (line + 1) % NumBufferedLines;
}
[self outputFrom:scan to:outputArea.end.scan commandBuffer:commandBuffer];
} break;
[encoder setVertexBuffer:_scansBuffer offset:0 atIndex:0]; default: // TODO: add composite colour pipeline, and eliminate default.
[encoder setVertexBuffer:_uniformsBuffer offset:0 atIndex:1]; case Pipeline::SVideo: {
[encoder setVertexTexture:_compositionTexture atIndex:0]; // Build the composition buffer.
[self composeOutputArea:outputArea commandBuffer:commandBuffer];
[encoder setFragmentBuffer:_uniformsBuffer offset:0 atIndex:0]; // Filter to the finalised line texture.
[encoder setFragmentTexture:_writeAreaTexture atIndex:0]; id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
[computeEncoder setTexture:_compositionTexture atIndex:0];
[computeEncoder setTexture:_finalisedLineTexture atIndex:1];
[computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
#define OutputScans(start, size) [encoder drawPrimitives:MTLPrimitiveTypeLine vertexStart:0 vertexCount:2 instanceCount:size baseInstance:start] // TODO: limit processed area to those lines that are actually in use.
RangePerform(outputArea.start.scan, outputArea.end.scan, NumBufferedScans, OutputScans); [self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines];
#undef OutputScans
[encoder endEncoding]; [computeEncoder endEncoding];
// Output lines, broken up by frame. // Output lines, broken up by frame.
size_t startLine = outputArea.start.line; size_t startLine = outputArea.start.line;
@ -710,19 +763,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
line = (line + 1) % NumBufferedLines; line = (line + 1) % NumBufferedLines;
} }
[self outputFrom:startLine to:outputArea.end.line commandBuffer:commandBuffer]; [self outputFrom:startLine to:outputArea.end.line commandBuffer:commandBuffer];
} else { } break;
// Output scans directly, broken up by frame.
size_t line = outputArea.start.line;
size_t scan = outputArea.start.scan;
while(line != outputArea.end.line) {
if(_lineMetadataBuffer[line].is_first_in_frame && _lineMetadataBuffer[line].previous_frame_was_complete) {
[self outputFrom:scan to:_lineMetadataBuffer[line].first_scan commandBuffer:commandBuffer];
[self outputFrameCleanerToCommandBuffer:commandBuffer];
scan = _lineMetadataBuffer[line].first_scan;
}
line = (line + 1) % NumBufferedLines;
}
[self outputFrom:scan to:outputArea.end.scan commandBuffer:commandBuffer];
} }
// Add a callback to update the scan target buffer and commit the drawing. // Add a callback to update the scan target buffer and commit the drawing.

View File

@ -384,3 +384,13 @@ fragment float4 filterSVideoFragment(SourceInterpolator vert [[stage_in]], textu
fragment float4 filterCompositeFragment(SourceInterpolator vert [[stage_in]], texture2d<float> texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) { fragment float4 filterCompositeFragment(SourceInterpolator vert [[stage_in]], texture2d<float> texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) {
return applyFilter<true>(vert, texture, uniforms); return applyFilter<true>(vert, texture, uniforms);
} }
// MARK: - Kernel functions
// TEST FUNCTION. Just copies from input to output.
kernel void copyKernel( texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]],
constant Uniforms &uniforms [[buffer(0)]]) {
outTexture.write(inTexture.read(gid), gid);
}