1
0
mirror of https://github.com/TomHarte/CLK.git synced 2025-02-05 05:34:20 +00:00

Introduces smaller luma kernel functions where useable.

This commit is contained in:
Thomas Harte 2020-09-08 19:55:37 -04:00
parent c82e0df071
commit dda1649ab7
2 changed files with 142 additions and 60 deletions

View File

@ -460,7 +460,18 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
if(_pipeline == Pipeline::CompositeColour) {
if(!_separatedLumaTexture) {
_separatedLumaTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor];
_separatedLumaState = [_view.device newComputePipelineStateWithFunction:[library newFunctionWithName:@"separateLumaKernel"] error:nil];
NSString *kernelFunction;
switch(_lumaKernelSize) {
default: kernelFunction = @"separateLumaKernel15"; break;
case 9: kernelFunction = @"separateLumaKernel9"; break;
case 7: kernelFunction = @"separateLumaKernel7"; break;
case 1:
case 3:
case 5: kernelFunction = @"separateLumaKernel5"; break;
}
_separatedLumaState = [_view.device newComputePipelineStateWithFunction:[library newFunctionWithName:kernelFunction] error:nil];
}
} else {
_separatedLumaTexture = nil;
@ -580,9 +591,6 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
_pipeline = isSVideoOutput ? Pipeline::SVideo : Pipeline::CompositeColour;
}
// Update intermediate storage.
[self updateModalBuffers];
// TODO: factor in gamma, which may or may not be a factor (it isn't for 1-bit formats).
struct FragmentSamplerDictionary {
/// Fragment shader that outputs to the composition buffer for composite processing.
@ -633,21 +641,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
}
}
// Create the composition render pass.
pipelineDescriptor.colorAttachments[0].pixelFormat = _compositionTexture.pixelFormat;
pipelineDescriptor.vertexFunction = [library newFunctionWithName:@"scanToComposition"];
pipelineDescriptor.fragmentFunction =
[library newFunctionWithName:isSVideoOutput ? samplerDictionary[int(modals.input_data_type)].compositionSVideo : samplerDictionary[int(modals.input_data_type)].compositionComposite];
_composePipeline = [_view.device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:nil];
_compositionRenderPass = [[MTLRenderPassDescriptor alloc] init];
_compositionRenderPass.colorAttachments[0].texture = _compositionTexture;
_compositionRenderPass.colorAttachments[0].loadAction = MTLLoadActionClear;
_compositionRenderPass.colorAttachments[0].storeAction = MTLStoreActionStore;
_compositionRenderPass.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.5, 0.5, 0.3);
// Create suitable FIR filters.
// Create suitable filters.
_lineBufferPixelsPerLine = NSUInteger(modals.cycles_per_line) * NSUInteger(uniforms()->cyclesMultiplier);
const float colourCyclesPerLine = float(modals.colour_cycle_numerator) / float(modals.colour_cycle_denominator);
@ -693,6 +687,25 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
}
}
// Update intermediate storage.
[self updateModalBuffers];
if(_pipeline != Pipeline::DirectToDisplay) {
// Create the composition render pass.
pipelineDescriptor.colorAttachments[0].pixelFormat = _compositionTexture.pixelFormat;
pipelineDescriptor.vertexFunction = [library newFunctionWithName:@"scanToComposition"];
pipelineDescriptor.fragmentFunction =
[library newFunctionWithName:isSVideoOutput ? samplerDictionary[int(modals.input_data_type)].compositionSVideo : samplerDictionary[int(modals.input_data_type)].compositionComposite];
_composePipeline = [_view.device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:nil];
_compositionRenderPass = [[MTLRenderPassDescriptor alloc] init];
_compositionRenderPass.colorAttachments[0].texture = _compositionTexture;
_compositionRenderPass.colorAttachments[0].loadAction = MTLLoadActionClear;
_compositionRenderPass.colorAttachments[0].storeAction = MTLStoreActionStore;
_compositionRenderPass.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.5, 0.5, 0.3);
}
// Build the output pipeline.
pipelineDescriptor.colorAttachments[0].pixelFormat = _view.colorPixelFormat;
pipelineDescriptor.vertexFunction = [library newFunctionWithName:_pipeline == Pipeline::DirectToDisplay ? @"scanToDisplay" : @"lineToDisplay"];

View File

@ -439,47 +439,7 @@ kernel void filterChromaKernelWithGamma( texture2d<half, access::read> inTexture
filterChromaKernel<true>(inTexture, outTexture, gid, uniforms, offset);
}
/// Given input pixels of the form:
///
/// (composite sample, cos(phase), sin(phase), colour amplitude), applies a lowpass
///
/// Filters to separate luminance, subtracts that and scales and maps the remaining chrominance in order to output
/// pixels in the form:
///
/// (luminance, 0.5 + 0.5*chrominance*cos(phase), 0.5 + 0.5*chrominance*sin(phase))
///
/// i.e. the input form for the filterChromaKernel, above].
kernel void separateLumaKernel( texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]],
constant Uniforms &uniforms [[buffer(0)]],
constant int &offset [[buffer(1)]]) {
const half4 centreSample = inTexture.read(gid + uint2(7, offset));
const half rawSamples[] = {
inTexture.read(gid + uint2(0, offset)).r,
inTexture.read(gid + uint2(1, offset)).r,
inTexture.read(gid + uint2(2, offset)).r,
inTexture.read(gid + uint2(3, offset)).r,
inTexture.read(gid + uint2(4, offset)).r,
inTexture.read(gid + uint2(5, offset)).r,
inTexture.read(gid + uint2(6, offset)).r,
centreSample.r,
inTexture.read(gid + uint2(8, offset)).r,
inTexture.read(gid + uint2(9, offset)).r,
inTexture.read(gid + uint2(10, offset)).r,
inTexture.read(gid + uint2(11, offset)).r,
inTexture.read(gid + uint2(12, offset)).r,
inTexture.read(gid + uint2(13, offset)).r,
inTexture.read(gid + uint2(14, offset)).r,
};
#define Sample(x, y) uniforms.lumaKernel[y] * rawSamples[x]
const half luminance =
Sample(0, 0) + Sample(1, 1) + Sample(2, 2) + Sample(3, 3) + Sample(4, 4) + Sample(5, 5) + Sample(6, 6) +
Sample(7, 7) +
Sample(8, 6) + Sample(9, 5) + Sample(10, 4) + Sample(11, 3) + Sample(12, 2) + Sample(13, 1) + Sample(14, 0);
#undef Sample
void setSeparatedLumaChroma(half luminance, half4 centreSample, texture2d<half, access::write> outTexture, uint2 gid, int offset) {
// The mix/steps below ensures that the absence of a colour burst leads the colour subcarrier to be discarded.
const half isColour = step(half(0.01f), centreSample.a);
const half chroma = (centreSample.r - luminance) / mix(half(1.0f), centreSample.a, isColour);
@ -490,3 +450,112 @@ kernel void separateLumaKernel( texture2d<half, access::read> inTexture [[textur
),
gid + uint2(7, offset));
}
/// Given input pixels of the form:
///
/// (composite sample, cos(phase), sin(phase), colour amplitude), applies a lowpass
///
/// Filters to separate luminance, subtracts that and scales and maps the remaining chrominance in order to output
/// pixels in the form:
///
/// (luminance, 0.5 + 0.5*chrominance*cos(phase), 0.5 + 0.5*chrominance*sin(phase))
///
/// i.e. the input form for the filterChromaKernel, above].
kernel void separateLumaKernel15( texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]],
constant Uniforms &uniforms [[buffer(0)]],
constant int &offset [[buffer(1)]]) {
const half4 centreSample = inTexture.read(gid + uint2(7, offset));
const half rawSamples[] = {
inTexture.read(gid + uint2(0, offset)).r, inTexture.read(gid + uint2(1, offset)).r,
inTexture.read(gid + uint2(2, offset)).r, inTexture.read(gid + uint2(3, offset)).r,
inTexture.read(gid + uint2(4, offset)).r, inTexture.read(gid + uint2(5, offset)).r,
inTexture.read(gid + uint2(6, offset)).r,
centreSample.r,
inTexture.read(gid + uint2(8, offset)).r,
inTexture.read(gid + uint2(9, offset)).r, inTexture.read(gid + uint2(10, offset)).r,
inTexture.read(gid + uint2(11, offset)).r, inTexture.read(gid + uint2(12, offset)).r,
inTexture.read(gid + uint2(13, offset)).r, inTexture.read(gid + uint2(14, offset)).r,
};
#define Sample(x, y) uniforms.lumaKernel[y] * rawSamples[x]
const half luminance =
Sample(0, 0) + Sample(1, 1) + Sample(2, 2) + Sample(3, 3) + Sample(4, 4) + Sample(5, 5) + Sample(6, 6) +
Sample(7, 7) +
Sample(8, 6) + Sample(9, 5) + Sample(10, 4) + Sample(11, 3) + Sample(12, 2) + Sample(13, 1) + Sample(14, 0);
#undef Sample
return setSeparatedLumaChroma(luminance, centreSample, outTexture, gid, offset);
}
kernel void separateLumaKernel9( texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]],
constant Uniforms &uniforms [[buffer(0)]],
constant int &offset [[buffer(1)]]) {
const half4 centreSample = inTexture.read(gid + uint2(7, offset));
const half rawSamples[] = {
inTexture.read(gid + uint2(3, offset)).r, inTexture.read(gid + uint2(4, offset)).r,
inTexture.read(gid + uint2(5, offset)).r, inTexture.read(gid + uint2(6, offset)).r,
centreSample.r,
inTexture.read(gid + uint2(8, offset)).r, inTexture.read(gid + uint2(9, offset)).r,
inTexture.read(gid + uint2(10, offset)).r, inTexture.read(gid + uint2(11, offset)).r
};
#define Sample(x, y) uniforms.lumaKernel[y] * rawSamples[x]
const half luminance =
Sample(0, 3) + Sample(1, 4) + Sample(2, 5) + Sample(3, 6) +
Sample(4, 7) +
Sample(5, 6) + Sample(6, 5) + Sample(7, 4) + Sample(8, 3);
#undef Sample
return setSeparatedLumaChroma(luminance, centreSample, outTexture, gid, offset);
}
kernel void separateLumaKernel7( texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]],
constant Uniforms &uniforms [[buffer(0)]],
constant int &offset [[buffer(1)]]) {
const half4 centreSample = inTexture.read(gid + uint2(7, offset));
const half rawSamples[] = {
inTexture.read(gid + uint2(4, offset)).r,
inTexture.read(gid + uint2(5, offset)).r, inTexture.read(gid + uint2(6, offset)).r,
centreSample.r,
inTexture.read(gid + uint2(8, offset)).r, inTexture.read(gid + uint2(9, offset)).r,
inTexture.read(gid + uint2(10, offset)).r
};
#define Sample(x, y) uniforms.lumaKernel[y] * rawSamples[x]
const half luminance =
Sample(0, 4) + Sample(1, 5) + Sample(2, 6) +
Sample(3, 7) +
Sample(4, 6) + Sample(5, 5) + Sample(6, 4);
#undef Sample
return setSeparatedLumaChroma(luminance, centreSample, outTexture, gid, offset);
}
kernel void separateLumaKernel5( texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]],
constant Uniforms &uniforms [[buffer(0)]],
constant int &offset [[buffer(1)]]) {
const half4 centreSample = inTexture.read(gid + uint2(7, offset));
const half rawSamples[] = {
inTexture.read(gid + uint2(5, offset)).r, inTexture.read(gid + uint2(6, offset)).r,
centreSample.r,
inTexture.read(gid + uint2(8, offset)).r, inTexture.read(gid + uint2(9, offset)).r,
};
#define Sample(x, y) uniforms.lumaKernel[y] * rawSamples[x]
const half luminance =
Sample(0, 5) + Sample(1, 6) +
Sample(2, 7) +
Sample(3, 6) + Sample(4, 5);
#undef Sample
return setSeparatedLumaChroma(luminance, centreSample, outTexture, gid, offset);
}