1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-12-23 20:29:42 +00:00

Attempts correctly to set up the CPU side of a composite video pipeline, at least.

So: I think this is really close, but I'm out of time for the day.
This commit is contained in:
Thomas Harte 2020-09-01 21:58:33 -04:00
parent f2929230a2
commit 15296e43a4
2 changed files with 151 additions and 102 deletions

View File

@ -388,6 +388,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
if(_pipeline == Pipeline::CompositeColour) {
if(!_separatedLumaTexture) {
_separatedLumaTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor];
_separatedLumaState = [_view.device newComputePipelineStateWithFunction:[library newFunctionWithName:@"separateLumaKernel"] error:nil];
}
} else {
_separatedLumaTexture = nil;
@ -713,68 +714,104 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
const auto outputArea = _scanTarget.get_output_area();
// Ensure texture changes are noted.
const auto writeAreaModificationStart = size_t(outputArea.start.write_area_x + outputArea.start.write_area_y * 2048) * _bytesPerInputPixel;
const auto writeAreaModificationEnd = size_t(outputArea.end.write_area_x + outputArea.end.write_area_y * 2048) * _bytesPerInputPixel;
if(outputArea.end.line != outputArea.start.line) {
// Ensure texture changes are noted.
const auto writeAreaModificationStart = size_t(outputArea.start.write_area_x + outputArea.start.write_area_y * 2048) * _bytesPerInputPixel;
const auto writeAreaModificationEnd = size_t(outputArea.end.write_area_x + outputArea.end.write_area_y * 2048) * _bytesPerInputPixel;
#define FlushRegion(start, size) [_writeAreaBuffer didModifyRange:NSMakeRange(start, size)]
RangePerform(writeAreaModificationStart, writeAreaModificationEnd, _totalTextureBytes, FlushRegion);
RangePerform(writeAreaModificationStart, writeAreaModificationEnd, _totalTextureBytes, FlushRegion);
#undef FlushRegion
// Obtain a source for render command encoders.
id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
// Obtain a source for render command encoders.
id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
//
// Drawing algorithm used below, in broad terms:
//
// Maintain a persistent buffer of current CRT state.
//
// During each frame, paint to the persistent buffer anything new. Update a stencil buffer to track
// every pixel so-far touched.
//
// At the end of the frame, draw a 'frame cleaner', which is a whole-screen rect that paints over
// only those areas that the stencil buffer indicates weren't painted this frame.
//
// Hence every pixel is touched every frame, regardless of the machine's output.
//
//
// Drawing algorithm used below, in broad terms:
//
// Maintain a persistent buffer of current CRT state.
//
// During each frame, paint to the persistent buffer anything new. Update a stencil buffer to track
// every pixel so-far touched.
//
// At the end of the frame, draw a 'frame cleaner', which is a whole-screen rect that paints over
// only those areas that the stencil buffer indicates weren't painted this frame.
//
// Hence every pixel is touched every frame, regardless of the machine's output.
//
switch(_pipeline) {
case Pipeline::DirectToDisplay: {
// Output scans directly, broken up by frame.
size_t line = outputArea.start.line;
size_t scan = outputArea.start.scan;
while(line != outputArea.end.line) {
if(_lineMetadataBuffer[line].is_first_in_frame && _lineMetadataBuffer[line].previous_frame_was_complete) {
[self outputFrom:scan to:_lineMetadataBuffer[line].first_scan commandBuffer:commandBuffer];
[self outputFrameCleanerToCommandBuffer:commandBuffer];
scan = _lineMetadataBuffer[line].first_scan;
}
line = (line + 1) % NumBufferedLines;
}
[self outputFrom:scan to:outputArea.end.scan commandBuffer:commandBuffer];
} break;
default: // TODO: add composite colour pipeline, and eliminate default.
case Pipeline::SVideo: {
// Build the composition buffer.
[self composeOutputArea:outputArea commandBuffer:commandBuffer];
if(outputArea.end.line != outputArea.start.line) {
// Filter to the finalised line texture.
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
[computeEncoder setTexture:_compositionTexture atIndex:0];
[computeEncoder setTexture:_finalisedLineTexture atIndex:1];
[computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
if(outputArea.end.line > outputArea.start.line) {
[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line];
} else {
[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line];
if(outputArea.end.line) {
[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0];
switch(_pipeline) {
case Pipeline::DirectToDisplay: {
// Output scans directly, broken up by frame.
size_t line = outputArea.start.line;
size_t scan = outputArea.start.scan;
while(line != outputArea.end.line) {
if(_lineMetadataBuffer[line].is_first_in_frame && _lineMetadataBuffer[line].previous_frame_was_complete) {
[self outputFrom:scan to:_lineMetadataBuffer[line].first_scan commandBuffer:commandBuffer];
[self outputFrameCleanerToCommandBuffer:commandBuffer];
scan = _lineMetadataBuffer[line].first_scan;
}
line = (line + 1) % NumBufferedLines;
}
[self outputFrom:scan to:outputArea.end.scan commandBuffer:commandBuffer];
} break;
[computeEncoder endEncoding];
case Pipeline::CompositeColour:
case Pipeline::SVideo: {
// Build the composition buffer.
[self composeOutputArea:outputArea commandBuffer:commandBuffer];
if(_pipeline == Pipeline::SVideo) {
// Filter from composition to the finalised line texture.
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
[computeEncoder setTexture:_compositionTexture atIndex:0];
[computeEncoder setTexture:_finalisedLineTexture atIndex:1];
[computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
if(outputArea.end.line > outputArea.start.line) {
[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line];
} else {
[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line];
if(outputArea.end.line) {
[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0];
}
}
[computeEncoder endEncoding];
} else {
// Separate luma and then filter.
id<MTLComputeCommandEncoder> separateComputeEncoder = [commandBuffer computeCommandEncoder];
[separateComputeEncoder setTexture:_compositionTexture atIndex:0];
[separateComputeEncoder setTexture:_separatedLumaTexture atIndex:1];
[separateComputeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
if(outputArea.end.line > outputArea.start.line) {
[self dispatchComputeCommandEncoder:separateComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line];
} else {
[self dispatchComputeCommandEncoder:separateComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line];
if(outputArea.end.line) {
[self dispatchComputeCommandEncoder:separateComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0];
}
}
[separateComputeEncoder endEncoding];
id<MTLComputeCommandEncoder> filterComputeEncoder = [commandBuffer computeCommandEncoder];
[filterComputeEncoder setTexture:_separatedLumaTexture atIndex:0];
[filterComputeEncoder setTexture:_finalisedLineTexture atIndex:1];
[filterComputeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
if(outputArea.end.line > outputArea.start.line) {
[self dispatchComputeCommandEncoder:filterComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line];
} else {
[self dispatchComputeCommandEncoder:filterComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line];
if(outputArea.end.line) {
[self dispatchComputeCommandEncoder:filterComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0];
}
}
[filterComputeEncoder endEncoding];
}
// Output lines, broken up by frame.
size_t startLine = outputArea.start.line;
@ -788,15 +825,18 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
line = (line + 1) % NumBufferedLines;
}
[self outputFrom:startLine to:outputArea.end.line commandBuffer:commandBuffer];
}
} break;
}
} break;
}
// Add a callback to update the scan target buffer and commit the drawing.
[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> _Nonnull) {
// Add a callback to update the scan target buffer and commit the drawing.
[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> _Nonnull) {
self->_scanTarget.complete_output_area(outputArea);
}];
[commandBuffer commit];
} else {
// There was no work, but to be contractually correct:
self->_scanTarget.complete_output_area(outputArea);
}];
[commandBuffer commit];
}
}
}

View File

@ -307,8 +307,8 @@ float3 convertRed1Green1Blue1(SourceInterpolator vert, texture2d<ushort> texture
const float level = mix(colour.r, dot(colour.gb, colourSubcarrier), vert.colourAmplitude); \
return float4( \
level, \
float2(0.5f) + level*colourSubcarrier*0.5f, \
1.0 \
float2(0.5f) + quadrature(vert.colourPhase)*0.5f, \
vert.colourPhase \
); \
}
@ -352,52 +352,16 @@ fragment float4 clearFragment() {
return float4(0.0, 0.0, 0.0, 0.64);
}
// MARK: - Conversion fragment shaders
template <bool applyCompositeAmplitude> float4 applyFilter(SourceInterpolator vert [[stage_in]], texture2d<float> texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) {
#define Sample(x) texture.sample(standardSampler, vert.textureCoordinates + float2(x, 0.0f)) - float4(0.0f, 0.5f, 0.5f, 0.0f)
float4 rawSamples[] = {
Sample(-7), Sample(-6), Sample(-5), Sample(-4), Sample(-3), Sample(-2), Sample(-1),
Sample(0),
Sample(1), Sample(2), Sample(3), Sample(4), Sample(5), Sample(6), Sample(7),
};
#undef Sample
#define Sample(c, o, a) uniforms.firCoefficients[c] * rawSamples[o].rgb
const float3 colour =
Sample(0, 0, -7) + Sample(1, 1, -6) + Sample(2, 2, -5) + Sample(3, 3, -4) +
Sample(4, 4, -3) + Sample(5, 5, -2) + Sample(6, 6, -1) +
Sample(7, 7, 0) +
Sample(6, 8, 1) + Sample(5, 9, 2) + Sample(4, 10, 3) +
Sample(3, 11, 4) + Sample(2, 12, 5) + Sample(1, 13, 6) + Sample(0, 14, 7);
#undef Sample
// This would be `if constexpr`, were this C++17; the compiler should do compile-time selection here regardless.
if(applyCompositeAmplitude) {
return float4(uniforms.toRGB * (colour * float3(1.0f, 1.0f / vert.colourAmplitude, 1.0f / vert.colourAmplitude)), 1.0f);
} else {
return float4(uniforms.toRGB * colour, 1.0f);
}
}
fragment float4 filterSVideoFragment(SourceInterpolator vert [[stage_in]], texture2d<float> texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) {
return applyFilter<false>(vert, texture, uniforms);
}
fragment float4 filterCompositeFragment(SourceInterpolator vert [[stage_in]], texture2d<float> texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) {
return applyFilter<true>(vert, texture, uniforms);
}
// MARK: -
fragment float4 interpolateFragment(CopyInterpolator vert [[stage_in]], texture2d<float> texture [[texture(0)]]) {
return texture.sample(linearSampler, vert.textureCoordinates);
}
// MARK: - Kernel functions
// TEST FUNCTION. Just copies from input to output.
/// Given input pixels of the form (luminance, 0.5 + 0.5*chrominance*cos(phase), 0.5 + 0.5*chrominance*sin(phase)), applies a lowpass
/// filter to the two chrominance parts, then uses the toRGB matrix to convert to RGB and stores.
kernel void filterChromaKernel( texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]],
@ -431,3 +395,48 @@ kernel void filterChromaKernel( texture2d<float, access::read> inTexture [[textu
outTexture.write(float4(uniforms.toRGB * colour, 1.0f), gid + uint2(7, offset));
}
/// Given input pixels of the form:
///
/// (composite sample, cos(phase), sin(phase), colour amplitude), applies a lowpass
///
/// Filters to separate luminance, subtracts that and scales and maps the remaining chrominance in order to output
/// pixels in the form:
///
/// (luminance, 0.5 + 0.5*chrominance*cos(phase), 0.5 + 0.5*chrominance*sin(phase))
///
/// i.e. the input form for the filterChromaKernel, above].
kernel void separateLumaKernel( texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]],
constant Uniforms &uniforms [[buffer(0)]],
constant int &offset [[buffer(1)]]) {
// TODO!
constexpr float4 moveToZero = float4(0.0f, 0.5f, 0.5f, 0.0f);
const float4 rawSamples[] = {
inTexture.read(gid + uint2(0, offset)) - moveToZero,
inTexture.read(gid + uint2(1, offset)) - moveToZero,
inTexture.read(gid + uint2(2, offset)) - moveToZero,
inTexture.read(gid + uint2(3, offset)) - moveToZero,
inTexture.read(gid + uint2(4, offset)) - moveToZero,
inTexture.read(gid + uint2(5, offset)) - moveToZero,
inTexture.read(gid + uint2(6, offset)) - moveToZero,
inTexture.read(gid + uint2(7, offset)) - moveToZero,
inTexture.read(gid + uint2(8, offset)) - moveToZero,
inTexture.read(gid + uint2(9, offset)) - moveToZero,
inTexture.read(gid + uint2(10, offset)) - moveToZero,
inTexture.read(gid + uint2(11, offset)) - moveToZero,
inTexture.read(gid + uint2(12, offset)) - moveToZero,
inTexture.read(gid + uint2(13, offset)) - moveToZero,
inTexture.read(gid + uint2(14, offset)) - moveToZero,
};
#define Sample(x, y) uniforms.firCoefficients[y] * rawSamples[x].rgb
const float3 colour =
Sample(0, 0) + Sample(1, 1) + Sample(2, 2) + Sample(3, 3) + Sample(4, 4) + Sample(5, 5) + Sample(6, 6) +
Sample(7, 7) +
Sample(8, 6) + Sample(9, 5) + Sample(10, 4) + Sample(11, 3) + Sample(12, 2) + Sample(13, 1) + Sample(14, 0);
#undef Sample
outTexture.write(float4(uniforms.toRGB * colour, 1.0f), gid + uint2(7, offset));
}