diff --git a/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm b/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm index 3e6f858b8..1b320a09d 100644 --- a/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm +++ b/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm @@ -12,6 +12,7 @@ #include #include +#include #include "BufferingScanTarget.hpp" #include "FIRFilter.hpp" @@ -104,7 +105,7 @@ struct Uniforms { float zoom; simd::float2 offset; simd::float3 chromaCoefficients[8]; - simd::float2 lumaCoefficients[8]; + float lumaKernel[8]; float radiansPerPixel; float cyclesMultiplier; float outputAlpha; @@ -166,6 +167,30 @@ std::array boxCoefficients(float radiansPerPixel, float cutoff) { return filter; } +/// @returns the IEEE 754 binary16 conversion of @c value, stored in a 16-bit int. +uint16_t half(float value) { + uint16_t result = 0; + + if(value < 0) { + result |= 0x8000; + value = -value; + } + + int exponent; + const float mantissa = frexpf(value, &exponent); + + // There is a bias of 15 on the exponent; given that the value given by frexp doesn't have the + // implicit first bit — that'll be masked off below — that's like a bias of 14 versus the output + // of frexp. + exponent += 14; + result |= (exponent & 31) << 10; + + // Also store the mantissa. + result |= uint16_t(mantissa * 2048.0f) & 0x3ff; + + return result; +} + } using BufferingScanTarget = Outputs::Display::BufferingScanTarget; @@ -258,6 +283,9 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget; BufferingScanTarget::LineMetadata _lineMetadataBuffer[NumBufferedLines]; std::atomic_flag _isDrawing; + // Additional pipeline information. + size_t _lumaKernelSize; + // The output view. __weak MTKView *_view; } @@ -653,7 +681,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget; // Generate the chrominance filter. { auto *const firCoefficients = uniforms()->chromaCoefficients; - const auto chromaCoefficients = boxCoefficients(uniforms()->radiansPerPixel, 3.141592654f * 0.5f); + const auto chromaCoefficients = boxCoefficients(uniforms()->radiansPerPixel, 3.141592654f); for(size_t c = 0; c < 8; ++c) { firCoefficients[c].y = firCoefficients[c].z = (isSVideoOutput ? 2.0f : 1.0f) * chromaCoefficients[c]; firCoefficients[c].x = 0.0f; @@ -667,7 +695,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget; // // The low cut off ['Hz' but per line, not per second] is somewhat arbitrary. if(!isSVideoOutput) { - SignalProcessing::FIRFilter sharpenFilter(15, float(_lineBufferPixelsPerLine), 40.0f, colourCyclesPerLine); + SignalProcessing::FIRFilter sharpenFilter(15, float(_lineBufferPixelsPerLine), 20.0f, colourCyclesPerLine); const auto sharpen = sharpenFilter.get_coefficients(); for(size_t c = 0; c < 8; ++c) { firCoefficients[c].x = sharpen[c]; @@ -675,20 +703,16 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget; } } - // Generate the luminance separation filter. + // Generate the luminance separation filter and determine its required size. { - auto *const firCoefficients = uniforms()->lumaCoefficients; - SignalProcessing::FIRFilter lumaPart(15, float(_lineBufferPixelsPerLine), 0.0f, colourCyclesPerLine * 0.5f); -// SignalProcessing::FIRFilter chromaPart(15, float(_lineBufferPixelsPerLine), 0.0f, colourCyclesPerLine * 0.5f); - -// const auto chromaCoefficients = lumaPart.get_coefficients(); -// const auto lumaCoefficients = lumaPart.get_coefficients(); - const auto chromaCoefficients = boxCoefficients(uniforms()->radiansPerPixel, 3.141592654f);//chromaPart.get_coefficients(); - const auto lumaCoefficients = lumaPart.get_coefficients(); -// const auto chromaCoefficients = lumaCoefficients; + auto *const filter = uniforms()->lumaKernel; + const auto coefficients = boxCoefficients(uniforms()->radiansPerPixel, 3.141592654f); + _lumaKernelSize = 15; for(size_t c = 0; c < 8; ++c) { - firCoefficients[c].x = //lumaCoefficients[c]; - firCoefficients[c].y = chromaCoefficients[c]; + filter[c] = coefficients[c]; + if(coefficients[c] < 0.01f) { + _lumaKernelSize -= 2; + } } } } diff --git a/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal b/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal index 98f6df2f0..70e6eb5f2 100644 --- a/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal +++ b/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal @@ -36,9 +36,9 @@ struct Uniforms { // 15 coefficients but they're symmetrical around the centre. float3 chromaCoefficients[8]; - // Describes the FIR filter in use for luma filtering; also 15 coefficients + // Describes the filter in use for luma filtering; 15 coefficients // symmetrical around the centre. - float2 lumaCoefficients[8]; + float lumaKernel[8]; // Maps from pixel offsets into the composition buffer to angular difference. float radiansPerPixel; @@ -449,45 +449,43 @@ kernel void filterChromaKernelWithGamma(texture2d inTexture /// (luminance, 0.5 + 0.5*chrominance*cos(phase), 0.5 + 0.5*chrominance*sin(phase)) /// /// i.e. the input form for the filterChromaKernel, above]. -kernel void separateLumaKernel( texture2d inTexture [[texture(0)]], - texture2d outTexture [[texture(1)]], +kernel void separateLumaKernel( texture2d inTexture [[texture(0)]], + texture2d outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]], constant Uniforms &uniforms [[buffer(0)]], constant int &offset [[buffer(1)]]) { - const float4 centreSample = inTexture.read(gid + uint2(7, offset)); - const float2 rawSamples[] = { - inTexture.read(gid + uint2(0, offset)).rr, - inTexture.read(gid + uint2(1, offset)).rr, - inTexture.read(gid + uint2(2, offset)).rr, - inTexture.read(gid + uint2(3, offset)).rr, - inTexture.read(gid + uint2(4, offset)).rr, - inTexture.read(gid + uint2(5, offset)).rr, - inTexture.read(gid + uint2(6, offset)).rr, - centreSample.rr, - inTexture.read(gid + uint2(8, offset)).rr, - inTexture.read(gid + uint2(9, offset)).rr, - inTexture.read(gid + uint2(10, offset)).rr, - inTexture.read(gid + uint2(11, offset)).rr, - inTexture.read(gid + uint2(12, offset)).rr, - inTexture.read(gid + uint2(13, offset)).rr, - inTexture.read(gid + uint2(14, offset)).rr, + const half4 centreSample = inTexture.read(gid + uint2(7, offset)); + const half rawSamples[] = { + inTexture.read(gid + uint2(0, offset)).r, + inTexture.read(gid + uint2(1, offset)).r, + inTexture.read(gid + uint2(2, offset)).r, + inTexture.read(gid + uint2(3, offset)).r, + inTexture.read(gid + uint2(4, offset)).r, + inTexture.read(gid + uint2(5, offset)).r, + inTexture.read(gid + uint2(6, offset)).r, + centreSample.r, + inTexture.read(gid + uint2(8, offset)).r, + inTexture.read(gid + uint2(9, offset)).r, + inTexture.read(gid + uint2(10, offset)).r, + inTexture.read(gid + uint2(11, offset)).r, + inTexture.read(gid + uint2(12, offset)).r, + inTexture.read(gid + uint2(13, offset)).r, + inTexture.read(gid + uint2(14, offset)).r, }; -#define Sample(x, y) uniforms.lumaCoefficients[y] * rawSamples[x] - const float2 luminance = +#define Sample(x, y) half(uniforms.lumaKernel[y]) * rawSamples[x] + const half luminance = Sample(0, 0) + Sample(1, 1) + Sample(2, 2) + Sample(3, 3) + Sample(4, 4) + Sample(5, 5) + Sample(6, 6) + Sample(7, 7) + Sample(8, 6) + Sample(9, 5) + Sample(10, 4) + Sample(11, 3) + Sample(12, 2) + Sample(13, 1) + Sample(14, 0); #undef Sample // The mix/steps below ensures that the absence of a colour burst leads the colour subcarrier to be discarded. - const float isColour = step(0.01, centreSample.a); - const float chroma = (centreSample.r - luminance.g) / mix(1.0f, centreSample.a, isColour); - outTexture.write(float4( -// mix(luminance.g, luminance.r / (1.0f - centreSample.a), isColour), - luminance.r / mix(1.0f, (1.0f - centreSample.a), isColour), -// luminance.r, - isColour * (centreSample.gb - float2(0.5f)) * chroma + float2(0.5f), + const half isColour = step(half(0.01f), centreSample.a); + const half chroma = (centreSample.r - luminance) / mix(half(1.0f), centreSample.a, isColour); + outTexture.write(half4( + luminance / mix(half(1.0f), (half(1.0f) - centreSample.a), isColour), + isColour * (centreSample.gb - half2(0.5f)) * chroma + half2(0.5f), 1.0f ), gid + uint2(7, offset));