From 3d392dd81d93530e686f7ee4728491855ab4d927 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Wed, 9 Sep 2020 13:02:04 -0400 Subject: [PATCH] Completes conversion of composite & S-Video per-pixel processing to 16-bit floats. --- .../Clock Signal/ScanTarget/CSScanTarget.mm | 35 ++++++++++++++++--- .../Clock Signal/ScanTarget/ScanTarget.metal | 16 ++++----- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm b/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm index 644670103..c2cdb1011 100644 --- a/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm +++ b/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm @@ -95,6 +95,18 @@ namespace { +/// Provides a container for __fp16 versions of tightly-packed single-precision plain old data with a copy assignment constructor. +template struct HalfConverter { + __fp16 elements[sizeof(NaturalType) / sizeof(float)]; + + void operator =(const NaturalType &rhs) { + const float *floatRHS = reinterpret_cast(&rhs); + for(size_t c = 0; c < sizeof(elements) / sizeof(*elements); ++c) { + elements[c] = __fp16(floatRHS[c]); + } + } +}; + // Tracks the Uniforms struct declared in ScanTarget.metal; see there for field definitions. // // __fp16 is a Clang-specific type which I'm using as equivalent to a Metal half, i.e. an IEEE 754 binary16. @@ -107,10 +119,10 @@ struct Uniforms { float zoom; simd::float2 offset; - simd::float3x3 toRGB; - simd::float3x3 fromRGB; + HalfConverter toRGB; + HalfConverter fromRGB; - simd::float3 chromaCoefficients[8]; + HalfConverter chromaKernel[8]; __fp16 lumaKernel[8]; __fp16 outputAlpha; @@ -511,6 +523,15 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget; [self setAspectRatio]; const auto toRGB = to_rgb_matrix(modals.composite_colour_space); +// uniforms()->toRGB[0] = toRGB[0]; +// uniforms()->toRGB[1] = toRGB[1]; +// uniforms()->toRGB[2] = toRGB[2]; +// uniforms()->toRGB[4] = toRGB[3]; +// uniforms()->toRGB[5] = toRGB[4]; +// uniforms()->toRGB[6] = toRGB[5]; +// uniforms()->toRGB[8] = toRGB[6]; +// uniforms()->toRGB[9] = toRGB[7]; +// uniforms()->toRGB[10] = toRGB[8]; uniforms()->toRGB = simd::float3x3( simd::float3{toRGB[0], toRGB[1], toRGB[2]}, simd::float3{toRGB[3], toRGB[4], toRGB[5]}, @@ -656,7 +677,8 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget; // Generate the chrominance filter. { - auto *const firCoefficients = uniforms()->chromaCoefficients; +// auto *const firCoefficients = uniforms()->chromaKernel; + simd::float3 firCoefficients[8]; const auto chromaCoefficients = boxCoefficients(radiansPerPixel, 3.141592654f); _chromaKernelSize = 15; for(size_t c = 0; c < 8; ++c) { @@ -686,6 +708,11 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget; } _chromaKernelSize = std::max(_chromaKernelSize, sharpenFilterSize); } + + // Convert to half-size floats. + for(size_t c = 0; c < 8; ++c) { + uniforms()->chromaKernel[c] = firCoefficients[c]; + } } // Generate the luminance separation filter and determine its required size. diff --git a/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal b/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal index 6707262ec..8bf86be47 100644 --- a/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal +++ b/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal @@ -32,12 +32,12 @@ struct Uniforms { float2 offset; // Provides conversions to and from RGB for the active colour space. - float3x3 toRGB; - float3x3 fromRGB; + half3x3 toRGB; + half3x3 fromRGB; - // Describes the FIR filter in use for chroma filtering; it'll be + // Describes the filter in use for chroma filtering; it'll be // 15 coefficients but they're symmetrical around the centre. - float3 chromaCoefficients[8]; + half3 chromaKernel[8]; // Describes the filter in use for luma filtering; 15 coefficients // symmetrical around the centre. @@ -343,7 +343,7 @@ float3 convertRed1Green1Blue1(SourceInterpolator vert, texture2d texture } \ \ fragment float4 svideoSample##name(SourceInterpolator vert [[stage_in]], texture2d texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) { \ - const auto colour = uniforms.fromRGB * clamp(convert##name(vert, texture), float(0.0f), float(1.0f)); \ + const auto colour = float3x3(uniforms.fromRGB) * clamp(convert##name(vert, texture), float(0.0f), float(1.0f)); \ const float2 qam = quadrature(vert.colourPhase); \ const float chroma = dot(colour.gb, qam); \ return float4( \ @@ -354,7 +354,7 @@ float3 convertRed1Green1Blue1(SourceInterpolator vert, texture2d texture } \ \ fragment float4 compositeSample##name(SourceInterpolator vert [[stage_in]], texture2d texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) { \ - const auto colour = uniforms.fromRGB * clamp(convert##name(vert, texture), float3(0.0f), float3(1.0f)); \ + const auto colour = float3x3(uniforms.fromRGB) * clamp(convert##name(vert, texture), float3(0.0f), float3(1.0f)); \ const float2 colourSubcarrier = quadrature(vert.colourPhase); \ const float level = mix(colour.r, dot(colour.gb, colourSubcarrier), vert.colourAmplitude); \ return composite(level, colourSubcarrier, vert.colourAmplitude); \ @@ -405,14 +405,14 @@ template void filterChromaKernel( texture2d