mirror of
https://github.com/TomHarte/CLK.git
synced 2024-12-27 16:31:31 +00:00
Completes conversion of composite & S-Video per-pixel processing to 16-bit floats.
This commit is contained in:
parent
42d810db7f
commit
3d392dd81d
@ -95,6 +95,18 @@
|
||||
|
||||
namespace {
|
||||
|
||||
/// Provides a container for __fp16 versions of tightly-packed single-precision plain old data with a copy assignment constructor.
|
||||
template <typename NaturalType> struct HalfConverter {
|
||||
__fp16 elements[sizeof(NaturalType) / sizeof(float)];
|
||||
|
||||
void operator =(const NaturalType &rhs) {
|
||||
const float *floatRHS = reinterpret_cast<const float *>(&rhs);
|
||||
for(size_t c = 0; c < sizeof(elements) / sizeof(*elements); ++c) {
|
||||
elements[c] = __fp16(floatRHS[c]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Tracks the Uniforms struct declared in ScanTarget.metal; see there for field definitions.
|
||||
//
|
||||
// __fp16 is a Clang-specific type which I'm using as equivalent to a Metal half, i.e. an IEEE 754 binary16.
|
||||
@ -107,10 +119,10 @@ struct Uniforms {
|
||||
float zoom;
|
||||
simd::float2 offset;
|
||||
|
||||
simd::float3x3 toRGB;
|
||||
simd::float3x3 fromRGB;
|
||||
HalfConverter<simd::float3x3> toRGB;
|
||||
HalfConverter<simd::float3x3> fromRGB;
|
||||
|
||||
simd::float3 chromaCoefficients[8];
|
||||
HalfConverter<simd::float3> chromaKernel[8];
|
||||
__fp16 lumaKernel[8];
|
||||
|
||||
__fp16 outputAlpha;
|
||||
@ -511,6 +523,15 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
|
||||
[self setAspectRatio];
|
||||
|
||||
const auto toRGB = to_rgb_matrix(modals.composite_colour_space);
|
||||
// uniforms()->toRGB[0] = toRGB[0];
|
||||
// uniforms()->toRGB[1] = toRGB[1];
|
||||
// uniforms()->toRGB[2] = toRGB[2];
|
||||
// uniforms()->toRGB[4] = toRGB[3];
|
||||
// uniforms()->toRGB[5] = toRGB[4];
|
||||
// uniforms()->toRGB[6] = toRGB[5];
|
||||
// uniforms()->toRGB[8] = toRGB[6];
|
||||
// uniforms()->toRGB[9] = toRGB[7];
|
||||
// uniforms()->toRGB[10] = toRGB[8];
|
||||
uniforms()->toRGB = simd::float3x3(
|
||||
simd::float3{toRGB[0], toRGB[1], toRGB[2]},
|
||||
simd::float3{toRGB[3], toRGB[4], toRGB[5]},
|
||||
@ -656,7 +677,8 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
|
||||
|
||||
// Generate the chrominance filter.
|
||||
{
|
||||
auto *const firCoefficients = uniforms()->chromaCoefficients;
|
||||
// auto *const firCoefficients = uniforms()->chromaKernel;
|
||||
simd::float3 firCoefficients[8];
|
||||
const auto chromaCoefficients = boxCoefficients(radiansPerPixel, 3.141592654f);
|
||||
_chromaKernelSize = 15;
|
||||
for(size_t c = 0; c < 8; ++c) {
|
||||
@ -686,6 +708,11 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
|
||||
}
|
||||
_chromaKernelSize = std::max(_chromaKernelSize, sharpenFilterSize);
|
||||
}
|
||||
|
||||
// Convert to half-size floats.
|
||||
for(size_t c = 0; c < 8; ++c) {
|
||||
uniforms()->chromaKernel[c] = firCoefficients[c];
|
||||
}
|
||||
}
|
||||
|
||||
// Generate the luminance separation filter and determine its required size.
|
||||
|
@ -32,12 +32,12 @@ struct Uniforms {
|
||||
float2 offset;
|
||||
|
||||
// Provides conversions to and from RGB for the active colour space.
|
||||
float3x3 toRGB;
|
||||
float3x3 fromRGB;
|
||||
half3x3 toRGB;
|
||||
half3x3 fromRGB;
|
||||
|
||||
// Describes the FIR filter in use for chroma filtering; it'll be
|
||||
// Describes the filter in use for chroma filtering; it'll be
|
||||
// 15 coefficients but they're symmetrical around the centre.
|
||||
float3 chromaCoefficients[8];
|
||||
half3 chromaKernel[8];
|
||||
|
||||
// Describes the filter in use for luma filtering; 15 coefficients
|
||||
// symmetrical around the centre.
|
||||
@ -343,7 +343,7 @@ float3 convertRed1Green1Blue1(SourceInterpolator vert, texture2d<ushort> texture
|
||||
} \
|
||||
\
|
||||
fragment float4 svideoSample##name(SourceInterpolator vert [[stage_in]], texture2d<pixelType> texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) { \
|
||||
const auto colour = uniforms.fromRGB * clamp(convert##name(vert, texture), float(0.0f), float(1.0f)); \
|
||||
const auto colour = float3x3(uniforms.fromRGB) * clamp(convert##name(vert, texture), float(0.0f), float(1.0f)); \
|
||||
const float2 qam = quadrature(vert.colourPhase); \
|
||||
const float chroma = dot(colour.gb, qam); \
|
||||
return float4( \
|
||||
@ -354,7 +354,7 @@ float3 convertRed1Green1Blue1(SourceInterpolator vert, texture2d<ushort> texture
|
||||
} \
|
||||
\
|
||||
fragment float4 compositeSample##name(SourceInterpolator vert [[stage_in]], texture2d<pixelType> texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) { \
|
||||
const auto colour = uniforms.fromRGB * clamp(convert##name(vert, texture), float3(0.0f), float3(1.0f)); \
|
||||
const auto colour = float3x3(uniforms.fromRGB) * clamp(convert##name(vert, texture), float3(0.0f), float3(1.0f)); \
|
||||
const float2 colourSubcarrier = quadrature(vert.colourPhase); \
|
||||
const float level = mix(colour.r, dot(colour.gb, colourSubcarrier), vert.colourAmplitude); \
|
||||
return composite(level, colourSubcarrier, vert.colourAmplitude); \
|
||||
@ -405,14 +405,14 @@ template <bool applyGamma> void filterChromaKernel( texture2d<half, access::read
|
||||
inTexture.read(gid + uint2(14, offset)) - moveToZero,
|
||||
};
|
||||
|
||||
#define Sample(x, y) half3(uniforms.chromaCoefficients[y]) * rawSamples[x].rgb
|
||||
#define Sample(x, y) uniforms.chromaKernel[y] * rawSamples[x].rgb
|
||||
const half3 colour =
|
||||
Sample(0, 0) + Sample(1, 1) + Sample(2, 2) + Sample(3, 3) + Sample(4, 4) + Sample(5, 5) + Sample(6, 6) +
|
||||
Sample(7, 7) +
|
||||
Sample(8, 6) + Sample(9, 5) + Sample(10, 4) + Sample(11, 3) + Sample(12, 2) + Sample(13, 1) + Sample(14, 0);
|
||||
#undef Sample
|
||||
|
||||
const half4 output = half4(half3x3(uniforms.toRGB) * colour * half(uniforms.outputMultiplier), half(uniforms.outputAlpha));
|
||||
const half4 output = half4(uniforms.toRGB * colour * uniforms.outputMultiplier, uniforms.outputAlpha);
|
||||
if(applyGamma) {
|
||||
outTexture.write(pow(output, uniforms.outputGamma), gid + uint2(7, offset));
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user