From 15296e43a40965babae78e91529b59626a2acd5d Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 1 Sep 2020 21:58:33 -0400
Subject: [PATCH] Attempts correctly to set up the CPU side of a composite
 video pipeline, at least.

So: I think this is really close, but I'm out of time for the day.
---
 .../Clock Signal/ScanTarget/CSScanTarget.mm   | 162 +++++++++++-------
 .../Clock Signal/ScanTarget/ScanTarget.metal  |  91 +++++-----
 2 files changed, 151 insertions(+), 102 deletions(-)

diff --git a/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm b/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm
index d5b609efe..b4721a7c4 100644
--- a/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm	
+++ b/OSBindings/Mac/Clock Signal/ScanTarget/CSScanTarget.mm	
@@ -388,6 +388,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
 	if(_pipeline == Pipeline::CompositeColour) {
 		if(!_separatedLumaTexture) {
 			_separatedLumaTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor];
+			_separatedLumaState = [_view.device newComputePipelineStateWithFunction:[library newFunctionWithName:@"separateLumaKernel"] error:nil];
 		}
 	} else {
 		_separatedLumaTexture = nil;
@@ -713,68 +714,104 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
 
 		const auto outputArea = _scanTarget.get_output_area();
 
-		// Ensure texture changes are noted.
-		const auto writeAreaModificationStart = size_t(outputArea.start.write_area_x + outputArea.start.write_area_y * 2048) * _bytesPerInputPixel;
-		const auto writeAreaModificationEnd = size_t(outputArea.end.write_area_x + outputArea.end.write_area_y * 2048) * _bytesPerInputPixel;
+		if(outputArea.end.line != outputArea.start.line) {
+
+			// Ensure texture changes are noted.
+			const auto writeAreaModificationStart = size_t(outputArea.start.write_area_x + outputArea.start.write_area_y * 2048) * _bytesPerInputPixel;
+			const auto writeAreaModificationEnd = size_t(outputArea.end.write_area_x + outputArea.end.write_area_y * 2048) * _bytesPerInputPixel;
 #define FlushRegion(start, size)	[_writeAreaBuffer didModifyRange:NSMakeRange(start, size)]
-		RangePerform(writeAreaModificationStart, writeAreaModificationEnd, _totalTextureBytes, FlushRegion);
+			RangePerform(writeAreaModificationStart, writeAreaModificationEnd, _totalTextureBytes, FlushRegion);
 #undef FlushRegion
 
-		// Obtain a source for render command encoders.
-		id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
+			// Obtain a source for render command encoders.
+			id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
 
-		//
-		// Drawing algorithm used below, in broad terms:
-		//
-		// Maintain a persistent buffer of current CRT state.
-		//
-		// During each frame, paint to the persistent buffer anything new. Update a stencil buffer to track
-		// every pixel so-far touched.
-		//
-		// At the end of the frame, draw a 'frame cleaner', which is a whole-screen rect that paints over
-		// only those areas that the stencil buffer indicates weren't painted this frame.
-		//
-		// Hence every pixel is touched every frame, regardless of the machine's output.
-		//
+			//
+			// Drawing algorithm used below, in broad terms:
+			//
+			// Maintain a persistent buffer of current CRT state.
+			//
+			// During each frame, paint to the persistent buffer anything new. Update a stencil buffer to track
+			// every pixel so-far touched.
+			//
+			// At the end of the frame, draw a 'frame cleaner', which is a whole-screen rect that paints over
+			// only those areas that the stencil buffer indicates weren't painted this frame.
+			//
+			// Hence every pixel is touched every frame, regardless of the machine's output.
+			//
 
-		switch(_pipeline) {
-			case Pipeline::DirectToDisplay: {
-				// Output scans directly, broken up by frame.
-				size_t line = outputArea.start.line;
-				size_t scan = outputArea.start.scan;
-				while(line != outputArea.end.line) {
-					if(_lineMetadataBuffer[line].is_first_in_frame && _lineMetadataBuffer[line].previous_frame_was_complete) {
-						[self outputFrom:scan to:_lineMetadataBuffer[line].first_scan commandBuffer:commandBuffer];
-						[self outputFrameCleanerToCommandBuffer:commandBuffer];
-						scan = _lineMetadataBuffer[line].first_scan;
-					}
-					line = (line + 1) % NumBufferedLines;
-				}
-				[self outputFrom:scan to:outputArea.end.scan commandBuffer:commandBuffer];
-			} break;
-
-			default:	// TODO: add composite colour pipeline, and eliminate default.
-			case Pipeline::SVideo: {
-				// Build the composition buffer.
-				[self composeOutputArea:outputArea commandBuffer:commandBuffer];
-
-				if(outputArea.end.line != outputArea.start.line) {
-					// Filter to the finalised line texture.
-					id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
-					[computeEncoder setTexture:_compositionTexture atIndex:0];
-					[computeEncoder setTexture:_finalisedLineTexture atIndex:1];
-					[computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
-
-					if(outputArea.end.line > outputArea.start.line) {
-						[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line];
-					} else {
-						[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line];
-						if(outputArea.end.line) {
-							[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0];
+			switch(_pipeline) {
+				case Pipeline::DirectToDisplay: {
+					// Output scans directly, broken up by frame.
+					size_t line = outputArea.start.line;
+					size_t scan = outputArea.start.scan;
+					while(line != outputArea.end.line) {
+						if(_lineMetadataBuffer[line].is_first_in_frame && _lineMetadataBuffer[line].previous_frame_was_complete) {
+							[self outputFrom:scan to:_lineMetadataBuffer[line].first_scan commandBuffer:commandBuffer];
+							[self outputFrameCleanerToCommandBuffer:commandBuffer];
+							scan = _lineMetadataBuffer[line].first_scan;
 						}
+						line = (line + 1) % NumBufferedLines;
 					}
+					[self outputFrom:scan to:outputArea.end.scan commandBuffer:commandBuffer];
+				} break;
 
-					[computeEncoder endEncoding];
+				case Pipeline::CompositeColour:
+				case Pipeline::SVideo: {
+					// Build the composition buffer.
+					[self composeOutputArea:outputArea commandBuffer:commandBuffer];
+
+					if(_pipeline == Pipeline::SVideo) {
+						// Filter from composition to the finalised line texture.
+						id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
+						[computeEncoder setTexture:_compositionTexture atIndex:0];
+						[computeEncoder setTexture:_finalisedLineTexture atIndex:1];
+						[computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
+
+						if(outputArea.end.line > outputArea.start.line) {
+							[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line];
+						} else {
+							[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line];
+							if(outputArea.end.line) {
+								[self dispatchComputeCommandEncoder:computeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0];
+							}
+						}
+
+						[computeEncoder endEncoding];
+					} else {
+						// Separate luma and then filter.
+						id<MTLComputeCommandEncoder> separateComputeEncoder = [commandBuffer computeCommandEncoder];
+						[separateComputeEncoder setTexture:_compositionTexture atIndex:0];
+						[separateComputeEncoder setTexture:_separatedLumaTexture atIndex:1];
+						[separateComputeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
+
+						if(outputArea.end.line > outputArea.start.line) {
+							[self dispatchComputeCommandEncoder:separateComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line];
+						} else {
+							[self dispatchComputeCommandEncoder:separateComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line];
+							if(outputArea.end.line) {
+								[self dispatchComputeCommandEncoder:separateComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0];
+							}
+						}
+
+						[separateComputeEncoder endEncoding];
+
+						id<MTLComputeCommandEncoder> filterComputeEncoder = [commandBuffer computeCommandEncoder];
+						[filterComputeEncoder setTexture:_separatedLumaTexture atIndex:0];
+						[filterComputeEncoder setTexture:_finalisedLineTexture atIndex:1];
+						[filterComputeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
+
+						if(outputArea.end.line > outputArea.start.line) {
+							[self dispatchComputeCommandEncoder:filterComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line - outputArea.start.line offset:outputArea.start.line];
+						} else {
+							[self dispatchComputeCommandEncoder:filterComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:NumBufferedLines - outputArea.start.line offset:outputArea.start.line];
+							if(outputArea.end.line) {
+								[self dispatchComputeCommandEncoder:filterComputeEncoder pipelineState:_finalisedLineState width:_lineBufferPixelsPerLine height:outputArea.end.line offset:0];
+							}
+						}
+
+						[filterComputeEncoder endEncoding];
+					}
 
 					// Output lines, broken up by frame.
 					size_t startLine = outputArea.start.line;
@@ -788,15 +825,18 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
 						line = (line + 1) % NumBufferedLines;
 					}
 					[self outputFrom:startLine to:outputArea.end.line commandBuffer:commandBuffer];
-				}
-			} break;
-		}
+				} break;
+			}
 
-		// Add a callback to update the scan target buffer and commit the drawing.
-		[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> _Nonnull) {
+			// Add a callback to update the scan target buffer and commit the drawing.
+			[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> _Nonnull) {
+				self->_scanTarget.complete_output_area(outputArea);
+			}];
+			[commandBuffer commit];
+		} else {
+			// There was no work, but to be contractually correct:
 			self->_scanTarget.complete_output_area(outputArea);
-		}];
-		[commandBuffer commit];
+		}
 	}
 }
 
diff --git a/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal b/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal
index e0d04f9c2..af86c6318 100644
--- a/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal	
+++ b/OSBindings/Mac/Clock Signal/ScanTarget/ScanTarget.metal	
@@ -307,8 +307,8 @@ float3 convertRed1Green1Blue1(SourceInterpolator vert, texture2d<ushort> texture
 		const float level = mix(colour.r, dot(colour.gb, colourSubcarrier), vert.colourAmplitude);	\
 		return float4(	\
 			level,	\
-			float2(0.5f) + level*colourSubcarrier*0.5f,	\
-			1.0		\
+			float2(0.5f) + quadrature(vert.colourPhase)*0.5f,	\
+			vert.colourPhase		\
 		);	\
 	}
 
@@ -352,52 +352,16 @@ fragment float4 clearFragment() {
 	return float4(0.0, 0.0, 0.0, 0.64);
 }
 
-// MARK: - Conversion fragment shaders
-
-template <bool applyCompositeAmplitude> float4 applyFilter(SourceInterpolator vert [[stage_in]], texture2d<float> texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) {
-#define Sample(x)	texture.sample(standardSampler, vert.textureCoordinates + float2(x, 0.0f)) - float4(0.0f, 0.5f, 0.5f, 0.0f)
-	float4 rawSamples[] = {
-		Sample(-7),	Sample(-6),	Sample(-5),	Sample(-4),	Sample(-3),	Sample(-2),	Sample(-1),
-		Sample(0),
-		Sample(1),	Sample(2),	Sample(3),	Sample(4),	Sample(5),	Sample(6),	Sample(7),
-	};
-#undef Sample
-
-#define Sample(c, o, a)	uniforms.firCoefficients[c] * rawSamples[o].rgb
-
-	const float3 colour =
-		Sample(0, 0, -7) +	Sample(1, 1, -6) +	Sample(2, 2, -5) +	Sample(3, 3, -4) +
-		Sample(4, 4, -3) +	Sample(5, 5, -2) +	Sample(6, 6, -1) +
-		Sample(7, 7, 0) +
-		Sample(6, 8, 1) +	Sample(5, 9, 2) +	Sample(4, 10, 3) +
-		Sample(3, 11, 4) +	Sample(2, 12, 5) +	Sample(1, 13, 6) +	Sample(0, 14, 7);
-
-#undef Sample
-
-	// This would be `if constexpr`, were this C++17; the compiler should do compile-time selection here regardless.
-	if(applyCompositeAmplitude) {
-		return float4(uniforms.toRGB * (colour * float3(1.0f, 1.0f / vert.colourAmplitude, 1.0f / vert.colourAmplitude)), 1.0f);
-	} else {
-		return float4(uniforms.toRGB * colour, 1.0f);
-	}
-}
-
-fragment float4 filterSVideoFragment(SourceInterpolator vert [[stage_in]], texture2d<float> texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) {
-	return applyFilter<false>(vert, texture, uniforms);
-}
-
-fragment float4 filterCompositeFragment(SourceInterpolator vert [[stage_in]], texture2d<float> texture [[texture(0)]], constant Uniforms &uniforms [[buffer(0)]]) {
-	return applyFilter<true>(vert, texture, uniforms);
-}
+// MARK: -
 
 fragment float4 interpolateFragment(CopyInterpolator vert [[stage_in]], texture2d<float> texture [[texture(0)]]) {
 	return texture.sample(linearSampler, vert.textureCoordinates);
 }
 
-
 // MARK: - Kernel functions
 
-// TEST FUNCTION. Just copies from input to output.
+/// Given input pixels of the form (luminance, 0.5 + 0.5*chrominance*cos(phase), 0.5 + 0.5*chrominance*sin(phase)), applies a lowpass
+/// filter to the two chrominance parts, then uses the toRGB matrix to convert to RGB and stores.
 kernel void filterChromaKernel(	texture2d<float, access::read> inTexture [[texture(0)]],
 								texture2d<float, access::write> outTexture [[texture(1)]],
 								uint2 gid [[thread_position_in_grid]],
@@ -431,3 +395,48 @@ kernel void filterChromaKernel(	texture2d<float, access::read> inTexture [[textu
 
 	outTexture.write(float4(uniforms.toRGB * colour, 1.0f), gid + uint2(7, offset));
 }
+
+/// Given input pixels of the form:
+///
+///	(composite sample, cos(phase), sin(phase), colour amplitude), applies a lowpass
+///
+/// Filters to separate luminance, subtracts that and scales and maps the remaining chrominance in order to output
+/// pixels in the form:
+///
+///	(luminance, 0.5 + 0.5*chrominance*cos(phase), 0.5 + 0.5*chrominance*sin(phase))
+///
+/// i.e. the input form for the filterChromaKernel, above].
+kernel void separateLumaKernel(	texture2d<float, access::read> inTexture [[texture(0)]],
+								texture2d<float, access::write> outTexture [[texture(1)]],
+								uint2 gid [[thread_position_in_grid]],
+								constant Uniforms &uniforms [[buffer(0)]],
+								constant int &offset [[buffer(1)]]) {
+	// TODO!
+	constexpr float4 moveToZero = float4(0.0f, 0.5f, 0.5f, 0.0f);
+	const float4 rawSamples[] = {
+		inTexture.read(gid + uint2(0, offset))  - moveToZero,
+		inTexture.read(gid + uint2(1, offset)) - moveToZero,
+		inTexture.read(gid + uint2(2, offset)) - moveToZero,
+		inTexture.read(gid + uint2(3, offset)) - moveToZero,
+		inTexture.read(gid + uint2(4, offset)) - moveToZero,
+		inTexture.read(gid + uint2(5, offset)) - moveToZero,
+		inTexture.read(gid + uint2(6, offset)) - moveToZero,
+		inTexture.read(gid + uint2(7, offset)) - moveToZero,
+		inTexture.read(gid + uint2(8, offset)) - moveToZero,
+		inTexture.read(gid + uint2(9, offset)) - moveToZero,
+		inTexture.read(gid + uint2(10, offset)) - moveToZero,
+		inTexture.read(gid + uint2(11, offset)) - moveToZero,
+		inTexture.read(gid + uint2(12, offset)) - moveToZero,
+		inTexture.read(gid + uint2(13, offset)) - moveToZero,
+		inTexture.read(gid + uint2(14, offset)) - moveToZero,
+	};
+
+#define Sample(x, y) uniforms.firCoefficients[y] * rawSamples[x].rgb
+	const float3 colour =
+		Sample(0, 0) + Sample(1, 1) + Sample(2, 2) + Sample(3, 3) + Sample(4, 4) + Sample(5, 5) + Sample(6, 6) +
+		Sample(7, 7) +
+		Sample(8, 6) + Sample(9, 5) + Sample(10, 4) + Sample(11, 3) + Sample(12, 2) + Sample(13, 1) + Sample(14, 0);
+#undef Sample
+
+	outTexture.write(float4(uniforms.toRGB * colour, 1.0f), gid + uint2(7, offset));
+}