1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-11-26 23:52:26 +00:00

Starts girding for a third pipeline.

This commit is contained in:
Thomas Harte 2020-08-31 20:01:59 -04:00
parent b344269140
commit 67d4dbf91a

View File

@ -16,6 +16,78 @@
#include "BufferingScanTarget.hpp" #include "BufferingScanTarget.hpp"
#include "FIRFilter.hpp" #include "FIRFilter.hpp"
/*
RGB and composite monochrome
----------------------------
Source data is converted to 32bpp RGB or to composite directly from its input, at output resolution.
Gamma correction is applied unless the inputs are 1bpp (e.g. Macintosh-style black/white, TTL-style RGB).
TODO: filtering when the output size is 'small'.
S-Video
-------
Source data is pasted together with a common clock in the composition buffer. Colour phase is baked in
at this point. Format within the composition buffer is:
.r = luminance
.g = 0.5 + 0.5 * chrominance * cos(phase)
.b = 0.5 + 0.5 * chrominance * sin(phase)
Contents of the composition buffer are then drawn into the finalised line texture; at this point a suitable
low-filter is applied to the two chrominance channels, colours are converted to RGB and gamma corrected.
Contents from the finalised line texture are then painted to the display.
Composite colour
----------------
Source data is pasted together with a common clock in the composition buffer. Colour phase and amplitude are
recorded at this point. Format within the composition buffer is:
.r = composite value
.g = phase
.b = amplitude
Contents of the composition buffer are transferred to the separated-luma buffer, subject to a low-paass filter
that has sought to separate luminance and chrominance, and with phase and amplitude now baked into the latter:
.r = luminance
.g = 0.5 + 0.5 * chrominance * cos(phase)
.b = 0.5 + 0.5 * chrominance * sin(phase)
The process now continues as per the corresponding S-Video steps.
NOTES
-----
1) for many of the input pixel formats it would be possible to do the trigonometric side of things at
arbitrary precision. Since it would always be necessary to support fixed-precision processing because
of the directly-sampled input formats, I've used fixed throughout to reduce the number of permutations
and combinations of code I need to support. The precision is always selected to be at least four times
the colour clock.
2) I experimented with skipping the separated-luma buffer for composite colour based on the observation that
just multiplying the raw signal by sin and cos and then filtering well below the colour subcarrier frequency
should be sufficient. It wasn't in practice because the bits of luminance that don't quite separate are then
of such massive amplitude that you get huge bands of bright colour in place of the usual chroma dots.
3) I also initially didn't want to have a finalied-line texture, but processing costs changed my mind on that.
If you accept that output will be fixed precision, anyway. In that case, processing for a typical NTSC frame
in its original resolution means applying filtering (i.e. at least 15 samples per pixel) likely between
218,400 and 273,000 times per output frame, then upscaling from there at 1 sample per pixel. Count the second
sample twice for the original store and you're talking between 16*218,400 = 3,494,400 to 16*273,000 = 4,368,000
total pixel accesses. Though that's not a perfect way to measure cost, roll with it.
On my 4k monitor, doing it at actual output resolution would instead cost 3840*2160*15 = 124,416,000 total
accesses. Which doesn't necessarily mean "more than 28 times as much", but does mean "a lot more".
(going direct-to-display for composite monochrome means evaluating sin/cos a lot more often than it might
with more buffering in between, but that doesn't provisionally seem to be as much of a bottleneck)
*/
namespace { namespace {
struct Uniforms { struct Uniforms {
@ -110,7 +182,23 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
// properly adjoin their neighbours and everything is converted to a common clock. // properly adjoin their neighbours and everything is converted to a common clock.
id<MTLTexture> _compositionTexture; id<MTLTexture> _compositionTexture;
MTLRenderPassDescriptor *_compositionRenderPass; // The render pass for _drawing to_ the composition buffer. MTLRenderPassDescriptor *_compositionRenderPass; // The render pass for _drawing to_ the composition buffer.
BOOL _isUsingCompositionPipeline; // Whether the composition pipeline is in use.
enum class Pipeline {
/// Scans are painted directly to the frame buffer.
DirectToDisplay,
/// Scans are painted to the composition buffer, which is processed to the finalised line buffer,
/// from which lines are painted to the frame buffer.
SVideo,
/// Scans are painted to the composition buffer, which is processed to the separated luma buffer and then the finalised line buffer,
/// from which lines are painted to the frame buffer.
CompositeColour
// TODO: decide what to do for downard-scaled direct-to-display. Obvious options are to include lowpass
// filtering into the scan outputter and contine hoping that the vertical takes care of itself, or maybe
// to stick with DirectToDisplay but with a minimum size for the frame buffer and apply filtering from
// there to the screen.
};
Pipeline _pipeline;
// Textures: additional storage used when processing S-Video and composite colour input. // Textures: additional storage used when processing S-Video and composite colour input.
id<MTLTexture> _finalisedLineTexture; id<MTLTexture> _finalisedLineTexture;
@ -177,16 +265,6 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
depthStencilDescriptor.frontFaceStencil.stencilFailureOperation = MTLStencilOperationReplace; depthStencilDescriptor.frontFaceStencil.stencilFailureOperation = MTLStencilOperationReplace;
_clearStencilState = [view.device newDepthStencilStateWithDescriptor:depthStencilDescriptor]; _clearStencilState = [view.device newDepthStencilStateWithDescriptor:depthStencilDescriptor];
// Create a composition texture up front. (TODO: is it worth switching to an 8bpp texture in composite mode?)
MTLTextureDescriptor *const textureDescriptor = [MTLTextureDescriptor
texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
width:2048 // This 'should do'.
height:NumBufferedLines // TODO: I want to turn this down _considerably_. A frame and a bit should be sufficient, though probably I'd also want to adjust the buffering scan target to keep most recent data?
mipmapped:NO];
textureDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
textureDescriptor.resourceOptions = MTLResourceStorageModePrivate;
_compositionTexture = [view.device newTextureWithDescriptor:textureDescriptor];
// Ensure the is-drawing flag is initially clear. // Ensure the is-drawing flag is initially clear.
_isDrawing.clear(); _isDrawing.clear();
} }
@ -204,23 +282,26 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
- (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size { - (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size {
[self setAspectRatio]; [self setAspectRatio];
@synchronized(self) {
[self updateSizeBuffers];
}
}
- (void)updateSizeBuffers {
// TODO: consider multisampling here? But it seems like you'd need another level of indirection // TODO: consider multisampling here? But it seems like you'd need another level of indirection
// in order to maintain an ongoing buffer that supersamples only at the end. // in order to maintain an ongoing buffer that supersamples only at the end.
const NSUInteger frameBufferWidth = NSUInteger(_view.drawableSize.width * _view.layer.contentsScale);
const NSUInteger frameBufferHeight = NSUInteger(_view.drawableSize.height * _view.layer.contentsScale);
// TODO: Do I need to multiply by contents scale here?
const NSUInteger frameBufferWidth = NSUInteger(size.width * view.layer.contentsScale);
const NSUInteger frameBufferHeight = NSUInteger(size.height * view.layer.contentsScale);
@synchronized(self) {
// Generate a framebuffer and a stencil. // Generate a framebuffer and a stencil.
MTLTextureDescriptor *const textureDescriptor = [MTLTextureDescriptor MTLTextureDescriptor *const textureDescriptor = [MTLTextureDescriptor
texture2DDescriptorWithPixelFormat:view.colorPixelFormat texture2DDescriptorWithPixelFormat:_view.colorPixelFormat
width:frameBufferWidth width:frameBufferWidth
height:frameBufferHeight height:frameBufferHeight
mipmapped:NO]; mipmapped:NO];
textureDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; textureDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
textureDescriptor.resourceOptions = MTLResourceStorageModePrivate; textureDescriptor.resourceOptions = MTLResourceStorageModePrivate;
_frameBuffer = [view.device newTextureWithDescriptor:textureDescriptor]; _frameBuffer = [_view.device newTextureWithDescriptor:textureDescriptor];
MTLTextureDescriptor *const stencilTextureDescriptor = [MTLTextureDescriptor MTLTextureDescriptor *const stencilTextureDescriptor = [MTLTextureDescriptor
texture2DDescriptorWithPixelFormat:MTLPixelFormatStencil8 texture2DDescriptorWithPixelFormat:MTLPixelFormatStencil8
@ -229,7 +310,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
mipmapped:NO]; mipmapped:NO];
stencilTextureDescriptor.usage = MTLTextureUsageRenderTarget; stencilTextureDescriptor.usage = MTLTextureUsageRenderTarget;
stencilTextureDescriptor.resourceOptions = MTLResourceStorageModePrivate; stencilTextureDescriptor.resourceOptions = MTLResourceStorageModePrivate;
_frameBufferStencil = [view.device newTextureWithDescriptor:stencilTextureDescriptor]; _frameBufferStencil = [_view.device newTextureWithDescriptor:stencilTextureDescriptor];
// Generate a render pass with that framebuffer and stencil. // Generate a render pass with that framebuffer and stencil.
_frameBufferRenderPass = [[MTLRenderPassDescriptor alloc] init]; _frameBufferRenderPass = [[MTLRenderPassDescriptor alloc] init];
@ -248,50 +329,51 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
MTLDepthStencilDescriptor *depthStencilDescriptor = [[MTLDepthStencilDescriptor alloc] init]; MTLDepthStencilDescriptor *depthStencilDescriptor = [[MTLDepthStencilDescriptor alloc] init];
depthStencilDescriptor.frontFaceStencil.stencilCompareFunction = MTLCompareFunctionAlways; depthStencilDescriptor.frontFaceStencil.stencilCompareFunction = MTLCompareFunctionAlways;
depthStencilDescriptor.frontFaceStencil.depthStencilPassOperation = MTLStencilOperationReplace; depthStencilDescriptor.frontFaceStencil.depthStencilPassOperation = MTLStencilOperationReplace;
_drawStencilState = [view.device newDepthStencilStateWithDescriptor:depthStencilDescriptor]; _drawStencilState = [_view.device newDepthStencilStateWithDescriptor:depthStencilDescriptor];
// TODO: old framebuffer should be resized onto the new one. // TODO: old framebuffer should be resized onto the new one.
[self updateSizeAndModalBuffers];
}
}
- (void)updateSizeAndModalBuffers {
// Buffers are required only for the composition pipeline; so if this isn't that then release anything
// currently being held and return.
if(!_isUsingCompositionPipeline) {
_finalisedLineTexture = nil;
_finalisedLineRenderPass = nil;
_separatedLumaTexture = nil;
_separatedLumaRenderPass = nil;
return;
}
const auto modals = _scanTarget.modals();
const NSUInteger frameBufferWidth = NSUInteger(_view.drawableSize.width * _view.layer.contentsScale);
const NSUInteger maximumDetail = NSUInteger(modals.cycles_per_line) * NSUInteger(uniforms()->cyclesMultiplier);
// maximumDetail = 0 => the modals haven't been set yet. So there's nothing to create buffers for.
if(!maximumDetail) {
return;
} }
- (void)updateModalBuffers {
// Build a descriptor for any intermediate line texture. // Build a descriptor for any intermediate line texture.
MTLTextureDescriptor *const lineTextureDescriptor = [MTLTextureDescriptor MTLTextureDescriptor *const lineTextureDescriptor = [MTLTextureDescriptor
texture2DDescriptorWithPixelFormat:_view.colorPixelFormat texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm
width:std::min(frameBufferWidth, maximumDetail) width:2048 // This 'should do'.
height:NumBufferedLines height:NumBufferedLines
mipmapped:NO]; mipmapped:NO];
lineTextureDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; lineTextureDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
lineTextureDescriptor.resourceOptions = MTLResourceStorageModePrivate; lineTextureDescriptor.resourceOptions = MTLResourceStorageModePrivate;
// The finalised texture will definitely exist given that this is the composition pipeline. if(_pipeline == Pipeline::DirectToDisplay) {
// Buffers are not required when outputting direct to display; so if this isn't that then release anything
// currently being held and return.
_finalisedLineTexture = nil;
_finalisedLineRenderPass = nil;
_separatedLumaTexture = nil;
_separatedLumaRenderPass = nil;
_compositionTexture = nil;
_compositionRenderPass = nil;
return;
}
// Create a composition texture if one does not yet exist.
if(!_compositionTexture) {
_compositionTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor];
}
// The finalised texture will definitely exist.
if(!_finalisedLineTexture) {
_finalisedLineTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor]; _finalisedLineTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor];
}
// A luma separation texture will exist only for composite colour. // A luma separation texture will exist only for composite colour.
if(modals.display_type == Outputs::Display::DisplayType::CompositeColour) { if(_pipeline == Pipeline::CompositeColour) {
if(!_separatedLumaTexture) {
_separatedLumaTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor]; _separatedLumaTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor];
} }
} else {
_separatedLumaTexture = nil;
}
} }
- (void)setAspectRatio { - (void)setAspectRatio {
@ -386,12 +468,23 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
// //
// (i) input and output are both RGB; or // (i) input and output are both RGB; or
// (i) output is composite monochrome. // (i) output is composite monochrome.
_isUsingCompositionPipeline = const bool isComposition =
( (
(natural_display_type_for_data_type(modals.input_data_type) != Outputs::Display::DisplayType::RGB) || (natural_display_type_for_data_type(modals.input_data_type) != Outputs::Display::DisplayType::RGB) ||
(modals.display_type != Outputs::Display::DisplayType::RGB) (modals.display_type != Outputs::Display::DisplayType::RGB)
) && modals.display_type != Outputs::Display::DisplayType::CompositeMonochrome; ) && modals.display_type != Outputs::Display::DisplayType::CompositeMonochrome;
const bool isSVideoOutput = modals.display_type == Outputs::Display::DisplayType::SVideo;
if(!isComposition) {
_pipeline = Pipeline::DirectToDisplay;
} else {
_pipeline = isSVideoOutput ? Pipeline::SVideo : Pipeline::CompositeColour;
}
// Update intermediate storage.
[self updateModalBuffers];
// TODO: factor in gamma, which may or may not be a factor (it isn't for 1-bit formats).
struct FragmentSamplerDictionary { struct FragmentSamplerDictionary {
/// Fragment shader that outputs to the composition buffer for composite processing. /// Fragment shader that outputs to the composition buffer for composite processing.
NSString *const compositionComposite; NSString *const compositionComposite;
@ -403,9 +496,6 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
/// Fragment shader that outputs directly as RGB. /// Fragment shader that outputs directly as RGB.
NSString *const directRGB; NSString *const directRGB;
}; };
// TODO: create fragment shaders to apply composite multiplication.
// TODO: incorporate gamma correction into all direct outputters.
const FragmentSamplerDictionary samplerDictionary[8] = { const FragmentSamplerDictionary samplerDictionary[8] = {
// Luminance1 // Luminance1
{@"sampleLuminance1", nullptr, @"sampleLuminance1", nullptr}, {@"sampleLuminance1", nullptr, @"sampleLuminance1", nullptr},
@ -427,18 +517,16 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
if(samplerDictionary[c].directRGB) assert([library newFunctionWithName:samplerDictionary[c].directRGB]); if(samplerDictionary[c].directRGB) assert([library newFunctionWithName:samplerDictionary[c].directRGB]);
} }
#endif #endif
// Pick a suitable cycle multiplier.
uniforms()->cyclesMultiplier = 1.0f; uniforms()->cyclesMultiplier = 1.0f;
if(_isUsingCompositionPipeline) { if(_pipeline != Pipeline::DirectToDisplay) {
// Pick a suitable cycle multiplier.
const float minimumSize = 4.0f * float(modals.colour_cycle_numerator) / float(modals.colour_cycle_denominator); const float minimumSize = 4.0f * float(modals.colour_cycle_numerator) / float(modals.colour_cycle_denominator);
while(uniforms()->cyclesMultiplier * modals.cycles_per_line < minimumSize) { while(uniforms()->cyclesMultiplier * modals.cycles_per_line < minimumSize) {
uniforms()->cyclesMultiplier += 1.0f; uniforms()->cyclesMultiplier += 1.0f;
} }
}
// Build the composition pipeline if one is in use. // Create the composition render pass.
const bool isSVideoOutput = modals.display_type == Outputs::Display::DisplayType::SVideo;
if(_isUsingCompositionPipeline) {
pipelineDescriptor.colorAttachments[0].pixelFormat = _compositionTexture.pixelFormat; pipelineDescriptor.colorAttachments[0].pixelFormat = _compositionTexture.pixelFormat;
pipelineDescriptor.vertexFunction = [library newFunctionWithName:@"scanToComposition"]; pipelineDescriptor.vertexFunction = [library newFunctionWithName:@"scanToComposition"];
pipelineDescriptor.fragmentFunction = pipelineDescriptor.fragmentFunction =
@ -452,6 +540,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
_compositionRenderPass.colorAttachments[0].storeAction = MTLStoreActionStore; _compositionRenderPass.colorAttachments[0].storeAction = MTLStoreActionStore;
_compositionRenderPass.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.5, 0.5, 1.0); _compositionRenderPass.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.5, 0.5, 1.0);
// Create suitable FIR filters.
auto *const firCoefficients = uniforms()->firCoefficients; auto *const firCoefficients = uniforms()->firCoefficients;
const float cyclesPerLine = float(modals.cycles_per_line) * uniforms()->cyclesMultiplier; const float cyclesPerLine = float(modals.cycles_per_line) * uniforms()->cyclesMultiplier;
const float colourCyclesPerLine = float(modals.colour_cycle_numerator) / float(modals.colour_cycle_denominator); const float colourCyclesPerLine = float(modals.colour_cycle_numerator) / float(modals.colour_cycle_denominator);
@ -483,9 +572,9 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
// Build the output pipeline. // Build the output pipeline.
pipelineDescriptor.colorAttachments[0].pixelFormat = _view.colorPixelFormat; pipelineDescriptor.colorAttachments[0].pixelFormat = _view.colorPixelFormat;
pipelineDescriptor.vertexFunction = [library newFunctionWithName:_isUsingCompositionPipeline ? @"lineToDisplay" : @"scanToDisplay"]; pipelineDescriptor.vertexFunction = [library newFunctionWithName:_pipeline == Pipeline::DirectToDisplay ? @"scanToDisplay" : @"lineToDisplay"];
if(_isUsingCompositionPipeline) { if(_pipeline != Pipeline::DirectToDisplay) {
pipelineDescriptor.fragmentFunction = [library newFunctionWithName:isSVideoOutput ? @"filterSVideoFragment" : @"filterCompositeFragment"]; pipelineDescriptor.fragmentFunction = [library newFunctionWithName:isSVideoOutput ? @"filterSVideoFragment" : @"filterCompositeFragment"];
} else { } else {
const bool isRGBOutput = modals.display_type == Outputs::Display::DisplayType::RGB; const bool isRGBOutput = modals.display_type == Outputs::Display::DisplayType::RGB;
@ -503,9 +592,6 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
// Finish. // Finish.
_outputPipeline = [_view.device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:nil]; _outputPipeline = [_view.device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:nil];
// Update intermediate storage that is a function of both modals and current window size.
[self updateSizeAndModalBuffers];
} }
- (void)outputFrom:(size_t)start to:(size_t)end commandBuffer:(id<MTLCommandBuffer>)commandBuffer { - (void)outputFrom:(size_t)start to:(size_t)end commandBuffer:(id<MTLCommandBuffer>)commandBuffer {
@ -515,7 +601,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
// Final output. Could be scans or lines. // Final output. Could be scans or lines.
[encoder setRenderPipelineState:_outputPipeline]; [encoder setRenderPipelineState:_outputPipeline];
if(_isUsingCompositionPipeline) { if(_pipeline != Pipeline::DirectToDisplay) {
[encoder setFragmentTexture:_compositionTexture atIndex:0]; [encoder setFragmentTexture:_compositionTexture atIndex:0];
[encoder setVertexBuffer:_linesBuffer offset:0 atIndex:0]; [encoder setVertexBuffer:_linesBuffer offset:0 atIndex:0];
} else { } else {
@ -534,7 +620,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
#endif #endif
#define OutputStrips(start, size) [encoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4 instanceCount:size baseInstance:start] #define OutputStrips(start, size) [encoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4 instanceCount:size baseInstance:start]
RangePerform(start, end, (_isUsingCompositionPipeline ? NumBufferedLines : NumBufferedScans), OutputStrips); RangePerform(start, end, (_pipeline != Pipeline::DirectToDisplay ? NumBufferedLines : NumBufferedScans), OutputStrips);
#undef OutputStrips #undef OutputStrips
// Complete encoding. // Complete encoding.
@ -595,7 +681,7 @@ using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
// Hence every pixel is touched every frame, regardless of the machine's output. // Hence every pixel is touched every frame, regardless of the machine's output.
// //
if(_isUsingCompositionPipeline) { if(_pipeline != Pipeline::DirectToDisplay) {
// Output all scans to the composition buffer. // Output all scans to the composition buffer.
id<MTLRenderCommandEncoder> encoder = [commandBuffer renderCommandEncoderWithDescriptor:_compositionRenderPass]; id<MTLRenderCommandEncoder> encoder = [commandBuffer renderCommandEncoderWithDescriptor:_compositionRenderPass];
[encoder setRenderPipelineState:_composePipeline]; [encoder setRenderPipelineState:_composePipeline];