mirror of
https://github.com/TomHarte/CLK.git
synced 2026-04-20 10:17:05 +00:00
1179 lines
45 KiB
Plaintext
1179 lines
45 KiB
Plaintext
//
|
|
// ScanTarget.m
|
|
// Clock Signal
|
|
//
|
|
// Created by Thomas Harte on 02/08/2020.
|
|
// Copyright © 2020 Thomas Harte. All rights reserved.
|
|
//
|
|
|
|
#import "CSScanTarget.h"
|
|
|
|
#import <Metal/Metal.h>
|
|
|
|
#include <algorithm>
|
|
#include <atomic>
|
|
#include <cmath>
|
|
#include <concepts>
|
|
#include <optional>
|
|
|
|
#include "BufferingScanTarget.hpp"
|
|
#include "FilterGenerator.hpp"
|
|
#include "Numeric/CircularCounter.hpp"
|
|
|
|
/*
|
|
|
|
RGB and composite monochrome
|
|
----------------------------
|
|
|
|
Source data is converted to 32bpp RGB or to composite directly from its input, at output resolution.
|
|
Gamma correction is applied unless the inputs are 1bpp (e.g. Macintosh-style black/white, TTL-style RGB).
|
|
|
|
S-Video
|
|
-------
|
|
|
|
Source data is pasted together with a common clock in the composition buffer. Colour phase is baked in
|
|
at this point. Format within the composition buffer is:
|
|
|
|
.r = luminance
|
|
.g = 0.5 + 0.5 * chrominance * cos(phase)
|
|
.b = 0.5 + 0.5 * chrominance * sin(phase)
|
|
|
|
Contents of the composition buffer are then drawn into the finalised line texture; at this point a suitable
|
|
low-pass filter is applied to the two chrominance channels, colours are converted to RGB and gamma corrected.
|
|
|
|
Contents from the finalised line texture are then painted to the display.
|
|
|
|
Composite colour
|
|
----------------
|
|
|
|
Source data is pasted together with a common clock in the composition buffer. Colour phase and amplitude are
|
|
recorded at this point. Format within the composition buffer is:
|
|
|
|
.r = composite value
|
|
.g = 0.5 + 0.5 * cos(phase)
|
|
.b = 0.5 + 0.5 * sin(phase)
|
|
.a = amplitude
|
|
|
|
[aside: upfront calculation of cos/sin is just because it'll need to be calculated at this precision anyway,
|
|
and doing it here avoids having to do unit<->radian conversions on phase alone]
|
|
|
|
Contents of the composition buffer are transferred to the separated-luma buffer, subject to a low-pass filter
|
|
that has sought to separate luminance and chrominance, and with phase and amplitude now baked into the latter:
|
|
|
|
.r = luminance
|
|
.g = 0.5 + 0.5 * chrominance * cos(phase)
|
|
.b = 0.5 + 0.5 * chrominance * sin(phase)
|
|
|
|
The process now continues as per the corresponding S-Video steps.
|
|
|
|
NOTES
|
|
-----
|
|
|
|
1) for many of the input pixel formats it would be possible to do the trigonometric side of things at
|
|
arbitrary precision. Since it would always be necessary to support fixed-precision processing because
|
|
of the directly-sampled input formats, I've used fixed throughout to reduce the number of permutations
|
|
and combinations of code I need to support. The precision is always selected to be at least four times
|
|
the colour clock.
|
|
|
|
2) I experimented with skipping the separated-luma buffer for composite colour based on the observation that
|
|
just multiplying the raw signal by sin and cos and then filtering well below the colour subcarrier frequency
|
|
should be sufficient. It wasn't in practice because the bits of luminance that don't quite separate are then
|
|
of such massive amplitude that you get huge bands of bright colour in place of the usual chroma dots.
|
|
|
|
3) I also initially didn't want to have a finalied-line texture, but processing costs changed my mind on that.
|
|
If you accept that output will be fixed precision, anyway. In that case, processing for a typical NTSC frame
|
|
in its original resolution means applying filtering (i.e. at least 15 samples per pixel) likely between
|
|
218,400 and 273,000 times per output frame, then upscaling from there at 1 sample per pixel. Count the
|
|
second sample twice for the original store and you're talking between 16*218,400 = 3,494,400 to
|
|
16*273,000 = 4,368,000 total pixel accesses. Though that's not a perfect way to measure cost, roll with it.
|
|
|
|
On my 4k monitor, doing it at actual output resolution would instead cost 3840*2160*15 = 124,416,000 total
|
|
accesses. Which doesn't necessarily mean "more than 28 times as much", but does mean "a lot more".
|
|
|
|
(going direct-to-display for composite monochrome means evaluating sin/cos a lot more often than it might
|
|
with more buffering in between, but that doesn't provisionally seem to be as much of a bottleneck)
|
|
*/
|
|
|
|
namespace {
|
|
constexpr size_t NumBufferedLines = 500;
|
|
constexpr size_t NumBufferedScans = NumBufferedLines * 4;
|
|
|
|
/// Provides a container for __fp16 versions of tightly-packed single-precision plain old data with a copy assignment constructor.
|
|
template <typename NaturalType> struct HalfConverter {
|
|
__fp16 elements[sizeof(NaturalType) / sizeof(float)];
|
|
|
|
void operator =(const NaturalType &rhs) {
|
|
const float *floatRHS = reinterpret_cast<const float *>(&rhs);
|
|
for(size_t c = 0; c < sizeof(elements) / sizeof(*elements); ++c) {
|
|
elements[c] = __fp16(floatRHS[c]);
|
|
}
|
|
}
|
|
};
|
|
|
|
// Tracks the Uniforms struct declared in ScanTarget.metal; see there for field definitions.
|
|
//
|
|
// __fp16 is a Clang-specific type which I'm using as equivalent to a Metal half, i.e. an IEEE 754 binary16.
|
|
struct Uniforms {
|
|
int32_t scale[2];
|
|
float cyclesMultiplier;
|
|
float lineWidth;
|
|
|
|
simd::float3x3 sourcetoDisplay;
|
|
|
|
HalfConverter<simd::float3x3> toRGB;
|
|
HalfConverter<simd::float3x3> fromRGB;
|
|
|
|
HalfConverter<simd::float3> chromaKernel[16];
|
|
HalfConverter<simd::float2> lumaKernel[16];
|
|
|
|
__fp16 outputAlpha;
|
|
__fp16 outputGamma;
|
|
__fp16 outputMultiplier;
|
|
};
|
|
|
|
// Kernel sizes above and in the shaders themselves assume a maximum filter kernel size.
|
|
static_assert(Outputs::Display::FilterGenerator::MaxKernelSize <= 31);
|
|
|
|
/// The shared resource options this app would most favour; applied as widely as possible.
|
|
constexpr MTLResourceOptions SharedResourceOptionsStandard =
|
|
MTLResourceCPUCacheModeWriteCombined | MTLResourceStorageModeShared;
|
|
|
|
/// The shared resource options used for the write-area texture; on macOS it can't be MTLResourceStorageModeShared so this is a carve-out.
|
|
constexpr MTLResourceOptions SharedResourceOptionsTexture =
|
|
MTLResourceCPUCacheModeWriteCombined | MTLResourceStorageModeManaged;
|
|
|
|
template <typename FuncT>
|
|
requires std::invocable<FuncT, size_t, size_t>
|
|
void range_perform(
|
|
const size_t start,
|
|
const size_t end,
|
|
const size_t size,
|
|
const FuncT &&func
|
|
) {
|
|
if(start == end) return;
|
|
if(start < end) {
|
|
func(start, end - start);
|
|
return;
|
|
}
|
|
|
|
func(start, size - start);
|
|
if(end) func(0, end);
|
|
}
|
|
|
|
}
|
|
|
|
using BufferingScanTarget = Outputs::Display::BufferingScanTarget;
|
|
|
|
@implementation CSScanTarget {
|
|
// The command queue for the device in use.
|
|
id<MTLCommandQueue> _commandQueue;
|
|
|
|
// Pipelines.
|
|
id<MTLRenderPipelineState> _composePipeline; // Renders to the composition texture.
|
|
id<MTLRenderPipelineState> _outputPipeline; // Draws to the frame buffer.
|
|
id<MTLRenderPipelineState> _copyPipeline; // Copies from one texture to another.
|
|
id<MTLRenderPipelineState> _supersamplePipeline; // Resamples from one texture to one that is 1/4 as large.
|
|
id<MTLRenderPipelineState> _clearPipeline; // Applies additional inter-frame clearing (cf. the stencil).
|
|
|
|
// Buffers.
|
|
id<MTLBuffer> _uniformsBuffer; // A static buffer, containing a copy of the Uniforms struct.
|
|
id<MTLBuffer> _scansBuffer; // A dynamic buffer, into which the CPU writes Scans for later display.
|
|
id<MTLBuffer> _linesBuffer; // A dynamic buffer, into which the CPU writes Lines for later display.
|
|
|
|
// Textures: the write area.
|
|
//
|
|
// The write area receives fragments of output from the emulated machine.
|
|
// So it is written by the CPU and read by the GPU.
|
|
id<MTLTexture> _writeAreaTexture;
|
|
id<MTLBuffer> _writeAreaBuffer; // The storage underlying the write-area texture.
|
|
size_t _bytesPerInputPixel; // Determines per-pixel sizing within the write-area texture.
|
|
size_t _totalTextureBytes; // Holds the total size of the write-area texture.
|
|
|
|
// Textures: the frame buffer.
|
|
//
|
|
// When inter-frame blending is in use, the frame buffer contains the most recent output.
|
|
// Metal isn't really set up for single-buffered output, so this acts as if it were that
|
|
// single buffer. This texture is complete 2d data, copied directly to the display.
|
|
id<MTLTexture> _frameBuffers[2];
|
|
MTLRenderPassDescriptor *_frameBufferRenderPasses[2]; // The render pass for _drawing to_ the frame buffer.
|
|
BOOL _dontClearFrameBuffer;
|
|
|
|
// Textures: the stencil.
|
|
//
|
|
// Scan targets receive scans, not full frames. Those scans may not cover the entire display,
|
|
// either because unlit areas have been omitted or because a sync discrepancy means that the full
|
|
// potential vertical or horizontal width of the display isn't used momentarily.
|
|
//
|
|
// In order to manage inter-frame blending correctly in those cases, a stencil is attached to the
|
|
// frame buffer so that a clearing step can darken any pixels that weren't naturally painted during
|
|
// any frame.
|
|
id<MTLTexture> _frameBufferStencil;
|
|
id<MTLDepthStencilState> _drawStencilState; // Always draws, sets stencil to 1.
|
|
id<MTLDepthStencilState> _clearStencilState; // Draws only where stencil is 0, clears all to 0.
|
|
|
|
// Textures: the composition texture.
|
|
//
|
|
// If additional temporal processing is required (i.e. for S-Video and colour composite output),
|
|
// fragments from the write-area texture are assembled into the composition texture, where they
|
|
// properly adjoin their neighbours and everything is converted to a common clock.
|
|
id<MTLTexture> _compositionTexture;
|
|
MTLRenderPassDescriptor *_compositionRenderPass; // The render pass for _drawing to_ the composition buffer.
|
|
|
|
enum class Pipeline {
|
|
/// Scans are painted directly to the frame buffer.
|
|
DirectToDisplay,
|
|
/// Scans are painted to the composition buffer, which is processed to the finalised line buffer,
|
|
/// from which lines are painted to the frame buffer.
|
|
SVideo,
|
|
/// Scans are painted to the composition buffer, which is processed to the separated luma buffer and then the finalised line buffer,
|
|
/// from which lines are painted to the frame buffer.
|
|
CompositeColour
|
|
};
|
|
Pipeline _pipeline;
|
|
|
|
// Textures: additional storage used when processing S-Video and composite colour input.
|
|
id<MTLTexture> _finalisedLineTexture;
|
|
id<MTLComputePipelineState> _finalisedLineState;
|
|
id<MTLTexture> _separatedLumaTexture;
|
|
id<MTLComputePipelineState> _separatedLumaState;
|
|
NSUInteger _lineBufferPixelsPerLine;
|
|
|
|
Numeric::CircularCounter<size_t, NumBufferedLines> _lineOffsetBuffer;
|
|
id<MTLBuffer> _lineOffsetBuffers[NumBufferedLines]; // Allocating NumBufferedLines buffers ensures these can't
|
|
// possibly be exhausted; for this list to be exhausted there'd
|
|
// have to be more draw calls in flight than there are lines for
|
|
// them to operate upon.
|
|
|
|
// The scan target in C++-world terms and the non-GPU storage for it.
|
|
BufferingScanTarget _scanTarget;
|
|
std::atomic_flag _isDrawing;
|
|
|
|
// Additional pipeline information.
|
|
std::atomic<bool> _isUsingSupersampling;
|
|
|
|
// The output view and its aspect ratio.
|
|
__weak MTKView *_view;
|
|
CGFloat _viewAspectRatio; // To avoid accessing .bounds away from the main thread.
|
|
|
|
// Previously set modals, to avoid unnecessary buffer churn.
|
|
std::optional<Outputs::Display::ScanTarget::Modals> _priorModals;
|
|
}
|
|
|
|
- (Uniforms *)uniforms {
|
|
return reinterpret_cast<Uniforms *>(_uniformsBuffer.contents);
|
|
}
|
|
|
|
- (nonnull instancetype)initWithView:(nonnull MTKView *)view {
|
|
self = [super init];
|
|
if(self) {
|
|
_view = view;
|
|
_commandQueue = [view.device newCommandQueue];
|
|
|
|
// Allocate space for uniforms.
|
|
_uniformsBuffer = [view.device
|
|
newBufferWithLength:sizeof(Uniforms)
|
|
options:MTLResourceCPUCacheModeWriteCombined | MTLResourceStorageModeShared];
|
|
|
|
// Allocate buffers for scans and lines and for the write area texture.
|
|
_scansBuffer = [view.device
|
|
newBufferWithLength:sizeof(Outputs::Display::BufferingScanTarget::Scan)*NumBufferedScans
|
|
options:SharedResourceOptionsStandard];
|
|
_linesBuffer = [view.device
|
|
newBufferWithLength:sizeof(Outputs::Display::BufferingScanTarget::Line)*NumBufferedLines
|
|
options:SharedResourceOptionsStandard];
|
|
_writeAreaBuffer = [view.device
|
|
newBufferWithLength:BufferingScanTarget::WriteAreaWidth*BufferingScanTarget::WriteAreaHeight*4
|
|
options:SharedResourceOptionsTexture];
|
|
|
|
// Install all that storage in the buffering scan target.
|
|
_scanTarget.set_write_area(reinterpret_cast<uint8_t *>(_writeAreaBuffer.contents));
|
|
_scanTarget.set_line_buffer(
|
|
reinterpret_cast<BufferingScanTarget::Line *>(_linesBuffer.contents),
|
|
NumBufferedLines
|
|
);
|
|
_scanTarget.set_scan_buffer(
|
|
reinterpret_cast<BufferingScanTarget::Scan *>(_scansBuffer.contents),
|
|
NumBufferedScans
|
|
);
|
|
|
|
// Generate copy and clear pipelines.
|
|
id<MTLLibrary> library = [_view.device newDefaultLibrary];
|
|
MTLRenderPipelineDescriptor *const pipelineDescriptor = [[MTLRenderPipelineDescriptor alloc] init];
|
|
pipelineDescriptor.colorAttachments[0].pixelFormat = _view.colorPixelFormat;
|
|
pipelineDescriptor.vertexFunction = [library newFunctionWithName:@"copyVertex"];
|
|
pipelineDescriptor.fragmentFunction = [library newFunctionWithName:@"copyFragment"];
|
|
_copyPipeline = [_view.device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:nil];
|
|
|
|
pipelineDescriptor.fragmentFunction = [library newFunctionWithName:@"interpolateFragment"];
|
|
_supersamplePipeline = [_view.device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:nil];
|
|
|
|
pipelineDescriptor.fragmentFunction = [library newFunctionWithName:@"clearFragment"];
|
|
pipelineDescriptor.stencilAttachmentPixelFormat = MTLPixelFormatStencil8;
|
|
_clearPipeline = [_view.device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:nil];
|
|
|
|
// Clear stencil: always write the reference value (of 0), but draw only where the stencil already
|
|
// had that value.
|
|
MTLDepthStencilDescriptor *depthStencilDescriptor = [[MTLDepthStencilDescriptor alloc] init];
|
|
depthStencilDescriptor.frontFaceStencil.stencilCompareFunction = MTLCompareFunctionEqual;
|
|
depthStencilDescriptor.frontFaceStencil.depthStencilPassOperation = MTLStencilOperationReplace;
|
|
depthStencilDescriptor.frontFaceStencil.stencilFailureOperation = MTLStencilOperationReplace;
|
|
_clearStencilState = [view.device newDepthStencilStateWithDescriptor:depthStencilDescriptor];
|
|
|
|
// Allocate a large number of single-int buffers, for supplying offsets to the compute shaders.
|
|
// There's a ridiculous amount of overhead in this, but it avoids allocations during drawing,
|
|
// and a single int per instance is all I need.
|
|
for(size_t c = 0; c < NumBufferedLines; ++c) {
|
|
_lineOffsetBuffers[c] =
|
|
[_view.device newBufferWithLength:sizeof(int) options:SharedResourceOptionsStandard];
|
|
}
|
|
|
|
// Ensure the is-drawing flag is initially clear.
|
|
_isDrawing.clear();
|
|
|
|
// Set initial aspect-ratio multiplier and generate buffers.
|
|
[self mtkView:view drawableSizeWillChange:view.drawableSize];
|
|
}
|
|
|
|
return self;
|
|
}
|
|
|
|
- (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size {
|
|
_viewAspectRatio = size.width / size.height;
|
|
[self setAspectRatio];
|
|
|
|
@synchronized(self) {
|
|
// Always [re]try multisampling upon a resize.
|
|
_scanTarget.display_metrics_.announce_did_resize();
|
|
_isUsingSupersampling = true;
|
|
[self updateSizeBuffersToSize:size];
|
|
}
|
|
}
|
|
|
|
- (void)updateSizeBuffers {
|
|
@synchronized(self) {
|
|
[self updateSizeBuffersToSize:_view.drawableSize];
|
|
}
|
|
}
|
|
|
|
- (id<MTLCommandBuffer>)copyTexture:(id<MTLTexture>)source to:(id<MTLTexture>)destination {
|
|
MTLRenderPassDescriptor *const copyTextureDescriptor = [[MTLRenderPassDescriptor alloc] init];
|
|
copyTextureDescriptor.colorAttachments[0].texture = destination;
|
|
copyTextureDescriptor.colorAttachments[0].loadAction = MTLLoadActionDontCare;
|
|
copyTextureDescriptor.colorAttachments[0].storeAction = MTLStoreActionStore;
|
|
|
|
id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
|
|
id<MTLRenderCommandEncoder> encoder = [commandBuffer renderCommandEncoderWithDescriptor:copyTextureDescriptor];
|
|
|
|
[encoder setRenderPipelineState:_copyPipeline];
|
|
[encoder setVertexTexture:source atIndex:0];
|
|
[encoder setFragmentTexture:source atIndex:0];
|
|
|
|
[encoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
|
|
[encoder endEncoding];
|
|
encoder = nil;
|
|
[commandBuffer commit];
|
|
|
|
return commandBuffer;
|
|
}
|
|
|
|
- (void)updateSizeBuffersToSize:(CGSize)size {
|
|
// Anecdotally, the size provided here, which ultimately is from _view.drawableSize,
|
|
// already factors in Retina-style scaling.
|
|
//
|
|
// 16384 has been the maximum texture size in all Mac versions of Metal so far, and
|
|
// I haven't yet found a way to query it dynamically. So it's hard-coded.
|
|
const NSUInteger frameBufferWidth = MIN(NSUInteger(size.width) * (_isUsingSupersampling ? 2 : 1), 16384);
|
|
const NSUInteger frameBufferHeight = MIN(NSUInteger(size.height) * (_isUsingSupersampling ? 2 : 1), 16384);
|
|
|
|
// Generate stencil.
|
|
MTLTextureDescriptor *const stencilTextureDescriptor = [MTLTextureDescriptor
|
|
texture2DDescriptorWithPixelFormat:MTLPixelFormatStencil8
|
|
width:frameBufferWidth
|
|
height:frameBufferHeight
|
|
mipmapped:NO];
|
|
stencilTextureDescriptor.usage = MTLTextureUsageRenderTarget;
|
|
stencilTextureDescriptor.resourceOptions = MTLResourceStorageModePrivate;
|
|
_frameBufferStencil = [_view.device newTextureWithDescriptor:stencilTextureDescriptor];
|
|
|
|
// Establish intended stencil useage; it's only to track which pixels haven't been painted
|
|
// at all at the end of every frame. So: always paint, and replace the stored stencil value
|
|
// (which is seeded as 0) with the nominated one (a 1).
|
|
MTLDepthStencilDescriptor *depthStencilDescriptor = [[MTLDepthStencilDescriptor alloc] init];
|
|
depthStencilDescriptor.frontFaceStencil.stencilCompareFunction = MTLCompareFunctionAlways;
|
|
depthStencilDescriptor.frontFaceStencil.depthStencilPassOperation = MTLStencilOperationReplace;
|
|
_drawStencilState = [_view.device newDepthStencilStateWithDescriptor:depthStencilDescriptor];
|
|
|
|
// Generate framebuffers.
|
|
for(int c = 0; c < 2; c++) {
|
|
MTLTextureDescriptor *const textureDescriptor = [MTLTextureDescriptor
|
|
texture2DDescriptorWithPixelFormat:_view.colorPixelFormat
|
|
width:frameBufferWidth
|
|
height:frameBufferHeight
|
|
mipmapped:NO];
|
|
textureDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite;
|
|
textureDescriptor.resourceOptions = MTLResourceStorageModePrivate;
|
|
id<MTLTexture> _oldFrameBuffer = _frameBuffers[c];
|
|
_frameBuffers[c] = [_view.device newTextureWithDescriptor:textureDescriptor];
|
|
|
|
// Generate a render pass with that framebuffer and stencil.
|
|
_frameBufferRenderPasses[c] = [[MTLRenderPassDescriptor alloc] init];
|
|
_frameBufferRenderPasses[c].colorAttachments[0].texture = _frameBuffers[c];
|
|
_frameBufferRenderPasses[c].colorAttachments[0].loadAction = MTLLoadActionLoad;
|
|
_frameBufferRenderPasses[c].colorAttachments[0].storeAction = MTLStoreActionStore;
|
|
|
|
_frameBufferRenderPasses[c].stencilAttachment.clearStencil = 0;
|
|
_frameBufferRenderPasses[c].stencilAttachment.texture = _frameBufferStencil;
|
|
_frameBufferRenderPasses[c].stencilAttachment.loadAction = MTLLoadActionLoad;
|
|
_frameBufferRenderPasses[c].stencilAttachment.storeAction = MTLStoreActionStore;
|
|
|
|
// Draw from _oldFrameBuffer to _frameBuffer; otherwise clear the new framebuffer.
|
|
if(_oldFrameBuffer) {
|
|
[self copyTexture:_oldFrameBuffer to:_frameBuffers[c]];
|
|
} else {
|
|
// TODO: this use of clearTexture is the only reason _frameBuffer has a marked usage of
|
|
// MTLTextureUsageShaderWrite; it'd probably be smarter to blank it with geometry rather than potentially
|
|
// complicating its storage further?
|
|
[self clearTexture:_frameBuffers[c]];
|
|
}
|
|
}
|
|
|
|
// Don't clear the framebuffer at the end of this frame.
|
|
_dontClearFrameBuffer = YES;
|
|
}
|
|
|
|
- (BOOL)shouldApplyGamma {
|
|
return fabsf(float(self.uniforms->outputGamma) - 1.0f) > 0.01f;
|
|
}
|
|
|
|
- (void)clearTexture:(id<MTLTexture>)texture {
|
|
id<MTLLibrary> library = [_view.device newDefaultLibrary];
|
|
|
|
// Ensure finalised line texture is initially clear.
|
|
id<MTLComputePipelineState> clearPipeline =
|
|
[_view.device newComputePipelineStateWithFunction:[library newFunctionWithName:@"clearKernel"] error:nil];
|
|
id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
|
|
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
|
|
|
|
[computeEncoder setTexture:texture atIndex:0];
|
|
[self
|
|
dispatchComputeCommandEncoder:computeEncoder
|
|
pipelineState:clearPipeline
|
|
width:texture.width
|
|
height:texture.height
|
|
offsetBuffer:[self bufferForOffset:0]
|
|
];
|
|
|
|
[computeEncoder endEncoding];
|
|
[commandBuffer commit];
|
|
}
|
|
|
|
- (void)updateModalBuffers {
|
|
// Build a descriptor for any intermediate line texture.
|
|
MTLTextureDescriptor *const lineTextureDescriptor = [MTLTextureDescriptor
|
|
texture2DDescriptorWithPixelFormat:MTLPixelFormatBGRA8Unorm
|
|
width:Outputs::Display::FilterGenerator::SuggestedBufferWidth
|
|
height:NumBufferedLines
|
|
mipmapped:NO];
|
|
lineTextureDescriptor.resourceOptions = MTLResourceStorageModePrivate;
|
|
|
|
if(_pipeline == Pipeline::DirectToDisplay) {
|
|
// Buffers are not required when outputting direct to display; so if this isn't that then release anything
|
|
// currently being held and return.
|
|
_finalisedLineTexture = nil;
|
|
_finalisedLineState = nil;
|
|
_separatedLumaTexture = nil;
|
|
_separatedLumaState = nil;
|
|
_compositionTexture = nil;
|
|
_compositionRenderPass = nil;
|
|
return;
|
|
}
|
|
|
|
// Create a composition texture if one does not yet exist.
|
|
if(!_compositionTexture) {
|
|
lineTextureDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
|
|
_compositionTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor];
|
|
}
|
|
|
|
// Grab the shader library.
|
|
id<MTLLibrary> library = [_view.device newDefaultLibrary];
|
|
lineTextureDescriptor.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead;
|
|
|
|
// The finalised texture will definitely exist, and may or may not require a gamma conversion when written to.
|
|
if(!_finalisedLineTexture) {
|
|
_finalisedLineTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor];
|
|
[self clearTexture:_finalisedLineTexture];
|
|
|
|
NSString *const kernelFunction =
|
|
[self shouldApplyGamma] ? @"demodulateKernelWithGamma" : @"demodulateKernelNoGamma";
|
|
_finalisedLineState =
|
|
[_view.device newComputePipelineStateWithFunction:[library newFunctionWithName:kernelFunction] error:nil];
|
|
}
|
|
|
|
// A luma separation texture will exist only for composite colour.
|
|
if(_pipeline == Pipeline::CompositeColour) {
|
|
if(!_separatedLumaTexture) {
|
|
_separatedLumaTexture = [_view.device newTextureWithDescriptor:lineTextureDescriptor];
|
|
_separatedLumaState =
|
|
[_view.device
|
|
newComputePipelineStateWithFunction:[library newFunctionWithName:@"separateKernel"]
|
|
error:nil];
|
|
}
|
|
} else {
|
|
_separatedLumaTexture = nil;
|
|
}
|
|
}
|
|
|
|
- (void)setAspectRatio {
|
|
const auto transformation = aspect_ratio_transformation(_scanTarget.modals(), float(_viewAspectRatio));
|
|
self.uniforms->sourcetoDisplay = simd_matrix_from_rows(
|
|
simd_float3{transformation[0], transformation[3], transformation[6]},
|
|
simd_float3{transformation[1], transformation[4], transformation[7]},
|
|
simd_float3{transformation[2], transformation[5], transformation[8]}
|
|
);
|
|
}
|
|
|
|
- (void)setModals:(const Outputs::Display::ScanTarget::Modals &)modals {
|
|
//
|
|
// Populate uniforms.
|
|
//
|
|
self.uniforms->scale[0] = modals.output_scale.x;
|
|
self.uniforms->scale[1] = modals.output_scale.y;
|
|
self.uniforms->lineWidth = 1.05f / modals.expected_vertical_lines;
|
|
[self setAspectRatio];
|
|
|
|
const auto toRGB = to_rgb_matrix(modals.composite_colour_space);
|
|
self.uniforms->toRGB = simd::float3x3(
|
|
simd::float3{toRGB[0], toRGB[1], toRGB[2]},
|
|
simd::float3{toRGB[3], toRGB[4], toRGB[5]},
|
|
simd::float3{toRGB[6], toRGB[7], toRGB[8]}
|
|
);
|
|
|
|
const auto fromRGB = from_rgb_matrix(modals.composite_colour_space);
|
|
self.uniforms->fromRGB = simd::float3x3(
|
|
simd::float3{fromRGB[0], fromRGB[1], fromRGB[2]},
|
|
simd::float3{fromRGB[3], fromRGB[4], fromRGB[5]},
|
|
simd::float3{fromRGB[6], fromRGB[7], fromRGB[8]}
|
|
);
|
|
|
|
// This is fixed for now; consider making it a function of frame rate and/or of whether frame syncing
|
|
// is ongoing (which would require a way to signal that to this scan target).
|
|
self.uniforms->outputAlpha = __fp16(0.64f);
|
|
self.uniforms->outputMultiplier = __fp16(modals.brightness);
|
|
|
|
const float displayGamma = 2.2f; // This is assumed.
|
|
self.uniforms->outputGamma = __fp16(displayGamma / modals.intended_gamma);
|
|
|
|
if(
|
|
!_priorModals ||
|
|
_priorModals->display_type != modals.display_type ||
|
|
_priorModals->input_data_type != modals.input_data_type
|
|
) {
|
|
//
|
|
// Generate input texture.
|
|
//
|
|
MTLPixelFormat pixelFormat;
|
|
_bytesPerInputPixel = size_for_data_type(modals.input_data_type);
|
|
if(data_type_is_normalised(modals.input_data_type)) {
|
|
switch(_bytesPerInputPixel) {
|
|
default:
|
|
case 1: pixelFormat = MTLPixelFormatR8Unorm; break;
|
|
case 2: pixelFormat = MTLPixelFormatRG8Unorm; break;
|
|
case 4: pixelFormat = MTLPixelFormatRGBA8Unorm; break;
|
|
}
|
|
} else {
|
|
switch(_bytesPerInputPixel) {
|
|
default:
|
|
case 1: pixelFormat = MTLPixelFormatR8Uint; break;
|
|
case 2: pixelFormat = MTLPixelFormatRG8Uint; break;
|
|
case 4: pixelFormat = MTLPixelFormatRGBA8Uint; break;
|
|
}
|
|
}
|
|
MTLTextureDescriptor *const textureDescriptor = [MTLTextureDescriptor
|
|
texture2DDescriptorWithPixelFormat:pixelFormat
|
|
width:BufferingScanTarget::WriteAreaWidth
|
|
height:BufferingScanTarget::WriteAreaHeight
|
|
mipmapped:NO];
|
|
textureDescriptor.resourceOptions = SharedResourceOptionsTexture;
|
|
if(@available(macOS 10.14, *)) {
|
|
textureDescriptor.allowGPUOptimizedContents = NO;
|
|
}
|
|
|
|
const NSUInteger bytesPerRow = BufferingScanTarget::WriteAreaWidth * _bytesPerInputPixel;
|
|
_writeAreaTexture = [_writeAreaBuffer
|
|
newTextureWithDescriptor:textureDescriptor
|
|
offset:0
|
|
bytesPerRow:bytesPerRow];
|
|
_totalTextureBytes = bytesPerRow * BufferingScanTarget::WriteAreaHeight;
|
|
|
|
|
|
|
|
//
|
|
// Generate scan pipeline.
|
|
//
|
|
id<MTLLibrary> library = [_view.device newDefaultLibrary];
|
|
MTLRenderPipelineDescriptor *pipelineDescriptor = [[MTLRenderPipelineDescriptor alloc] init];
|
|
|
|
// Occasions when the composition buffer isn't required are slender: the output must be neither RGB
|
|
// nor composite monochrome.
|
|
const bool isComposition =
|
|
modals.display_type != Outputs::Display::DisplayType::RGB &&
|
|
modals.display_type != Outputs::Display::DisplayType::CompositeMonochrome;
|
|
const bool isSVideoOutput = modals.display_type == Outputs::Display::DisplayType::SVideo;
|
|
|
|
if(!isComposition) {
|
|
_pipeline = Pipeline::DirectToDisplay;
|
|
} else {
|
|
_pipeline = isSVideoOutput ? Pipeline::SVideo : Pipeline::CompositeColour;
|
|
}
|
|
|
|
float &cyclesMultiplier = self.uniforms->cyclesMultiplier;
|
|
if(_pipeline != Pipeline::DirectToDisplay) {
|
|
cyclesMultiplier =
|
|
Outputs::Display::FilterGenerator::suggested_sample_multiplier(
|
|
float(modals.colour_cycle_numerator) / float(modals.colour_cycle_denominator),
|
|
modals.cycles_per_line
|
|
);
|
|
|
|
// Create suitable filters.
|
|
_lineBufferPixelsPerLine = NSUInteger(modals.cycles_per_line * cyclesMultiplier);
|
|
const float colourCyclesPerLine =
|
|
float(modals.colour_cycle_numerator) / float(modals.colour_cycle_denominator);
|
|
using DecodingPath = Outputs::Display::FilterGenerator::DecodingPath;
|
|
|
|
Outputs::Display::FilterGenerator generator(
|
|
_lineBufferPixelsPerLine,
|
|
colourCyclesPerLine,
|
|
isSVideoOutput ? DecodingPath::SVideo : DecodingPath::Composite
|
|
);
|
|
|
|
const auto separation = generator.separation_filter();
|
|
using Coefficients2 = std::array<simd::float2, 31>;
|
|
Coefficients2 separation_multiplexed{};
|
|
separation.luma.copy_to<Coefficients2::iterator>(
|
|
separation_multiplexed.begin(),
|
|
separation_multiplexed.end(),
|
|
[](const auto destination, const float value) {
|
|
destination->x = value;
|
|
}
|
|
);
|
|
separation.chroma.copy_to<Coefficients2::iterator>(
|
|
separation_multiplexed.begin(),
|
|
separation_multiplexed.end(),
|
|
[](const auto destination, const float value) {
|
|
destination->y = value;
|
|
}
|
|
);
|
|
for(size_t c = 0; c < 16; ++c) {
|
|
self.uniforms->lumaKernel[c] = separation_multiplexed[c];
|
|
}
|
|
|
|
const auto demodulation = generator.demouldation_filter();
|
|
using Coefficients3 = std::array<simd::float3, 31>;
|
|
Coefficients3 demodulation_multiplexed{};
|
|
demodulation.luma.copy_to<Coefficients3::iterator>(
|
|
demodulation_multiplexed.begin(),
|
|
demodulation_multiplexed.end(),
|
|
[](const auto destination, const float value) {
|
|
destination->x = value;
|
|
}
|
|
);
|
|
demodulation.chroma.copy_to<Coefficients3::iterator>(
|
|
demodulation_multiplexed.begin(),
|
|
demodulation_multiplexed.end(),
|
|
[](const auto destination, const float value) {
|
|
destination->y = destination->z = value;
|
|
}
|
|
);
|
|
// Convert to half-size floats.
|
|
for(size_t c = 0; c < 16; ++c) {
|
|
self.uniforms->chromaKernel[c] = demodulation_multiplexed[c];
|
|
}
|
|
}
|
|
|
|
// Update intermediate storage.
|
|
[self updateModalBuffers];
|
|
|
|
const auto fragment_function = [&](NSString *const prefix) {
|
|
NSString *const functionName = [prefix stringByAppendingFormat:@"%s", name(modals.input_data_type)];
|
|
id <MTLFunction> function = [library newFunctionWithName:functionName];
|
|
assert(function);
|
|
return function;
|
|
};
|
|
|
|
if(_pipeline != Pipeline::DirectToDisplay) {
|
|
// Create the composition render pass.
|
|
pipelineDescriptor.colorAttachments[0].pixelFormat = _compositionTexture.pixelFormat;
|
|
pipelineDescriptor.vertexFunction = [library newFunctionWithName:@"scanToComposition"];
|
|
pipelineDescriptor.fragmentFunction =
|
|
fragment_function(isSVideoOutput ? @"internalSVideo" : @"internalComposite");
|
|
|
|
_composePipeline = [_view.device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:nil];
|
|
|
|
_compositionRenderPass = [[MTLRenderPassDescriptor alloc] init];
|
|
_compositionRenderPass.colorAttachments[0].texture = _compositionTexture;
|
|
_compositionRenderPass.colorAttachments[0].loadAction = MTLLoadActionClear;
|
|
_compositionRenderPass.colorAttachments[0].storeAction = MTLStoreActionStore;
|
|
_compositionRenderPass.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.5, 0.5, 0.3);
|
|
}
|
|
|
|
// Build the output pipeline.
|
|
pipelineDescriptor.colorAttachments[0].pixelFormat = _view.colorPixelFormat;
|
|
pipelineDescriptor.vertexFunction =
|
|
[library newFunctionWithName:_pipeline == Pipeline::DirectToDisplay ? @"scanToDisplay" : @"lineToDisplay"];
|
|
|
|
if(_pipeline != Pipeline::DirectToDisplay) {
|
|
pipelineDescriptor.fragmentFunction = [library newFunctionWithName:@"interpolateFragment"];
|
|
} else {
|
|
const bool isRGBOutput = modals.display_type == Outputs::Display::DisplayType::RGB;
|
|
pipelineDescriptor.fragmentFunction = fragment_function(
|
|
[isRGBOutput ? @"outputRGB" : @"outputComposite"
|
|
stringByAppendingString:self.shouldApplyGamma ? @"WithGamma" : @""]
|
|
);
|
|
}
|
|
|
|
// Enable blending.
|
|
pipelineDescriptor.colorAttachments[0].blendingEnabled = YES;
|
|
pipelineDescriptor.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorSourceAlpha;
|
|
pipelineDescriptor.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha;
|
|
|
|
// Set stencil format.
|
|
pipelineDescriptor.stencilAttachmentPixelFormat = MTLPixelFormatStencil8;
|
|
|
|
// Finish.
|
|
_outputPipeline = [_view.device newRenderPipelineStateWithDescriptor:pipelineDescriptor error:nil];
|
|
}
|
|
_priorModals = modals;
|
|
}
|
|
|
|
- (void)outputFrom:(size_t)start to:(size_t)end commandBuffer:(id<MTLCommandBuffer>)commandBuffer {
|
|
if(start == end) return;
|
|
|
|
// Generate a command encoder for the view.
|
|
id<MTLRenderCommandEncoder> encoder = [commandBuffer renderCommandEncoderWithDescriptor:_frameBufferRenderPasses[0]];
|
|
|
|
// Final output. Could be scans or lines.
|
|
[encoder setRenderPipelineState:_outputPipeline];
|
|
|
|
if(_pipeline != Pipeline::DirectToDisplay) {
|
|
[encoder setFragmentTexture:_finalisedLineTexture atIndex:0];
|
|
[encoder setVertexBuffer:_linesBuffer offset:0 atIndex:0];
|
|
} else {
|
|
[encoder setFragmentTexture:_writeAreaTexture atIndex:0];
|
|
[encoder setVertexBuffer:_scansBuffer offset:0 atIndex:0];
|
|
}
|
|
[encoder setVertexBuffer:_uniformsBuffer offset:0 atIndex:1];
|
|
[encoder setFragmentBuffer:_uniformsBuffer offset:0 atIndex:0];
|
|
|
|
[encoder setDepthStencilState:_drawStencilState];
|
|
[encoder setStencilReferenceValue:1];
|
|
#ifndef NDEBUG
|
|
// Quick aid for debugging: the stencil test is predicated on front-facing pixels, so make sure they're
|
|
// being generated.
|
|
[encoder setCullMode:MTLCullModeBack];
|
|
#endif
|
|
|
|
range_perform(
|
|
start,
|
|
end,
|
|
_pipeline != Pipeline::DirectToDisplay ? NumBufferedLines : NumBufferedScans,
|
|
[&](const size_t start, const size_t size) {
|
|
[encoder
|
|
drawPrimitives:MTLPrimitiveTypeTriangleStrip
|
|
vertexStart:0
|
|
vertexCount:4
|
|
instanceCount:size
|
|
baseInstance:start
|
|
];
|
|
});
|
|
|
|
// Complete encoding.
|
|
[encoder endEncoding];
|
|
encoder = nil;
|
|
}
|
|
|
|
- (void)outputFrameCleanerToCommandBuffer:(id<MTLCommandBuffer>)commandBuffer {
|
|
// Generate a command encoder for the view.
|
|
id<MTLRenderCommandEncoder> encoder = [commandBuffer renderCommandEncoderWithDescriptor:_frameBufferRenderPasses[0]];
|
|
|
|
[encoder setRenderPipelineState:_clearPipeline];
|
|
[encoder setDepthStencilState:_clearStencilState];
|
|
[encoder setStencilReferenceValue:0];
|
|
|
|
[encoder setVertexTexture:_frameBuffers[0] atIndex:0];
|
|
[encoder setFragmentTexture:_frameBuffers[0] atIndex:0];
|
|
[encoder setFragmentBuffer:_uniformsBuffer offset:0 atIndex:0];
|
|
|
|
[encoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
|
|
[encoder endEncoding];
|
|
encoder = nil;
|
|
}
|
|
|
|
- (void)
|
|
composeOutputArea:(const BufferingScanTarget::OutputArea &)outputArea
|
|
commandBuffer:(id<MTLCommandBuffer>)commandBuffer
|
|
{
|
|
// Output all scans to the composition buffer.
|
|
id<MTLRenderCommandEncoder> encoder = [commandBuffer renderCommandEncoderWithDescriptor:_compositionRenderPass];
|
|
[encoder setRenderPipelineState:_composePipeline];
|
|
|
|
[encoder setVertexBuffer:_scansBuffer offset:0 atIndex:0];
|
|
[encoder setVertexBuffer:_uniformsBuffer offset:0 atIndex:1];
|
|
[encoder setVertexTexture:_compositionTexture atIndex:0];
|
|
|
|
[encoder setFragmentBuffer:_uniformsBuffer offset:0 atIndex:0];
|
|
[encoder setFragmentTexture:_writeAreaTexture atIndex:0];
|
|
|
|
range_perform(
|
|
outputArea.begin.scan,
|
|
outputArea.end.scan,
|
|
NumBufferedScans,
|
|
[&](const size_t start, const size_t size) {
|
|
[encoder
|
|
drawPrimitives:MTLPrimitiveTypeLine
|
|
vertexStart:0
|
|
vertexCount:2
|
|
instanceCount:size
|
|
baseInstance:start
|
|
];
|
|
}
|
|
);
|
|
[encoder endEncoding];
|
|
encoder = nil;
|
|
}
|
|
|
|
- (id<MTLBuffer>)bufferForOffset:(size_t)offset {
|
|
// Store and apply the offset.
|
|
const auto buffer = _lineOffsetBuffers[_lineOffsetBuffer];
|
|
*(reinterpret_cast<int *>(_lineOffsetBuffers[_lineOffsetBuffer].contents)) = int(offset);
|
|
++_lineOffsetBuffer;
|
|
return buffer;
|
|
}
|
|
|
|
- (void)
|
|
dispatchComputeCommandEncoder:(id<MTLComputeCommandEncoder>)encoder
|
|
pipelineState:(id<MTLComputePipelineState>)pipelineState
|
|
width:(NSUInteger)width
|
|
height:(NSUInteger)height
|
|
offsetBuffer:(id<MTLBuffer>)offsetBuffer
|
|
{
|
|
[encoder setBuffer:offsetBuffer offset:0 atIndex:1];
|
|
|
|
// Follows https://developer.apple.com/documentation/metal/calculating_threadgroup_and_grid_sizes ;
|
|
// I have no independent opinion whatsoever.
|
|
const MTLSize threadsPerThreadgroup = MTLSizeMake(
|
|
pipelineState.threadExecutionWidth,
|
|
pipelineState.maxTotalThreadsPerThreadgroup / pipelineState.threadExecutionWidth,
|
|
1
|
|
);
|
|
const MTLSize threadsPerGrid = MTLSizeMake(width, height, 1);
|
|
|
|
// Set the pipeline state and dispatch the drawing. Which may slightly overdraw.
|
|
[encoder setComputePipelineState:pipelineState];
|
|
[encoder dispatchThreads:threadsPerGrid threadsPerThreadgroup:threadsPerThreadgroup];
|
|
}
|
|
|
|
- (void)drawInMTKView:(nonnull MTKView *)view {
|
|
if(_isDrawing.test_and_set()) {
|
|
_scanTarget.display_metrics_.announce_draw_status(false);
|
|
return;
|
|
}
|
|
|
|
// Disable supersampling if performance requires it.
|
|
if(_isUsingSupersampling && _scanTarget.display_metrics_.should_lower_resolution()) {
|
|
_isUsingSupersampling = false;
|
|
[self updateSizeBuffers];
|
|
}
|
|
|
|
// Schedule a copy from the current framebuffer to the view;
|
|
// blitting is unavailable as the target is a framebuffer texture.
|
|
id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
|
|
|
|
// Every pixel will be drawn, so don't clear or reload.
|
|
view.currentRenderPassDescriptor.colorAttachments[0].loadAction = MTLLoadActionDontCare;
|
|
id<MTLRenderCommandEncoder> encoder =
|
|
[commandBuffer renderCommandEncoderWithDescriptor:view.currentRenderPassDescriptor];
|
|
|
|
[encoder setRenderPipelineState:_isUsingSupersampling ? _supersamplePipeline : _copyPipeline];
|
|
[encoder setVertexTexture:_frameBuffers[0] atIndex:0];
|
|
[encoder setFragmentTexture:_frameBuffers[0] atIndex:0];
|
|
|
|
[encoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
|
|
[encoder endEncoding];
|
|
encoder = nil;
|
|
|
|
[commandBuffer presentDrawable:view.currentDrawable];
|
|
[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> _Nonnull) {
|
|
self->_isDrawing.clear();
|
|
self->_scanTarget.display_metrics_.announce_draw_status(true);
|
|
}];
|
|
[commandBuffer commit];
|
|
}
|
|
|
|
// MARK: - Per-frame output.
|
|
|
|
- (void)updateFrameBuffer {
|
|
if(_scanTarget.has_new_modals()) {
|
|
// TODO: rethink BufferingScanTarget::perform. Is it now really just for guarding the modals?
|
|
_scanTarget.perform([=] {
|
|
const Outputs::Display::ScanTarget::Modals *const newModals = _scanTarget.new_modals();
|
|
if(newModals) {
|
|
[self setModals:*newModals];
|
|
}
|
|
});
|
|
}
|
|
|
|
@synchronized(self) {
|
|
if(!_frameBufferRenderPasses[0]) return;
|
|
|
|
const auto outputArea = _scanTarget.get_output_area();
|
|
|
|
// Ensure texture changes are noted.
|
|
const auto writeAreaModificationStart =
|
|
size_t(outputArea.begin.write_area_x + outputArea.begin.write_area_y * BufferingScanTarget::WriteAreaWidth)
|
|
* _bytesPerInputPixel;
|
|
const auto writeAreaModificationEnd =
|
|
size_t(outputArea.end.write_area_x + outputArea.end.write_area_y * BufferingScanTarget::WriteAreaWidth)
|
|
* _bytesPerInputPixel;
|
|
range_perform(
|
|
writeAreaModificationStart,
|
|
writeAreaModificationEnd,
|
|
_totalTextureBytes,
|
|
[&](const size_t start, const size_t size) {
|
|
[_writeAreaBuffer didModifyRange:NSMakeRange(start, size)];
|
|
}
|
|
);
|
|
|
|
// Obtain a source for render command encoders.
|
|
id<MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
|
|
|
|
//
|
|
// Drawing algorithm used below, in broad terms:
|
|
//
|
|
// Maintain a persistent buffer of current CRT state.
|
|
//
|
|
// During each frame, paint to the persistent buffer anything new. Update a stencil buffer to track
|
|
// every pixel so-far touched.
|
|
//
|
|
// At the end of the frame, draw a 'frame cleaner', which is a whole-screen rect that paints over
|
|
// only those areas that the stencil buffer indicates weren't painted this frame.
|
|
//
|
|
// Hence every pixel is touched every frame, regardless of the machine's output.
|
|
//
|
|
|
|
const auto output_items =
|
|
[&](const size_t begin, const size_t end) {
|
|
[self outputFrom:begin to:end commandBuffer:commandBuffer];
|
|
};
|
|
const auto end_field =
|
|
[&](
|
|
const bool was_complete,
|
|
[[maybe_unused]] const int field_index,
|
|
[[maybe_unused]] const bool is_interlaced
|
|
) {
|
|
if(was_complete && !_dontClearFrameBuffer) {
|
|
[self outputFrameCleanerToCommandBuffer:commandBuffer];
|
|
}
|
|
_dontClearFrameBuffer = NO;
|
|
};
|
|
|
|
switch(_pipeline) {
|
|
case Pipeline::DirectToDisplay:
|
|
_scanTarget.output_scans(outputArea, output_items, end_field);
|
|
break;
|
|
|
|
case Pipeline::CompositeColour:
|
|
case Pipeline::SVideo:
|
|
// Build the composition buffer.
|
|
[self composeOutputArea:outputArea commandBuffer:commandBuffer];
|
|
|
|
if(_pipeline == Pipeline::SVideo) {
|
|
// Filter from composition to the finalised line texture.
|
|
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
|
|
[computeEncoder setTexture:_compositionTexture atIndex:0];
|
|
[computeEncoder setTexture:_finalisedLineTexture atIndex:1];
|
|
[computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
|
|
|
|
if(outputArea.end.line > outputArea.begin.line) {
|
|
[self
|
|
dispatchComputeCommandEncoder:computeEncoder
|
|
pipelineState:_finalisedLineState
|
|
width:_lineBufferPixelsPerLine
|
|
height:outputArea.end.line - outputArea.begin.line
|
|
offsetBuffer:[self bufferForOffset:outputArea.begin.line]
|
|
];
|
|
} else {
|
|
[self
|
|
dispatchComputeCommandEncoder:computeEncoder
|
|
pipelineState:_finalisedLineState
|
|
width:_lineBufferPixelsPerLine
|
|
height:NumBufferedLines - outputArea.begin.line
|
|
offsetBuffer:[self bufferForOffset:outputArea.begin.line]
|
|
];
|
|
|
|
if(outputArea.end.line) {
|
|
[self
|
|
dispatchComputeCommandEncoder:computeEncoder
|
|
pipelineState:_finalisedLineState
|
|
width:_lineBufferPixelsPerLine
|
|
height:outputArea.end.line
|
|
offsetBuffer:[self bufferForOffset:0]
|
|
];
|
|
}
|
|
}
|
|
|
|
[computeEncoder endEncoding];
|
|
} else {
|
|
// Separate luminance.
|
|
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
|
|
[computeEncoder setTexture:_compositionTexture atIndex:0];
|
|
[computeEncoder setTexture:_separatedLumaTexture atIndex:1];
|
|
[computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
|
|
|
|
__unsafe_unretained id<MTLBuffer> offsetBuffers[2] = {nil, nil};
|
|
offsetBuffers[0] = [self bufferForOffset:outputArea.begin.line];
|
|
|
|
if(outputArea.end.line > outputArea.begin.line) {
|
|
[self
|
|
dispatchComputeCommandEncoder:computeEncoder
|
|
pipelineState:_separatedLumaState
|
|
width:_lineBufferPixelsPerLine
|
|
height:outputArea.end.line - outputArea.begin.line
|
|
offsetBuffer:offsetBuffers[0]
|
|
];
|
|
} else {
|
|
[self
|
|
dispatchComputeCommandEncoder:computeEncoder
|
|
pipelineState:_separatedLumaState
|
|
width:_lineBufferPixelsPerLine
|
|
height:NumBufferedLines - outputArea.begin.line
|
|
offsetBuffer:offsetBuffers[0]
|
|
];
|
|
if(outputArea.end.line) {
|
|
offsetBuffers[1] = [self bufferForOffset:0];
|
|
[self
|
|
dispatchComputeCommandEncoder:computeEncoder
|
|
pipelineState:_separatedLumaState
|
|
width:_lineBufferPixelsPerLine
|
|
height:outputArea.end.line
|
|
offsetBuffer:offsetBuffers[1]
|
|
];
|
|
}
|
|
}
|
|
|
|
// Filter resulting chrominance.
|
|
[computeEncoder setTexture:_separatedLumaTexture atIndex:0];
|
|
[computeEncoder setTexture:_finalisedLineTexture atIndex:1];
|
|
[computeEncoder setBuffer:_uniformsBuffer offset:0 atIndex:0];
|
|
|
|
if(outputArea.end.line > outputArea.begin.line) {
|
|
[self
|
|
dispatchComputeCommandEncoder:computeEncoder
|
|
pipelineState:_finalisedLineState
|
|
width:_lineBufferPixelsPerLine
|
|
height:outputArea.end.line - outputArea.begin.line
|
|
offsetBuffer:offsetBuffers[0]
|
|
];
|
|
} else {
|
|
[self
|
|
dispatchComputeCommandEncoder:computeEncoder
|
|
pipelineState:_finalisedLineState
|
|
width:_lineBufferPixelsPerLine
|
|
height:NumBufferedLines - outputArea.begin.line
|
|
offsetBuffer:offsetBuffers[0]
|
|
];
|
|
if(outputArea.end.line) {
|
|
[self
|
|
dispatchComputeCommandEncoder:computeEncoder
|
|
pipelineState:_finalisedLineState
|
|
width:_lineBufferPixelsPerLine
|
|
height:outputArea.end.line
|
|
offsetBuffer:offsetBuffers[1]
|
|
];
|
|
}
|
|
}
|
|
|
|
[computeEncoder endEncoding];
|
|
}
|
|
|
|
_scanTarget.output_lines(outputArea, output_items, end_field);
|
|
break;
|
|
}
|
|
|
|
// Add a callback to update the scan target buffer and commit the drawing.
|
|
[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> _Nonnull) {
|
|
@synchronized (self) {
|
|
self->_scanTarget.complete_output_area(outputArea);
|
|
}
|
|
}];
|
|
[commandBuffer commit];
|
|
}
|
|
}
|
|
|
|
// MARK: - External connections.
|
|
|
|
- (Outputs::Display::ScanTarget *)scanTarget {
|
|
return &_scanTarget;
|
|
}
|
|
|
|
- (void)willChangeOwner {
|
|
self.scanTarget->will_change_owner();
|
|
}
|
|
|
|
- (NSBitmapImageRep *)imageRepresentation {
|
|
// Create an NSBitmapRep as somewhere to copy pixel data to.
|
|
NSBitmapImageRep *const result =
|
|
[[NSBitmapImageRep alloc]
|
|
initWithBitmapDataPlanes:NULL
|
|
pixelsWide:(NSInteger)_frameBuffers[0].width
|
|
pixelsHigh:(NSInteger)_frameBuffers[0].height
|
|
bitsPerSample:8
|
|
samplesPerPixel:4
|
|
hasAlpha:YES
|
|
isPlanar:NO
|
|
colorSpaceName:NSDeviceRGBColorSpace
|
|
bytesPerRow:4 * (NSInteger)_frameBuffers[0].width
|
|
bitsPerPixel:0];
|
|
|
|
// Create a CPU-accessible texture and copy the current contents of the _frameBuffer to it.
|
|
// TODO: supersample rather than directly copy if appropriate?
|
|
id<MTLTexture> cpuTexture;
|
|
MTLTextureDescriptor *const textureDescriptor = [MTLTextureDescriptor
|
|
texture2DDescriptorWithPixelFormat:_view.colorPixelFormat
|
|
width:_frameBuffers[0].width
|
|
height:_frameBuffers[0].height
|
|
mipmapped:NO];
|
|
textureDescriptor.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
|
|
textureDescriptor.resourceOptions = MTLResourceStorageModeManaged;
|
|
cpuTexture = [_view.device newTextureWithDescriptor:textureDescriptor];
|
|
[[self copyTexture:_frameBuffers[0] to:cpuTexture] waitUntilCompleted];
|
|
|
|
// Copy from the CPU-visible texture to the bitmap image representation.
|
|
uint8_t *const bitmapData = result.bitmapData;
|
|
[cpuTexture
|
|
getBytes:bitmapData
|
|
bytesPerRow:_frameBuffers[0].width*4
|
|
fromRegion:MTLRegionMake2D(0, 0, _frameBuffers[0].width, _frameBuffers[0].height)
|
|
mipmapLevel:0];
|
|
|
|
// Set alpha to fully opaque and do some byte shuffling if necessary;
|
|
// Apple likes BGR for output but RGB is the best I can specify to NSBitmapImageRep.
|
|
//
|
|
// I'm not putting my foot down and having the GPU do the conversion I want
|
|
// because this lets me reuse _copyPipeline and thereby cut down on boilerplate,
|
|
// especially given that screenshots are not a bottleneck.
|
|
const NSUInteger totalBytes = _frameBuffers[0].width * _frameBuffers[0].height * 4;
|
|
const bool flipRedBlue = _view.colorPixelFormat == MTLPixelFormatBGRA8Unorm;
|
|
for(NSUInteger offset = 0; offset < totalBytes; offset += 4) {
|
|
if(flipRedBlue) {
|
|
const uint8_t red = bitmapData[offset];
|
|
bitmapData[offset] = bitmapData[offset+2];
|
|
bitmapData[offset+2] = red;
|
|
}
|
|
bitmapData[offset+3] = 0xff;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
@end
|