A converter for Apple IIGS graphic files

This commit is contained in:
markpmlim 2018-12-08 09:28:41 +08:00
parent 679b5a4e33
commit ee0875a512
12 changed files with 571 additions and 0 deletions

23
.gitignore vendored Normal file
View File

@ -0,0 +1,23 @@
# Exclude the build directory
build/*
examples/FilterShowcase/build*
# Exclude temp nibs and swap files
*~.nib
*.swp
# Exclude OS X folder attributes
.DS_Store
.svn
# Exclude user-specific XCode 3 and 4 files
*.mode1
*.mode1v3
*.mode2v3
*.perspective
*.perspectivev3
*.pbxuser
*.xcworkspace
xcuserdata

View File

@ -0,0 +1,18 @@
import AppKit
import MetalKit
import PlaygroundSupport
/*
Originally developed using XCode 9.x
Modified to compile and run under XCode 8.x, macOS 10.12
*/
let frame = NSRect(x: 0, y: 0,
width: 640, height: 400)
let device = MTLCreateSystemDefaultDevice()
let renderer = MetalViewRenderer(device: device!)
let view = MTKView(frame: frame, device: device!)
view.delegate = renderer
PlaygroundPage.current.liveView = view

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,115 @@
//
// Shaders.metal
//
// Created by Mark Lim Pak Mun on 06/12/2018.
// Copyright © Incremental Innovation 2018 . All rights reserved.
//
// File for Metal kernel and shader functions
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
// Vertex shader outputs and per-fragment inputs.
typedef struct
{
float2 position;
float2 texCoord;
} Vertex;
typedef struct
{
float4 position [[position]]; // in clip space
float2 texCoord;
} RasterizerData;
vertex RasterizerData
vertexShader( uint vertexID [[ vertex_id ]],
const device Vertex *vertices [[ buffer(0)]])
{
RasterizerData out;
float2 position = vertices[vertexID].position;
// convert incoming position into clip space
out.position.xy = position;
out.position.z = 0.0;
out.position.w = 1.0;
// pass thru to the fragment shader
out.texCoord = vertices[vertexID].texCoord;
return out;
}
// Fragment function
fragment half4
fragmentShader(RasterizerData in [[stage_in]],
texture2d<half> colorTexture [[ texture(0) ]])
{
constexpr sampler textureSampler (mag_filter::linear,
min_filter::linear);
// Sample the texture and return the color to colorSample
const half4 colorSample = colorTexture.sample(textureSampler,
in.texCoord);
// We return the color of the texture
return colorSample;
}
/// ============ kernel function ============
#define bytesPerScanLine 160
#define sizeOfColorTable 16 // in terms of 16-bit words
/*
Converts a IIGS "pixel" to an ordinary rgba pixel.
*/
kernel void convert320(const device uchar *iigsBitmap [[buffer(0)]],
const device uchar *scbs [[buffer(1)]],
const device ushort *colorTables [[buffer(2)]],
texture2d<half, access::write> output [[texture(0)]],
uint2 gid [[thread_position_in_grid]])
{
uint width = output.get_width();
uint height = output.get_height();
if ((gid.x >= width) || (gid.y >= height))
{
// Return early if the pixel is out of bounds
return;
}
uint col = gid.x; // 0 - 319 for standard Apple IIGS 320x200 graphics
uint row = gid.y; // 0 - 199
uint whichColorTable = scbs[row] & 0x0f; // 0 - 15
uint bitmapIndex = row * bytesPerScanLine + col/2;
uchar pixels = iigsBitmap[bitmapIndex]; // 2 IIGS 4-bit "pixels"/byte
uint whichColorEntry; // 0 - 15
if (col % 2) {
// odd column # - pixel #1 (bits 0-3)
whichColorEntry = pixels & 0x0f;
}
else {
// even column # - pixel #0 (bits 4-7)
whichColorEntry = (pixels >> 4) & 0x0f;
}
uint colorTableIndex = sizeOfColorTable*whichColorTable + whichColorEntry;
ushort color = colorTables[colorTableIndex];
ushort red = (color & 0x0f00) >> 8; // 0 - 15
ushort green = (color & 0x00f0) >> 4;
ushort blue = (color & 0x000f);
// Scale the values [0,15] to [0,255]
red *= 17; // 0, 17, 34, ... , 238, 255
green *= 17;
blue *= 17;
// Compute the rbga8888 colour of the pixel ...
half4 color4 = half4(red, green, blue, 255);
// ... and scale its values to [0, 1.0]
color4 *= 1/255.0;
// Write the pixel to the texture.
output.write(color4, gid);
}

View File

@ -0,0 +1,265 @@
/*
To run this demo under XCode 9.x or later, minor editing is required.
*/
import AppKit
import MetalKit
import simd
// We need to pass the 16 color tables as 1D array of UInt16's to the
// metal shader. Each color table has 16 color entries of size 2 bytes.
class ExtractIIgsGraphicData {
var iigsBitmap: [UInt8]?
var colorTables: [UInt16]?
var scbs: [UInt8]?
public init?(_ url: URL) {
// Each scanline is 160 bytes. Each byte consists of 2 "pixels".
// There are 200 scanlines in a 320x200 IIGS graphic
iigsBitmap = [UInt8](repeating: 0, count: 160*200)
// First load the entire file
// Extract the first 160x200 = 32 000 bytes - this is the bitmap
// Then extract the next 256 bytes - this is the SCB table only 200 required
// Then extract the last 512 bytes - 16 color tables = 16 x 32 bytes
var rawData: Data? = nil
do {
try rawData = Data(contentsOf: url)
}
catch let error {
print("Error", error)
return nil
}
var range = Range(0..<32000)
rawData?.copyBytes(to: &iigsBitmap!, from: range)
scbs = [UInt8](repeating: 0, count: 256)
range = Range(32000..<32256)
rawData?.copyBytes(to: &scbs!, from: range)
range = Range(32256..<32768)
colorTables = [UInt16](repeating:0, count: 256)
var buffer512 = [UInt8](repeating:0, count: 512)
rawData?.copyBytes(to: &buffer512, from: range)
var index = 0
// On the IIGS, UInt16 is in little-endian format.
for k in stride(from: 0, to: 512, by: 2) {
colorTables![index] = UInt16(buffer512[k]) + (UInt16(buffer512[k+1]) << 8)
// Checked! color table entries are correct.
//print(colorTables![index], terminator: " ")
index += 1
//if index % 16 == 0 {
// print()
//}
}
}
}
public class MetalViewRenderer: NSObject, MTKViewDelegate {
var queue: MTLCommandQueue?
var device: MTLDevice!
var rps: MTLRenderPipelineState!
var cps: MTLComputePipelineState!
var vertexBuffer: MTLBuffer!
var indexBuffer: MTLBuffer!
var bitMapBuffer: MTLBuffer!
var scbTablesBuffer: MTLBuffer!
var colorTablesBuffer: MTLBuffer!
var outputTexture: MTLTexture!
public init?(device: MTLDevice) {
super.init()
self.device = device
queue = device.makeCommandQueue()
createBuffers()
buildPipelineStates()
guard let texture = createTexture()
else {
print("Texture could not be created")
return nil
}
outputTexture = texture
}
func createBuffers() {
let myBundle = Bundle.main
let assetURL = myBundle.url(forResource: "ANGELFISH",
withExtension:"SHR")
let graphicsExtractor = ExtractIIgsGraphicData(assetURL!)!
let bmData = graphicsExtractor.iigsBitmap!
bitMapBuffer = device!.makeBuffer(bytes: bmData,
length: MemoryLayout<UInt8>.stride * bmData.count,
options: [])
let colorTables = graphicsExtractor.colorTables!
let numberOfColorEntries = colorTables.count
let sizeOfColorTables = MemoryLayout<UInt16>.stride * numberOfColorEntries
colorTablesBuffer = device!.makeBuffer(bytes: colorTables,
length: sizeOfColorTables,
options: [])
let scbTable = graphicsExtractor.scbs!
scbTablesBuffer = device!.makeBuffer(bytes: scbTable,
length: MemoryLayout<UInt8>.stride * scbTable.count,
options: [])
// size = 16 bytes; alignment=8; stride=16
struct Vertex {
var position: packed_float2
var texCoords: packed_float2
}
// Note: both the position & texture coordinates are already
// normalized to the range [-1.0, 1.0] & [0.0, 1.0] respectively.
// total size = 64 bytes
let quadVertices: [Vertex] =
// clockwise - triangle strip; origin of tex coord system is upper-left.
[
Vertex(position: [-0.75, -0.75], texCoords: [ 0.0, 1.0 ]), // v0
Vertex(position: [-0.75, 0.75], texCoords: [ 0.0, 0.0 ]), // v1
Vertex(position: [ 0.75, -0.75], texCoords: [ 1.0, 1.0 ]), // v2
Vertex(position: [ 0.75, 0.75], texCoords: [ 1.0, 0.0 ]), // v3
]
vertexBuffer = device!.makeBuffer(bytes: quadVertices,
length: MemoryLayout<Vertex>.stride * quadVertices.count,
options: [])
// total size = 16 bytes.
let indices: [UInt16] = [0, 1, 2, 2, 1, 3]
indexBuffer = device!.makeBuffer(bytes: indices,
length: MemoryLayout<UInt16>.stride * indices.count,
options: [])
}
func buildPipelineStates() {
let path = Bundle.main.path(forResource: "Shaders",
ofType: "metal")
let input: String?
var library: MTLLibrary?
do {
input = try String(contentsOfFile: path!,
encoding: String.Encoding.utf8)
library = try device!.makeLibrary(source: input!,
options: nil)
let kernel = library!.makeFunction(name: "convert320")!
cps = try device!.makeComputePipelineState(function: kernel)
}
catch let e {
Swift.print("\(e)")
}
let vertex_func = library!.makeFunction(name: "vertexShader")
let frag_func = library!.makeFunction(name: "fragmentShader")
// Setup a render pipeline descriptor
let rpld = MTLRenderPipelineDescriptor()
rpld.vertexFunction = vertex_func
rpld.fragmentFunction = frag_func
// Note: the kernel return each pixel as rgba but the render pipeline
// insists it's bgra. Weird.
rpld.colorAttachments[0].pixelFormat = .bgra8Unorm
do {
try rps = device!.makeRenderPipelineState(descriptor: rpld)
}
catch let error {
Swift.print("\(error)")
}
}
// Instantiate the output texture object and generate its contents
// so that the render encoder could use.
func createTexture() -> MTLTexture? {
let width = 320
let height = 200
let textureDesc = MTLTextureDescriptor.texture2DDescriptor(pixelFormat: .rgba8Unorm,
width: Int(width),
height: Int(height),
mipmapped: false)
// Must be read and write.
textureDesc.usage = [.shaderWrite, .shaderRead]
textureDesc.resourceOptions = .storageModeManaged
let texture = device.makeTexture(descriptor: textureDesc)
if let commandBuffer = queue?.makeCommandBuffer() {
commandBuffer.addCompletedHandler {
(commandBuffer: MTLCommandBuffer) -> Void in
if commandBuffer.status == .error {
Swift.print(commandBuffer.error!)
}
else if commandBuffer.status == .completed {
Swift.print("Texture was generated successfully")
}
}
let commandComputeEncoder = commandBuffer.makeComputeCommandEncoder()
print("Generate Texture")
commandComputeEncoder.setComputePipelineState(cps)
commandComputeEncoder.setTexture(texture,
at: 0)
commandComputeEncoder.setBuffer(bitMapBuffer,
offset: 0,
at: 0)
commandComputeEncoder.setBuffer(scbTablesBuffer,
offset: 0,
at: 1)
commandComputeEncoder.setBuffer(colorTablesBuffer,
offset: 0,
at: 2)
let threadGroupCount = MTLSizeMake(8, 8, 1)
let threadGroups = MTLSizeMake(texture.width / threadGroupCount.width,
texture.height / threadGroupCount.height,
1)
// Execute the kernel function
commandComputeEncoder.dispatchThreadgroups(threadGroups,
threadsPerThreadgroup: threadGroupCount)
commandComputeEncoder.endEncoding()
commandBuffer.commit()
}
return texture
}
// Implementation of the 2 MTKView delegate protocol functions.
public func mtkView(_ view: MTKView,
drawableSizeWillChange size: CGSize) {
}
// drawInMTKView:
public func draw(in view: MTKView) {
if let rpd = view.currentRenderPassDescriptor,
let drawable = view.currentDrawable,
let commandBuffer = queue?.makeCommandBuffer() {
view.clearColor = MTLClearColorMake(0.5, 0.5, 0.5, 1.0)
// Render the generated graphic.
let commandRenderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: rpd)
commandRenderEncoder.setRenderPipelineState(rps)
commandRenderEncoder.setVertexBuffer(vertexBuffer,
offset: 0,
at: 0)
commandRenderEncoder.setFragmentTexture(outputTexture,
at: 0)
commandRenderEncoder.drawIndexedPrimitives(type: MTLPrimitiveType.triangleStrip,
indexCount: indexBuffer.length/MemoryLayout<UInt16>.size,
indexType: MTLIndexType.uint16,
indexBuffer: indexBuffer,
indexBufferOffset: 0)
commandRenderEncoder.endEncoding()
commandBuffer.present(drawable)
commandBuffer.commit()
}
}
}

View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<playground version='5.0' target-platform='macos'>
<timeline fileName='timeline.xctimeline'/>
</playground>

View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<Timeline
version = "3.0">
<TimelineItems>
<LoggerValueHistoryTimelineItem
documentLocation = "file:///Volumes/USBVolume/Github/Converter320/ConverterIIGS320.playground#CharacterRangeLen=50&amp;CharacterRangeLoc=281&amp;EndingLineNumber=12&amp;StartingLineNumber=12&amp;Timestamp=565833872.064764"
selectedRepresentationIndex = "0"
shouldTrackSuperviewWidth = "NO">
</LoggerValueHistoryTimelineItem>
</TimelineItems>
</Timeline>

17
Readme.md Normal file
View File

@ -0,0 +1,17 @@
Metal version of GraphicConverterIIGS.
The aim of this project (in the form of a Swift playground) is to investigate if it's possible to use a metal kernel function to convert a IIGS graphic to an instance of MTLTexture which will be rendered by a pair of vertex-fragment functions.
Requirements:
XCode 8.x, Swift 3.x or later
Hardware: A graphics processor which supports the Metal API
Knowhow: how to run a Swift playground
Because of changes in the interfaces, it is necessary to edit the file "Converter.swift" to run the playground demo in XCode 9.x or later.
To understand the source code, the programmer should have
a) a sound knowledge of the Fundamentals of the Metal API,
b) know the structure of an Apple IIGS graphic file with the format $C1/$0000, and,
c) basic knowledge of the Apple IIGS video hardware.

View File

@ -0,0 +1,22 @@
Brief Description
The Swift playground demo
(a) sets up a renderer object,
(b) instantiates a MTKView object,
(c) set the renderer instance in step (a) to be the view's delegate, and,
(d) gets playground to display the view.
Converter.swift source code
The renderer object must adopt both methods of MTKViewDelegate since it's going to be the view's delegate. The renderer sets up the environment before the MTKViewDelegate method "drawInMTKView:" gets called. Notice that the compute shader is called in the method "createTexture" rather than in the method "drawInMTKView:" because the latter method is called at least 60 frames/second. This means the method "buildPipelineStates" must be called before "createTexture" because the latter method needs a MTLComputePipeState instance to do its job of preparing an instance of MTLTexture whose allocated storage is to be filled with the pixels of the generated graphic. (There are methods for getting these pixels from the texture's storage allocation if one intends to write them out as a graphic PNG file. See Apple's documentation on MTLTexture)
The method "createBuffers" sets up all instances of MTLBuffer needed by the rest of the program. Briefly, it loads an Apple IIGS graphic and extracts its bitmap, color tables and SCB table and instantiate these as one-dimension (1D) arrays of MTLBuffers to be passed to the compute shader. The MTLBuffers for the geometry to be rendered in the view is also prepared by this method.
For those who have a powerful graphics processor, you may want to change the line
let threadGroupCount = MTLSizeMake(8, 8, 1)
to use a bigger threadgroup to execute more threads in parallel.
Refer to the Apple's article entitled "Calculating Threadgroup and Grid Sizes" for more information.

View File

@ -0,0 +1,52 @@
Description of the functions of the Metal shaders.
Compute Processing with a Metal kernel function.
The meat of the project is the kernel function named "convert320". See the source code of the file "Shaders.metal"<22>
The kernel function is declared as:
kernel void convert320(const device uchar *iigsBitmap [[buffer(0)]],
const device uchar *scbs [[buffer(1)]],
const device ushort *colorTables [[buffer(2)]],
texture2d<half, access::write> output [[texture(0)]],
uint2 gid [[thread_position_in_grid]])
Three input buffers are passed to the kernel function. The first input buffer will consist of a 32 000-byte IIGS bitmap passed as a 1D array of bytes. Each byte in this bitmap consists of two 4-bit "pixels". These 2 "pixels" are not true colour pixels but are indices into a color table.
The second input buffer contains 200 Scanline Control Bytes (SCBs) (plus 56 unused bytes) passed as a 1D array of 256 bytes. Note: the Apple IIGS standard monitor supports 200 scanlines (or screen lines in graphic mode). The value of each of these SCBs tells the Apple IIGS hardware which one of the 16 colour tables to use for a particular scanline (or entire row of 320 pixels). There is a 1:1 correspondence between the SCBs in the SCB array and the rows of pixels on the IIGS monitor. To elaborate, the colours of scanline 0 is controlled by SCB0 (the first SCB), scanline 199 by SCB199 (the last SCB). Each scanline can use just ONE of the 16 colour tables passed. BTW, each scanline on the Apple IIGS occupies 160 bytes of video memory. In 320 graphic mode, each screen line on the standard Apple IIGS monitor displays exactly 160 x 2 = 320 pixels.
The third input buffer consists of 16 colour tables passed also as a 1D array of 512 bytes; each colour table itself consists of 16 colour entries. Each colour entry is a 16-bit word; the colour word has the following format (in bits): 0000rrrr ggggbbbb. (In hex, the bytes are written as: 0x0RGB; in the Apple IIGS video memory the bytes are stored as: GB 0R) In other words, the colour word is actually 12 bits. The maximum value is 00001111 11111111 (or 0F FF in hex or 4095 in decimal) and the minimum is 00000000 00000000 (00 00 in hex). The Apple IIGS can display up to 4 096 colours but the number of colours displayed by a scanline is much more limited; each pixel of a scanline can be one of 16 colours in the colour table specified by the scanline's SCB.
The output of the kernel function is an instance of a MTLTexture which is created by the code in CPU before being passed as a parameter to the GPU. This 2D texture object must have the dimensions 320x200 exactly matching the IIGS graphic's resolution.
Think of the metal texture as having a rectangular 2D grid of pixels. In this project, the rectangular grid is 320 columns and 200 rows. The first pixel on the grid is (0, 0) and is positioned at the the upper left hand corner of the grid. The last pixel of the grid is (319, 199) and is at the bottom right hand corner. The pixel at the upper right hand corner is (319, 0) and finally, that at the bottom left hand corner is (0, 199). These 4 pairs of coordinates are the bounds of the grid.
In the kernel shader (or function), each pixel of the texture is assigned a pair of coordinates. The function works backwards to determine which IIGS "pixel" corresponds to a particular pixel in the metal texture.
Let's consider an example where the kernel function is passed an integer pair (61, 40). It receives this pair of integers via the parameter "gid" from the Metal Framework. The pixel associated with this integer pair is said to have an x-coordinate of 61 and y-coordinate of 40. Incidentally, this unique pair of integers is used to identify the thread currently processed by the kernel function. In other words, the integer pair can be considered to be the coordinates of the pixel currently processed.
In the 2D rectangular grid mentioned above, the pixel is located at row 40 and column 61. Since the row is known, the kernel function fetches the 41st byte from the SCB array and must mask off its upper nibble. (Some Apple IIGS graphics file has SCBs with a bit ON in the upper nibble.) The resulting value is the colour table to be used for the entire row of 320 pixels.
Next, the kernel function computes the location of the byte corresponding to the pixel within the IIGS bitmap and stores it in the variable "bitmapIndex". (Remember, the bitmap was passed as a 1D array of bytes.) For the pixel in question, the value of "bitmapIndex" is 40x160 + 61/2 = 6 400 + 30 = 6 430.
The byte containing the IIGS "pixel" is fetched from the bitmap and stored in the variable "pixels". The upper nibble of this byte is for the pixel whose position is (60, 40) on the grid and the lower nibble for the pixel at location (61, 40). The value of this IIGS "pixel" is the relative offset to the colour word (within the colour table) to use.
In short, the SCB of the row of the IIGS "pixel" is used to determine which of the 16 colour tables to use. The value of a IIGS "pixel" indicates which colour word (or entry) to access within that colour table. As mentioned above, each colour table has 16 colour words, each of which is 16 bits (or 2 bytes) wide. The variable "colorTableIndex" is the array index of the colour word. Using this index, the colour of the IIGS "pixel" being processed is fetched from the 1D color tables array and stored variable "color"<22>.
The red, green and blue components of the colour word are unpacked and stored in the variables "red", "green" and "blue" respectively. The range of their values is [0, 15] and the components need to be scaled to [0, 255] i.e. 4 bits becoming 8 bits. Effectively, this means we are going from 4-bit RGB to 8-bit RGB. A colour word consisting of four 16-bit floating point numbers is formed using the half4 constructor. Using 32-bit floating point may be an overkill given that an Apple IIGS graphic can have at most 4 096 colours. Finally, the colour of the pixel is scaled down to [0, 1.0] because Metal, like OpenGL, works with colours in the range [0, 1.0].
Instead of multiplying the red, green and blue components separately, these arithmetic operations should perform faster replacing them with a vector-scalar multiplication. Initialise the "color4" variable with the values of "red", "green", "blue" with 15 as the fourth component. Scale up and then scale down using 2 successive vector-scalar multiplications. This is left as an exercise to the reader.
On the CPU side, Metal will instantiate 8x8 = 64 threads and the kernel function is called to process these threads in parallel. 64 pixels will be generated and written to the metal texture. No double for-loops are needed.
Rendering with a pair of vertex-fragment shaders.
The operations of the vertex and fragment shaders are straight forward. The vertex function converts the position of an incoming vertex from a 2D float (float2) to a 4D float (float4) and output it to clip space. There is no need to perform any matrix transformation because the coordinates of the corners to the rectangle to be rendered are already in normalised device coordinates (NDC). However, the user may want to change the original coordinates of the 4 corners to display a rectangle that takes up the entire display area. Just change 0.75 to 1.0 and -0.75 to -1.0.
BTW, unlike OpenGL, the default winding order of the vertices of a triangle in Metal is clockwise and Metal's texture coordinate system has its origin at the upper left corner of the rendered rectangle. The declaration of the Vertex struct takes into account of these 2 requirements.

Binary file not shown.

After

Width:  |  Height:  |  Size: 206 KiB