diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..10c9bc4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,23 @@ +# Exclude the build directory +build/* +examples/FilterShowcase/build* + +# Exclude temp nibs and swap files +*~.nib +*.swp + +# Exclude OS X folder attributes +.DS_Store +.svn + +# Exclude user-specific XCode 3 and 4 files +*.mode1 +*.mode1v3 +*.mode2v3 +*.perspective +*.perspectivev3 +*.pbxuser +*.xcworkspace +xcuserdata + + diff --git a/ConverterIIGS320.playground/Contents.swift b/ConverterIIGS320.playground/Contents.swift new file mode 100644 index 0000000..f695878 --- /dev/null +++ b/ConverterIIGS320.playground/Contents.swift @@ -0,0 +1,18 @@ +import AppKit +import MetalKit +import PlaygroundSupport + +/* + Originally developed using XCode 9.x + Modified to compile and run under XCode 8.x, macOS 10.12 + */ +let frame = NSRect(x: 0, y: 0, + width: 640, height: 400) +let device = MTLCreateSystemDefaultDevice() + +let renderer = MetalViewRenderer(device: device!) +let view = MTKView(frame: frame, device: device!) +view.delegate = renderer +PlaygroundPage.current.liveView = view + + diff --git a/ConverterIIGS320.playground/Resources/ANGELFISH.SHR b/ConverterIIGS320.playground/Resources/ANGELFISH.SHR new file mode 100644 index 0000000..413f0f1 Binary files /dev/null and b/ConverterIIGS320.playground/Resources/ANGELFISH.SHR differ diff --git a/ConverterIIGS320.playground/Resources/REDDRAGON.SHR b/ConverterIIGS320.playground/Resources/REDDRAGON.SHR new file mode 100644 index 0000000..01a07b4 --- /dev/null +++ b/ConverterIIGS320.playground/Resources/REDDRAGON.SHR @@ -0,0 +1,44 @@ +1?113?2??11??/113?3?31/3??3??1?1<333333<33!?11<33<3>>3>33#3333<333>333>3>>3#:!?#313>;>37>?}=33>?32>>>>3>>3?7?>>3?!!:3>>3>33????sqwݬ?>~3?3333?}z:37=3?}:z?>:7w!?331?7ws}}3"?333333;qws17w3;3??:3333z17=>ǽ2!/!1;333;1zq:w";;1!?3?33;>33;?1?;3{7sݧݣ2!+2";;33?3>;11;=߭=w>"/;3?33333?33?7?:߮2""?";31333:3;?;}zߪ73"1?"?::33;3;>377?="";;񳫣3:33;11?3=zߧsss("31ڪ3???;;3:=sӪ:3?"㳿3:ݪ3;;:{:w:++1;;?=::ڪ333?s3=>+?;31?33z3>㻿3==s7s3￿(1,:㳿::33;31;3?;s:3>;31±;z{1}ڣ3;;}?1;31333>;7?>r"+;3?zz3w:3>;?1;~3>)±??㳿:7s37s3>;?:33r,+??񱫫{s<wz>3s>;;~133±;;:~:3>;111:1>3;;񿱿qqw33~zz33;??;13133;;;37z:wz>s:733;:133;㳿wzs~7z:z:3>3;333ݻ>qӳ7w:33??31?3??s7:333;;;3{9???7z~3wzy3:=?;:?83;;3?::s777:77:3ӿ;3?;33;3:w~z333=;33ꫳ333/;3;3z:zz77:3ӿ1>7z3?"+"!˻(;3񻺻7zzw:w3;11?7zs?+%+"<;1:;3s3z~:3s77=;$z:33/K+!8Ȉ;:;:7ڭszwꧪ:s3ӿ?3;1:333/ "̸8;;>33z:szr=?33;+ j++331;;7ꪩzw7#;?#3H)( ²!?83;?;;3333>z:窪z~:#"32((HDk?;3373w:~ꧩ=>?+113=)H@L((8;3>3s3s;z~w6>;::8)I@@&؋(883113:3>㪧>3:ꮧww}#?:*8h @D+34>?;113~>~3>~:>w};:iIDDDB&($1;;1:3:s33>}>;1:"(Od Dh(;131:>ꪪ:s333}}#H`@dD&(Ҙ3;3311:133ww}?12"FI`FD)F(M3;?33>ڪzzwwwi>?19")DDfDBb("!3?331>>>ꪭӚ999٪?1!)/@B`F`)F҃)":;1;3?3>3ڣ9*+ yDiDBb""I:31:;:333==33m`dik$f/,ﳞ;1;1;??>>ꪭ9=3;3+;pDFFdU""!ԝ33?;33133?٪=1+`I f$Y/"(Hۮ3;;1:3333ڭگ(8d9IU!!9J۪;;3;:;33?=93?@I@DIY/""9Ĕۺ>;?3;9ڪ+33;31`@MDB򷒄;w;:;333ڣ?#;3?DdT/&+wJw33;3:(#1?ِF""h"wI;:wz;3;;3::񱱱1"I؄ɺt:3;w3333333ӭ+"!񺝐IbfF3:3z3;331;:>3+񻝙ii˹mx3wz3;1:33~>:ِf؆ؗ3wz;:3񫱣::zEi&idƩiiه7wz31;:1333>7`fٯ"fIMڮڻz33;:;:=37ٖd Vjڪfi-ڝ333;3:33337;e@FimjII"ݪ33331;:>3:?9ݖPDFگvٖݭ֛3w3;3;񿺣:39;;;eDi"iij+z;133:;:3933;;;ٖP@Di؆i97ywww;1333;;3;393333;;;dTi&id'i:www:11󻻳;:33;::93;3;;E@Ey"fIM*ywww:;;3Ӫ:33?3y;3333UDYiMjڪfiY"yw33313:>;;?}3;DDEViwmjI3:33;;1:3:7;?z;;TUTffgxvٖݭ֖'{33;3;;3:;?z;;:@TIixijiq;>:33;;::;wz;;3?:EEis'3;::311;33?;3ת;;33:33z;ٙdPfى)ws;3󣪣;:?;:33:33wIPTiim3;:3:ڣ;33;7s;:3:;3;333!EEEFff9ىh'z33;:;37z?33:;33333?DTVfiihzg;:;٣33:3;3z?33:;33;?3TUE iihyyz1933;3;;3w3;33;3ww<ٖEEDVi'g;:;333;117:;:;;:w<333U@dTEViw;33333;373:::z񌳳>9EUEETfi313>;3?:3:3:?7z3;:9UUDTIiiiת31;33>3?:;33wz:9;;ٕUDFeiӈhw;3:;33>3;7;;333wwz?;"39DEDVfi?ٙIih33:333:1??3z̻393?;<EEUDFiiݍz;3;3:?::333:::?z󪧯"+:93D EDDTEӹזizz;3;3ݣ?37133;z3::z"91dTdEFdUi޶fI3;7ד33333337333:zz:3::3:;񻣪z33sw?+:3:?<ٕTEEEUF븖iF3?z3333?3=w;w:zzꮪ:ݮw3::;w;31TUUTTEDhF;33;:333:w73;;:1:7:;>ޣ:3ڪ:w:y3;ûEUUUDTVޯؘi33:3;33}};1zw{7wz}z3w7;;:?3̳UTEDFfiگih33333::ww;33s33}33z333?:<DDTEDVfIff33;;;ww;;:s3s}33:wz?UTEDFfiٖ333;3wz1?;z;;}3:}?33@DDDTiim޿}mhdf:::33?ws??1;:73;333:ws3?;;;1;7?3;iEPDeFdۿiim::::3333:3?73;3wڪڣ?3񻻿s?339ܙeFUEU;ٖm:ӣ33::;1?:7z33w7wzz37?3;s9fEUVDEDiݻM3:3::??33s;ssz3;:7?;7yU@@DFDTVӿ3ꭝ33:3??񳧧s33;;:;1w?3yٔeTDTFfiٖ:3>::=::3:13s333;3:3333;;71w:ݖiDUUFdEDYiݫI3>:333꣪3;:3;:;1::;333333373ݖU@DDUEUfEVj:376::;3?3:33z7{:z:3333feEDTEDUTiIf:;=yך:33;1:3:3z:33>:33;33;wݙݙTTDDTVTIٖ33:3::if:3f:::;3z:zz::33:3:;333;;3339m@DEDEFfiIiӯiii:;3fڪ3fj1:::;wszz333>;1:>3;;;=333;;s=iٙffEETeTFUUiӪY3iݝ?:3:3;㣣:=33333:337yfeFTDFUDVTFi:yY]33vi:i::3:3>>3;:3;?񻻻93;wfiPdDUfDDDEii:?iY׫:ꪪjfڪ37z33ꪣ뱻;㣣;;93:ݖiDDDfDdETfӳdT::ݖijڪ:񪪪zzwz:z33:3ꮣ33?93ݖU@DUUfDDTDFdUi޳hxYh:7;:ޭ3꣪;13w3:33:?31!9;}feDDDEDDUEUFiݪ;ꪣzzz3㪪3333::>333;33;?9vfݕTFDDDDDVdDdyiiH:;>ު3;zzzz3333133?;;;;?9ٙ@DEeDTDDEdUVڿFT:3곿zwzz:㻻3133?29iEEETdTEDTEeUdh::ꪪ3:;1z3>3>;>3;3331񳳳(;9;DFTDFfEDEDVdDvif3::3=zzzz3꫱:;33?#;39ٙIDTTUFEDTEEFUUiif;3꣪:3;3zڪz::꣱񳫻3:;;3;:?;("93=鞔@DiDDfDDDTEVUFim;1::3333;񺪭ڪת:3:3:3:333;3;(;9;;:dEEVdDDDUDFdUi޳333;33:3;;;1zڪڪ:333::;1;##?93;ޝiUfUTDDeDUEUF1:3::33;zݣ:3>33;:33::;3;93:iYDUdFDEVUEfdDhfh;ꪣ>::;3:3:338823=3iTDEVDFDFdUVUUVڿ33333z;3ޝ:ꪪ>㳣;:3:3:;;8##:ٝfeDDfDDDeUUUUdޮ3wz3::ꫣi鮓3::3:3:;3:;?;񂂈iDUeDDeUUUUD33ݧ}zꪣf:3333:3㮮;(֙DIUfeDDEeUUUUUi333ުz>ޭf:>㪳;3:333;;ٙffdDUVfeUDDUDUUUUFim3:ݪڮޞݪ33ݚ:ުfi:ꪪ333333:3:;1fDEYUUUTEUEUUUUF:sz333ݞꪪުmf>:333333::3;;:33:?ffUUVTEPDEPEUVe3:9sz:33:ݪ3;9ꪮ33=꫻کn:::33::3;:3333?;ޝݙfTDDEeTEDE@ETVYi:333꩙=::33:3ڣ񺞭ڪ::333:3::;3껳3;ޙfeDDDEVUUEUUVY:?3y333ݙ:333ꭣ3ު1ښ:=㚮333:33333ꪡ;3;;;3陙UDUUVeUPEDUUVV:铩;3:۽ݫ3٩:ꮮ>ڪ񾪪:::33333:3333fUDUVfeUUDDUDEYYjݻ:3:ꮭ㳭ڣ3ݙݮ:>331񺪞3;:㪣;3;ꪳ룪3fUDUUUUffDPEETUUiӿ:33::9s:333ڣݭ:>3ꮪ흙33::::3;33;;?;fYDTEUffUPTE@DUDEEۿ3:}7}{;3ꮪ::ꮮ㪣33꭭>3;>3>3::>;񪯯3;>;:;;;YUUDEUUDTU@@EUDYi;3;:zz׻33ꪣꪪ33""ޭ;;3:333;3㳺33;;UTEDUUUTDDEFDD@EUTYiݿ:;yך;)33:Ӫ3::""}:33333333Ӯ;33:3?3;UUTDfUDDTUeTTDYUFڿ33}yڪz33:3::;:3""ȉםۻ33333:>;3333ٿTTDfUffdDVUEDEYUYڿyz3:333:;3"(>y>33:33;33ݪꫣ:33?;9TDfUUDDTUUDTDUY1;ݫ3;:::3;("(Ȉ3ںݮ;3333;:::9UYDDDDEVeUeTEEEEY::;w9+333;333333:(""3{3;;::ݮުڪ9YDDDDVfdDeUDED]:3?3:73;3;;;;:"(;;1w:9UUEDDDFfeTDeEDDIT:37:333:3333;::331""ݞ333:9TUUDefeUTDUETTUIYY/3s:z:333;3:::")ޮ3;3:3:333:ٙww3:ڪڭ9UUUfUTDUDTUETDUYYY"333s333;:;3;"ӫ3:333333}ws:ݮު9TVfETDUDDVTDDIUY!(33s:33;3:;;ݚ>>:>31}s339FfdUffDDDUEdDDIY"3773333;ޫ;>33331ݭ}ww33ڭ9iTDETTUEdEFiYY"::3w77337w;33>=:33w33:3>9UUUEDDUTDVTDUDiY3373::73㣻3>ݙꪮ::w::33:ڪ9UTTDDEUDETTTEEfM:3333::z3;3ꣻ>ijꪪ111:ݙsw::3:8TDDDDUDDVTET3w7s;:s;;333337~wz39DDDDEfDFUTETFỻ:3:s:3;3;:zs;;::=im:::3z73:9DDDVfDEFEdDTFmѻ+3:3;;3z;;=in3ݧ㪪31DUVffeDDEUDUim]+:3ws777z;3;3>3933:3}z3:EUffeDDUeUdeDiٝ!33:sw;33:7ss3333:3;333ꪪ:1UfeUDVUdYdeYe137w3;3333:>:3333㪪3VfUTFVUDUeVT]3ף;3:;33:333;3>333fDEEFeDdEIYYV3?33}w33w3;;333>3333::33!VdEEdVEEeEUUݯ?:3737w:3:3:3:31;33333dDDVUUDEfEYڭ;3w;3::333;331DDEeVUTVDUf?3sڧ337s;;333>33ꪣ3331DUUeVUDVYDIi3sz7:7s;333333:331+FfVeUDDeiYFi"37z3:3{")32"*:;3UUEeDTFVUiVi""񳣪:z:3::;3+)!"""㻺33ꪪ33;33;+TUUeDVFUYYՖm""33zs33932/"""̻,"+#>"3뻱3:333UTUdDTEUVUګs333؉ɜ/)£,,62.";3:ڣ:38ETUDTTEUf]f??32"ܜ,(Ȉk)˼*.b3"2#ꪪ3;>=3"++UUUTeEEYUVYiڡ?3sz:3"-ܙ"™Ι#,)˻ʪ›6","">)2);;3333("3TETDVDTifff=3s̙,,,.,̒œ2̬ʩ̫",29)<2.2*ګ333333"!?UTUDVTTVifYfFݪI3")),)"&b,ə왻)+),,#""9"3)33*ӻ3;333;311EYEDdEffidiiF?ҙ,,".)))’ɘ&"",,̩"2+#"b"36"&)3>"ڣ3ꪻ333333EETDdEeffVffifiFm)-쒙"ɜ")""")"#""")i&)"2"<")#b).*3:3333331񸫪YEUDVifdfifmMFFI3"ɜ,""")""(’ɒ"ʙ,"),̜<<""3""#"骪3333183UEDVUfDfiݝIݖF,̜Μ"""".i,,""2™"ɒ¢:<3"9"#3)*:;:3;83;TTEYiYefFfFmiFdF")̜.","","əl")")bf-’✢*"),,,#222>23=:3;?EYEYVYidffffDidi)),"")nbb.))i)"ɜ."̙̓3))"2"")"23*33ޣ3;EUTY]dffifTDEfDfޒ,.)l,"",""".),*""ƙ",,))""#2"93#"")").2:;;;UYTD]ݙdfffTDEDDfE)-"̜,)",")̞))Œ"")",,"",2l’*.,3,"),.>#).#993"3:333338UUDYUݙeUEDDDDDF’,&l̖,l̙"<)쪒éʜ:33Γ<"2##9)>3;333;_,_ +C ^ʞ``,`L_S  Q  Q Q  (J B +d + +" (J B +d + +" (J B +d + +" (J B +d + +" (J B +d + +" (J B +d + +" (J B +d + +" (J B +d + +" (J B +d + +" (J B +d + +" (J B +d + +" (J B +d + +" (J B +d + +" (J B +d + +"Dffww   \ No newline at end of file diff --git a/ConverterIIGS320.playground/Resources/Shaders.metal b/ConverterIIGS320.playground/Resources/Shaders.metal new file mode 100644 index 0000000..b8dfbf9 --- /dev/null +++ b/ConverterIIGS320.playground/Resources/Shaders.metal @@ -0,0 +1,115 @@ +// +// Shaders.metal +// +// Created by Mark Lim Pak Mun on 06/12/2018. +// Copyright © Incremental Innovation 2018 . All rights reserved. +// + +// File for Metal kernel and shader functions + +#include +#include + +using namespace metal; + + +// Vertex shader outputs and per-fragment inputs. +typedef struct +{ + float2 position; + float2 texCoord; +} Vertex; + +typedef struct +{ + float4 position [[position]]; // in clip space + float2 texCoord; +} RasterizerData; + + +vertex RasterizerData +vertexShader( uint vertexID [[ vertex_id ]], + const device Vertex *vertices [[ buffer(0)]]) +{ + RasterizerData out; + + float2 position = vertices[vertexID].position; + // convert incoming position into clip space + out.position.xy = position; + out.position.z = 0.0; + out.position.w = 1.0; + + // pass thru to the fragment shader + out.texCoord = vertices[vertexID].texCoord; + + return out; +} + +// Fragment function +fragment half4 +fragmentShader(RasterizerData in [[stage_in]], + texture2d colorTexture [[ texture(0) ]]) +{ + constexpr sampler textureSampler (mag_filter::linear, + min_filter::linear); + + // Sample the texture and return the color to colorSample + const half4 colorSample = colorTexture.sample(textureSampler, + in.texCoord); + // We return the color of the texture + return colorSample; +} + +/// ============ kernel function ============ +#define bytesPerScanLine 160 +#define sizeOfColorTable 16 // in terms of 16-bit words + +/* + Converts a IIGS "pixel" to an ordinary rgba pixel. + */ +kernel void convert320(const device uchar *iigsBitmap [[buffer(0)]], + const device uchar *scbs [[buffer(1)]], + const device ushort *colorTables [[buffer(2)]], + texture2d output [[texture(0)]], + uint2 gid [[thread_position_in_grid]]) +{ + uint width = output.get_width(); + uint height = output.get_height(); + if ((gid.x >= width) || (gid.y >= height)) + { + // Return early if the pixel is out of bounds + return; + } + + uint col = gid.x; // 0 - 319 for standard Apple IIGS 320x200 graphics + uint row = gid.y; // 0 - 199 + uint whichColorTable = scbs[row] & 0x0f; // 0 - 15 + uint bitmapIndex = row * bytesPerScanLine + col/2; + uchar pixels = iigsBitmap[bitmapIndex]; // 2 IIGS 4-bit "pixels"/byte + uint whichColorEntry; // 0 - 15 + if (col % 2) { + // odd column # - pixel #1 (bits 0-3) + whichColorEntry = pixels & 0x0f; + } + else { + // even column # - pixel #0 (bits 4-7) + whichColorEntry = (pixels >> 4) & 0x0f; + } + uint colorTableIndex = sizeOfColorTable*whichColorTable + whichColorEntry; + ushort color = colorTables[colorTableIndex]; + ushort red = (color & 0x0f00) >> 8; // 0 - 15 + ushort green = (color & 0x00f0) >> 4; + ushort blue = (color & 0x000f); + // Scale the values [0,15] to [0,255] + red *= 17; // 0, 17, 34, ... , 238, 255 + green *= 17; + blue *= 17; + + // Compute the rbga8888 colour of the pixel ... + half4 color4 = half4(red, green, blue, 255); + // ... and scale its values to [0, 1.0] + color4 *= 1/255.0; + // Write the pixel to the texture. + output.write(color4, gid); +} + diff --git a/ConverterIIGS320.playground/Sources/Converter.swift b/ConverterIIGS320.playground/Sources/Converter.swift new file mode 100644 index 0000000..de6e6ff --- /dev/null +++ b/ConverterIIGS320.playground/Sources/Converter.swift @@ -0,0 +1,265 @@ +/* + To run this demo under XCode 9.x or later, minor editing is required. + */ +import AppKit +import MetalKit +import simd + +// We need to pass the 16 color tables as 1D array of UInt16's to the +// metal shader. Each color table has 16 color entries of size 2 bytes. +class ExtractIIgsGraphicData { + var iigsBitmap: [UInt8]? + var colorTables: [UInt16]? + var scbs: [UInt8]? + + public init?(_ url: URL) { + + // Each scanline is 160 bytes. Each byte consists of 2 "pixels". + // There are 200 scanlines in a 320x200 IIGS graphic + iigsBitmap = [UInt8](repeating: 0, count: 160*200) + // First load the entire file + // Extract the first 160x200 = 32 000 bytes - this is the bitmap + // Then extract the next 256 bytes - this is the SCB table only 200 required + // Then extract the last 512 bytes - 16 color tables = 16 x 32 bytes + + var rawData: Data? = nil + do { + try rawData = Data(contentsOf: url) + } + catch let error { + print("Error", error) + return nil + } + + var range = Range(0..<32000) + rawData?.copyBytes(to: &iigsBitmap!, from: range) + + scbs = [UInt8](repeating: 0, count: 256) + range = Range(32000..<32256) + rawData?.copyBytes(to: &scbs!, from: range) + + range = Range(32256..<32768) + colorTables = [UInt16](repeating:0, count: 256) + var buffer512 = [UInt8](repeating:0, count: 512) + rawData?.copyBytes(to: &buffer512, from: range) + var index = 0 + // On the IIGS, UInt16 is in little-endian format. + for k in stride(from: 0, to: 512, by: 2) { + colorTables![index] = UInt16(buffer512[k]) + (UInt16(buffer512[k+1]) << 8) + // Checked! color table entries are correct. + //print(colorTables![index], terminator: " ") + index += 1 + //if index % 16 == 0 { + // print() + //} + } + } +} + +public class MetalViewRenderer: NSObject, MTKViewDelegate { + var queue: MTLCommandQueue? + var device: MTLDevice! + var rps: MTLRenderPipelineState! + var cps: MTLComputePipelineState! + + var vertexBuffer: MTLBuffer! + var indexBuffer: MTLBuffer! + + var bitMapBuffer: MTLBuffer! + var scbTablesBuffer: MTLBuffer! + var colorTablesBuffer: MTLBuffer! + + var outputTexture: MTLTexture! + + public init?(device: MTLDevice) { + super.init() + self.device = device + queue = device.makeCommandQueue() + createBuffers() + buildPipelineStates() + guard let texture = createTexture() + else { + print("Texture could not be created") + return nil + } + outputTexture = texture + } + + func createBuffers() { + let myBundle = Bundle.main + let assetURL = myBundle.url(forResource: "ANGELFISH", + withExtension:"SHR") + let graphicsExtractor = ExtractIIgsGraphicData(assetURL!)! + + let bmData = graphicsExtractor.iigsBitmap! + bitMapBuffer = device!.makeBuffer(bytes: bmData, + length: MemoryLayout.stride * bmData.count, + options: []) + let colorTables = graphicsExtractor.colorTables! + let numberOfColorEntries = colorTables.count + let sizeOfColorTables = MemoryLayout.stride * numberOfColorEntries + colorTablesBuffer = device!.makeBuffer(bytes: colorTables, + length: sizeOfColorTables, + options: []) + let scbTable = graphicsExtractor.scbs! + scbTablesBuffer = device!.makeBuffer(bytes: scbTable, + length: MemoryLayout.stride * scbTable.count, + options: []) + + // size = 16 bytes; alignment=8; stride=16 + struct Vertex { + var position: packed_float2 + var texCoords: packed_float2 + } + + // Note: both the position & texture coordinates are already + // normalized to the range [-1.0, 1.0] & [0.0, 1.0] respectively. + // total size = 64 bytes + let quadVertices: [Vertex] = + // clockwise - triangle strip; origin of tex coord system is upper-left. + [ + Vertex(position: [-0.75, -0.75], texCoords: [ 0.0, 1.0 ]), // v0 + Vertex(position: [-0.75, 0.75], texCoords: [ 0.0, 0.0 ]), // v1 + Vertex(position: [ 0.75, -0.75], texCoords: [ 1.0, 1.0 ]), // v2 + Vertex(position: [ 0.75, 0.75], texCoords: [ 1.0, 0.0 ]), // v3 + ] + vertexBuffer = device!.makeBuffer(bytes: quadVertices, + length: MemoryLayout.stride * quadVertices.count, + options: []) + + // total size = 16 bytes. + let indices: [UInt16] = [0, 1, 2, 2, 1, 3] + indexBuffer = device!.makeBuffer(bytes: indices, + length: MemoryLayout.stride * indices.count, + options: []) + } + + func buildPipelineStates() { + let path = Bundle.main.path(forResource: "Shaders", + ofType: "metal") + let input: String? + var library: MTLLibrary? + + do { + input = try String(contentsOfFile: path!, + encoding: String.Encoding.utf8) + library = try device!.makeLibrary(source: input!, + options: nil) + let kernel = library!.makeFunction(name: "convert320")! + cps = try device!.makeComputePipelineState(function: kernel) + } + catch let e { + Swift.print("\(e)") + } + + let vertex_func = library!.makeFunction(name: "vertexShader") + let frag_func = library!.makeFunction(name: "fragmentShader") + // Setup a render pipeline descriptor + let rpld = MTLRenderPipelineDescriptor() + rpld.vertexFunction = vertex_func + rpld.fragmentFunction = frag_func + // Note: the kernel return each pixel as rgba but the render pipeline + // insists it's bgra. Weird. + rpld.colorAttachments[0].pixelFormat = .bgra8Unorm + do { + try rps = device!.makeRenderPipelineState(descriptor: rpld) + } + catch let error { + Swift.print("\(error)") + } + } + + + // Instantiate the output texture object and generate its contents + // so that the render encoder could use. + func createTexture() -> MTLTexture? { + let width = 320 + let height = 200 + let textureDesc = MTLTextureDescriptor.texture2DDescriptor(pixelFormat: .rgba8Unorm, + width: Int(width), + height: Int(height), + mipmapped: false) + // Must be read and write. + textureDesc.usage = [.shaderWrite, .shaderRead] + textureDesc.resourceOptions = .storageModeManaged + + let texture = device.makeTexture(descriptor: textureDesc) + + if let commandBuffer = queue?.makeCommandBuffer() { + + commandBuffer.addCompletedHandler { + (commandBuffer: MTLCommandBuffer) -> Void in + if commandBuffer.status == .error { + Swift.print(commandBuffer.error!) + } + else if commandBuffer.status == .completed { + Swift.print("Texture was generated successfully") + } + } + + let commandComputeEncoder = commandBuffer.makeComputeCommandEncoder() + print("Generate Texture") + commandComputeEncoder.setComputePipelineState(cps) + commandComputeEncoder.setTexture(texture, + at: 0) + commandComputeEncoder.setBuffer(bitMapBuffer, + offset: 0, + at: 0) + commandComputeEncoder.setBuffer(scbTablesBuffer, + offset: 0, + at: 1) + commandComputeEncoder.setBuffer(colorTablesBuffer, + offset: 0, + at: 2) + + let threadGroupCount = MTLSizeMake(8, 8, 1) + let threadGroups = MTLSizeMake(texture.width / threadGroupCount.width, + texture.height / threadGroupCount.height, + 1) + // Execute the kernel function + commandComputeEncoder.dispatchThreadgroups(threadGroups, + threadsPerThreadgroup: threadGroupCount) + commandComputeEncoder.endEncoding() + commandBuffer.commit() + } + return texture + } + + // Implementation of the 2 MTKView delegate protocol functions. + public func mtkView(_ view: MTKView, + drawableSizeWillChange size: CGSize) { + } + + // drawInMTKView: + public func draw(in view: MTKView) { + + if let rpd = view.currentRenderPassDescriptor, + let drawable = view.currentDrawable, + let commandBuffer = queue?.makeCommandBuffer() { + view.clearColor = MTLClearColorMake(0.5, 0.5, 0.5, 1.0) + + // Render the generated graphic. + let commandRenderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: rpd) + + commandRenderEncoder.setRenderPipelineState(rps) + + commandRenderEncoder.setVertexBuffer(vertexBuffer, + offset: 0, + at: 0) + + commandRenderEncoder.setFragmentTexture(outputTexture, + at: 0) + + commandRenderEncoder.drawIndexedPrimitives(type: MTLPrimitiveType.triangleStrip, + indexCount: indexBuffer.length/MemoryLayout.size, + indexType: MTLIndexType.uint16, + indexBuffer: indexBuffer, + indexBufferOffset: 0) + + commandRenderEncoder.endEncoding() + + commandBuffer.present(drawable) + commandBuffer.commit() + } + } +} diff --git a/ConverterIIGS320.playground/contents.xcplayground b/ConverterIIGS320.playground/contents.xcplayground new file mode 100644 index 0000000..63b6dd8 --- /dev/null +++ b/ConverterIIGS320.playground/contents.xcplayground @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/ConverterIIGS320.playground/timeline.xctimeline b/ConverterIIGS320.playground/timeline.xctimeline new file mode 100644 index 0000000..77c4bf6 --- /dev/null +++ b/ConverterIIGS320.playground/timeline.xctimeline @@ -0,0 +1,11 @@ + + + + + + + diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..f3680bb --- /dev/null +++ b/Readme.md @@ -0,0 +1,17 @@ +Metal version of GraphicConverterIIGS. + +The aim of this project (in the form of a Swift playground) is to investigate if it's possible to use a metal kernel function to convert a IIGS graphic to an instance of MTLTexture which will be rendered by a pair of vertex-fragment functions. + + +Requirements: +XCode 8.x, Swift 3.x or later +Hardware: A graphics processor which supports the Metal API +Knowhow: how to run a Swift playground + +Because of changes in the interfaces, it is necessary to edit the file "Converter.swift" to run the playground demo in XCode 9.x or later. + +To understand the source code, the programmer should have + +a) a sound knowledge of the Fundamentals of the Metal API, +b) know the structure of an Apple IIGS graphic file with the format $C1/$0000, and, +c) basic knowledge of the Apple IIGS video hardware. \ No newline at end of file diff --git a/documentation/ProgramDoc.txt b/documentation/ProgramDoc.txt new file mode 100644 index 0000000..e95922a --- /dev/null +++ b/documentation/ProgramDoc.txt @@ -0,0 +1,22 @@ +Brief Description + +The Swift playground demo +(a) sets up a renderer object, +(b) instantiates a MTKView object, +(c) set the renderer instance in step (a) to be the view's delegate, and, +(d) gets playground to display the view. + + +Converter.swift source code + +The renderer object must adopt both methods of MTKViewDelegate since it's going to be the view's delegate. The renderer sets up the environment before the MTKViewDelegate method "drawInMTKView:" gets called. Notice that the compute shader is called in the method "createTexture" rather than in the method "drawInMTKView:" because the latter method is called at least 60 frames/second. This means the method "buildPipelineStates" must be called before "createTexture" because the latter method needs a MTLComputePipeState instance to do its job of preparing an instance of MTLTexture whose allocated storage is to be filled with the pixels of the generated graphic. (There are methods for getting these pixels from the texture's storage allocation if one intends to write them out as a graphic PNG file. See Apple's documentation on MTLTexture) + +The method "createBuffers" sets up all instances of MTLBuffer needed by the rest of the program. Briefly, it loads an Apple IIGS graphic and extracts its bitmap, color tables and SCB table and instantiate these as one-dimension (1D) arrays of MTLBuffers to be passed to the compute shader. The MTLBuffers for the geometry to be rendered in the view is also prepared by this method. + +For those who have a powerful graphics processor, you may want to change the line + + let threadGroupCount = MTLSizeMake(8, 8, 1) + +to use a bigger threadgroup to execute more threads in parallel. + +Refer to the Apple's article entitled "Calculating Threadgroup and Grid Sizes" for more information. \ No newline at end of file diff --git a/documentation/ShadersDoc.txt b/documentation/ShadersDoc.txt new file mode 100644 index 0000000..1385551 --- /dev/null +++ b/documentation/ShadersDoc.txt @@ -0,0 +1,52 @@ +Description of the functions of the Metal shaders. + + +Compute Processing with a Metal kernel function. + +The meat of the project is the kernel function named "convert320". See the source code of the file "Shaders.metal" + +The kernel function is declared as: + +kernel void convert320(const device uchar *iigsBitmap [[buffer(0)]], + const device uchar *scbs [[buffer(1)]], + const device ushort *colorTables [[buffer(2)]], + texture2d output [[texture(0)]], + uint2 gid [[thread_position_in_grid]]) + + +Three input buffers are passed to the kernel function. The first input buffer will consist of a 32 000-byte IIGS bitmap passed as a 1D array of bytes. Each byte in this bitmap consists of two 4-bit "pixels". These 2 "pixels" are not true colour pixels but are indices into a color table. + +The second input buffer contains 200 Scanline Control Bytes (SCBs) (plus 56 unused bytes) passed as a 1D array of 256 bytes. Note: the Apple IIGS standard monitor supports 200 scanlines (or screen lines in graphic mode). The value of each of these SCBs tells the Apple IIGS hardware which one of the 16 colour tables to use for a particular scanline (or entire row of 320 pixels). There is a 1:1 correspondence between the SCBs in the SCB array and the rows of pixels on the IIGS monitor. To elaborate, the colours of scanline 0 is controlled by SCB0 (the first SCB), scanline 199 by SCB199 (the last SCB). Each scanline can use just ONE of the 16 colour tables passed. BTW, each scanline on the Apple IIGS occupies 160 bytes of video memory. In 320 graphic mode, each screen line on the standard Apple IIGS monitor displays exactly 160 x 2 = 320 pixels. + +The third input buffer consists of 16 colour tables passed also as a 1D array of 512 bytes; each colour table itself consists of 16 colour entries. Each colour entry is a 16-bit word; the colour word has the following format (in bits): 0000rrrr ggggbbbb. (In hex, the bytes are written as: 0x0RGB; in the Apple IIGS video memory the bytes are stored as: GB 0R) In other words, the colour word is actually 12 bits. The maximum value is 00001111 11111111 (or 0F FF in hex or 4095 in decimal) and the minimum is 00000000 00000000 (00 00 in hex). The Apple IIGS can display up to 4 096 colours but the number of colours displayed by a scanline is much more limited; each pixel of a scanline can be one of 16 colours in the colour table specified by the scanline's SCB. + +The output of the kernel function is an instance of a MTLTexture which is created by the code in CPU before being passed as a parameter to the GPU. This 2D texture object must have the dimensions 320x200 exactly matching the IIGS graphic's resolution. + +Think of the metal texture as having a rectangular 2D grid of pixels. In this project, the rectangular grid is 320 columns and 200 rows. The first pixel on the grid is (0, 0) and is positioned at the the upper left hand corner of the grid. The last pixel of the grid is (319, 199) and is at the bottom right hand corner. The pixel at the upper right hand corner is (319, 0) and finally, that at the bottom left hand corner is (0, 199). These 4 pairs of coordinates are the bounds of the grid. + +In the kernel shader (or function), each pixel of the texture is assigned a pair of coordinates. The function works backwards to determine which IIGS "pixel" corresponds to a particular pixel in the metal texture. + +Let's consider an example where the kernel function is passed an integer pair (61, 40). It receives this pair of integers via the parameter "gid" from the Metal Framework. The pixel associated with this integer pair is said to have an x-coordinate of 61 and y-coordinate of 40. Incidentally, this unique pair of integers is used to identify the thread currently processed by the kernel function. In other words, the integer pair can be considered to be the coordinates of the pixel currently processed. + +In the 2D rectangular grid mentioned above, the pixel is located at row 40 and column 61. Since the row is known, the kernel function fetches the 41st byte from the SCB array and must mask off its upper nibble. (Some Apple IIGS graphics file has SCBs with a bit ON in the upper nibble.) The resulting value is the colour table to be used for the entire row of 320 pixels. + +Next, the kernel function computes the location of the byte corresponding to the pixel within the IIGS bitmap and stores it in the variable "bitmapIndex". (Remember, the bitmap was passed as a 1D array of bytes.) For the pixel in question, the value of "bitmapIndex" is 40x160 + 61/2 = 6 400 + 30 = 6 430. + +The byte containing the IIGS "pixel" is fetched from the bitmap and stored in the variable "pixels". The upper nibble of this byte is for the pixel whose position is (60, 40) on the grid and the lower nibble for the pixel at location (61, 40). The value of this IIGS "pixel" is the relative offset to the colour word (within the colour table) to use. + +In short, the SCB of the row of the IIGS "pixel" is used to determine which of the 16 colour tables to use. The value of a IIGS "pixel" indicates which colour word (or entry) to access within that colour table. As mentioned above, each colour table has 16 colour words, each of which is 16 bits (or 2 bytes) wide. The variable "colorTableIndex" is the array index of the colour word. Using this index, the colour of the IIGS "pixel" being processed is fetched from the 1D color tables array and stored variable "color". + +The red, green and blue components of the colour word are unpacked and stored in the variables "red", "green" and "blue" respectively. The range of their values is [0, 15] and the components need to be scaled to [0, 255] i.e. 4 bits becoming 8 bits. Effectively, this means we are going from 4-bit RGB to 8-bit RGB. A colour word consisting of four 16-bit floating point numbers is formed using the half4 constructor. Using 32-bit floating point may be an overkill given that an Apple IIGS graphic can have at most 4 096 colours. Finally, the colour of the pixel is scaled down to [0, 1.0] because Metal, like OpenGL, works with colours in the range [0, 1.0]. + +Instead of multiplying the red, green and blue components separately, these arithmetic operations should perform faster replacing them with a vector-scalar multiplication. Initialise the "color4" variable with the values of "red", "green", "blue" with 15 as the fourth component. Scale up and then scale down using 2 successive vector-scalar multiplications. This is left as an exercise to the reader. + +On the CPU side, Metal will instantiate 8x8 = 64 threads and the kernel function is called to process these threads in parallel. 64 pixels will be generated and written to the metal texture. No double for-loops are needed. + + + +Rendering with a pair of vertex-fragment shaders. + +The operations of the vertex and fragment shaders are straight forward. The vertex function converts the position of an incoming vertex from a 2D float (float2) to a 4D float (float4) and output it to clip space. There is no need to perform any matrix transformation because the coordinates of the corners to the rectangle to be rendered are already in normalised device coordinates (NDC). However, the user may want to change the original coordinates of the 4 corners to display a rectangle that takes up the entire display area. Just change 0.75 to 1.0 and -0.75 to -1.0. + +BTW, unlike OpenGL, the default winding order of the vertices of a triangle in Metal is clockwise and Metal's texture coordinate system has its origin at the upper left corner of the rendered rectangle. The declaration of the Vertex struct takes into account of these 2 requirements. + diff --git a/documentation/WindowCapture.png b/documentation/WindowCapture.png new file mode 100644 index 0000000..0953bf0 Binary files /dev/null and b/documentation/WindowCapture.png differ