From 173c283369352497e8b2658a8c634d999799c5e0 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 9 Nov 2021 11:23:25 +0000
Subject: [PATCH 01/82] First implementation of using k-means clustering in RGB
 space to dither a 320x200 SHR image.

---
 convert.py | 95 ++++++++++++++++++++++++++++++++++++++++++------------
 screen.py  |  5 +++
 2 files changed, 79 insertions(+), 21 deletions(-)

diff --git a/convert.py b/convert.py
index 7cd34d1..d4cd028 100644
--- a/convert.py
+++ b/convert.py
@@ -7,6 +7,7 @@ import time
 import colour
 from PIL import Image
 import numpy as np
+from sklearn.cluster import KMeans
 
 import dither as dither_pyx
 import dither_pattern
@@ -19,6 +20,55 @@ import screen as screen_py
 # - support LR/DLR
 # - support HGR
 
+def _to_pixel(float_array):
+    return tuple(np.clip(float_array.astype(np.uint8), 0, 255))
+
+
+def cluster_palette(image: Image):
+    # TODO: cluster in CAM16-UCS space
+    colours = np.asarray(image).reshape((-1, 3))
+    kmeans = KMeans(n_clusters=16)
+    kmeans.fit_predict(colours)
+    palette = kmeans.cluster_centers_
+
+    pal_image = Image.new('P', (1, 1), 0)
+    pal_image.putpalette(palette.reshape(-1).astype(np.uint8))
+
+    working_image = np.asarray(image).astype(np.float32)
+    for y in range(200):
+        print(y)
+        for x in range(320):
+            pixel = working_image[y, x]
+
+            best_distance = 1e9
+            best_colour = None
+            for colour in palette:
+                distance = np.sum(np.power(colour - pixel, 2))
+                if distance < best_distance:
+                    best_distance = distance
+                    best_colour = colour
+            quant_error = pixel - best_colour
+
+            # Floyd-Steinberg dither
+            # 0 * 7
+            # 3 5 1
+            working_image[y, x] = best_colour
+            if x < 319:
+                working_image[y, x + 1] = np.clip(
+                    working_image[y, x + 1] + quant_error * (7 / 16), 0, 255)
+            if y < 199:
+                working_image[y + 1, x] = np.clip(
+                    working_image[y + 1, x] + quant_error * (5 / 16), 0, 255)
+                if x < 319:
+                    working_image[y + 1, x + 1] = np.clip(
+                        working_image[y + 1, x + 1] + quant_error * (1 / 16),
+                        0, 255)
+                if x > 0:
+                    working_image[y + 1, x - 1] = np.clip(
+                        working_image[y + 1, x - 1] + quant_error * (3 / 16), 0,
+                        255)
+    return working_image
+
 
 def main():
     parser = argparse.ArgumentParser()
@@ -63,8 +113,8 @@ def main():
     if args.lookahead < 1:
         parser.error('--lookahead must be at least 1')
 
-    palette = palette_py.PALETTES[args.palette]()
-    screen = screen_py.DHGRScreen(palette)
+    # palette = palette_py.PALETTES[args.palette]()
+    screen = screen_py.SHR320Screen()
 
     # Conversion matrix from RGB to CAM16UCS colour values.  Indexed by
     # 24-bit RGB value
@@ -73,39 +123,42 @@ def main():
     # Open and resize source image
     image = image_py.open(args.input)
     if args.show_input:
-        image_py.resize(image, screen.X_RES, screen.Y_RES * 2,
+        image_py.resize(image, screen.X_RES, screen.Y_RES,
                         srgb_output=True).show()
     rgb = np.array(
         image_py.resize(image, screen.X_RES, screen.Y_RES,
                         gamma=args.gamma_correct)).astype(np.float32) / 255
 
-    dither = dither_pattern.PATTERNS[args.dither]()
-    bitmap = dither_pyx.dither_image(
-        screen, rgb, dither, args.lookahead, args.verbose, rgb_to_cam16)
+    output_rgb = cluster_palette(Image.fromarray((rgb * 255).astype(np.uint8)))
+    output_srgb = image_py.linear_to_srgb(output_rgb).astype(np.uint8)
+
+    # dither = dither_pattern.PATTERNS[args.dither]()
+    # bitmap = dither_pyx.dither_image(
+    #     screen, rgb, dither, args.lookahead, args.verbose, rgb_to_cam16)
 
     # Show output image by rendering in target palette
-    output_palette_name = args.show_palette or args.palette
-    output_palette = palette_py.PALETTES[output_palette_name]()
-    output_screen = screen_py.DHGRScreen(output_palette)
-    if output_palette_name == "ntsc":
-        output_srgb = output_screen.bitmap_to_image_ntsc(bitmap)
-    else:
-        output_srgb = image_py.linear_to_srgb(
-            output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
+    # output_palette_name = args.show_palette or args.palette
+    # output_palette = palette_py.PALETTES[output_palette_name]()
+    # output_screen = screen_py.DHGRScreen(output_palette)
+    # if output_palette_name == "ntsc":
+    #     output_srgb = output_screen.bitmap_to_image_ntsc(bitmap)
+    # else:
+    #     output_srgb = image_py.linear_to_srgb(
+    #         output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
     out_image = image_py.resize(
-        Image.fromarray(output_srgb), screen.X_RES, screen.Y_RES * 2,
+        Image.fromarray(output_srgb), screen.X_RES, screen.Y_RES,
         srgb_output=True)
 
     if args.show_output:
         out_image.show()
 
     # Save Double hi-res image
-    outfile = os.path.join(os.path.splitext(args.output)[0] + "-preview.png")
-    out_image.save(outfile, "PNG")
-    screen.pack(bitmap)
-    with open(args.output, "wb") as f:
-        f.write(bytes(screen.aux))
-        f.write(bytes(screen.main))
+    # outfile = os.path.join(os.path.splitext(args.output)[0] + "-preview.png")
+    # out_image.save(outfile, "PNG")
+    # screen.pack(bitmap)
+    # with open(args.output, "wb") as f:
+    #     f.write(bytes(screen.aux))
+    #     f.write(bytes(screen.main))
 
 
 if __name__ == "__main__":
diff --git a/screen.py b/screen.py
index 9831807..d164be7 100644
--- a/screen.py
+++ b/screen.py
@@ -5,6 +5,11 @@ import numpy as np
 import palette as palette_py
 
 
+class SHR320Screen:
+    X_RES = 320
+    Y_RES = 200
+
+
 class DHGRScreen:
     X_RES = 560
     Y_RES = 192

From a92c9cd7b573a5ce7850e3780da9ee12745dc62c Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 9 Nov 2021 15:13:07 +0000
Subject: [PATCH 02/82] Work in CAM16-UCS colour space and cythonize

---
 convert.py | 59 ++++++++++++++++--------------------------------------
 dither.pyx | 47 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 42 deletions(-)

diff --git a/convert.py b/convert.py
index d4cd028..70431e9 100644
--- a/convert.py
+++ b/convert.py
@@ -1,6 +1,7 @@
 """Image converter to Apple II Double Hi-Res format."""
 
 import argparse
+import array
 import os.path
 import time
 
@@ -24,49 +25,22 @@ def _to_pixel(float_array):
     return tuple(np.clip(float_array.astype(np.uint8), 0, 255))
 
 
-def cluster_palette(image: Image):
-    # TODO: cluster in CAM16-UCS space
-    colours = np.asarray(image).reshape((-1, 3))
+def cluster_palette(image: Image, rgb_to_cam16):
+    # TODO: only 4-bit RGB colour channels
+    colours_rgb = np.asarray(image).reshape((-1, 3))
+    with colour.utilities.suppress_warnings(colour_usage_warnings=True):
+        colours_cam = colour.convert(colours_rgb / 255, "RGB",
+                                     "CAM16UCS").astype(np.float32)
+
     kmeans = KMeans(n_clusters=16)
-    kmeans.fit_predict(colours)
-    palette = kmeans.cluster_centers_
+    kmeans.fit_predict(colours_cam)
+    palette_cam = kmeans.cluster_centers_
+    with colour.utilities.suppress_warnings(colour_usage_warnings=True):
+        palette_rgb = colour.convert(palette_cam, "CAM16UCS", "RGB").astype(
+            np.float32)
 
-    pal_image = Image.new('P', (1, 1), 0)
-    pal_image.putpalette(palette.reshape(-1).astype(np.uint8))
-
-    working_image = np.asarray(image).astype(np.float32)
-    for y in range(200):
-        print(y)
-        for x in range(320):
-            pixel = working_image[y, x]
-
-            best_distance = 1e9
-            best_colour = None
-            for colour in palette:
-                distance = np.sum(np.power(colour - pixel, 2))
-                if distance < best_distance:
-                    best_distance = distance
-                    best_colour = colour
-            quant_error = pixel - best_colour
-
-            # Floyd-Steinberg dither
-            # 0 * 7
-            # 3 5 1
-            working_image[y, x] = best_colour
-            if x < 319:
-                working_image[y, x + 1] = np.clip(
-                    working_image[y, x + 1] + quant_error * (7 / 16), 0, 255)
-            if y < 199:
-                working_image[y + 1, x] = np.clip(
-                    working_image[y + 1, x] + quant_error * (5 / 16), 0, 255)
-                if x < 319:
-                    working_image[y + 1, x + 1] = np.clip(
-                        working_image[y + 1, x + 1] + quant_error * (1 / 16),
-                        0, 255)
-                if x > 0:
-                    working_image[y + 1, x - 1] = np.clip(
-                        working_image[y + 1, x - 1] + quant_error * (3 / 16), 0,
-                        255)
+    return dither_pyx.dither_shr(
+        np.asarray(image).astype(np.float32) / 255, palette_rgb, rgb_to_cam16)
     return working_image
 
 
@@ -129,7 +103,8 @@ def main():
         image_py.resize(image, screen.X_RES, screen.Y_RES,
                         gamma=args.gamma_correct)).astype(np.float32) / 255
 
-    output_rgb = cluster_palette(Image.fromarray((rgb * 255).astype(np.uint8)))
+    output_rgb = cluster_palette(Image.fromarray((rgb * 255).astype(
+        np.uint8)), rgb_to_cam16)
     output_srgb = image_py.linear_to_srgb(output_rgb).astype(np.uint8)
 
     # dither = dither_pattern.PATTERNS[args.dither]()
diff --git a/dither.pyx b/dither.pyx
index 0487c84..95822da 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -323,3 +323,50 @@ def dither_image(
 
     free(cdither.pattern)
     return image_nbit_to_bitmap(image_nbit, xres, yres, palette_depth)
+
+import colour
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def dither_shr(float[:, :, ::1] working_image, float[:, ::1] palette_rgb, float[:,::1] rgb_to_cam16ucs):
+    cdef int y, x, idx
+    cdef float best_distance, distance
+    cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
+    cdef float[3] quant_error
+
+    for y in range(200):
+        print(y)
+        for x in range(320):
+            pixel_cam = convert_rgb_to_cam16ucs(
+                rgb_to_cam16ucs, working_image[y, x, 0], working_image[y, x, 1], working_image[y, x, 2])
+
+            best_distance = 1e9
+            for idx, colour_rgb in enumerate(palette_rgb):
+                colour_cam = convert_rgb_to_cam16ucs(rgb_to_cam16ucs, colour_rgb[0], colour_rgb[1], colour_rgb[2])
+                distance = colour_distance_squared(pixel_cam, colour_cam)
+                if distance < best_distance:
+                    best_distance = distance
+                    best_colour_rgb = colour_rgb
+
+            for i in range(3):
+                quant_error[i] = working_image[y, x, i] - best_colour_rgb[i]
+
+                # Floyd-Steinberg dither
+                # 0 * 7
+                # 3 5 1
+                working_image[y, x, i] = best_colour_rgb[i]
+                if x < 319:
+                    working_image[y, x + 1, i] = clip(
+                        working_image[y, x + 1, i] + quant_error[i] * (7 / 16), 0, 1)
+                if y < 199:
+                    working_image[y + 1, x, i] = clip(
+                        working_image[y + 1, x, i] + quant_error[i] * (5 / 16), 0, 1)
+                    if x < 319:
+                        working_image[y + 1, x + 1, i] = clip(
+                            working_image[y + 1, x + 1, i] + quant_error[i] * (1 / 16),
+                            0, 1)
+                    if x > 0:
+                        working_image[y + 1, x - 1, i] = clip(
+                            working_image[y + 1, x - 1, i] + quant_error[i] * (3 / 16), 0,
+                            1)
+    return np.array(working_image).astype(np.float32) * 255

From 01b19a4a06442ec85cb64b4afa2a82ef2e74ec21 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 9 Nov 2021 15:35:44 +0000
Subject: [PATCH 03/82] Use 4-bit RGB values instead of 8-bit

---
 convert.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/convert.py b/convert.py
index 70431e9..0d6bb30 100644
--- a/convert.py
+++ b/convert.py
@@ -26,18 +26,21 @@ def _to_pixel(float_array):
 
 
 def cluster_palette(image: Image, rgb_to_cam16):
-    # TODO: only 4-bit RGB colour channels
     colours_rgb = np.asarray(image).reshape((-1, 3))
     with colour.utilities.suppress_warnings(colour_usage_warnings=True):
         colours_cam = colour.convert(colours_rgb / 255, "RGB",
                                      "CAM16UCS").astype(np.float32)
 
-    kmeans = KMeans(n_clusters=16)
+    kmeans = KMeans(n_clusters=16, max_iter=10000)
     kmeans.fit_predict(colours_cam)
     palette_cam = kmeans.cluster_centers_
     with colour.utilities.suppress_warnings(colour_usage_warnings=True):
-        palette_rgb = colour.convert(palette_cam, "CAM16UCS", "RGB").astype(
-            np.float32)
+        palette_rgb = colour.convert(palette_cam, "CAM16UCS", "RGB")
+        # SHR colour palette only uses 4-bit values
+        # TODO: do this more carefully
+        palette_rgb = np.clip(np.round(palette_rgb * 16).astype(np.uint32) *
+                              16, 0, 255)
+        palette_rgb = palette_rgb.astype(np.float32) / 255
 
     return dither_pyx.dither_shr(
         np.asarray(image).astype(np.float32) / 255, palette_rgb, rgb_to_cam16)

From 21058084e28be7f9534783b60448b4bb854b991a Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 9 Nov 2021 16:14:37 +0000
Subject: [PATCH 04/82] Tidy

---
 convert.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/convert.py b/convert.py
index 0d6bb30..074551a 100644
--- a/convert.py
+++ b/convert.py
@@ -21,14 +21,11 @@ import screen as screen_py
 # - support LR/DLR
 # - support HGR
 
-def _to_pixel(float_array):
-    return tuple(np.clip(float_array.astype(np.uint8), 0, 255))
 
-
-def cluster_palette(image: Image, rgb_to_cam16):
+def cluster_palette(image: Image):
     colours_rgb = np.asarray(image).reshape((-1, 3))
     with colour.utilities.suppress_warnings(colour_usage_warnings=True):
-        colours_cam = colour.convert(colours_rgb / 255, "RGB",
+        colours_cam = colour.convert(colours_rgb, "RGB",
                                      "CAM16UCS").astype(np.float32)
 
     kmeans = KMeans(n_clusters=16, max_iter=10000)
@@ -41,10 +38,8 @@ def cluster_palette(image: Image, rgb_to_cam16):
         palette_rgb = np.clip(np.round(palette_rgb * 16).astype(np.uint32) *
                               16, 0, 255)
         palette_rgb = palette_rgb.astype(np.float32) / 255
+    return palette_rgb
 
-    return dither_pyx.dither_shr(
-        np.asarray(image).astype(np.float32) / 255, palette_rgb, rgb_to_cam16)
-    return working_image
 
 
 def main():
@@ -106,8 +101,8 @@ def main():
         image_py.resize(image, screen.X_RES, screen.Y_RES,
                         gamma=args.gamma_correct)).astype(np.float32) / 255
 
-    output_rgb = cluster_palette(Image.fromarray((rgb * 255).astype(
-        np.uint8)), rgb_to_cam16)
+    palette_rgb = cluster_palette(rgb)
+    output_rgb = dither_pyx.dither_shr(rgb, palette_rgb, rgb_to_cam16)
     output_srgb = image_py.linear_to_srgb(output_rgb).astype(np.uint8)
 
     # dither = dither_pattern.PATTERNS[args.dither]()

From 80885aabf93f8caa08711f0bffe047eee3f6c55b Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 9 Nov 2021 22:26:34 +0000
Subject: [PATCH 05/82] Working SHR version.  Still just uses a single palette

---
 convert.py | 32 ++++++++++++++++----------
 dither.pyx | 66 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 screen.py  | 56 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 136 insertions(+), 18 deletions(-)

diff --git a/convert.py b/convert.py
index 074551a..369603f 100644
--- a/convert.py
+++ b/convert.py
@@ -34,11 +34,9 @@ def cluster_palette(image: Image):
     with colour.utilities.suppress_warnings(colour_usage_warnings=True):
         palette_rgb = colour.convert(palette_cam, "CAM16UCS", "RGB")
         # SHR colour palette only uses 4-bit values
-        # TODO: do this more carefully
-        palette_rgb = np.clip(np.round(palette_rgb * 16).astype(np.uint32) *
-                              16, 0, 255)
-        palette_rgb = palette_rgb.astype(np.float32) / 255
-    return palette_rgb
+        palette_rgb = np.round(palette_rgb * 15) / 15
+        # palette_rgb = palette_rgb.astype(np.float32) / 255
+    return palette_rgb.astype(np.float32)
 
 
 
@@ -96,13 +94,21 @@ def main():
     image = image_py.open(args.input)
     if args.show_input:
         image_py.resize(image, screen.X_RES, screen.Y_RES,
-                        srgb_output=True).show()
+                        srgb_output=False).show()
     rgb = np.array(
         image_py.resize(image, screen.X_RES, screen.Y_RES,
-                        gamma=args.gamma_correct)).astype(np.float32) / 255
+                        gamma=args.gamma_correct, srgb_output=True)).astype(
+        np.float32) / 255
 
     palette_rgb = cluster_palette(rgb)
-    output_rgb = dither_pyx.dither_shr(rgb, palette_rgb, rgb_to_cam16)
+    # print(palette_rgb)
+    # screen.set_palette(0, (image_py.linear_to_srgb_array(palette_rgb) *
+    #                        15).astype(np.uint8))
+    screen.set_palette(0, (np.round(palette_rgb * 15)).astype(np.uint8))
+
+    output_4bit = dither_pyx.dither_shr(rgb, palette_rgb, rgb_to_cam16)
+    screen.set_pixels(output_4bit)
+    output_rgb = (palette_rgb[output_4bit] * 255).astype(np.uint8)
     output_srgb = image_py.linear_to_srgb(output_rgb).astype(np.uint8)
 
     # dither = dither_pattern.PATTERNS[args.dither]()
@@ -120,18 +126,20 @@ def main():
     #         output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
     out_image = image_py.resize(
         Image.fromarray(output_srgb), screen.X_RES, screen.Y_RES,
-        srgb_output=True)
+        srgb_output=False)  # XXX true
 
     if args.show_output:
         out_image.show()
 
     # Save Double hi-res image
-    # outfile = os.path.join(os.path.splitext(args.output)[0] + "-preview.png")
-    # out_image.save(outfile, "PNG")
-    # screen.pack(bitmap)
+    outfile = os.path.join(os.path.splitext(args.output)[0] + "-preview.png")
+    out_image.save(outfile, "PNG")
+    screen.pack()
     # with open(args.output, "wb") as f:
     #     f.write(bytes(screen.aux))
     #     f.write(bytes(screen.main))
+    with open(args.output, "wb") as f:
+        f.write(bytes(screen.memory))
 
 
 if __name__ == "__main__":
diff --git a/dither.pyx b/dither.pyx
index 95822da..fc23520 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -329,11 +329,13 @@ import colour
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def dither_shr(float[:, :, ::1] working_image, float[:, ::1] palette_rgb, float[:,::1] rgb_to_cam16ucs):
-    cdef int y, x, idx
+    cdef int y, x, idx, best_colour_idx
     cdef float best_distance, distance
     cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
     cdef float[3] quant_error
 
+    cdef (unsigned char)[:, ::1] output_4bit = np.zeros((200, 320), dtype=np.uint8)
+
     for y in range(200):
         print(y)
         for x in range(320):
@@ -341,12 +343,15 @@ def dither_shr(float[:, :, ::1] working_image, float[:, ::1] palette_rgb, float[
                 rgb_to_cam16ucs, working_image[y, x, 0], working_image[y, x, 1], working_image[y, x, 2])
 
             best_distance = 1e9
+            best_colour_idx = 0
             for idx, colour_rgb in enumerate(palette_rgb):
                 colour_cam = convert_rgb_to_cam16ucs(rgb_to_cam16ucs, colour_rgb[0], colour_rgb[1], colour_rgb[2])
                 distance = colour_distance_squared(pixel_cam, colour_cam)
                 if distance < best_distance:
                     best_distance = distance
                     best_colour_rgb = colour_rgb
+                    best_colour_idx = idx
+            output_4bit[y, x] = best_colour_idx
 
             for i in range(3):
                 quant_error[i] = working_image[y, x, i] - best_colour_rgb[i]
@@ -359,14 +364,63 @@ def dither_shr(float[:, :, ::1] working_image, float[:, ::1] palette_rgb, float[
                     working_image[y, x + 1, i] = clip(
                         working_image[y, x + 1, i] + quant_error[i] * (7 / 16), 0, 1)
                 if y < 199:
+                    if x > 0:
+                        working_image[y + 1, x - 1, i] = clip(
+                            working_image[y + 1, x - 1, i] + quant_error[i] * (3 / 16), 0,
+                            1)
                     working_image[y + 1, x, i] = clip(
                         working_image[y + 1, x, i] + quant_error[i] * (5 / 16), 0, 1)
                     if x < 319:
                         working_image[y + 1, x + 1, i] = clip(
                             working_image[y + 1, x + 1, i] + quant_error[i] * (1 / 16),
                             0, 1)
-                    if x > 0:
-                        working_image[y + 1, x - 1, i] = clip(
-                            working_image[y + 1, x - 1, i] + quant_error[i] * (3 / 16), 0,
-                            1)
-    return np.array(working_image).astype(np.float32) * 255
+
+#                # 0 0 X 7 5
+#                # 3 5 7 5 3
+#                # 1 3 5 3 1
+#                if x < 319:
+#                    working_image[y, x + 1, i] = clip(
+#                        working_image[y, x + 1, i] + quant_error[i] * (7 / 48), 0, 1)
+#                if x < 318:
+#                    working_image[y, x + 2, i] = clip(
+#                        working_image[y, x + 2, i] + quant_error[i] * (5 / 48), 0, 1)
+#                if y < 199:
+#                    if x > 1:
+#                        working_image[y + 1, x - 2, i] = clip(
+#                            working_image[y + 1, x - 2, i] + quant_error[i] * (3 / 48), 0,
+#                            1)
+#                    if x > 0:
+#                        working_image[y + 1, x - 1, i] = clip(
+#                            working_image[y + 1, x - 1, i] + quant_error[i] * (5 / 48), 0,
+#                            1)
+#                    working_image[y + 1, x, i] = clip(
+#                        working_image[y + 1, x, i] + quant_error[i] * (7 / 48), 0, 1)
+#                    if x < 319:
+#                        working_image[y + 1, x + 1, i] = clip(
+#                            working_image[y + 1, x + 1, i] + quant_error[i] * (5 / 48),
+#                            0, 1)
+#                    if x < 318:
+#                        working_image[y + 1, x + 2, i] = clip(
+#                            working_image[y + 1, x + 2, i] + quant_error[i] * (3 / 48),
+#                            0, 1)
+#                if y < 198:
+#                    if x > 1:
+#                        working_image[y + 2, x - 2, i] = clip(
+#                            working_image[y + 2, x - 2, i] + quant_error[i] * (1 / 48), 0,
+#                            1)
+#                    if x > 0:
+#                        working_image[y + 2, x - 1, i] = clip(
+#                            working_image[y + 2, x - 1, i] + quant_error[i] * (3 / 48), 0,
+#                            1)
+#                    working_image[y + 2, x, i] = clip(
+#                        working_image[y + 2, x, i] + quant_error[i] * (5 / 48), 0, 1)
+#                    if x < 319:
+#                        working_image[y + 2, x + 1, i] = clip(
+#                            working_image[y + 2, x + 1, i] + quant_error[i] * (3 / 48),
+#                            0, 1)
+#                    if x < 318:
+#                        working_image[y + 2, x + 2, i] = clip(
+#                            working_image[y + 2, x + 2, i] + quant_error[i] * (1 / 48),
+#                            0, 1)
+
+    return np.array(output_4bit, dtype=np.uint8)
\ No newline at end of file
diff --git a/screen.py b/screen.py
index d164be7..3c526dd 100644
--- a/screen.py
+++ b/screen.py
@@ -9,6 +9,62 @@ class SHR320Screen:
     X_RES = 320
     Y_RES = 200
 
+    def __init__(self):
+        self.palettes = {k: np.zeros((16, 3), dtype=np.uint8) for k in
+                         range(16)}
+        # Really 4-bit values, indexing into palette
+        self.pixels = np.array((self.Y_RES, self.X_RES), dtype=np.uint8)
+
+        # Choice of palette per scan-line
+        self.line_palette = np.zeros(self.Y_RES, dtype=np.uint8)
+
+        self.memory = None
+
+    def set_palette(self, idx: int, palette: np.array):
+        if idx < 0 or idx > 15:
+            raise ValueError("Palette index %s must be in range 0 .. 15" % idx)
+        if palette.shape != (16, 3):
+            raise ValueError("Palette size %s != (16, 3)" % palette.shape)
+        # XXX check element range
+        if palette.dtype != np.uint8:
+            raise ValueError("Palette must be of type np.uint8")
+        print(palette)
+        self.palettes[idx] = np.array(palette)
+
+    def set_pixels(self, pixels):
+        self.pixels = np.array(pixels)
+
+    def pack(self):
+        dump = np.zeros(32768, dtype=np.uint8)
+        for y in range(self.Y_RES):
+            pixel_pair = 0
+            for x in range(self.X_RES):
+                if x % 2 == 0:
+                    pixel_pair |= (self.pixels[y, x] << 4)
+                else:
+                    pixel_pair |= self.pixels[y, x]
+                    # print(pixel_pair)
+                    dump[y * 160 + (x - 1) // 2] = pixel_pair
+                    pixel_pair = 0
+
+        scan_control_offset = 320 * 200 // 2
+        for y in range(self.Y_RES):
+            dump[scan_control_offset + y] = self.line_palette[y]
+
+        palette_offset = scan_control_offset + 256
+        for palette_idx, palette in self.palettes.items():
+            for rgb_idx, rgb in enumerate(palette):
+                r, g, b = rgb
+                # print(r, g, b)
+                rgb_low = (g << 4) | b
+                rgb_hi = r
+                print(hex(rgb_hi), hex(rgb_low))
+                palette_idx_offset = palette_offset + (32 * palette_idx)
+                dump[palette_idx_offset + (2 * rgb_idx)] = rgb_low
+                dump[palette_idx_offset + (2 * rgb_idx + 1)] = rgb_hi
+
+        self.memory = dump
+
 
 class DHGRScreen:
     X_RES = 560

From fb52815412222af23b67ccce2fb4e7a8924e315f Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 9 Nov 2021 22:42:27 +0000
Subject: [PATCH 06/82] Experiment with striping 16 palettes contiguously
 across line ranges. As expected it has clear banding.  A better approach
 (though still not optimal) might be to assign lines to palettes randomly.

---
 convert.py | 36 +++++++++++++++++++++++-------------
 dither.pyx |  6 ++++--
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/convert.py b/convert.py
index 369603f..51d1c92 100644
--- a/convert.py
+++ b/convert.py
@@ -28,15 +28,18 @@ def cluster_palette(image: Image):
         colours_cam = colour.convert(colours_rgb, "RGB",
                                      "CAM16UCS").astype(np.float32)
 
-    kmeans = KMeans(n_clusters=16, max_iter=10000)
-    kmeans.fit_predict(colours_cam)
-    palette_cam = kmeans.cluster_centers_
-    with colour.utilities.suppress_warnings(colour_usage_warnings=True):
-        palette_rgb = colour.convert(palette_cam, "CAM16UCS", "RGB")
-        # SHR colour palette only uses 4-bit values
-        palette_rgb = np.round(palette_rgb * 15) / 15
-        # palette_rgb = palette_rgb.astype(np.float32) / 255
-    return palette_rgb.astype(np.float32)
+    palettes_rgb = {}
+    for palette in range(16):
+        kmeans = KMeans(n_clusters=16, max_iter=10000)
+        kmeans.fit_predict(colours_cam[palette*320*12:(palette+1)*320*12])
+        palette_cam = kmeans.cluster_centers_
+        with colour.utilities.suppress_warnings(colour_usage_warnings=True):
+            palette_rgb = colour.convert(palette_cam, "CAM16UCS", "RGB")
+            # SHR colour palette only uses 4-bit values
+            palette_rgb = np.round(palette_rgb * 15) / 15
+            # palette_rgb = palette_rgb.astype(np.float32) / 255
+            palettes_rgb[palette] = palette_rgb.astype(np.float32)
+    return palettes_rgb
 
 
 
@@ -100,15 +103,22 @@ def main():
                         gamma=args.gamma_correct, srgb_output=True)).astype(
         np.float32) / 255
 
-    palette_rgb = cluster_palette(rgb)
+    palettes_rgb = cluster_palette(rgb)
     # print(palette_rgb)
     # screen.set_palette(0, (image_py.linear_to_srgb_array(palette_rgb) *
     #                        15).astype(np.uint8))
-    screen.set_palette(0, (np.round(palette_rgb * 15)).astype(np.uint8))
+    for i, p in palettes_rgb.items():
+        screen.set_palette(i, (np.round(p * 15)).astype(np.uint8))
 
-    output_4bit = dither_pyx.dither_shr(rgb, palette_rgb, rgb_to_cam16)
+    output_4bit = dither_pyx.dither_shr(rgb, palettes_rgb, rgb_to_cam16)
     screen.set_pixels(output_4bit)
-    output_rgb = (palette_rgb[output_4bit] * 255).astype(np.uint8)
+    for i in range(200):
+        screen.line_palette[i] = i // 12
+    output_rgb = np.zeros((200, 320, 3), dtype=np.uint8)
+    for i, p in palettes_rgb.items():
+        output_rgb[i*12:(i+1)*12, :, :] = (p[output_4bit[i*12:(i+1)*12,
+                                            :]] * 255).astype(
+            np.uint8)
     output_srgb = image_py.linear_to_srgb(output_rgb).astype(np.uint8)
 
     # dither = dither_pattern.PATTERNS[args.dither]()
diff --git a/dither.pyx b/dither.pyx
index fc23520..8524d56 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -328,16 +328,18 @@ import colour
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def dither_shr(float[:, :, ::1] working_image, float[:, ::1] palette_rgb, float[:,::1] rgb_to_cam16ucs):
+def dither_shr(float[:, :, ::1] working_image, object palettes_rgb, float[:,::1] rgb_to_cam16ucs):
     cdef int y, x, idx, best_colour_idx
     cdef float best_distance, distance
     cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
     cdef float[3] quant_error
+    cdef float[:, ::1] palette_rgb
 
     cdef (unsigned char)[:, ::1] output_4bit = np.zeros((200, 320), dtype=np.uint8)
 
-    for y in range(200):
+    for y in range(192):
         print(y)
+        palette_rgb = palettes_rgb[y // 12]
         for x in range(320):
             pixel_cam = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y, x, 0], working_image[y, x, 1], working_image[y, x, 2])

From 322123522c9cfd51aeb98160868705cbf6c62911 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 10 Nov 2021 00:34:17 +0000
Subject: [PATCH 07/82] Assign scan lines randomly to palettes and cluster
 independently.  This doesn't give good results either, since neighbouring
 lines end up getting similar but not identical colours, which still results
 in horizontal striping.

---
 convert.py | 35 ++++++++++++++++++++++++-----------
 dither.pyx |  6 +++---
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/convert.py b/convert.py
index 51d1c92..5171b21 100644
--- a/convert.py
+++ b/convert.py
@@ -4,6 +4,8 @@ import argparse
 import array
 import os.path
 import time
+import collections
+import random
 
 import colour
 from PIL import Image
@@ -23,15 +25,27 @@ import screen as screen_py
 
 
 def cluster_palette(image: Image):
+    shuffle_lines = list(range(200))
+    random.shuffle(shuffle_lines)
+    line_to_palette = {}
+    for idx, line in enumerate(shuffle_lines):
+        line_to_palette[line] = idx % 16
+
     colours_rgb = np.asarray(image).reshape((-1, 3))
     with colour.utilities.suppress_warnings(colour_usage_warnings=True):
         colours_cam = colour.convert(colours_rgb, "RGB",
                                      "CAM16UCS").astype(np.float32)
 
     palettes_rgb = {}
+    palette_colours = collections.defaultdict(list)
+    for line in range(200):
+        palette = line_to_palette[line]
+        palette_colours[palette].extend(
+            colours_cam[line * 320:(line + 1) * 320])
+
     for palette in range(16):
         kmeans = KMeans(n_clusters=16, max_iter=10000)
-        kmeans.fit_predict(colours_cam[palette*320*12:(palette+1)*320*12])
+        kmeans.fit_predict(palette_colours[palette])
         palette_cam = kmeans.cluster_centers_
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
             palette_rgb = colour.convert(palette_cam, "CAM16UCS", "RGB")
@@ -39,8 +53,7 @@ def cluster_palette(image: Image):
             palette_rgb = np.round(palette_rgb * 15) / 15
             # palette_rgb = palette_rgb.astype(np.float32) / 255
             palettes_rgb[palette] = palette_rgb.astype(np.float32)
-    return palettes_rgb
-
+    return palettes_rgb, line_to_palette
 
 
 def main():
@@ -103,22 +116,22 @@ def main():
                         gamma=args.gamma_correct, srgb_output=True)).astype(
         np.float32) / 255
 
-    palettes_rgb = cluster_palette(rgb)
+    palettes_rgb, line_to_palette = cluster_palette(rgb)
     # print(palette_rgb)
     # screen.set_palette(0, (image_py.linear_to_srgb_array(palette_rgb) *
     #                        15).astype(np.uint8))
     for i, p in palettes_rgb.items():
         screen.set_palette(i, (np.round(p * 15)).astype(np.uint8))
 
-    output_4bit = dither_pyx.dither_shr(rgb, palettes_rgb, rgb_to_cam16)
+    output_4bit = dither_pyx.dither_shr(rgb, palettes_rgb, rgb_to_cam16,
+                                        line_to_palette)
     screen.set_pixels(output_4bit)
-    for i in range(200):
-        screen.line_palette[i] = i // 12
     output_rgb = np.zeros((200, 320, 3), dtype=np.uint8)
-    for i, p in palettes_rgb.items():
-        output_rgb[i*12:(i+1)*12, :, :] = (p[output_4bit[i*12:(i+1)*12,
-                                            :]] * 255).astype(
-            np.uint8)
+    for i in range(200):
+        screen.line_palette[i] = line_to_palette[i]
+        output_rgb[i, :, :] = (
+                    palettes_rgb[line_to_palette[i]][
+                        output_4bit[i, :]] * 255).astype(np.uint8)
     output_srgb = image_py.linear_to_srgb(output_rgb).astype(np.uint8)
 
     # dither = dither_pattern.PATTERNS[args.dither]()
diff --git a/dither.pyx b/dither.pyx
index 8524d56..8029de7 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -328,7 +328,7 @@ import colour
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def dither_shr(float[:, :, ::1] working_image, object palettes_rgb, float[:,::1] rgb_to_cam16ucs):
+def dither_shr(float[:, :, ::1] working_image, object palettes_rgb, float[:,::1] rgb_to_cam16ucs, object line_to_palette):
     cdef int y, x, idx, best_colour_idx
     cdef float best_distance, distance
     cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
@@ -337,9 +337,9 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_rgb, float[:,::1]
 
     cdef (unsigned char)[:, ::1] output_4bit = np.zeros((200, 320), dtype=np.uint8)
 
-    for y in range(192):
+    for y in range(200):
         print(y)
-        palette_rgb = palettes_rgb[y // 12]
+        palette_rgb = palettes_rgb[line_to_palette[y]]
         for x in range(320):
             pixel_cam = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y, x, 0], working_image[y, x, 1], working_image[y, x, 2])

From 8c34d872165785899708e067805857559658b339 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 10 Nov 2021 18:30:39 +0000
Subject: [PATCH 08/82] WIP - interleave 3 successive palettes for each
 contiguous row range. Avoids the banding but not clear if it's overall better

Also implement my own k-means clustering which is able to keep some
centroids fixed, e.g. to be able to retain some fixed palette entries
while swapping out others.  I was hoping this would improve colour
blending across neighbouring palettes but it's also not clear if it
does.
---
 convert.py | 51 +++++++++++++++++++++++++++++++++++-------------
 dither.pyx | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 94 insertions(+), 14 deletions(-)

diff --git a/convert.py b/convert.py
index 5171b21..efc74c9 100644
--- a/convert.py
+++ b/convert.py
@@ -23,13 +23,29 @@ import screen as screen_py
 # - support LR/DLR
 # - support HGR
 
-
 def cluster_palette(image: Image):
-    shuffle_lines = list(range(200))
-    random.shuffle(shuffle_lines)
     line_to_palette = {}
-    for idx, line in enumerate(shuffle_lines):
-        line_to_palette[line] = idx % 16
+
+    #shuffle_lines = liprint(st(range(200))
+    #random.shuffle(shuffle_lines)
+    #for idx, line in enumerate(shuffle_lines):
+    #    line_to_palette[line] = idx % 16
+
+    # for line in range(200):
+    #     if line % 3 == 0:
+    #         line_to_palette[line] = int(line / (200 / 16))
+    #     elif line % 3 == 1:
+    #         line_to_palette[line] = np.clip(int(line / (200 / 16)) + 1, 0, 15)
+    #     else:
+    #         line_to_palette[line] = np.clip(int(line / (200 / 16)) + 2, 0, 15)
+
+    for line in range(200):
+        if line % 3 == 0:
+            line_to_palette[line] = int(line / (200 / 16))
+        elif line % 3 == 1:
+            line_to_palette[line] = np.clip(int(line / (200 / 16)) + 1, 0, 15)
+        else:
+            line_to_palette[line] = np.clip(int(line / (200 / 16)) + 2, 0, 15)
 
     colours_rgb = np.asarray(image).reshape((-1, 3))
     with colour.utilities.suppress_warnings(colour_usage_warnings=True):
@@ -43,16 +59,25 @@ def cluster_palette(image: Image):
         palette_colours[palette].extend(
             colours_cam[line * 320:(line + 1) * 320])
 
-    for palette in range(16):
-        kmeans = KMeans(n_clusters=16, max_iter=10000)
-        kmeans.fit_predict(palette_colours[palette])
-        palette_cam = kmeans.cluster_centers_
+    # For each line grouping, find big palette entries with minimal total
+    # distance
+
+    palette_cam = None
+    for palette_idx in range(16):
+        line_colours = palette_colours[palette_idx]
+        # if palette_idx > 0:
+        #     fixed_centroids = palette_cam[:8, :]
+        # else:
+        fixed_centroids = None
+        # print(np.array(line_colours), fixed_centroids)
+        palette_cam = dither_pyx.k_means_with_fixed_centroids(16, np.array(
+            line_colours), fixed_centroids=fixed_centroids, tolerance=1e-6)
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
             palette_rgb = colour.convert(palette_cam, "CAM16UCS", "RGB")
             # SHR colour palette only uses 4-bit values
             palette_rgb = np.round(palette_rgb * 15) / 15
-            # palette_rgb = palette_rgb.astype(np.float32) / 255
-            palettes_rgb[palette] = palette_rgb.astype(np.float32)
+            palettes_rgb[palette_idx] = palette_rgb.astype(np.float32)
+    # print(palettes_rgb)
     return palettes_rgb, line_to_palette
 
 
@@ -130,8 +155,8 @@ def main():
     for i in range(200):
         screen.line_palette[i] = line_to_palette[i]
         output_rgb[i, :, :] = (
-                    palettes_rgb[line_to_palette[i]][
-                        output_4bit[i, :]] * 255).astype(np.uint8)
+                palettes_rgb[line_to_palette[i]][
+                    output_4bit[i, :]] * 255).astype(np.uint8)
     output_srgb = image_py.linear_to_srgb(output_rgb).astype(np.uint8)
 
     # dither = dither_pattern.PATTERNS[args.dither]()
diff --git a/dither.pyx b/dither.pyx
index 8029de7..327c6c3 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -425,4 +425,59 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_rgb, float[:,::1]
 #                            working_image[y + 2, x + 2, i] + quant_error[i] * (1 / 48),
 #                            0, 1)
 
-    return np.array(output_4bit, dtype=np.uint8)
\ No newline at end of file
+    return np.array(output_4bit, dtype=np.uint8)
+
+import collections
+import random
+
+def k_means_with_fixed_centroids(
+        int n_clusters, float[:, ::1] data, float[:, ::1] fixed_centroids = None,
+        int iterations = 10000, float tolerance = 1e-3):
+    cdef int i, iteration, centroid_idx, num_fixed_centroids, num_random_centroids, best_centroid_idx
+    cdef float[::1] point, centroid, new_centroid, old_centroid
+    cdef float[:, ::1] centroids
+    cdef float best_dist, centroid_movement
+
+    centroids = np.zeros((n_clusters, 3), dtype=np.float32)
+    if fixed_centroids is not None:
+        centroids[:fixed_centroids.shape[0], :] = fixed_centroids
+    num_fixed_centroids = fixed_centroids.shape[0] if fixed_centroids is not None else 0
+    num_random_centroids = n_clusters - num_fixed_centroids
+
+    # TODO: kmeans++ initialization
+    for i in range(num_random_centroids):
+        centroids[num_fixed_centroids + i, :] = data[
+            random.randint(0, data.shape[0]), :]
+
+    cdef int[::1] centroid_weights = np.zeros(n_clusters, dtype=np.int32)
+    for iteration in range(iterations):
+        # print("centroids ", centroids)
+        closest_points = collections.defaultdict(list)
+        for point in data:
+            best_dist = 1e9
+            best_centroid_idx = 0
+            for centroid_idx, centroid in enumerate(centroids):
+                dist = colour_distance_squared(centroid, point)
+                if dist < best_dist:
+                    best_dist = dist
+                    best_centroid_idx = centroid_idx
+            closest_points[best_centroid_idx].append(point)
+
+        centroid_movement = 0
+
+        for centroid_idx, points in closest_points.items():
+            centroid_weights[centroid_idx] = len(points)
+            if centroid_idx < num_fixed_centroids:
+                continue
+            new_centroid = np.mean(np.array(points), axis=0)
+            old_centroid = centroids[centroid_idx]
+            centroid_movement += colour_distance_squared(old_centroid, new_centroid)
+            centroids[centroid_idx, :] = new_centroid
+        # print("iteration %d: movement %f" % (iteration, centroid_movement))
+        if centroid_movement < tolerance:
+            break
+
+    weighted_centroids = list(zip(centroid_weights, [tuple(c) for c in centroids]))
+    print(weighted_centroids)
+    return np.array([c for w, c in sorted(weighted_centroids, reverse=True)], dtype=np.float32)
+

From ee2229d0eada22a467980e9fd71213de030ae48e Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 11 Nov 2021 11:10:22 +0000
Subject: [PATCH 09/82] * Modify Floyd-Steinberg dithering to diffuse less
 error in the y   direction.  Otherwise, errors can accumulate in an RGB
 channel if   there are no palette colours with an extremal value, and then
 when   we introduce a new palette the error all suddenly discharges in a  
 spurious horizontal line.  This now gives quite good results!

* Switch to using L1-norm for k-means, per suggestion of Lucas
  Scharenbroich: "A k-medians effectively uses an L1 distance metric
  instead of L2 for k-means.  Using a squared distance metric causes
  the fit to "fall off" too quickly and allows too many of the k
  centroids to cluster around areas of high density, which results in
  many similar colors being selected.  A linear cost function forces
  the centroids to spread out since the error influence has a broader
  range."
---
 convert.py |   9 ++++
 dither.pyx | 127 +++++++++++++++++++++++++++++------------------------
 screen.py  |   4 +-
 3 files changed, 80 insertions(+), 60 deletions(-)

diff --git a/convert.py b/convert.py
index efc74c9..12cc424 100644
--- a/convert.py
+++ b/convert.py
@@ -65,6 +65,10 @@ def cluster_palette(image: Image):
     palette_cam = None
     for palette_idx in range(16):
         line_colours = palette_colours[palette_idx]
+        #if palette_idx < 15:
+        #    line_colours += palette_colours[palette_idx + 1]
+        # if palette_idx < 14:
+        #     line_colours += palette_colours[palette_idx + 2]
         # if palette_idx > 0:
         #     fixed_centroids = palette_cam[:8, :]
         # else:
@@ -72,6 +76,11 @@ def cluster_palette(image: Image):
         # print(np.array(line_colours), fixed_centroids)
         palette_cam = dither_pyx.k_means_with_fixed_centroids(16, np.array(
             line_colours), fixed_centroids=fixed_centroids, tolerance=1e-6)
+
+        #kmeans = KMeans(n_clusters=16, max_iter=10000)
+        #kmeans.fit_predict(line_colours)
+        #palette_cam = kmeans.cluster_centers_
+
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
             palette_rgb = colour.convert(palette_cam, "CAM16UCS", "RGB")
             # SHR colour palette only uses 4-bit values
diff --git a/dither.pyx b/dither.pyx
index 327c6c3..ff165cf 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -163,6 +163,11 @@ cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, fl
     cdef int rgb_24bit = (<int>(r*255) << 16) + (<int>(g*255) << 8) + <int>(b*255)
     return rgb_to_cam16ucs[rgb_24bit]
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline float fabs(float value) nogil:
+    return -value if value < 0 else value
+
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
@@ -170,6 +175,12 @@ cdef inline float colour_distance_squared(float[::1] colour1, float[::1] colour2
     return (colour1[0] - colour2[0]) ** 2 + (colour1[1] - colour2[1]) ** 2 + (colour1[2] - colour2[2]) ** 2
 
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline float colour_distance(float[::1] colour1, float[::1] colour2) nogil:
+    return fabs(colour1[0] - colour2[0]) + fabs(colour1[1] - colour2[1]) + fabs(colour1[2] - colour2[2])
+
+
 # Perform error diffusion to a single image row.
 #
 # Args:
@@ -326,16 +337,17 @@ def dither_image(
 
 import colour
 
-@cython.boundscheck(False)
+@cython.boundscheck(True)
 @cython.wraparound(False)
 def dither_shr(float[:, :, ::1] working_image, object palettes_rgb, float[:,::1] rgb_to_cam16ucs, object line_to_palette):
     cdef int y, x, idx, best_colour_idx
     cdef float best_distance, distance
     cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
-    cdef float[3] quant_error
+    cdef float quant_error
     cdef float[:, ::1] palette_rgb
 
     cdef (unsigned char)[:, ::1] output_4bit = np.zeros((200, 320), dtype=np.uint8)
+    # cdef (unsigned char)[:, :, ::1] output_rgb = np.zeros((200, 320, 3), dtype=np.uint8)
 
     for y in range(200):
         print(y)
@@ -345,18 +357,19 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_rgb, float[:,::1]
                 rgb_to_cam16ucs, working_image[y, x, 0], working_image[y, x, 1], working_image[y, x, 2])
 
             best_distance = 1e9
-            best_colour_idx = 0
+            best_colour_idx = -1
             for idx, colour_rgb in enumerate(palette_rgb):
                 colour_cam = convert_rgb_to_cam16ucs(rgb_to_cam16ucs, colour_rgb[0], colour_rgb[1], colour_rgb[2])
                 distance = colour_distance_squared(pixel_cam, colour_cam)
                 if distance < best_distance:
                     best_distance = distance
-                    best_colour_rgb = colour_rgb
                     best_colour_idx = idx
+            best_colour_rgb = palette_rgb[best_colour_idx]
             output_4bit[y, x] = best_colour_idx
 
             for i in range(3):
-                quant_error[i] = working_image[y, x, i] - best_colour_rgb[i]
+                # output_rgb[y,x,i] = <int>(best_colour_rgb[i] * 255)
+                quant_error = working_image[y, x, i] - best_colour_rgb[i]
 
                 # Floyd-Steinberg dither
                 # 0 * 7
@@ -364,68 +377,66 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_rgb, float[:,::1]
                 working_image[y, x, i] = best_colour_rgb[i]
                 if x < 319:
                     working_image[y, x + 1, i] = clip(
-                        working_image[y, x + 1, i] + quant_error[i] * (7 / 16), 0, 1)
+                        working_image[y, x + 1, i] + quant_error * (7 / 16), 0, 1)
                 if y < 199:
                     if x > 0:
                         working_image[y + 1, x - 1, i] = clip(
-                            working_image[y + 1, x - 1, i] + quant_error[i] * (3 / 16), 0,
-                            1)
+                            working_image[y + 1, x - 1, i] + quant_error * (3 / 32), 0, 1)
                     working_image[y + 1, x, i] = clip(
-                        working_image[y + 1, x, i] + quant_error[i] * (5 / 16), 0, 1)
+                        working_image[y + 1, x, i] + quant_error * (5 / 32), 0, 1)
                     if x < 319:
                         working_image[y + 1, x + 1, i] = clip(
-                            working_image[y + 1, x + 1, i] + quant_error[i] * (1 / 16),
-                            0, 1)
+                            working_image[y + 1, x + 1, i] + quant_error * (1 / 32), 0, 1)
 
 #                # 0 0 X 7 5
 #                # 3 5 7 5 3
 #                # 1 3 5 3 1
-#                if x < 319:
-#                    working_image[y, x + 1, i] = clip(
-#                        working_image[y, x + 1, i] + quant_error[i] * (7 / 48), 0, 1)
-#                if x < 318:
-#                    working_image[y, x + 2, i] = clip(
-#                        working_image[y, x + 2, i] + quant_error[i] * (5 / 48), 0, 1)
-#                if y < 199:
-#                    if x > 1:
-#                        working_image[y + 1, x - 2, i] = clip(
-#                            working_image[y + 1, x - 2, i] + quant_error[i] * (3 / 48), 0,
-#                            1)
-#                    if x > 0:
-#                        working_image[y + 1, x - 1, i] = clip(
-#                            working_image[y + 1, x - 1, i] + quant_error[i] * (5 / 48), 0,
-#                            1)
-#                    working_image[y + 1, x, i] = clip(
-#                        working_image[y + 1, x, i] + quant_error[i] * (7 / 48), 0, 1)
-#                    if x < 319:
-#                        working_image[y + 1, x + 1, i] = clip(
-#                            working_image[y + 1, x + 1, i] + quant_error[i] * (5 / 48),
-#                            0, 1)
-#                    if x < 318:
-#                        working_image[y + 1, x + 2, i] = clip(
-#                            working_image[y + 1, x + 2, i] + quant_error[i] * (3 / 48),
-#                            0, 1)
-#                if y < 198:
-#                    if x > 1:
-#                        working_image[y + 2, x - 2, i] = clip(
-#                            working_image[y + 2, x - 2, i] + quant_error[i] * (1 / 48), 0,
-#                            1)
-#                    if x > 0:
-#                        working_image[y + 2, x - 1, i] = clip(
-#                            working_image[y + 2, x - 1, i] + quant_error[i] * (3 / 48), 0,
-#                            1)
-#                    working_image[y + 2, x, i] = clip(
-#                        working_image[y + 2, x, i] + quant_error[i] * (5 / 48), 0, 1)
-#                    if x < 319:
-#                        working_image[y + 2, x + 1, i] = clip(
-#                            working_image[y + 2, x + 1, i] + quant_error[i] * (3 / 48),
-#                            0, 1)
-#                    if x < 318:
-#                        working_image[y + 2, x + 2, i] = clip(
-#                            working_image[y + 2, x + 2, i] + quant_error[i] * (1 / 48),
-#                            0, 1)
+                #if x < 319:
+                #    working_image[y, x + 1, i] = clip(
+                #        working_image[y, x + 1, i] + quant_error * (7 / 48), 0, 1)
+                #if x < 318:
+                #    working_image[y, x + 2, i] = clip(
+                #        working_image[y, x + 2, i] + quant_error * (5 / 48), 0, 1)
+                #if y < 199:
+                #    if x > 1:
+                #        working_image[y + 1, x - 2, i] = clip(
+                #            working_image[y + 1, x - 2, i] + quant_error * (3 / 48), 0,
+                #            1)
+                #    if x > 0:
+                #        working_image[y + 1, x - 1, i] = clip(
+                #            working_image[y + 1, x - 1, i] + quant_error * (5 / 48), 0,
+                #            1)
+                #    working_image[y + 1, x, i] = clip(
+                #        working_image[y + 1, x, i] + quant_error * (7 / 48), 0, 1)
+                #    if x < 319:
+                #        working_image[y + 1, x + 1, i] = clip(
+                #            working_image[y + 1, x + 1, i] + quant_error * (5 / 48),
+                #            0, 1)
+                #    if x < 318:
+                #        working_image[y + 1, x + 2, i] = clip(
+                #            working_image[y + 1, x + 2, i] + quant_error * (3 / 48),
+                #            0, 1)
+                #if y < 198:
+                #    if x > 1:
+                #        working_image[y + 2, x - 2, i] = clip(
+                #            working_image[y + 2, x - 2, i] + quant_error * (1 / 48), 0,
+                #            1)
+                #    if x > 0:
+                #        working_image[y + 2, x - 1, i] = clip(
+                #            working_image[y + 2, x - 1, i] + quant_error * (3 / 48), 0,
+                #            1)
+                #    working_image[y + 2, x, i] = clip(
+                #        working_image[y + 2, x, i] + quant_error * (5 / 48), 0, 1)
+                #    if x < 319:
+                #        working_image[y + 2, x + 1, i] = clip(
+                #            working_image[y + 2, x + 1, i] + quant_error * (3 / 48),
+                #            0, 1)
+                #    if x < 318:
+                #        working_image[y + 2, x + 2, i] = clip(
+                #            working_image[y + 2, x + 2, i] + quant_error * (1 / 48),
+                #            0, 1)
 
-    return np.array(output_4bit, dtype=np.uint8)
+    return np.array(output_4bit, dtype=np.uint8) #, np.array(output_rgb, dtype=np.uint8)
 
 import collections
 import random
@@ -457,7 +468,7 @@ def k_means_with_fixed_centroids(
             best_dist = 1e9
             best_centroid_idx = 0
             for centroid_idx, centroid in enumerate(centroids):
-                dist = colour_distance_squared(centroid, point)
+                dist = colour_distance(centroid, point)
                 if dist < best_dist:
                     best_dist = dist
                     best_centroid_idx = centroid_idx
@@ -471,7 +482,7 @@ def k_means_with_fixed_centroids(
                 continue
             new_centroid = np.mean(np.array(points), axis=0)
             old_centroid = centroids[centroid_idx]
-            centroid_movement += colour_distance_squared(old_centroid, new_centroid)
+            centroid_movement += colour_distance(old_centroid, new_centroid)
             centroids[centroid_idx, :] = new_centroid
         # print("iteration %d: movement %f" % (iteration, centroid_movement))
         if centroid_movement < tolerance:
diff --git a/screen.py b/screen.py
index 3c526dd..dd0b554 100644
--- a/screen.py
+++ b/screen.py
@@ -28,7 +28,7 @@ class SHR320Screen:
         # XXX check element range
         if palette.dtype != np.uint8:
             raise ValueError("Palette must be of type np.uint8")
-        print(palette)
+        # print(palette)
         self.palettes[idx] = np.array(palette)
 
     def set_pixels(self, pixels):
@@ -58,7 +58,7 @@ class SHR320Screen:
                 # print(r, g, b)
                 rgb_low = (g << 4) | b
                 rgb_hi = r
-                print(hex(rgb_hi), hex(rgb_low))
+                # print(hex(rgb_hi), hex(rgb_low))
                 palette_idx_offset = palette_offset + (32 * palette_idx)
                 dump[palette_idx_offset + (2 * rgb_idx)] = rgb_low
                 dump[palette_idx_offset + (2 * rgb_idx + 1)] = rgb_hi

From 5cab8542698c29562d43ebafd9ec6bd2687f6936 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 11 Nov 2021 16:10:03 +0000
Subject: [PATCH 10/82] Fit palettes from overlapping line ranges, and map line
 to palette when dithering with two limitations:

- cannot choose the same palette as the previous line (this avoids banding)
- must be within +/- 1 of the "base" palette for the line number

This gives pretty good results!
---
 convert.py | 75 ++++++++++++++++++++++++++++++++++++------------------
 dither.pyx | 52 ++++++++++++++++++++++++++++++++++---
 2 files changed, 99 insertions(+), 28 deletions(-)

diff --git a/convert.py b/convert.py
index 12cc424..53f8daa 100644
--- a/convert.py
+++ b/convert.py
@@ -51,43 +51,68 @@ def cluster_palette(image: Image):
     with colour.utilities.suppress_warnings(colour_usage_warnings=True):
         colours_cam = colour.convert(colours_rgb, "RGB",
                                      "CAM16UCS").astype(np.float32)
-
     palettes_rgb = {}
-    palette_colours = collections.defaultdict(list)
-    for line in range(200):
-        palette = line_to_palette[line]
-        palette_colours[palette].extend(
-            colours_cam[line * 320:(line + 1) * 320])
+    palettes_cam = {}
+    for palette_idx in range(16):
+        p_lower = max(palette_idx-2, 0)
+        p_upper = min(palette_idx+2, 16)
+        palette_pixels = colours_cam[
+                         int(p_lower * (200/16)) * 320:int(p_upper * (
+                                 200/16)) * 320, :]
+
+        # kmeans = KMeans(n_clusters=16, max_iter=10000)
+        # kmeans.fit_predict(palette_pixels)
+        # palettes_cam[palette_idx] = kmeans.cluster_centers_
+
+        fixed_centroids = None
+        # print(np.array(line_colours), fixed_centroids)
+        palettes_cam[palette_idx] = dither_pyx.k_means_with_fixed_centroids(
+            16, palette_pixels, fixed_centroids=fixed_centroids, tolerance=1e-6)
+
+        # palette_colours = collections.defaultdict(list)
+    # for line in range(200):
+    #     palette = line_to_palette[line]
+    #     palette_colours[palette].extend(
+    #         colours_cam[line * 320:(line + 1) * 320])
 
     # For each line grouping, find big palette entries with minimal total
     # distance
 
-    palette_cam = None
-    for palette_idx in range(16):
-        line_colours = palette_colours[palette_idx]
-        #if palette_idx < 15:
-        #    line_colours += palette_colours[palette_idx + 1]
-        # if palette_idx < 14:
-        #     line_colours += palette_colours[palette_idx + 2]
-        # if palette_idx > 0:
-        #     fixed_centroids = palette_cam[:8, :]
-        # else:
-        fixed_centroids = None
-        # print(np.array(line_colours), fixed_centroids)
-        palette_cam = dither_pyx.k_means_with_fixed_centroids(16, np.array(
-            line_colours), fixed_centroids=fixed_centroids, tolerance=1e-6)
+    # palette_cam = None
+    # for palette_idx in range(16):
+    #     line_colours = palette_colours[palette_idx]
+    #     #if palette_idx < 15:
+    #     #    line_colours += palette_colours[palette_idx + 1]
+    #     # if palette_idx < 14:
+    #     #     line_colours += palette_colours[palette_idx + 2]
+    #     # if palette_idx > 0:
+    #     #     fixed_centroids = palette_cam[:8, :]
+    #     # else:
+    #     fixed_centroids = None
+    #     # print(np.array(line_colours), fixed_centroids)
+    #     palette_cam = dither_pyx.k_means_with_fixed_centroids(16, np.array(
+    #         line_colours), fixed_centroids=fixed_centroids, tolerance=1e-6)
 
         #kmeans = KMeans(n_clusters=16, max_iter=10000)
         #kmeans.fit_predict(line_colours)
         #palette_cam = kmeans.cluster_centers_
 
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
-            palette_rgb = colour.convert(palette_cam, "CAM16UCS", "RGB")
+            palette_rgb = colour.convert(palettes_cam[palette_idx], "CAM16UCS", "RGB")
             # SHR colour palette only uses 4-bit values
             palette_rgb = np.round(palette_rgb * 15) / 15
             palettes_rgb[palette_idx] = palette_rgb.astype(np.float32)
     # print(palettes_rgb)
-    return palettes_rgb, line_to_palette
+
+    # For each line, pick the palette with lowest total distance
+    # best_palette = 15
+    # for line in range(200):
+    #     line_pixels = colours_cam[line*320:(line+1)*320]
+    #     best_palette = dither_pyx.best_palette_for_line(
+    #         line_pixels, palettes_cam, best_palette)
+    #     line_to_palette[line] = best_palette
+    #     print(line, line_to_palette[line])
+    return palettes_cam, palettes_rgb, line_to_palette
 
 
 def main():
@@ -150,15 +175,15 @@ def main():
                         gamma=args.gamma_correct, srgb_output=True)).astype(
         np.float32) / 255
 
-    palettes_rgb, line_to_palette = cluster_palette(rgb)
+    palettes_cam, palettes_rgb, line_to_palette = cluster_palette(rgb)
     # print(palette_rgb)
     # screen.set_palette(0, (image_py.linear_to_srgb_array(palette_rgb) *
     #                        15).astype(np.uint8))
     for i, p in palettes_rgb.items():
         screen.set_palette(i, (np.round(p * 15)).astype(np.uint8))
 
-    output_4bit = dither_pyx.dither_shr(rgb, palettes_rgb, rgb_to_cam16,
-                                        line_to_palette)
+    output_4bit, line_to_palette = dither_pyx.dither_shr(
+        rgb, palettes_cam, palettes_rgb, rgb_to_cam16)
     screen.set_pixels(output_4bit)
     output_rgb = np.zeros((200, 320, 3), dtype=np.uint8)
     for i in range(200):
diff --git a/dither.pyx b/dither.pyx
index ff165cf..31277df 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -339,7 +339,7 @@ import colour
 
 @cython.boundscheck(True)
 @cython.wraparound(False)
-def dither_shr(float[:, :, ::1] working_image, object palettes_rgb, float[:,::1] rgb_to_cam16ucs, object line_to_palette):
+def dither_shr(float[:, :, ::1] working_image, object palettes_cam, object palettes_rgb, float[:,::1] rgb_to_cam16ucs):
     cdef int y, x, idx, best_colour_idx
     cdef float best_distance, distance
     cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
@@ -349,9 +349,24 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_rgb, float[:,::1]
     cdef (unsigned char)[:, ::1] output_4bit = np.zeros((200, 320), dtype=np.uint8)
     # cdef (unsigned char)[:, :, ::1] output_rgb = np.zeros((200, 320, 3), dtype=np.uint8)
 
+    cdef float[:, ::1] line_cam = np.zeros((320, 3), dtype=np.float32)
+
+    line_to_palette = {}
+    best_palette = 15
     for y in range(200):
         print(y)
-        palette_rgb = palettes_rgb[line_to_palette[y]]
+        # palette_rgb = palettes_rgb[line_to_palette[y]]
+
+        for x in range(320):
+            colour_cam = convert_rgb_to_cam16ucs(
+                rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
+            line_cam[x, :] = colour_cam
+
+        best_palette = best_palette_for_line(line_cam, palettes_cam, y * 16 / 200, best_palette)
+        print("-->", best_palette)
+        palette_rgb = palettes_rgb[best_palette]
+        line_to_palette[y] = best_palette
+
         for x in range(320):
             pixel_cam = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y, x, 0], working_image[y, x, 1], working_image[y, x, 2])
@@ -436,11 +451,13 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_rgb, float[:,::1]
                 #            working_image[y + 2, x + 2, i] + quant_error * (1 / 48),
                 #            0, 1)
 
-    return np.array(output_4bit, dtype=np.uint8) #, np.array(output_rgb, dtype=np.uint8)
+    return np.array(output_4bit, dtype=np.uint8), line_to_palette #, np.array(output_rgb, dtype=np.uint8)
 
 import collections
 import random
 
+@cython.boundscheck(True)
+@cython.wraparound(False)
 def k_means_with_fixed_centroids(
         int n_clusters, float[:, ::1] data, float[:, ::1] fixed_centroids = None,
         int iterations = 10000, float tolerance = 1e-3):
@@ -492,3 +509,32 @@ def k_means_with_fixed_centroids(
     print(weighted_centroids)
     return np.array([c for w, c in sorted(weighted_centroids, reverse=True)], dtype=np.float32)
 
+@cython.boundscheck(True)
+@cython.wraparound(False)
+def best_palette_for_line(float [:, ::1] line_cam, object palettes_cam, int base_palette_idx, int last_palette_idx):
+    cdef int palette_idx, best_palette_idx
+    cdef float best_total_dist, total_dist, best_pixel_dist, pixel_dist
+    cdef float[:, ::1] palette_cam
+    cdef float[::1] pixel_cam, palette_entry
+
+    best_total_dist = 1e9
+    best_palette_idx = -1
+    for palette_idx, palette_cam in palettes_cam.items():
+        if palette_idx < (base_palette_idx - 1) or palette_idx > (base_palette_idx + 1):
+            continue
+        if palette_idx == last_palette_idx:
+            continue
+        total_dist = 0
+        best_pixel_dist = 1e9
+        for pixel_cam in line_cam:
+            for palette_entry in palette_cam:
+                pixel_dist = colour_distance_squared(pixel_cam, palette_entry)
+                if pixel_dist < best_pixel_dist:
+                    best_pixel_dist = pixel_dist
+            total_dist += best_pixel_dist
+            # print(total_dist)
+        if total_dist < best_total_dist:
+            best_total_dist = total_dist
+            best_palette_idx = palette_idx
+    return best_palette_idx
+

From 52af982159daf4b8bcc36d05da479877aac0f0e2 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Sat, 13 Nov 2021 16:10:33 +0000
Subject: [PATCH 11/82] k-means should be using median with L1 norm, otherwise
 it may not converge

Also optimize a tiny bit
---
 dither.pyx | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/dither.pyx b/dither.pyx
index 31277df..109611e 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -464,7 +464,7 @@ def k_means_with_fixed_centroids(
     cdef int i, iteration, centroid_idx, num_fixed_centroids, num_random_centroids, best_centroid_idx
     cdef float[::1] point, centroid, new_centroid, old_centroid
     cdef float[:, ::1] centroids
-    cdef float best_dist, centroid_movement
+    cdef float best_dist, centroid_movement, dist
 
     centroids = np.zeros((n_clusters, 3), dtype=np.float32)
     if fixed_centroids is not None:
@@ -473,9 +473,9 @@ def k_means_with_fixed_centroids(
     num_random_centroids = n_clusters - num_fixed_centroids
 
     # TODO: kmeans++ initialization
+    cdef int rand_idx = random.randint(0, data.shape[0])
     for i in range(num_random_centroids):
-        centroids[num_fixed_centroids + i, :] = data[
-            random.randint(0, data.shape[0]), :]
+        centroids[num_fixed_centroids + i, :] = data[rand_idx, :]
 
     cdef int[::1] centroid_weights = np.zeros(n_clusters, dtype=np.int32)
     for iteration in range(iterations):
@@ -484,7 +484,8 @@ def k_means_with_fixed_centroids(
         for point in data:
             best_dist = 1e9
             best_centroid_idx = 0
-            for centroid_idx, centroid in enumerate(centroids):
+            for centroid_idx in range(n_clusters):
+                centroid = centroids[centroid_idx, :]
                 dist = colour_distance(centroid, point)
                 if dist < best_dist:
                     best_dist = dist
@@ -492,12 +493,11 @@ def k_means_with_fixed_centroids(
             closest_points[best_centroid_idx].append(point)
 
         centroid_movement = 0
-
         for centroid_idx, points in closest_points.items():
             centroid_weights[centroid_idx] = len(points)
             if centroid_idx < num_fixed_centroids:
                 continue
-            new_centroid = np.mean(np.array(points), axis=0)
+            new_centroid = np.median(np.array(points), axis=0)
             old_centroid = centroids[centroid_idx]
             centroid_movement += colour_distance(old_centroid, new_centroid)
             centroids[centroid_idx, :] = new_centroid

From 0596aefe0b0469bf6a3eced8779d3ec29c5b8b2b Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Sat, 13 Nov 2021 17:18:34 +0000
Subject: [PATCH 12/82] Use pyclustering for kmedians instead of hand-rolled

Optimize cython code
---
 convert.py | 80 ++++++++++++++++++++++++++++++------------------------
 dither.pyx | 20 +++++++-------
 2 files changed, 55 insertions(+), 45 deletions(-)

diff --git a/convert.py b/convert.py
index 53f8daa..3520459 100644
--- a/convert.py
+++ b/convert.py
@@ -10,7 +10,8 @@ import random
 import colour
 from PIL import Image
 import numpy as np
-from sklearn.cluster import KMeans
+from pyclustering.cluster.kmedians import kmedians
+from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
 
 import dither as dither_pyx
 import dither_pattern
@@ -26,9 +27,9 @@ import screen as screen_py
 def cluster_palette(image: Image):
     line_to_palette = {}
 
-    #shuffle_lines = liprint(st(range(200))
-    #random.shuffle(shuffle_lines)
-    #for idx, line in enumerate(shuffle_lines):
+    # shuffle_lines = liprint(st(range(200))
+    # random.shuffle(shuffle_lines)
+    # for idx, line in enumerate(shuffle_lines):
     #    line_to_palette[line] = idx % 16
 
     # for line in range(200):
@@ -54,51 +55,60 @@ def cluster_palette(image: Image):
     palettes_rgb = {}
     palettes_cam = {}
     for palette_idx in range(16):
-        p_lower = max(palette_idx-2, 0)
-        p_upper = min(palette_idx+2, 16)
+        p_lower = max(palette_idx - 2, 0)
+        p_upper = min(palette_idx + 2, 16)
         palette_pixels = colours_cam[
-                         int(p_lower * (200/16)) * 320:int(p_upper * (
-                                 200/16)) * 320, :]
+                         int(p_lower * (200 / 16)) * 320:int(p_upper * (
+                                 200 / 16)) * 320, :]
 
         # kmeans = KMeans(n_clusters=16, max_iter=10000)
         # kmeans.fit_predict(palette_pixels)
         # palettes_cam[palette_idx] = kmeans.cluster_centers_
 
-        fixed_centroids = None
+        # fixed_centroids = None
         # print(np.array(line_colours), fixed_centroids)
-        palettes_cam[palette_idx] = dither_pyx.k_means_with_fixed_centroids(
-            16, palette_pixels, fixed_centroids=fixed_centroids, tolerance=1e-6)
+        # palettes_cam[palette_idx] = dither_pyx.k_means_with_fixed_centroids(
+        #    16, palette_pixels, fixed_centroids=fixed_centroids,
+        #    tolerance=1e-6)
+
+        initial_centers = kmeans_plusplus_initializer(
+            palette_pixels, 16).initialize()
+        kmedians_instance = kmedians(palette_pixels, initial_centers)
+        kmedians_instance.process()
+        palettes_cam[palette_idx] = np.array(
+            kmedians_instance.get_medians()).astype(np.float32)
 
         # palette_colours = collections.defaultdict(list)
-    # for line in range(200):
-    #     palette = line_to_palette[line]
-    #     palette_colours[palette].extend(
-    #         colours_cam[line * 320:(line + 1) * 320])
+        # for line in range(200):
+        #     palette = line_to_palette[line]
+        #     palette_colours[palette].extend(
+        #         colours_cam[line * 320:(line + 1) * 320])
 
-    # For each line grouping, find big palette entries with minimal total
-    # distance
+        # For each line grouping, find big palette entries with minimal total
+        # distance
 
-    # palette_cam = None
-    # for palette_idx in range(16):
-    #     line_colours = palette_colours[palette_idx]
-    #     #if palette_idx < 15:
-    #     #    line_colours += palette_colours[palette_idx + 1]
-    #     # if palette_idx < 14:
-    #     #     line_colours += palette_colours[palette_idx + 2]
-    #     # if palette_idx > 0:
-    #     #     fixed_centroids = palette_cam[:8, :]
-    #     # else:
-    #     fixed_centroids = None
-    #     # print(np.array(line_colours), fixed_centroids)
-    #     palette_cam = dither_pyx.k_means_with_fixed_centroids(16, np.array(
-    #         line_colours), fixed_centroids=fixed_centroids, tolerance=1e-6)
+        # palette_cam = None
+        # for palette_idx in range(16):
+        #     line_colours = palette_colours[palette_idx]
+        #     #if palette_idx < 15:
+        #     #    line_colours += palette_colours[palette_idx + 1]
+        #     # if palette_idx < 14:
+        #     #     line_colours += palette_colours[palette_idx + 2]
+        #     # if palette_idx > 0:
+        #     #     fixed_centroids = palette_cam[:8, :]
+        #     # else:
+        #     fixed_centroids = None
+        #     # print(np.array(line_colours), fixed_centroids)
+        #     palette_cam = dither_pyx.k_means_with_fixed_centroids(16, np.array(
+        #         line_colours), fixed_centroids=fixed_centroids, tolerance=1e-6)
 
-        #kmeans = KMeans(n_clusters=16, max_iter=10000)
-        #kmeans.fit_predict(line_colours)
-        #palette_cam = kmeans.cluster_centers_
+        # kmeans = KMeans(n_clusters=16, max_iter=10000)
+        # kmeans.fit_predict(line_colours)
+        # palette_cam = kmeans.cluster_centers_
 
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
-            palette_rgb = colour.convert(palettes_cam[palette_idx], "CAM16UCS", "RGB")
+            palette_rgb = colour.convert(palettes_cam[palette_idx], "CAM16UCS",
+                                         "RGB")
             # SHR colour palette only uses 4-bit values
             palette_rgb = np.round(palette_rgb * 15) / 15
             palettes_rgb[palette_idx] = palette_rgb.astype(np.float32)
diff --git a/dither.pyx b/dither.pyx
index 109611e..3ebca48 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -337,10 +337,10 @@ def dither_image(
 
 import colour
 
-@cython.boundscheck(True)
+@cython.boundscheck(False)
 @cython.wraparound(False)
 def dither_shr(float[:, :, ::1] working_image, object palettes_cam, object palettes_rgb, float[:,::1] rgb_to_cam16ucs):
-    cdef int y, x, idx, best_colour_idx
+    cdef int y, x, idx, best_colour_idx, best_palette
     cdef float best_distance, distance
     cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
     cdef float quant_error
@@ -362,8 +362,8 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_cam, object palet
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
             line_cam[x, :] = colour_cam
 
-        best_palette = best_palette_for_line(line_cam, palettes_cam, y * 16 / 200, best_palette)
-        print("-->", best_palette)
+        best_palette = best_palette_for_line(line_cam, palettes_cam, <int>(y * 16 / 200), best_palette)
+        # print("-->", best_palette)
         palette_rgb = palettes_rgb[best_palette]
         line_to_palette[y] = best_palette
 
@@ -373,7 +373,8 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_cam, object palet
 
             best_distance = 1e9
             best_colour_idx = -1
-            for idx, colour_rgb in enumerate(palette_rgb):
+            for idx in range(16):
+                colour_rgb = palette_rgb[idx, :]
                 colour_cam = convert_rgb_to_cam16ucs(rgb_to_cam16ucs, colour_rgb[0], colour_rgb[1], colour_rgb[2])
                 distance = colour_distance_squared(pixel_cam, colour_cam)
                 if distance < best_distance:
@@ -383,7 +384,6 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_cam, object palet
             output_4bit[y, x] = best_colour_idx
 
             for i in range(3):
-                # output_rgb[y,x,i] = <int>(best_colour_rgb[i] * 255)
                 quant_error = working_image[y, x, i] - best_colour_rgb[i]
 
                 # Floyd-Steinberg dither
@@ -451,12 +451,12 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_cam, object palet
                 #            working_image[y + 2, x + 2, i] + quant_error * (1 / 48),
                 #            0, 1)
 
-    return np.array(output_4bit, dtype=np.uint8), line_to_palette #, np.array(output_rgb, dtype=np.uint8)
+    return np.array(output_4bit, dtype=np.uint8), line_to_palette
 
 import collections
 import random
 
-@cython.boundscheck(True)
+@cython.boundscheck(False)
 @cython.wraparound(False)
 def k_means_with_fixed_centroids(
         int n_clusters, float[:, ::1] data, float[:, ::1] fixed_centroids = None,
@@ -509,9 +509,9 @@ def k_means_with_fixed_centroids(
     print(weighted_centroids)
     return np.array([c for w, c in sorted(weighted_centroids, reverse=True)], dtype=np.float32)
 
-@cython.boundscheck(True)
+@cython.boundscheck(False)
 @cython.wraparound(False)
-def best_palette_for_line(float [:, ::1] line_cam, object palettes_cam, int base_palette_idx, int last_palette_idx):
+cdef int best_palette_for_line(float [:, ::1] line_cam, object palettes_cam, int base_palette_idx, int last_palette_idx):
     cdef int palette_idx, best_palette_idx
     cdef float best_total_dist, total_dist, best_pixel_dist, pixel_dist
     cdef float[:, ::1] palette_cam

From 643e50349e2d17617fe5a7990967472cbf2d3841 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Sat, 13 Nov 2021 17:29:13 +0000
Subject: [PATCH 13/82] Optimize more

---
 convert.py | 13 +++++++------
 dither.pyx | 22 ++++++++++++----------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/convert.py b/convert.py
index 3520459..d18842f 100644
--- a/convert.py
+++ b/convert.py
@@ -52,8 +52,8 @@ def cluster_palette(image: Image):
     with colour.utilities.suppress_warnings(colour_usage_warnings=True):
         colours_cam = colour.convert(colours_rgb, "RGB",
                                      "CAM16UCS").astype(np.float32)
-    palettes_rgb = {}
-    palettes_cam = {}
+    palettes_rgb = np.empty((16, 16, 3), dtype=np.float32)
+    palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
     for palette_idx in range(16):
         p_lower = max(palette_idx - 2, 0)
         p_upper = min(palette_idx + 2, 16)
@@ -75,7 +75,7 @@ def cluster_palette(image: Image):
             palette_pixels, 16).initialize()
         kmedians_instance = kmedians(palette_pixels, initial_centers)
         kmedians_instance.process()
-        palettes_cam[palette_idx] = np.array(
+        palettes_cam[palette_idx, :, :] = np.array(
             kmedians_instance.get_medians()).astype(np.float32)
 
         # palette_colours = collections.defaultdict(list)
@@ -111,7 +111,7 @@ def cluster_palette(image: Image):
                                          "RGB")
             # SHR colour palette only uses 4-bit values
             palette_rgb = np.round(palette_rgb * 15) / 15
-            palettes_rgb[palette_idx] = palette_rgb.astype(np.float32)
+            palettes_rgb[palette_idx, :, :] = palette_rgb.astype(np.float32)
     # print(palettes_rgb)
 
     # For each line, pick the palette with lowest total distance
@@ -189,8 +189,9 @@ def main():
     # print(palette_rgb)
     # screen.set_palette(0, (image_py.linear_to_srgb_array(palette_rgb) *
     #                        15).astype(np.uint8))
-    for i, p in palettes_rgb.items():
-        screen.set_palette(i, (np.round(p * 15)).astype(np.uint8))
+    for i in range(16):
+        screen.set_palette(i, (np.round(palettes_rgb[i, :, :] * 15)).astype(
+            np.uint8))
 
     output_4bit, line_to_palette = dither_pyx.dither_shr(
         rgb, palettes_cam, palettes_rgb, rgb_to_cam16)
diff --git a/dither.pyx b/dither.pyx
index 3ebca48..2506115 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -339,7 +339,7 @@ import colour
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def dither_shr(float[:, :, ::1] working_image, object palettes_cam, object palettes_rgb, float[:,::1] rgb_to_cam16ucs):
+def dither_shr(float[:, :, ::1] working_image, float[:, :, ::1] palettes_cam, float[:, :, ::1] palettes_rgb, float[:,::1] rgb_to_cam16ucs):
     cdef int y, x, idx, best_colour_idx, best_palette
     cdef float best_distance, distance
     cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
@@ -351,12 +351,10 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_cam, object palet
 
     cdef float[:, ::1] line_cam = np.zeros((320, 3), dtype=np.float32)
 
-    line_to_palette = {}
+    cdef int[::1] line_to_palette = np.zeros(200, dtype=np.int32)
     best_palette = 15
     for y in range(200):
         print(y)
-        # palette_rgb = palettes_rgb[line_to_palette[y]]
-
         for x in range(320):
             colour_cam = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
@@ -364,7 +362,7 @@ def dither_shr(float[:, :, ::1] working_image, object palettes_cam, object palet
 
         best_palette = best_palette_for_line(line_cam, palettes_cam, <int>(y * 16 / 200), best_palette)
         # print("-->", best_palette)
-        palette_rgb = palettes_rgb[best_palette]
+        palette_rgb = palettes_rgb[best_palette, :, :]
         line_to_palette[y] = best_palette
 
         for x in range(320):
@@ -511,23 +509,27 @@ def k_means_with_fixed_centroids(
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef int best_palette_for_line(float [:, ::1] line_cam, object palettes_cam, int base_palette_idx, int last_palette_idx):
-    cdef int palette_idx, best_palette_idx
+cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int base_palette_idx, int last_palette_idx) nogil:
+    cdef int palette_idx, best_palette_idx, palette_entry_idx, pixel_idx
     cdef float best_total_dist, total_dist, best_pixel_dist, pixel_dist
     cdef float[:, ::1] palette_cam
     cdef float[::1] pixel_cam, palette_entry
 
     best_total_dist = 1e9
     best_palette_idx = -1
-    for palette_idx, palette_cam in palettes_cam.items():
+    cdef int line_size = line_cam.shape[0]
+    for palette_idx in range(16):
+        palette_cam = palettes_cam[palette_idx, :, :]
         if palette_idx < (base_palette_idx - 1) or palette_idx > (base_palette_idx + 1):
             continue
         if palette_idx == last_palette_idx:
             continue
         total_dist = 0
         best_pixel_dist = 1e9
-        for pixel_cam in line_cam:
-            for palette_entry in palette_cam:
+        for pixel_idx in range(line_size):
+            pixel_cam = line_cam[pixel_idx]
+            for palette_entry_idx in range(16):
+                palette_entry = palette_cam[palette_entry_idx, :]
                 pixel_dist = colour_distance_squared(pixel_cam, palette_entry)
                 if pixel_dist < best_pixel_dist:
                     best_pixel_dist = pixel_dist

From b363d60754e41de3ba59f0e7cd78586e1a02058a Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Mon, 15 Nov 2021 09:19:44 +0000
Subject: [PATCH 14/82] Checkpoint

- switch to pyclustering for kmedians
- allow choosing the same palette as previous line, with a multiplicative penalty to distance in case it's much better
- iterate kmedians multiple times and choose the best, since it's only a local optimum
---
 convert.py | 99 +++++++++++++++++++++++++++++++-----------------------
 dither.pyx | 15 +++++----
 2 files changed, 66 insertions(+), 48 deletions(-)

diff --git a/convert.py b/convert.py
index d18842f..188a247 100644
--- a/convert.py
+++ b/convert.py
@@ -11,6 +11,8 @@ import colour
 from PIL import Image
 import numpy as np
 from pyclustering.cluster.kmedians import kmedians
+from pyclustering.cluster.kmeans import kmeans
+from pyclustering.utils.metric import distance_metric, type_metric
 from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
 
 import dither as dither_pyx
@@ -25,7 +27,7 @@ import screen as screen_py
 # - support HGR
 
 def cluster_palette(image: Image):
-    line_to_palette = {}
+    # line_to_palette = {}
 
     # shuffle_lines = liprint(st(range(200))
     # random.shuffle(shuffle_lines)
@@ -40,13 +42,13 @@ def cluster_palette(image: Image):
     #     else:
     #         line_to_palette[line] = np.clip(int(line / (200 / 16)) + 2, 0, 15)
 
-    for line in range(200):
-        if line % 3 == 0:
-            line_to_palette[line] = int(line / (200 / 16))
-        elif line % 3 == 1:
-            line_to_palette[line] = np.clip(int(line / (200 / 16)) + 1, 0, 15)
-        else:
-            line_to_palette[line] = np.clip(int(line / (200 / 16)) + 2, 0, 15)
+    # for line in range(200):
+    #     if line % 3 == 0:
+    #         line_to_palette[line] = int(line / (200 / 16))
+    #     elif line % 3 == 1:
+    #         line_to_palette[line] = np.clip(int(line / (200 / 16)) + 1, 0, 15)
+    #     else:
+    #         line_to_palette[line] = np.clip(int(line / (200 / 16)) + 2, 0, 15)
 
     colours_rgb = np.asarray(image).reshape((-1, 3))
     with colour.utilities.suppress_warnings(colour_usage_warnings=True):
@@ -55,6 +57,7 @@ def cluster_palette(image: Image):
     palettes_rgb = np.empty((16, 16, 3), dtype=np.float32)
     palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
     for palette_idx in range(16):
+        print("Fitting palette %d" % palette_idx)
         p_lower = max(palette_idx - 2, 0)
         p_upper = min(palette_idx + 2, 16)
         palette_pixels = colours_cam[
@@ -71,12 +74,23 @@ def cluster_palette(image: Image):
         #    16, palette_pixels, fixed_centroids=fixed_centroids,
         #    tolerance=1e-6)
 
-        initial_centers = kmeans_plusplus_initializer(
-            palette_pixels, 16).initialize()
-        kmedians_instance = kmedians(palette_pixels, initial_centers)
-        kmedians_instance.process()
+        best_wce = 1e9
+        best_medians = None
+        for i in range(500):
+            # print(i)
+            initial_centers = kmeans_plusplus_initializer(
+                palette_pixels, 16).initialize()
+            kmedians_instance = kmedians(
+                palette_pixels, initial_centers, tolerance=0.1, itermax=100,
+                metric=distance_metric(type_metric.MANHATTAN))
+            kmedians_instance.process()
+            if kmedians_instance.get_total_wce() < best_wce:
+                best_wce = kmedians_instance.get_total_wce()
+                print(i, best_wce)
+                best_medians = kmedians_instance
+        print("Best %f" % best_wce)
         palettes_cam[palette_idx, :, :] = np.array(
-            kmedians_instance.get_medians()).astype(np.float32)
+            best_medians.get_medians()).astype(np.float32)
 
         # palette_colours = collections.defaultdict(list)
         # for line in range(200):
@@ -122,7 +136,7 @@ def cluster_palette(image: Image):
     #         line_pixels, palettes_cam, best_palette)
     #     line_to_palette[line] = best_palette
     #     print(line, line_to_palette[line])
-    return palettes_cam, palettes_rgb, line_to_palette
+    return palettes_cam, palettes_rgb
 
 
 def main():
@@ -185,7 +199,7 @@ def main():
                         gamma=args.gamma_correct, srgb_output=True)).astype(
         np.float32) / 255
 
-    palettes_cam, palettes_rgb, line_to_palette = cluster_palette(rgb)
+    palettes_cam, palettes_rgb = cluster_palette(rgb)
     # print(palette_rgb)
     # screen.set_palette(0, (image_py.linear_to_srgb_array(palette_rgb) *
     #                        15).astype(np.uint8))
@@ -193,36 +207,37 @@ def main():
         screen.set_palette(i, (np.round(palettes_rgb[i, :, :] * 15)).astype(
             np.uint8))
 
-    output_4bit, line_to_palette = dither_pyx.dither_shr(
-        rgb, palettes_cam, palettes_rgb, rgb_to_cam16)
-    screen.set_pixels(output_4bit)
-    output_rgb = np.zeros((200, 320, 3), dtype=np.uint8)
-    for i in range(200):
-        screen.line_palette[i] = line_to_palette[i]
-        output_rgb[i, :, :] = (
-                palettes_rgb[line_to_palette[i]][
-                    output_4bit[i, :]] * 255).astype(np.uint8)
-    output_srgb = image_py.linear_to_srgb(output_rgb).astype(np.uint8)
+    for penalty in [1,2,3,4,5,6,7,8,9,10,1e9]:
+        output_4bit, line_to_palette = dither_pyx.dither_shr(
+            rgb, palettes_cam, palettes_rgb, rgb_to_cam16, float(penalty))
+        screen.set_pixels(output_4bit)
+        output_rgb = np.zeros((200, 320, 3), dtype=np.uint8)
+        for i in range(200):
+            screen.line_palette[i] = line_to_palette[i]
+            output_rgb[i, :, :] = (
+                    palettes_rgb[line_to_palette[i]][
+                        output_4bit[i, :]] * 255).astype(np.uint8)
+        output_srgb = image_py.linear_to_srgb(output_rgb).astype(np.uint8)
 
-    # dither = dither_pattern.PATTERNS[args.dither]()
-    # bitmap = dither_pyx.dither_image(
-    #     screen, rgb, dither, args.lookahead, args.verbose, rgb_to_cam16)
+        # dither = dither_pattern.PATTERNS[args.dither]()
+        # bitmap = dither_pyx.dither_image(
+        #     screen, rgb, dither, args.lookahead, args.verbose, rgb_to_cam16)
 
-    # Show output image by rendering in target palette
-    # output_palette_name = args.show_palette or args.palette
-    # output_palette = palette_py.PALETTES[output_palette_name]()
-    # output_screen = screen_py.DHGRScreen(output_palette)
-    # if output_palette_name == "ntsc":
-    #     output_srgb = output_screen.bitmap_to_image_ntsc(bitmap)
-    # else:
-    #     output_srgb = image_py.linear_to_srgb(
-    #         output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
-    out_image = image_py.resize(
-        Image.fromarray(output_srgb), screen.X_RES, screen.Y_RES,
-        srgb_output=False)  # XXX true
+        # Show output image by rendering in target palette
+        # output_palette_name = args.show_palette or args.palette
+        # output_palette = palette_py.PALETTES[output_palette_name]()
+        # output_screen = screen_py.DHGRScreen(output_palette)
+        # if output_palette_name == "ntsc":
+        #     output_srgb = output_screen.bitmap_to_image_ntsc(bitmap)
+        # else:
+        #     output_srgb = image_py.linear_to_srgb(
+        #         output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
+        out_image = image_py.resize(
+            Image.fromarray(output_srgb), screen.X_RES, screen.Y_RES,
+            srgb_output=False)  # XXX true
 
-    if args.show_output:
-        out_image.show()
+        if args.show_output:
+            out_image.show()
 
     # Save Double hi-res image
     outfile = os.path.join(os.path.splitext(args.output)[0] + "-preview.png")
diff --git a/dither.pyx b/dither.pyx
index 2506115..cea6a28 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -339,7 +339,7 @@ import colour
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def dither_shr(float[:, :, ::1] working_image, float[:, :, ::1] palettes_cam, float[:, :, ::1] palettes_rgb, float[:,::1] rgb_to_cam16ucs):
+def dither_shr(float[:, :, ::1] working_image, float[:, :, ::1] palettes_cam, float[:, :, ::1] palettes_rgb, float[:,::1] rgb_to_cam16ucs, float penalty):
     cdef int y, x, idx, best_colour_idx, best_palette
     cdef float best_distance, distance
     cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
@@ -354,13 +354,13 @@ def dither_shr(float[:, :, ::1] working_image, float[:, :, ::1] palettes_cam, fl
     cdef int[::1] line_to_palette = np.zeros(200, dtype=np.int32)
     best_palette = 15
     for y in range(200):
-        print(y)
+        # print(y)
         for x in range(320):
             colour_cam = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
             line_cam[x, :] = colour_cam
 
-        best_palette = best_palette_for_line(line_cam, palettes_cam, <int>(y * 16 / 200), best_palette)
+        best_palette = best_palette_for_line(line_cam, palettes_cam, <int>(y * 16 / 200), best_palette, penalty)
         # print("-->", best_palette)
         palette_rgb = palettes_rgb[best_palette, :, :]
         line_to_palette[y] = best_palette
@@ -509,7 +509,7 @@ def k_means_with_fixed_centroids(
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int base_palette_idx, int last_palette_idx) nogil:
+cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int base_palette_idx, int last_palette_idx, float last_penalty) nogil:
     cdef int palette_idx, best_palette_idx, palette_entry_idx, pixel_idx
     cdef float best_total_dist, total_dist, best_pixel_dist, pixel_dist
     cdef float[:, ::1] palette_cam
@@ -517,20 +517,23 @@ cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palette
 
     best_total_dist = 1e9
     best_palette_idx = -1
+    cdef float penalty
     cdef int line_size = line_cam.shape[0]
     for palette_idx in range(16):
         palette_cam = palettes_cam[palette_idx, :, :]
         if palette_idx < (base_palette_idx - 1) or palette_idx > (base_palette_idx + 1):
             continue
         if palette_idx == last_palette_idx:
-            continue
+            penalty = last_penalty
+        else:
+            penalty = 1.0
         total_dist = 0
         best_pixel_dist = 1e9
         for pixel_idx in range(line_size):
             pixel_cam = line_cam[pixel_idx]
             for palette_entry_idx in range(16):
                 palette_entry = palette_cam[palette_entry_idx, :]
-                pixel_dist = colour_distance_squared(pixel_cam, palette_entry)
+                pixel_dist = colour_distance_squared(pixel_cam, palette_entry) * penalty
                 if pixel_dist < best_pixel_dist:
                     best_pixel_dist = pixel_dist
             total_dist += best_pixel_dist

From 10c829906b9301efe7e9f12d7360dfc7a9e3fb43 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 16 Nov 2021 11:21:53 +0000
Subject: [PATCH 15/82] Checkpoint - Repeatedly refit palettes since k-means is
 only a local   optimization.  This can produce incremental improvements in
 image   quality but may also overfit, especially on complex images. - use
 pygame to render incremental images - Fix off-by-one in palette striping -
 When fitting palettes, first cluster a 16-colour palette for the   entire
 image and use this to initialize the centroids for individual   palettes. 
 This improves quality when fitting images with large   blocks of colour,
 since they will otherwise be fit separately and   may have slight
 differences.  With a global initializer these will   tend to be the same. 
 This also improves performance.

---
 convert.py | 256 +++++++++++++++++++++++++++--------------------------
 1 file changed, 129 insertions(+), 127 deletions(-)

diff --git a/convert.py b/convert.py
index 188a247..0fd88d2 100644
--- a/convert.py
+++ b/convert.py
@@ -6,6 +6,7 @@ import os.path
 import time
 import collections
 import random
+import pygame
 
 import colour
 from PIL import Image
@@ -14,6 +15,7 @@ from pyclustering.cluster.kmedians import kmedians
 from pyclustering.cluster.kmeans import kmeans
 from pyclustering.utils.metric import distance_metric, type_metric
 from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
+from sklearn import cluster
 
 import dither as dither_pyx
 import dither_pattern
@@ -26,117 +28,103 @@ import screen as screen_py
 # - support LR/DLR
 # - support HGR
 
-def cluster_palette(image: Image):
-    # line_to_palette = {}
-
-    # shuffle_lines = liprint(st(range(200))
-    # random.shuffle(shuffle_lines)
-    # for idx, line in enumerate(shuffle_lines):
-    #    line_to_palette[line] = idx % 16
-
-    # for line in range(200):
-    #     if line % 3 == 0:
-    #         line_to_palette[line] = int(line / (200 / 16))
-    #     elif line % 3 == 1:
-    #         line_to_palette[line] = np.clip(int(line / (200 / 16)) + 1, 0, 15)
-    #     else:
-    #         line_to_palette[line] = np.clip(int(line / (200 / 16)) + 2, 0, 15)
-
-    # for line in range(200):
-    #     if line % 3 == 0:
-    #         line_to_palette[line] = int(line / (200 / 16))
-    #     elif line % 3 == 1:
-    #         line_to_palette[line] = np.clip(int(line / (200 / 16)) + 1, 0, 15)
-    #     else:
-    #         line_to_palette[line] = np.clip(int(line / (200 / 16)) + 2, 0, 15)
-
-    colours_rgb = np.asarray(image).reshape((-1, 3))
-    with colour.utilities.suppress_warnings(colour_usage_warnings=True):
-        colours_cam = colour.convert(colours_rgb, "RGB",
-                                     "CAM16UCS").astype(np.float32)
-    palettes_rgb = np.empty((16, 16, 3), dtype=np.float32)
-    palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
-    for palette_idx in range(16):
-        print("Fitting palette %d" % palette_idx)
-        p_lower = max(palette_idx - 2, 0)
-        p_upper = min(palette_idx + 2, 16)
-        palette_pixels = colours_cam[
-                         int(p_lower * (200 / 16)) * 320:int(p_upper * (
-                                 200 / 16)) * 320, :]
-
-        # kmeans = KMeans(n_clusters=16, max_iter=10000)
-        # kmeans.fit_predict(palette_pixels)
-        # palettes_cam[palette_idx] = kmeans.cluster_centers_
-
-        # fixed_centroids = None
-        # print(np.array(line_colours), fixed_centroids)
-        # palettes_cam[palette_idx] = dither_pyx.k_means_with_fixed_centroids(
-        #    16, palette_pixels, fixed_centroids=fixed_centroids,
-        #    tolerance=1e-6)
-
-        best_wce = 1e9
-        best_medians = None
-        for i in range(500):
-            # print(i)
-            initial_centers = kmeans_plusplus_initializer(
-                palette_pixels, 16).initialize()
-            kmedians_instance = kmedians(
-                palette_pixels, initial_centers, tolerance=0.1, itermax=100,
-                metric=distance_metric(type_metric.MANHATTAN))
-            kmedians_instance.process()
-            if kmedians_instance.get_total_wce() < best_wce:
-                best_wce = kmedians_instance.get_total_wce()
-                print(i, best_wce)
-                best_medians = kmedians_instance
-        print("Best %f" % best_wce)
-        palettes_cam[palette_idx, :, :] = np.array(
-            best_medians.get_medians()).astype(np.float32)
-
-        # palette_colours = collections.defaultdict(list)
-        # for line in range(200):
-        #     palette = line_to_palette[line]
-        #     palette_colours[palette].extend(
-        #         colours_cam[line * 320:(line + 1) * 320])
-
-        # For each line grouping, find big palette entries with minimal total
-        # distance
-
-        # palette_cam = None
-        # for palette_idx in range(16):
-        #     line_colours = palette_colours[palette_idx]
-        #     #if palette_idx < 15:
-        #     #    line_colours += palette_colours[palette_idx + 1]
-        #     # if palette_idx < 14:
-        #     #     line_colours += palette_colours[palette_idx + 2]
-        #     # if palette_idx > 0:
-        #     #     fixed_centroids = palette_cam[:8, :]
-        #     # else:
-        #     fixed_centroids = None
-        #     # print(np.array(line_colours), fixed_centroids)
-        #     palette_cam = dither_pyx.k_means_with_fixed_centroids(16, np.array(
-        #         line_colours), fixed_centroids=fixed_centroids, tolerance=1e-6)
-
-        # kmeans = KMeans(n_clusters=16, max_iter=10000)
-        # kmeans.fit_predict(line_colours)
-        # palette_cam = kmeans.cluster_centers_
+class ClusterPalette:
+    def __init__(self, image: Image):
+        self._colours_cam = self._image_colours_cam(image)
+        self._best_palette_distances = {i: (1e9, None) for i in range(16)}
+        self._iterations = 0
+        self._palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
+        self._palettes_rgb = np.empty((16, 16, 3), dtype=np.float32)
+        self._global_palette = self._fit_global_palette()
 
+    def _image_colours_cam(self, image: Image):
+        colours_rgb = np.asarray(image).reshape((-1, 3))
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
-            palette_rgb = colour.convert(palettes_cam[palette_idx], "CAM16UCS",
-                                         "RGB")
-            # SHR colour palette only uses 4-bit values
-            palette_rgb = np.round(palette_rgb * 15) / 15
-            palettes_rgb[palette_idx, :, :] = palette_rgb.astype(np.float32)
-    # print(palettes_rgb)
+            colours_cam = colour.convert(colours_rgb, "RGB",
+                                         "CAM16UCS").astype(np.float32)
+        return colours_cam
 
-    # For each line, pick the palette with lowest total distance
-    # best_palette = 15
-    # for line in range(200):
-    #     line_pixels = colours_cam[line*320:(line+1)*320]
-    #     best_palette = dither_pyx.best_palette_for_line(
-    #         line_pixels, palettes_cam, best_palette)
-    #     line_to_palette[line] = best_palette
-    #     print(line, line_to_palette[line])
-    return palettes_cam, palettes_rgb
+    def _fit_global_palette(self):
+        """Compute a 16-colour palette for the entire image to use as
+        starting point for the sub-palettes.  This should help when the image
+        has large blocks of colour since the sub-palettes will tend to pick the same coloursx."""
+        clusters = cluster.MiniBatchKMeans(n_clusters=16, max_iter=10000)
+        # tol=0.0000000001, algorithm="elkan")
+        clusters.fit_predict(self._colours_cam)
+        return clusters.cluster_centers_
+
+    def iterate(self):
+        self._iterations += 1
+        print("Iteration %d" % self._iterations)
+        for palette_idx in range(16):
+
+
+            # i=5: 3 * (200/16) : 7 * (200/16)
+            # print("Fitting palette %d" % palette_idx)
+            p_lower2 = max(palette_idx - 1.5, 0)
+            p_lower1 = max(palette_idx - 1, 0)
+            p_lower0 = palette_idx
+            p_upper0 = max(palette_idx + 1, 16)
+            p_upper1 = max(palette_idx + 2, 16)
+            p_upper2 = min(palette_idx + 2.5, 16)
+            # TODO: weight +/-1 and 0 bands higher
+            # TODO: dynamically tune palette cuts
+            palette_pixels = np.concatenate(
+                [
+                    self._colours_cam[
+                        int(p_lower2 * (200 / 16)) * 320:int(p_upper2 * (
+                            200 / 16)) * 320, :],
+                    # self._colours_cam[
+                    #      int(p_lower1 * (200 / 16)) * 320:int(p_upper1 * (
+                    #          200 / 16)) * 320, :],
+                    # self._colours_cam[
+                    #      int(p_lower0 * (200 / 16)) * 320:int(p_upper0 * (
+                    #          200 / 16)) * 320, :],
+                ], axis=0)
+
+            best_wce, best_medians = self._best_palette_distances[palette_idx]
+            # if palette_idx == 0:
+            # initial_centers = kmeans_plusplus_initializer(
+            #     palette_pixels, 16).initialize()
+            # else:
+            #     initial_centers = kmedians_instance.get_medians()
+
+            # kmedians_instance = kmeans(
+            #     palette_pixels, initial_centers, tolerance=0.0000000001,
+            #     itermax=100,
+            #     metric=distance_metric(type_metric.EUCLIDEAN_SQUARE))
+            # kmedians_instance.process()
+            # TODO: tolerance
+            clusters = cluster.MiniBatchKMeans(
+                n_clusters=16, max_iter=10000, init=self._global_palette,
+                n_init=1)
+            # tol=0.0000000001, algorithm="elkan")
+            clusters.fit_predict(palette_pixels)
+            # if kmedians_instance.get_total_wce() < best_wce:
+            #    best_wce = kmedians_instance.get_total_wce()
+            #    best_medians = kmedians_instance
+            if clusters.inertia_ < (best_wce * 0.99):
+                best_wce = clusters.inertia_
+                print("Improved palette %d: %f" % (palette_idx, best_wce))
+
+                # self._palettes_cam[palette_idx, :, :] = np.array(
+                #     best_medians.get_centers()).astype(np.float32)
+
+                self._palettes_cam[palette_idx, :, :] = np.array(
+                    clusters.cluster_centers_).astype(np.float32)
+                self._best_palette_distances[palette_idx] = (
+                    best_wce, best_medians)
+
+                with colour.utilities.suppress_warnings(
+                        colour_usage_warnings=True):
+                    palette_rgb = colour.convert(
+                        self._palettes_cam[palette_idx], "CAM16UCS", "RGB")
+                    # SHR colour palette only uses 4-bit values
+                    palette_rgb = np.round(palette_rgb * 15) / 15
+                    self._palettes_rgb[palette_idx, :, :] = palette_rgb.astype(
+                        np.float32)
+
+        return self._palettes_cam, self._palettes_rgb
 
 
 def main():
@@ -199,15 +187,24 @@ def main():
                         gamma=args.gamma_correct, srgb_output=True)).astype(
         np.float32) / 255
 
-    palettes_cam, palettes_rgb = cluster_palette(rgb)
-    # print(palette_rgb)
-    # screen.set_palette(0, (image_py.linear_to_srgb_array(palette_rgb) *
-    #                        15).astype(np.uint8))
-    for i in range(16):
-        screen.set_palette(i, (np.round(palettes_rgb[i, :, :] * 15)).astype(
-            np.uint8))
+    penalty = 10  # 1e9
+    iterations = 50
+
+    pygame.init()
+    canvas = pygame.display.set_mode((640, 400))
+    canvas = pygame.display.set_mode((640, 400))
+    canvas.fill((0, 0, 0))
+    pygame.display.flip()
+    # print("Foo")
+
+    cluster_palette = ClusterPalette(rgb)
+    for iteration in range(iterations):
+        palettes_cam, palettes_rgb = cluster_palette.iterate()
+        # print((palettes_rgb*255).astype(np.uint8))
+        for i in range(16):
+            screen.set_palette(i, (np.round(palettes_rgb[i, :, :] * 15)).astype(
+                np.uint8))
 
-    for penalty in [1,2,3,4,5,6,7,8,9,10,1e9]:
         output_4bit, line_to_palette = dither_pyx.dither_shr(
             rgb, palettes_cam, palettes_rgb, rgb_to_cam16, float(penalty))
         screen.set_pixels(output_4bit)
@@ -233,21 +230,26 @@ def main():
         #     output_srgb = image_py.linear_to_srgb(
         #         output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
         out_image = image_py.resize(
-            Image.fromarray(output_srgb), screen.X_RES, screen.Y_RES,
+            Image.fromarray(output_srgb), screen.X_RES * 2, screen.Y_RES * 2,
             srgb_output=False)  # XXX true
 
         if args.show_output:
-            out_image.show()
+            surface = pygame.surfarray.make_surface(np.asarray(
+                out_image).transpose((1, 0, 2)))
+            canvas.blit(surface, (0, 0))
+            pygame.display.flip()
 
-    # Save Double hi-res image
-    outfile = os.path.join(os.path.splitext(args.output)[0] + "-preview.png")
-    out_image.save(outfile, "PNG")
-    screen.pack()
-    # with open(args.output, "wb") as f:
-    #     f.write(bytes(screen.aux))
-    #     f.write(bytes(screen.main))
-    with open(args.output, "wb") as f:
-        f.write(bytes(screen.memory))
+        # Save Double hi-res image
+        outfile = os.path.join(os.path.splitext(args.output)[0] +
+                               "-%d-preview.png" % cluster_palette._iterations)
+        out_image.save(outfile, "PNG")
+        screen.pack()
+        # with open(args.output, "wb") as f:
+        #     f.write(bytes(screen.aux))
+        #     f.write(bytes(screen.main))
+        with open("%s-%s" % (args.output, cluster_palette._iterations),
+                  "wb") as f:
+            f.write(bytes(screen.memory))
 
 
 if __name__ == "__main__":

From 7ad560247b933f508fc76264a39fee43c6455a8f Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 16 Nov 2021 12:24:43 +0000
Subject: [PATCH 16/82] Clean up

---
 convert.py | 88 ++++++++++++++++--------------------------------------
 1 file changed, 25 insertions(+), 63 deletions(-)

diff --git a/convert.py b/convert.py
index 0fd88d2..8f71f85 100644
--- a/convert.py
+++ b/convert.py
@@ -1,20 +1,13 @@
 """Image converter to Apple II Double Hi-Res format."""
 
 import argparse
-import array
 import os.path
-import time
-import collections
-import random
-import pygame
+import warnings
 
-import colour
 from PIL import Image
+import colour
 import numpy as np
-from pyclustering.cluster.kmedians import kmedians
-from pyclustering.cluster.kmeans import kmeans
-from pyclustering.utils.metric import distance_metric, type_metric
-from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
+import pygame
 from sklearn import cluster
 
 import dither as dither_pyx
@@ -31,7 +24,7 @@ import screen as screen_py
 class ClusterPalette:
     def __init__(self, image: Image):
         self._colours_cam = self._image_colours_cam(image)
-        self._best_palette_distances = {i: (1e9, None) for i in range(16)}
+        self._best_palette_distances = [1e9] * 16
         self._iterations = 0
         self._palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
         self._palettes_rgb = np.empty((16, 16, 3), dtype=np.float32)
@@ -47,9 +40,9 @@ class ClusterPalette:
     def _fit_global_palette(self):
         """Compute a 16-colour palette for the entire image to use as
         starting point for the sub-palettes.  This should help when the image
-        has large blocks of colour since the sub-palettes will tend to pick the same coloursx."""
+        has large blocks of colour since the sub-palettes will tend to pick the
+        same colours."""
         clusters = cluster.MiniBatchKMeans(n_clusters=16, max_iter=10000)
-        # tol=0.0000000001, algorithm="elkan")
         clusters.fit_predict(self._colours_cam)
         return clusters.cluster_centers_
 
@@ -58,65 +51,33 @@ class ClusterPalette:
         print("Iteration %d" % self._iterations)
         for palette_idx in range(16):
 
-
-            # i=5: 3 * (200/16) : 7 * (200/16)
-            # print("Fitting palette %d" % palette_idx)
-            p_lower2 = max(palette_idx - 1.5, 0)
-            p_lower1 = max(palette_idx - 1, 0)
-            p_lower0 = palette_idx
-            p_upper0 = max(palette_idx + 1, 16)
-            p_upper1 = max(palette_idx + 2, 16)
-            p_upper2 = min(palette_idx + 2.5, 16)
-            # TODO: weight +/-1 and 0 bands higher
+            p_lower = max(palette_idx - 1.5, 0)
+            p_upper = min(palette_idx + 2.5, 16)
             # TODO: dynamically tune palette cuts
-            palette_pixels = np.concatenate(
-                [
-                    self._colours_cam[
-                        int(p_lower2 * (200 / 16)) * 320:int(p_upper2 * (
-                            200 / 16)) * 320, :],
-                    # self._colours_cam[
-                    #      int(p_lower1 * (200 / 16)) * 320:int(p_upper1 * (
-                    #          200 / 16)) * 320, :],
-                    # self._colours_cam[
-                    #      int(p_lower0 * (200 / 16)) * 320:int(p_upper0 * (
-                    #          200 / 16)) * 320, :],
-                ], axis=0)
+            palette_pixels = self._colours_cam[
+                        int(p_lower * (200 / 16)) * 320:int(p_upper * (
+                            200 / 16)) * 320, :]
 
-            best_wce, best_medians = self._best_palette_distances[palette_idx]
-            # if palette_idx == 0:
-            # initial_centers = kmeans_plusplus_initializer(
-            #     palette_pixels, 16).initialize()
-            # else:
-            #     initial_centers = kmedians_instance.get_medians()
-
-            # kmedians_instance = kmeans(
-            #     palette_pixels, initial_centers, tolerance=0.0000000001,
-            #     itermax=100,
-            #     metric=distance_metric(type_metric.EUCLIDEAN_SQUARE))
-            # kmedians_instance.process()
+            best_wce = self._best_palette_distances[palette_idx]
             # TODO: tolerance
             clusters = cluster.MiniBatchKMeans(
                 n_clusters=16, max_iter=10000, init=self._global_palette,
                 n_init=1)
-            # tol=0.0000000001, algorithm="elkan")
             clusters.fit_predict(palette_pixels)
-            # if kmedians_instance.get_total_wce() < best_wce:
-            #    best_wce = kmedians_instance.get_total_wce()
-            #    best_medians = kmedians_instance
             if clusters.inertia_ < (best_wce * 0.99):
-                best_wce = clusters.inertia_
-                print("Improved palette %d: %f" % (palette_idx, best_wce))
-
-                # self._palettes_cam[palette_idx, :, :] = np.array(
-                #     best_medians.get_centers()).astype(np.float32)
+                # TODO: sentinel
+                if best_wce < 1e9:
+                    print("Improved palette %d (+%f%%)" % (
+                        palette_idx, best_wce / clusters.inertia_))
 
                 self._palettes_cam[palette_idx, :, :] = np.array(
                     clusters.cluster_centers_).astype(np.float32)
-                self._best_palette_distances[palette_idx] = (
-                    best_wce, best_medians)
+                best_wce = clusters.inertia_
+                self._best_palette_distances[palette_idx] = best_wce
 
-                with colour.utilities.suppress_warnings(
-                        colour_usage_warnings=True):
+                # Suppress divide by zero warning,
+                # https://github.com/colour-science/colour/issues/900
+                with colour.utilities.suppress_warnings(python_warnings=True):
                     palette_rgb = colour.convert(
                         self._palettes_cam[palette_idx], "CAM16UCS", "RGB")
                     # SHR colour palette only uses 4-bit values
@@ -187,20 +148,21 @@ def main():
                         gamma=args.gamma_correct, srgb_output=True)).astype(
         np.float32) / 255
 
+    # TODO: flags
     penalty = 10  # 1e9
     iterations = 50
 
     pygame.init()
-    canvas = pygame.display.set_mode((640, 400))
+    # TODO: for some reason I need to execute this twice - the first time
+    #  the window is created and immediately destroyed
+    pygame.display.set_mode((640, 400))
     canvas = pygame.display.set_mode((640, 400))
     canvas.fill((0, 0, 0))
     pygame.display.flip()
-    # print("Foo")
 
     cluster_palette = ClusterPalette(rgb)
     for iteration in range(iterations):
         palettes_cam, palettes_rgb = cluster_palette.iterate()
-        # print((palettes_rgb*255).astype(np.uint8))
         for i in range(16):
             screen.set_palette(i, (np.round(palettes_rgb[i, :, :] * 15)).astype(
                 np.uint8))

From 8694ab364e039f25b07f1c4e9971860a7ef33ce6 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 16 Nov 2021 12:38:53 +0000
Subject: [PATCH 17/82] Perform conversions in linear RGB space

---
 convert.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/convert.py b/convert.py
index 8f71f85..8fa01ee 100644
--- a/convert.py
+++ b/convert.py
@@ -2,7 +2,6 @@
 
 import argparse
 import os.path
-import warnings
 
 from PIL import Image
 import colour
@@ -145,8 +144,7 @@ def main():
                         srgb_output=False).show()
     rgb = np.array(
         image_py.resize(image, screen.X_RES, screen.Y_RES,
-                        gamma=args.gamma_correct, srgb_output=True)).astype(
-        np.float32) / 255
+                        gamma=args.gamma_correct)).astype(np.float32) / 255
 
     # TODO: flags
     penalty = 10  # 1e9
@@ -170,7 +168,7 @@ def main():
         output_4bit, line_to_palette = dither_pyx.dither_shr(
             rgb, palettes_cam, palettes_rgb, rgb_to_cam16, float(penalty))
         screen.set_pixels(output_4bit)
-        output_rgb = np.zeros((200, 320, 3), dtype=np.uint8)
+        output_rgb = np.empty((200, 320, 3), dtype=np.uint8)
         for i in range(200):
             screen.line_palette[i] = line_to_palette[i]
             output_rgb[i, :, :] = (
@@ -193,7 +191,7 @@ def main():
         #         output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
         out_image = image_py.resize(
             Image.fromarray(output_srgb), screen.X_RES * 2, screen.Y_RES * 2,
-            srgb_output=False)  # XXX true
+            srgb_output=True)
 
         if args.show_output:
             surface = pygame.surfarray.make_surface(np.asarray(

From 83b047b73f344f17595ea83d3b111c7f5c76961a Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 16 Nov 2021 15:44:04 +0000
Subject: [PATCH 18/82] Whoops, fix a major bug with the iterated image
 fitting: we don't want to mutate our source image!

Fix another bug introduced in the previous commit: convert from linear
rgb before quantizing //gs RGB palette since //gs RGB values are in
Rec.601 colour space.

Switch to double for colour_squared_distance and related variables,
not sure if it matters though.

When iterating palette clustering, reject the new palettes if they
would increase the total image error.  This prevents accepting changes
that are local improvements to one palette but which would introduce
more net errors elsewhere when this palette is reused.

This now seems to give monotonic improvements in image quality so no need
to write out intermediate images any more.
---
 convert.py | 79 ++++++++++++++++++++++++++++++++----------------------
 dither.pyx | 17 +++++++-----
 2 files changed, 57 insertions(+), 39 deletions(-)

diff --git a/convert.py b/convert.py
index 8fa01ee..0b931e6 100644
--- a/convert.py
+++ b/convert.py
@@ -24,10 +24,8 @@ class ClusterPalette:
     def __init__(self, image: Image):
         self._colours_cam = self._image_colours_cam(image)
         self._best_palette_distances = [1e9] * 16
-        self._iterations = 0
         self._palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
         self._palettes_rgb = np.empty((16, 16, 3), dtype=np.float32)
-        self._global_palette = self._fit_global_palette()
 
     def _image_colours_cam(self, image: Image):
         colours_rgb = np.asarray(image).reshape((-1, 3))
@@ -46,16 +44,15 @@ class ClusterPalette:
         return clusters.cluster_centers_
 
     def iterate(self):
-        self._iterations += 1
-        print("Iteration %d" % self._iterations)
+        self._global_palette = self._fit_global_palette()
         for palette_idx in range(16):
 
             p_lower = max(palette_idx - 1.5, 0)
             p_upper = min(palette_idx + 2.5, 16)
             # TODO: dynamically tune palette cuts
             palette_pixels = self._colours_cam[
-                        int(p_lower * (200 / 16)) * 320:int(p_upper * (
-                            200 / 16)) * 320, :]
+                             int(p_lower * (200 / 16)) * 320:int(p_upper * (
+                                     200 / 16)) * 320, :]
 
             best_wce = self._best_palette_distances[palette_idx]
             # TODO: tolerance
@@ -63,12 +60,7 @@ class ClusterPalette:
                 n_clusters=16, max_iter=10000, init=self._global_palette,
                 n_init=1)
             clusters.fit_predict(palette_pixels)
-            if clusters.inertia_ < (best_wce * 0.99):
-                # TODO: sentinel
-                if best_wce < 1e9:
-                    print("Improved palette %d (+%f%%)" % (
-                        palette_idx, best_wce / clusters.inertia_))
-
+            if clusters.inertia_ < best_wce:
                 self._palettes_cam[palette_idx, :, :] = np.array(
                     clusters.cluster_centers_).astype(np.float32)
                 best_wce = clusters.inertia_
@@ -147,26 +139,51 @@ def main():
                         gamma=args.gamma_correct)).astype(np.float32) / 255
 
     # TODO: flags
-    penalty = 10  # 1e9
-    iterations = 50
+    penalty = 1e9  # 0  # 1e9
+    iterations = 50  # 0
 
     pygame.init()
     # TODO: for some reason I need to execute this twice - the first time
     #  the window is created and immediately destroyed
-    pygame.display.set_mode((640, 400))
+    _ = pygame.display.set_mode((640, 400))
     canvas = pygame.display.set_mode((640, 400))
     canvas.fill((0, 0, 0))
     pygame.display.flip()
 
+    total_image_error = 1e9
     cluster_palette = ClusterPalette(rgb)
+    image_generation = 0
     for iteration in range(iterations):
-        palettes_cam, palettes_rgb = cluster_palette.iterate()
-        for i in range(16):
-            screen.set_palette(i, (np.round(palettes_rgb[i, :, :] * 15)).astype(
-                np.uint8))
+        old_best_palette_distances = cluster_palette._best_palette_distances
+        old_palettes_cam = cluster_palette._palettes_cam
+        old_palettes_rgb = cluster_palette._palettes_rgb
 
-        output_4bit, line_to_palette = dither_pyx.dither_shr(
-            rgb, palettes_cam, palettes_rgb, rgb_to_cam16, float(penalty))
+        new_palettes_cam, new_palettes_rgb = cluster_palette.iterate()
+        output_4bit, line_to_palette, new_total_image_error = \
+            dither_pyx.dither_shr(
+                rgb, new_palettes_cam, new_palettes_rgb, rgb_to_cam16,
+                float(penalty)
+            )
+
+        if new_total_image_error < total_image_error:
+            if total_image_error < 1e9:
+                print("Improved quality +%f%% (%f)" % (
+                    (1 - new_total_image_error / total_image_error) * 100,
+                    new_total_image_error))
+            total_image_error = new_total_image_error
+            palettes_rgb = new_palettes_rgb
+        else:
+            cluster_palette._palettes_cam = old_palettes_cam
+            cluster_palette._palettes_rgb = old_palettes_rgb
+            cluster_palette._best_palette_distances = old_best_palette_distances
+            continue
+
+        image_generation += 1
+        for i in range(16):
+            screen.set_palette(i, (
+                np.round(image_py.linear_to_srgb(palettes_rgb[i, :,
+                                                 :] * 255) / 255 * 15)).astype(
+                np.uint8))
         screen.set_pixels(output_4bit)
         output_rgb = np.empty((200, 320, 3), dtype=np.uint8)
         for i in range(200):
@@ -199,17 +216,15 @@ def main():
             canvas.blit(surface, (0, 0))
             pygame.display.flip()
 
-        # Save Double hi-res image
-        outfile = os.path.join(os.path.splitext(args.output)[0] +
-                               "-%d-preview.png" % cluster_palette._iterations)
-        out_image.save(outfile, "PNG")
-        screen.pack()
-        # with open(args.output, "wb") as f:
-        #     f.write(bytes(screen.aux))
-        #     f.write(bytes(screen.main))
-        with open("%s-%s" % (args.output, cluster_palette._iterations),
-                  "wb") as f:
-            f.write(bytes(screen.memory))
+    # Save Double hi-res image
+    outfile = os.path.join(os.path.splitext(args.output)[0] + "-preview.png")
+    out_image.save(outfile, "PNG")
+    screen.pack()
+    # with open(args.output, "wb") as f:
+    #     f.write(bytes(screen.aux))
+    #     f.write(bytes(screen.main))
+    with open(args.output, "wb") as f:
+        f.write(bytes(screen.memory))
 
 
 if __name__ == "__main__":
diff --git a/dither.pyx b/dither.pyx
index cea6a28..ff974d3 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -171,7 +171,7 @@ cdef inline float fabs(float value) nogil:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef inline float colour_distance_squared(float[::1] colour1, float[::1] colour2) nogil:
+cdef inline double colour_distance_squared(float[::1] colour1, float[::1] colour2) nogil:
     return (colour1[0] - colour2[0]) ** 2 + (colour1[1] - colour2[1]) ** 2 + (colour1[2] - colour2[2]) ** 2
 
 
@@ -339,20 +339,21 @@ import colour
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def dither_shr(float[:, :, ::1] working_image, float[:, :, ::1] palettes_cam, float[:, :, ::1] palettes_rgb, float[:,::1] rgb_to_cam16ucs, float penalty):
+def dither_shr(float[:, :, ::1] input_rgb, float[:, :, ::1] palettes_cam, float[:, :, ::1] palettes_rgb, float[:,::1] rgb_to_cam16ucs, float penalty):
     cdef int y, x, idx, best_colour_idx, best_palette
-    cdef float best_distance, distance
+    cdef double best_distance, distance, total_image_error
     cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
     cdef float quant_error
     cdef float[:, ::1] palette_rgb
 
     cdef (unsigned char)[:, ::1] output_4bit = np.zeros((200, 320), dtype=np.uint8)
-    # cdef (unsigned char)[:, :, ::1] output_rgb = np.zeros((200, 320, 3), dtype=np.uint8)
-
+    cdef float[:, :, ::1] working_image = np.copy(input_rgb)
     cdef float[:, ::1] line_cam = np.zeros((320, 3), dtype=np.float32)
 
     cdef int[::1] line_to_palette = np.zeros(200, dtype=np.int32)
+
     best_palette = 15
+    total_image_error = 0.0
     for y in range(200):
         # print(y)
         for x in range(320):
@@ -380,6 +381,8 @@ def dither_shr(float[:, :, ::1] working_image, float[:, :, ::1] palettes_cam, fl
                     best_colour_idx = idx
             best_colour_rgb = palette_rgb[best_colour_idx]
             output_4bit[y, x] = best_colour_idx
+            total_image_error += best_distance
+            # print(y,x,best_distance,total_image_error)
 
             for i in range(3):
                 quant_error = working_image[y, x, i] - best_colour_rgb[i]
@@ -449,7 +452,7 @@ def dither_shr(float[:, :, ::1] working_image, float[:, :, ::1] palettes_cam, fl
                 #            working_image[y + 2, x + 2, i] + quant_error * (1 / 48),
                 #            0, 1)
 
-    return np.array(output_4bit, dtype=np.uint8), line_to_palette
+    return np.array(output_4bit, dtype=np.uint8), line_to_palette, total_image_error
 
 import collections
 import random
@@ -511,7 +514,7 @@ def k_means_with_fixed_centroids(
 @cython.wraparound(False)
 cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int base_palette_idx, int last_palette_idx, float last_penalty) nogil:
     cdef int palette_idx, best_palette_idx, palette_entry_idx, pixel_idx
-    cdef float best_total_dist, total_dist, best_pixel_dist, pixel_dist
+    cdef double best_total_dist, total_dist, best_pixel_dist, pixel_dist
     cdef float[:, ::1] palette_cam
     cdef float[::1] pixel_cam, palette_entry
 

From 91e4fd7cba158d864daad3f936e4555b4b7ecb37 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 16 Nov 2021 15:50:19 +0000
Subject: [PATCH 19/82] Add comment

---
 convert.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/convert.py b/convert.py
index 0b931e6..f40dc05 100644
--- a/convert.py
+++ b/convert.py
@@ -154,6 +154,7 @@ def main():
     cluster_palette = ClusterPalette(rgb)
     image_generation = 0
     for iteration in range(iterations):
+        # TODO: clean this up - e.g. pass in an acceptance lambda to iterate()
         old_best_palette_distances = cluster_palette._best_palette_distances
         old_palettes_cam = cluster_palette._palettes_cam
         old_palettes_rgb = cluster_palette._palettes_rgb

From 5111696d5cdd7e5f8c310dffe1311bf6005a1f05 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 16 Nov 2021 16:57:44 +0000
Subject: [PATCH 20/82] Compute number of unique colours.  This does not seem
 to strongly depend on the width of the palette sampling.

Note the potential issue that since we are clustering in CAM space but
then quantizing a (much coarser) 4-bit RGB value we could end up
picking multiple centroids that will be represented by the same RGB
value.  This doesn't seem to be a major issue though (e.g. 3-4 lost
colours per typical image)
---
 convert.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/convert.py b/convert.py
index f40dc05..6e0a743 100644
--- a/convert.py
+++ b/convert.py
@@ -46,16 +46,22 @@ class ClusterPalette:
     def iterate(self):
         self._global_palette = self._fit_global_palette()
         for palette_idx in range(16):
-
-            p_lower = max(palette_idx - 1.5, 0)
-            p_upper = min(palette_idx + 2.5, 16)
+            palette_band_width = 3
+            p_lower = max(palette_idx + 0.5 - (palette_band_width / 2), 0)
+            p_upper = min(palette_idx + 0.5 + (palette_band_width / 2), 16)
             # TODO: dynamically tune palette cuts
             palette_pixels = self._colours_cam[
                              int(p_lower * (200 / 16)) * 320:int(p_upper * (
                                      200 / 16)) * 320, :]
 
+            # TODO: clustering should be aware of the fact that we will
+            #  down-quantize to a 4-bit RGB value afterwards.  i.e. we should
+            #  not pick multiple centroids that will quantize to the same RGB
+            #  value since we'll "waste" a palette entry.  This doesn't seem to
+            #  be a major issue in practise though, and fixing it would require
+            #  implementing our own (optimized) k-means.
             best_wce = self._best_palette_distances[palette_idx]
-            # TODO: tolerance
+            # TODO: tune tolerance
             clusters = cluster.MiniBatchKMeans(
                 n_clusters=16, max_iter=10000, init=self._global_palette,
                 n_init=1)
@@ -138,6 +144,8 @@ def main():
         image_py.resize(image, screen.X_RES, screen.Y_RES,
                         gamma=args.gamma_correct)).astype(np.float32) / 255
 
+    iigs_palette = np.empty((16, 16, 3), dtype=np.uint8)
+
     # TODO: flags
     penalty = 1e9  # 0  # 1e9
     iterations = 50  # 0
@@ -180,11 +188,13 @@ def main():
             continue
 
         image_generation += 1
+
         for i in range(16):
-            screen.set_palette(i, (
-                np.round(image_py.linear_to_srgb(palettes_rgb[i, :,
-                                                 :] * 255) / 255 * 15)).astype(
-                np.uint8))
+            iigs_palette[i, :, :] = (
+                np.round(image_py.linear_to_srgb(
+                    palettes_rgb[i, :, :] * 255) / 255 * 15)).astype(np.uint8)
+            screen.set_palette(i, iigs_palette[i, :, :])
+
         screen.set_pixels(output_4bit)
         output_rgb = np.empty((200, 320, 3), dtype=np.uint8)
         for i in range(200):
@@ -217,6 +227,9 @@ def main():
             canvas.blit(surface, (0, 0))
             pygame.display.flip()
 
+    unique_colours = np.unique(iigs_palette.reshape(-1, 3), axis=0).shape[0]
+    print("%d unique colours" % unique_colours)
+
     # Save Double hi-res image
     outfile = os.path.join(os.path.splitext(args.output)[0] + "-preview.png")
     out_image.save(outfile, "PNG")

From 613a36909c17ff48405b35a9d62af421839d1b9d Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 16 Nov 2021 17:23:31 +0000
Subject: [PATCH 21/82] Suppress pygame message at startup

Keep iterating until N iterations without quality improvement
---
 convert.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/convert.py b/convert.py
index 6e0a743..98fe41c 100644
--- a/convert.py
+++ b/convert.py
@@ -6,9 +6,12 @@ import os.path
 from PIL import Image
 import colour
 import numpy as np
-import pygame
 from sklearn import cluster
 
+from os import environ
+environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1'
+import pygame
+
 import dither as dither_pyx
 import dither_pattern
 import image as image_py
@@ -147,8 +150,8 @@ def main():
     iigs_palette = np.empty((16, 16, 3), dtype=np.uint8)
 
     # TODO: flags
-    penalty = 1e9  # 0  # 1e9
-    iterations = 50  # 0
+    penalty = 1e9
+    iterations = 50
 
     pygame.init()
     # TODO: for some reason I need to execute this twice - the first time
@@ -160,8 +163,8 @@ def main():
 
     total_image_error = 1e9
     cluster_palette = ClusterPalette(rgb)
-    image_generation = 0
-    for iteration in range(iterations):
+    iterations_since_improvement = 0
+    while iterations_since_improvement < iterations:
         # TODO: clean this up - e.g. pass in an acceptance lambda to iterate()
         old_best_palette_distances = cluster_palette._best_palette_distances
         old_palettes_cam = cluster_palette._palettes_cam
@@ -181,14 +184,14 @@ def main():
                     new_total_image_error))
             total_image_error = new_total_image_error
             palettes_rgb = new_palettes_rgb
+            iterations_since_improvement = 0
         else:
             cluster_palette._palettes_cam = old_palettes_cam
             cluster_palette._palettes_rgb = old_palettes_rgb
             cluster_palette._best_palette_distances = old_best_palette_distances
+            iterations_since_improvement += 1
             continue
 
-        image_generation += 1
-
         for i in range(16):
             iigs_palette[i, :, :] = (
                 np.round(image_py.linear_to_srgb(

From bb70eea7b0fa9d53a8913c5ff9d6d67519308e15 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 16 Nov 2021 21:07:13 +0000
Subject: [PATCH 22/82] Cleanup

---
 dither.pyx | 65 ++++--------------------------------------------------
 1 file changed, 4 insertions(+), 61 deletions(-)

diff --git a/dither.pyx b/dither.pyx
index ff974d3..e16e499 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -335,11 +335,12 @@ def dither_image(
     free(cdither.pattern)
     return image_nbit_to_bitmap(image_nbit, xres, yres, palette_depth)
 
-import colour
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def dither_shr(float[:, :, ::1] input_rgb, float[:, :, ::1] palettes_cam, float[:, :, ::1] palettes_rgb, float[:,::1] rgb_to_cam16ucs, float penalty):
+def dither_shr(
+        float[:, :, ::1] input_rgb, float[:, :, ::1] palettes_cam, float[:, :, ::1] palettes_rgb,
+        float[:,::1] rgb_to_cam16ucs, float penalty):
     cdef int y, x, idx, best_colour_idx, best_palette
     cdef double best_distance, distance, total_image_error
     cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
@@ -355,14 +356,12 @@ def dither_shr(float[:, :, ::1] input_rgb, float[:, :, ::1] palettes_cam, float[
     best_palette = 15
     total_image_error = 0.0
     for y in range(200):
-        # print(y)
         for x in range(320):
             colour_cam = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
             line_cam[x, :] = colour_cam
 
         best_palette = best_palette_for_line(line_cam, palettes_cam, <int>(y * 16 / 200), best_palette, penalty)
-        # print("-->", best_palette)
         palette_rgb = palettes_rgb[best_palette, :, :]
         line_to_palette[y] = best_palette
 
@@ -382,7 +381,6 @@ def dither_shr(float[:, :, ::1] input_rgb, float[:, :, ::1] palettes_cam, float[
             best_colour_rgb = palette_rgb[best_colour_idx]
             output_4bit[y, x] = best_colour_idx
             total_image_error += best_distance
-            # print(y,x,best_distance,total_image_error)
 
             for i in range(3):
                 quant_error = working_image[y, x, i] - best_colour_rgb[i]
@@ -395,6 +393,7 @@ def dither_shr(float[:, :, ::1] input_rgb, float[:, :, ::1] palettes_cam, float[
                     working_image[y, x + 1, i] = clip(
                         working_image[y, x + 1, i] + quant_error * (7 / 16), 0, 1)
                 if y < 199:
+                    # TODO: parametrize the 0.5x decay factor
                     if x > 0:
                         working_image[y + 1, x - 1, i] = clip(
                             working_image[y + 1, x - 1, i] + quant_error * (3 / 32), 0, 1)
@@ -454,62 +453,6 @@ def dither_shr(float[:, :, ::1] input_rgb, float[:, :, ::1] palettes_cam, float[
 
     return np.array(output_4bit, dtype=np.uint8), line_to_palette, total_image_error
 
-import collections
-import random
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def k_means_with_fixed_centroids(
-        int n_clusters, float[:, ::1] data, float[:, ::1] fixed_centroids = None,
-        int iterations = 10000, float tolerance = 1e-3):
-    cdef int i, iteration, centroid_idx, num_fixed_centroids, num_random_centroids, best_centroid_idx
-    cdef float[::1] point, centroid, new_centroid, old_centroid
-    cdef float[:, ::1] centroids
-    cdef float best_dist, centroid_movement, dist
-
-    centroids = np.zeros((n_clusters, 3), dtype=np.float32)
-    if fixed_centroids is not None:
-        centroids[:fixed_centroids.shape[0], :] = fixed_centroids
-    num_fixed_centroids = fixed_centroids.shape[0] if fixed_centroids is not None else 0
-    num_random_centroids = n_clusters - num_fixed_centroids
-
-    # TODO: kmeans++ initialization
-    cdef int rand_idx = random.randint(0, data.shape[0])
-    for i in range(num_random_centroids):
-        centroids[num_fixed_centroids + i, :] = data[rand_idx, :]
-
-    cdef int[::1] centroid_weights = np.zeros(n_clusters, dtype=np.int32)
-    for iteration in range(iterations):
-        # print("centroids ", centroids)
-        closest_points = collections.defaultdict(list)
-        for point in data:
-            best_dist = 1e9
-            best_centroid_idx = 0
-            for centroid_idx in range(n_clusters):
-                centroid = centroids[centroid_idx, :]
-                dist = colour_distance(centroid, point)
-                if dist < best_dist:
-                    best_dist = dist
-                    best_centroid_idx = centroid_idx
-            closest_points[best_centroid_idx].append(point)
-
-        centroid_movement = 0
-        for centroid_idx, points in closest_points.items():
-            centroid_weights[centroid_idx] = len(points)
-            if centroid_idx < num_fixed_centroids:
-                continue
-            new_centroid = np.median(np.array(points), axis=0)
-            old_centroid = centroids[centroid_idx]
-            centroid_movement += colour_distance(old_centroid, new_centroid)
-            centroids[centroid_idx, :] = new_centroid
-        # print("iteration %d: movement %f" % (iteration, centroid_movement))
-        if centroid_movement < tolerance:
-            break
-
-    weighted_centroids = list(zip(centroid_weights, [tuple(c) for c in centroids]))
-    print(weighted_centroids)
-    return np.array([c for w, c in sorted(weighted_centroids, reverse=True)], dtype=np.float32)
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int base_palette_idx, int last_palette_idx, float last_penalty) nogil:

From f2f07ddc0472e0b63eac2ddc17ab41671889681f Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 16 Nov 2021 23:45:11 +0000
Subject: [PATCH 23/82] Refactor and add comments

---
 convert.py | 148 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 97 insertions(+), 51 deletions(-)

diff --git a/convert.py b/convert.py
index 98fe41c..e266686 100644
--- a/convert.py
+++ b/convert.py
@@ -2,6 +2,7 @@
 
 import argparse
 import os.path
+from typing import Tuple, List
 
 from PIL import Image
 import colour
@@ -9,6 +10,7 @@ import numpy as np
 from sklearn import cluster
 
 from os import environ
+
 environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1'
 import pygame
 
@@ -23,12 +25,15 @@ import screen as screen_py
 # - support LR/DLR
 # - support HGR
 
+
 class ClusterPalette:
-    def __init__(self, image: Image):
+    def __init__(
+            self, image: Image):
         self._colours_cam = self._image_colours_cam(image)
-        self._best_palette_distances = [1e9] * 16
+        self._errors = [1e9] * 16
         self._palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
         self._palettes_rgb = np.empty((16, 16, 3), dtype=np.float32)
+        self._global_palette = np.empty((16, 16, 3), dtype=np.float32)
 
     def _image_colours_cam(self, image: Image):
         colours_rgb = np.asarray(image).reshape((-1, 3))
@@ -42,14 +47,45 @@ class ClusterPalette:
         starting point for the sub-palettes.  This should help when the image
         has large blocks of colour since the sub-palettes will tend to pick the
         same colours."""
+
         clusters = cluster.MiniBatchKMeans(n_clusters=16, max_iter=10000)
         clusters.fit_predict(self._colours_cam)
         return clusters.cluster_centers_
 
-    def iterate(self):
+    def propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
+        """Attempt to find new palettes that locally improve image quality.
+
+        Re-fit a set of 16 palettes from (overlapping) line ranges of the
+        source image, using k-means clustering in CAM16-UCS colour space.
+
+        We maintain the total image error for the pixels on which the 16
+        palettes are clustered.  A new palette that increases this local
+        image error is rejected.
+
+        New palettes that reduce local error cannot be applied immediately
+        though, because they may cause an increase in *global* image error
+        when dithering.  i.e. they would reduce the overall image quality.
+
+        The current (locally) best palettes are returned and can be applied
+        using accept_palettes().
+        """
+
+        # Compute a new 16-colour global palette for the entire image,
+        # used as the starting center positions for k-means clustering of the
+        # individual palettes
         self._global_palette = self._fit_global_palette()
+
+        new_errors = list(self._errors)
+        new_palettes_cam = np.copy(self._palettes_cam)
+        new_palettes_rgb = np.copy(self._palettes_rgb)
+
+        # The 16 palettes are striped across consecutive (overlapping) line
+        # ranges.  The basic unit is 200/16 = 12.5 lines, but we extend the
+        # line range to cover a multiple of this so that the palette ranges
+        # overlap.  Since nearby lines tend to have similar colours, this has
+        # the effect of smoothing out the colour transitions across palettes.
+        palette_band_width = 3
         for palette_idx in range(16):
-            palette_band_width = 3
             p_lower = max(palette_idx + 0.5 - (palette_band_width / 2), 0)
             p_upper = min(palette_idx + 0.5 + (palette_band_width / 2), 16)
             # TODO: dynamically tune palette cuts
@@ -58,34 +94,41 @@ class ClusterPalette:
                                      200 / 16)) * 320, :]
 
             # TODO: clustering should be aware of the fact that we will
-            #  down-quantize to a 4-bit RGB value afterwards.  i.e. we should
+            #  quantize to a 4-bit RGB value afterwards.  i.e. we should
             #  not pick multiple centroids that will quantize to the same RGB
             #  value since we'll "waste" a palette entry.  This doesn't seem to
             #  be a major issue in practise though, and fixing it would require
             #  implementing our own (optimized) k-means.
-            best_wce = self._best_palette_distances[palette_idx]
             # TODO: tune tolerance
             clusters = cluster.MiniBatchKMeans(
                 n_clusters=16, max_iter=10000, init=self._global_palette,
                 n_init=1)
             clusters.fit_predict(palette_pixels)
-            if clusters.inertia_ < best_wce:
-                self._palettes_cam[palette_idx, :, :] = np.array(
-                    clusters.cluster_centers_).astype(np.float32)
-                best_wce = clusters.inertia_
-                self._best_palette_distances[palette_idx] = best_wce
+            palette_error = clusters.inertia_
+            if palette_error >= self._errors[palette_idx]:
+                # Not a local improvement to existing palette
+                continue
 
-                # Suppress divide by zero warning,
-                # https://github.com/colour-science/colour/issues/900
-                with colour.utilities.suppress_warnings(python_warnings=True):
-                    palette_rgb = colour.convert(
-                        self._palettes_cam[palette_idx], "CAM16UCS", "RGB")
-                    # SHR colour palette only uses 4-bit values
-                    palette_rgb = np.round(palette_rgb * 15) / 15
-                    self._palettes_rgb[palette_idx, :, :] = palette_rgb.astype(
-                        np.float32)
+            palette_cam = np.array(clusters.cluster_centers_).astype(np.float32)
+            # Suppress divide by zero warning,
+            # https://github.com/colour-science/colour/issues/900
+            with colour.utilities.suppress_warnings(python_warnings=True):
+                # SHR colour palette only uses 4-bit RGB values
+                palette_rgb = (np.round(colour.convert(
+                    palette_cam, "CAM16UCS", "RGB") * 15) / 15).astype(
+                    np.float32)
+            new_palettes_cam[palette_idx, :, :] = palette_cam
+            new_palettes_rgb[palette_idx, :, :] = palette_rgb
+            new_errors[palette_idx] = palette_error
 
-        return self._palettes_cam, self._palettes_rgb
+        return new_palettes_cam, new_palettes_rgb, new_errors
+
+    def accept_palettes(
+            self, new_palettes_cam: np.ndarray,
+            new_palettes_rgb: np.ndarray, new_errors: List[float]):
+        self._palettes_cam = np.copy(new_palettes_cam)
+        self._palettes_rgb = np.copy(new_palettes_rgb)
+        self._errors = list(new_errors)
 
 
 def main():
@@ -147,8 +190,6 @@ def main():
         image_py.resize(image, screen.X_RES, screen.Y_RES,
                         gamma=args.gamma_correct)).astype(np.float32) / 255
 
-    iigs_palette = np.empty((16, 16, 3), dtype=np.uint8)
-
     # TODO: flags
     penalty = 1e9
     iterations = 50
@@ -162,42 +203,47 @@ def main():
     pygame.display.flip()
 
     total_image_error = 1e9
-    cluster_palette = ClusterPalette(rgb)
     iterations_since_improvement = 0
-    while iterations_since_improvement < iterations:
-        # TODO: clean this up - e.g. pass in an acceptance lambda to iterate()
-        old_best_palette_distances = cluster_palette._best_palette_distances
-        old_palettes_cam = cluster_palette._palettes_cam
-        old_palettes_rgb = cluster_palette._palettes_rgb
 
-        new_palettes_cam, new_palettes_rgb = cluster_palette.iterate()
-        output_4bit, line_to_palette, new_total_image_error = \
+    palette_iigs = np.empty((16, 16, 3), dtype=np.uint8)
+    cluster_palette = ClusterPalette(rgb)
+
+    while iterations_since_improvement < iterations:
+        new_palettes_cam, new_palettes_rgb, new_palette_errors = (
+            cluster_palette.propose_palettes())
+
+        # Recompute image with proposed palettes and check whether it has
+        # lower total image error than our previous best.
+        new_output_4bit, new_line_to_palette, new_total_image_error = \
             dither_pyx.dither_shr(
                 rgb, new_palettes_cam, new_palettes_rgb, rgb_to_cam16,
-                float(penalty)
-            )
-
-        if new_total_image_error < total_image_error:
-            if total_image_error < 1e9:
-                print("Improved quality +%f%% (%f)" % (
-                    (1 - new_total_image_error / total_image_error) * 100,
-                    new_total_image_error))
-            total_image_error = new_total_image_error
-            palettes_rgb = new_palettes_rgb
-            iterations_since_improvement = 0
-        else:
-            cluster_palette._palettes_cam = old_palettes_cam
-            cluster_palette._palettes_rgb = old_palettes_rgb
-            cluster_palette._best_palette_distances = old_best_palette_distances
+                float(penalty))
+        if new_total_image_error >= total_image_error:
             iterations_since_improvement += 1
             continue
 
+        # We found a globally better set of palettes
+        iterations_since_improvement = 0
+        cluster_palette.accept_palettes(
+            new_palettes_cam, new_palettes_rgb, new_palette_errors)
+
+        if total_image_error < 1e9:
+            print("Improved quality +%f%% (%f)" % (
+                (1 - new_total_image_error / total_image_error) * 100,
+                new_total_image_error))
+        output_4bit = new_output_4bit
+        line_to_palette = new_line_to_palette
+        total_image_error = new_total_image_error
+        palettes_rgb = new_palettes_rgb
+
+        # Recompute 4-bit //gs RGB palettes
         for i in range(16):
-            iigs_palette[i, :, :] = (
+            palette_iigs[i, :, :] = (
                 np.round(image_py.linear_to_srgb(
                     palettes_rgb[i, :, :] * 255) / 255 * 15)).astype(np.uint8)
-            screen.set_palette(i, iigs_palette[i, :, :])
+            screen.set_palette(i, palette_iigs[i, :, :])
 
+        # Recompute current screen RGB image
         screen.set_pixels(output_4bit)
         output_rgb = np.empty((200, 320, 3), dtype=np.uint8)
         for i in range(200):
@@ -225,12 +271,12 @@ def main():
             srgb_output=True)
 
         if args.show_output:
-            surface = pygame.surfarray.make_surface(np.asarray(
-                out_image).transpose((1, 0, 2)))
+            surface = pygame.surfarray.make_surface(
+                np.asarray(out_image).transpose((1, 0, 2)))  # flip y/x axes
             canvas.blit(surface, (0, 0))
             pygame.display.flip()
 
-    unique_colours = np.unique(iigs_palette.reshape(-1, 3), axis=0).shape[0]
+    unique_colours = np.unique(palette_iigs.reshape(-1, 3), axis=0).shape[0]
     print("%d unique colours" % unique_colours)
 
     # Save Double hi-res image

From 0009ce89139a7e4f953ab282ea19f823d28725c2 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 17 Nov 2021 17:09:42 +0000
Subject: [PATCH 24/82] - allow reserving a number of colours which are to be
 shared across   all palettes.  This will be useful for Total Replay which
 does an   animation effect when displaying the image (first set palettes,
 then   transition in pixels)

- this requires us to go back to computing k-means ourself instead of
  using sklearn, since it can't keep some centroids fixed

- try to be more careful about //gs RGB values, which are in the
  Rec.601 colour space.  This isn't quite right yet - the issue seems
  to be that since we dither in linear RGB space but quantize in the
  nonlinear space, small differences may lead to a +/- 1 in the 4-bit
  //gs RGB value, which is quite noticeable.  Instead we need to be
  clustering and/or dithering with awareness of the quantized palette
  space.
---
 convert.py | 113 ++++++++++++++++++++++++++++++++++++++---------------
 dither.pyx |  54 ++++++++++++++++++++++++-
 screen.py  |   1 +
 3 files changed, 136 insertions(+), 32 deletions(-)

diff --git a/convert.py b/convert.py
index e266686..8483f63 100644
--- a/convert.py
+++ b/convert.py
@@ -28,8 +28,9 @@ import screen as screen_py
 
 class ClusterPalette:
     def __init__(
-            self, image: Image):
+            self, image: Image, reserved_colours=0):
         self._colours_cam = self._image_colours_cam(image)
+        self._reserved_colours = reserved_colours
         self._errors = [1e9] * 16
         self._palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
         self._palettes_rgb = np.empty((16, 16, 3), dtype=np.float32)
@@ -50,7 +51,14 @@ class ClusterPalette:
 
         clusters = cluster.MiniBatchKMeans(n_clusters=16, max_iter=10000)
         clusters.fit_predict(self._colours_cam)
-        return clusters.cluster_centers_
+
+        labels = clusters.labels_
+        frequency_order = [
+            k for k, v in sorted(
+                # List of (palette idx, frequency count)
+                list(zip(*np.unique(labels, return_counts=True))),
+                key=lambda kv: kv[1], reverse=True)]
+        return clusters.cluster_centers_[frequency_order]
 
     def propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
         """Attempt to find new palettes that locally improve image quality.
@@ -69,15 +77,16 @@ class ClusterPalette:
         The current (locally) best palettes are returned and can be applied
         using accept_palettes().
         """
+        new_errors = list(self._errors)
+        new_palettes_cam = np.copy(self._palettes_cam)
+        new_palettes_rgb = np.copy(self._palettes_rgb)
 
         # Compute a new 16-colour global palette for the entire image,
         # used as the starting center positions for k-means clustering of the
         # individual palettes
         self._global_palette = self._fit_global_palette()
 
-        new_errors = list(self._errors)
-        new_palettes_cam = np.copy(self._palettes_cam)
-        new_palettes_rgb = np.copy(self._palettes_rgb)
+        dynamic_colours = 16 - self._reserved_colours
 
         # The 16 palettes are striped across consecutive (overlapping) line
         # ranges.  The basic unit is 200/16 = 12.5 lines, but we extend the
@@ -100,25 +109,53 @@ class ClusterPalette:
             #  be a major issue in practise though, and fixing it would require
             #  implementing our own (optimized) k-means.
             # TODO: tune tolerance
-            clusters = cluster.MiniBatchKMeans(
-                n_clusters=16, max_iter=10000, init=self._global_palette,
-                n_init=1)
-            clusters.fit_predict(palette_pixels)
-            palette_error = clusters.inertia_
-            if palette_error >= self._errors[palette_idx]:
-                # Not a local improvement to existing palette
+            # clusters = cluster.MiniBatchKMeans(
+            #     n_clusters=16, max_iter=10000,
+            #     init=self._global_palette,
+            #     n_init=1)
+            # clusters.fit_predict(palette_pixels)
+            #
+            # palette_error = clusters.inertia_
+
+            clusters, palette_error = dither_pyx.k_means_with_fixed_centroids(
+                n_clusters=16, n_fixed=self._reserved_colours,
+                samples=palette_pixels, initial_centroids=self._global_palette,
+                max_iterations=1000, tolerance=1e-4
+            )
+
+            if (palette_error >= self._errors[palette_idx] and not
+            self._reserved_colours):
+                # Not a local improvement to the existing palette, so ignore it.
+                # We can't take this shortcut when we're reserving colours
+                # because it would break the invariant that all palettes must
+                # share colours.
                 continue
 
-            palette_cam = np.array(clusters.cluster_centers_).astype(np.float32)
+            new_palettes_cam[palette_idx, :, :] = np.array(
+                # clusters.cluster_centers_).astype(np.float32)
+                clusters).astype(np.float32)
             # Suppress divide by zero warning,
             # https://github.com/colour-science/colour/issues/900
             with colour.utilities.suppress_warnings(python_warnings=True):
-                # SHR colour palette only uses 4-bit RGB values
-                palette_rgb = (np.round(colour.convert(
-                    palette_cam, "CAM16UCS", "RGB") * 15) / 15).astype(
-                    np.float32)
-            new_palettes_cam[palette_idx, :, :] = palette_cam
-            new_palettes_rgb[palette_idx, :, :] = palette_rgb
+                palette_rgb = colour.convert(
+                    new_palettes_cam[palette_idx, :, :], "CAM16UCS", "RGB")
+            palette_rgb_rec601 = np.clip(image_py.srgb_to_linear(
+                colour.YCbCr_to_RGB(
+                    colour.RGB_to_YCbCr(
+                        image_py.linear_to_srgb(palette_rgb * 255) / 255,
+                        K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
+                    K=colour.WEIGHTS_YCBCR['ITU-R BT.601']) * 255) / 255, 0, 1)
+            # palette_rgb = np.clip(
+            #     image_py.srgb_to_linear(
+            #         colour.YCbCr_to_RGB(
+            #             colour.RGB_to_YCbCr(
+            #                 image_py.linear_to_srgb(
+            #                     palette_rgb[:, :] * 255) / 255,
+            #                 K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
+            #             K=colour.WEIGHTS_YCBCR[
+            #                 'ITU-R BT.601']) * 255) / 255,
+            #     0, 1)
+            new_palettes_rgb[palette_idx, :, :] =  palette_rgb # palette_rgb_rec601
             new_errors[palette_idx] = palette_error
 
         return new_palettes_cam, new_palettes_rgb, new_errors
@@ -192,7 +229,7 @@ def main():
 
     # TODO: flags
     penalty = 1e9
-    iterations = 50
+    iterations = 10  # 50
 
     pygame.init()
     # TODO: for some reason I need to execute this twice - the first time
@@ -205,8 +242,8 @@ def main():
     total_image_error = 1e9
     iterations_since_improvement = 0
 
-    palette_iigs = np.empty((16, 16, 3), dtype=np.uint8)
-    cluster_palette = ClusterPalette(rgb)
+    palettes_iigs = np.empty((16, 16, 3), dtype=np.uint8)
+    cluster_palette = ClusterPalette(rgb, reserved_colours=1)
 
     while iterations_since_improvement < iterations:
         new_palettes_cam, new_palettes_rgb, new_palette_errors = (
@@ -237,11 +274,16 @@ def main():
         palettes_rgb = new_palettes_rgb
 
         # Recompute 4-bit //gs RGB palettes
+        palette_rgb_rec601 = np.clip(
+            colour.YCbCr_to_RGB(
+                colour.RGB_to_YCbCr(
+                    image_py.linear_to_srgb(palettes_rgb * 255) / 255,
+                    K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
+                K=colour.WEIGHTS_YCBCR['ITU-R BT.601']), 0, 1)
+
+        palettes_iigs = np.round(palette_rgb_rec601 * 15).astype(np.uint8)
         for i in range(16):
-            palette_iigs[i, :, :] = (
-                np.round(image_py.linear_to_srgb(
-                    palettes_rgb[i, :, :] * 255) / 255 * 15)).astype(np.uint8)
-            screen.set_palette(i, palette_iigs[i, :, :])
+            screen.set_palette(i, palettes_iigs[i, :, :])
 
         # Recompute current screen RGB image
         screen.set_pixels(output_4bit)
@@ -249,9 +291,18 @@ def main():
         for i in range(200):
             screen.line_palette[i] = line_to_palette[i]
             output_rgb[i, :, :] = (
-                    palettes_rgb[line_to_palette[i]][
-                        output_4bit[i, :]] * 255).astype(np.uint8)
-        output_srgb = image_py.linear_to_srgb(output_rgb).astype(np.uint8)
+                    palettes_rgb[line_to_palette[i]][output_4bit[i, :]] * 255
+            ).astype(
+                # np.round(palettes_rgb[line_to_palette[i]][
+                #              output_4bit[i, :]] * 15) / 15 * 255).astype(
+                np.uint8)
+        output_srgb_rec709 = np.clip(colour.YCbCr_to_RGB(
+            colour.RGB_to_YCbCr(
+                image_py.linear_to_srgb(output_rgb) / 255,
+                K=colour.WEIGHTS_YCBCR['ITU-R BT.601']),
+            K=colour.WEIGHTS_YCBCR['ITU-R BT.709']), 0, 1) * 255
+
+        output_srgb = (image_py.linear_to_srgb(output_rgb)).astype(np.uint8)
 
         # dither = dither_pattern.PATTERNS[args.dither]()
         # bitmap = dither_pyx.dither_image(
@@ -275,8 +326,8 @@ def main():
                 np.asarray(out_image).transpose((1, 0, 2)))  # flip y/x axes
             canvas.blit(surface, (0, 0))
             pygame.display.flip()
-
-    unique_colours = np.unique(palette_iigs.reshape(-1, 3), axis=0).shape[0]
+    print((palettes_rgb * 255).astype(np.uint8))
+    unique_colours = np.unique(palettes_iigs.reshape(-1, 3), axis=0).shape[0]
     print("%d unique colours" % unique_colours)
 
     # Save Double hi-res image
diff --git a/dither.pyx b/dither.pyx
index e16e499..d1e53d8 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -341,7 +341,7 @@ def dither_image(
 def dither_shr(
         float[:, :, ::1] input_rgb, float[:, :, ::1] palettes_cam, float[:, :, ::1] palettes_rgb,
         float[:,::1] rgb_to_cam16ucs, float penalty):
-    cdef int y, x, idx, best_colour_idx, best_palette
+    cdef int y, x, idx, best_colour_idx, best_palette, i
     cdef double best_distance, distance, total_image_error
     cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
     cdef float quant_error
@@ -357,6 +357,8 @@ def dither_shr(
     total_image_error = 0.0
     for y in range(200):
         for x in range(320):
+            #for i in range(3):
+            #    working_image[y, x, i] = np.round(working_image[y, x, i] * 15) / 15
             colour_cam = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
             line_cam[x, :] = colour_cam
@@ -489,3 +491,53 @@ cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palette
             best_palette_idx = palette_idx
     return best_palette_idx
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def k_means_with_fixed_centroids(
+    int n_clusters, int n_fixed, float[:, ::1] samples, float[:, ::1] initial_centroids, int max_iterations, float tolerance):
+
+    cdef double error, best_error, centroid_movement, total_error
+    cdef int centroid_idx, closest_centroid_idx, i, point_idx
+
+    cdef float[:, ::1] centroids = initial_centroids[:, :]
+    cdef float[::1] centroid, point, new_centroid = np.empty(3, dtype=np.float32)
+
+    cdef float[:, ::1] centroid_sample_positions_total
+    cdef int[::1] centroid_sample_counts
+
+    for iteration in range(max_iterations):
+        total_error = 0.0
+        centroid_movement = 0.0
+        centroid_sample_positions_total = np.zeros((16, 3), dtype=np.float32)
+        centroid_sample_counts = np.zeros(16, dtype=np.int32)
+
+        for point_idx in range(samples.shape[0]):
+            point = samples[point_idx, :]
+            best_error = 1e9
+            closest_centroid_idx = 0
+            for centroid_idx in range(n_clusters):
+                centroid = centroids[centroid_idx, :]
+                error = colour_distance_squared(centroid, point)
+                if error < best_error:
+                    best_error = error
+                    closest_centroid_idx = centroid_idx
+            for i in range(3):
+                centroid_sample_positions_total[closest_centroid_idx, i] += point[i]
+            centroid_sample_counts[closest_centroid_idx] += 1
+            total_error += best_error
+
+        for centroid_idx in range(n_fixed, n_clusters):
+            if centroid_sample_counts[centroid_idx]:
+                for i in range(3):
+                    new_centroid[i] = (
+                        centroid_sample_positions_total[centroid_idx, i] / centroid_sample_counts[centroid_idx])
+                centroid_movement += colour_distance_squared(centroids[centroid_idx], new_centroid)
+
+                centroids[centroid_idx, :] = new_centroid
+
+        # print(iteration, total_error, centroids)
+
+        if centroid_movement < tolerance:
+            break
+
+    return centroids, total_error
diff --git a/screen.py b/screen.py
index dd0b554..8a51d45 100644
--- a/screen.py
+++ b/screen.py
@@ -55,6 +55,7 @@ class SHR320Screen:
         for palette_idx, palette in self.palettes.items():
             for rgb_idx, rgb in enumerate(palette):
                 r, g, b = rgb
+                assert r <= 15 and g <= 15 and b <= 15
                 # print(r, g, b)
                 rgb_low = (g << 4) | b
                 rgb_hi = r

From ed2082344a69a9faf5f84ba1558c892e1c593c5f Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 17 Nov 2021 22:49:06 +0000
Subject: [PATCH 25/82] Working version!  Quantize the k-means centroids in
 12-bit //gs RGB space but continue to use CAM16-UCS for distances and
 updating centroid positions, before mapping back to the nearest legal 12-bit
 RGB position.

Needs some more work to deal with the fact that now that there are
discrete distances (but no fixed minimum) between allowed centroid
positions, the previous notion of convergence doesn't apply.  Actually
the centroids can oscillate between positions.

There is room for optimization but this is already reasonably
performant, and the image quality is much higher \o/
---
 convert.py               | 123 +++++++++++++++++++++++----------------
 dither.pyx               | 122 +++++++++++++++++++++++++++++++-------
 precompute_conversion.py |  59 ++++++++++++++-----
 3 files changed, 218 insertions(+), 86 deletions(-)

diff --git a/convert.py b/convert.py
index 8483f63..bf96218 100644
--- a/convert.py
+++ b/convert.py
@@ -28,13 +28,14 @@ import screen as screen_py
 
 class ClusterPalette:
     def __init__(
-            self, image: Image, reserved_colours=0):
+            self, image: Image, rgb12_iigs_to_cam16ucs, reserved_colours=0):
         self._colours_cam = self._image_colours_cam(image)
         self._reserved_colours = reserved_colours
         self._errors = [1e9] * 16
         self._palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
-        self._palettes_rgb = np.empty((16, 16, 3), dtype=np.float32)
+        self._palettes_rgb = np.empty((16, 16, 3), dtype=np.uint8)
         self._global_palette = np.empty((16, 16, 3), dtype=np.float32)
+        self._rgb12_iigs_to_cam16ucs = rgb12_iigs_to_cam16ucs
 
     def _image_colours_cam(self, image: Image):
         colours_rgb = np.asarray(image).reshape((-1, 3))
@@ -58,7 +59,14 @@ class ClusterPalette:
                 # List of (palette idx, frequency count)
                 list(zip(*np.unique(labels, return_counts=True))),
                 key=lambda kv: kv[1], reverse=True)]
-        return clusters.cluster_centers_[frequency_order]
+
+        res = np.empty((16, 3), dtype=np.uint8)
+        for i in range(16):
+            res[i, :] = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
+                clusters.cluster_centers_[frequency_order][i].astype(
+                    np.float32))
+        print(res)
+        return res
 
     def propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
         """Attempt to find new palettes that locally improve image quality.
@@ -79,7 +87,7 @@ class ClusterPalette:
         """
         new_errors = list(self._errors)
         new_palettes_cam = np.copy(self._palettes_cam)
-        new_palettes_rgb = np.copy(self._palettes_rgb)
+        new_palettes_rgb12_iigs = np.copy(self._palettes_rgb)
 
         # Compute a new 16-colour global palette for the entire image,
         # used as the starting center positions for k-means clustering of the
@@ -117,11 +125,14 @@ class ClusterPalette:
             #
             # palette_error = clusters.inertia_
 
-            clusters, palette_error = dither_pyx.k_means_with_fixed_centroids(
-                n_clusters=16, n_fixed=self._reserved_colours,
-                samples=palette_pixels, initial_centroids=self._global_palette,
-                max_iterations=1000, tolerance=1e-4
-            )
+            palettes_rgb12_iigs, palette_error = \
+                dither_pyx.k_means_with_fixed_centroids(
+                    n_clusters=16, n_fixed=self._reserved_colours,
+                    samples=palette_pixels,
+                    initial_centroids=self._global_palette,
+                    max_iterations=1000, tolerance=0.05,
+                    rgb12_iigs_to_cam16ucs=self._rgb12_iigs_to_cam16ucs
+                )
 
             if (palette_error >= self._errors[palette_idx] and not
             self._reserved_colours):
@@ -130,21 +141,23 @@ class ClusterPalette:
                 # because it would break the invariant that all palettes must
                 # share colours.
                 continue
+            for i in range(16):
+                new_palettes_cam[palette_idx, i, :] = (
+                    np.array(dither_pyx.convert_rgb12_iigs_to_cam(
+                        self._rgb12_iigs_to_cam16ucs, palettes_rgb12_iigs[
+                            i]), dtype=np.float32))
 
-            new_palettes_cam[palette_idx, :, :] = np.array(
-                # clusters.cluster_centers_).astype(np.float32)
-                clusters).astype(np.float32)
             # Suppress divide by zero warning,
             # https://github.com/colour-science/colour/issues/900
-            with colour.utilities.suppress_warnings(python_warnings=True):
-                palette_rgb = colour.convert(
-                    new_palettes_cam[palette_idx, :, :], "CAM16UCS", "RGB")
-            palette_rgb_rec601 = np.clip(image_py.srgb_to_linear(
-                colour.YCbCr_to_RGB(
-                    colour.RGB_to_YCbCr(
-                        image_py.linear_to_srgb(palette_rgb * 255) / 255,
-                        K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
-                    K=colour.WEIGHTS_YCBCR['ITU-R BT.601']) * 255) / 255, 0, 1)
+            # with colour.utilities.suppress_warnings(python_warnings=True):
+            #     palette_rgb = colour.convert(
+            #         new_palettes_cam[palette_idx, :, :], "CAM16UCS", "RGB")
+            # palette_rgb_rec601 = np.clip(image_py.srgb_to_linear(
+            #     colour.YCbCr_to_RGB(
+            #         colour.RGB_to_YCbCr(
+            #             image_py.linear_to_srgb(palette_rgb * 255) / 255,
+            #             K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
+            #         K=colour.WEIGHTS_YCBCR['ITU-R BT.601']) * 255) / 255, 0, 1)
             # palette_rgb = np.clip(
             #     image_py.srgb_to_linear(
             #         colour.YCbCr_to_RGB(
@@ -155,10 +168,10 @@ class ClusterPalette:
             #             K=colour.WEIGHTS_YCBCR[
             #                 'ITU-R BT.601']) * 255) / 255,
             #     0, 1)
-            new_palettes_rgb[palette_idx, :, :] =  palette_rgb # palette_rgb_rec601
+            new_palettes_rgb12_iigs[palette_idx, :, :] = palettes_rgb12_iigs
             new_errors[palette_idx] = palette_error
 
-        return new_palettes_cam, new_palettes_rgb, new_errors
+        return new_palettes_cam, new_palettes_rgb12_iigs, new_errors
 
     def accept_palettes(
             self, new_palettes_cam: np.ndarray,
@@ -216,7 +229,8 @@ def main():
 
     # Conversion matrix from RGB to CAM16UCS colour values.  Indexed by
     # 24-bit RGB value
-    rgb_to_cam16 = np.load("data/rgb_to_cam16ucs.npy")
+    rgb24_to_cam16ucs = np.load("data/rgb24_to_cam16ucs.npy")
+    rgb12_iigs_to_cam16ucs = np.load("data/rgb12_iigs_to_cam16ucs.npy")
 
     # Open and resize source image
     image = image_py.open(args.input)
@@ -242,19 +256,26 @@ def main():
     total_image_error = 1e9
     iterations_since_improvement = 0
 
-    palettes_iigs = np.empty((16, 16, 3), dtype=np.uint8)
-    cluster_palette = ClusterPalette(rgb, reserved_colours=1)
+    # palettes_iigs = np.empty((16, 16, 3), dtype=np.uint8)
+    cluster_palette = ClusterPalette(
+        rgb, reserved_colours=1, rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs)
 
     while iterations_since_improvement < iterations:
-        new_palettes_cam, new_palettes_rgb, new_palette_errors = (
+        new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
             cluster_palette.propose_palettes())
 
+        # Suppress divide by zero warning,
+        # https://github.com/colour-science/colour/issues/900
+        with colour.utilities.suppress_warnings(python_warnings=True):
+            new_palettes_linear_rgb = colour.convert(
+                new_palettes_cam, "CAM16UCS", "RGB").astype(np.float32)
+
         # Recompute image with proposed palettes and check whether it has
         # lower total image error than our previous best.
         new_output_4bit, new_line_to_palette, new_total_image_error = \
             dither_pyx.dither_shr(
-                rgb, new_palettes_cam, new_palettes_rgb, rgb_to_cam16,
-                float(penalty))
+                rgb, new_palettes_cam, new_palettes_linear_rgb,
+                rgb24_to_cam16ucs, float(penalty))
         if new_total_image_error >= total_image_error:
             iterations_since_improvement += 1
             continue
@@ -262,7 +283,7 @@ def main():
         # We found a globally better set of palettes
         iterations_since_improvement = 0
         cluster_palette.accept_palettes(
-            new_palettes_cam, new_palettes_rgb, new_palette_errors)
+            new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors)
 
         if total_image_error < 1e9:
             print("Improved quality +%f%% (%f)" % (
@@ -271,19 +292,19 @@ def main():
         output_4bit = new_output_4bit
         line_to_palette = new_line_to_palette
         total_image_error = new_total_image_error
-        palettes_rgb = new_palettes_rgb
-
-        # Recompute 4-bit //gs RGB palettes
-        palette_rgb_rec601 = np.clip(
-            colour.YCbCr_to_RGB(
-                colour.RGB_to_YCbCr(
-                    image_py.linear_to_srgb(palettes_rgb * 255) / 255,
-                    K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
-                K=colour.WEIGHTS_YCBCR['ITU-R BT.601']), 0, 1)
-
-        palettes_iigs = np.round(palette_rgb_rec601 * 15).astype(np.uint8)
+        palettes_rgb12_iigs = new_palettes_rgb12_iigs
+        palettes_linear_rgb = new_palettes_linear_rgb
+        # # Recompute 4-bit //gs RGB palettes
+        # palette_rgb_rec601 = np.clip(
+        #     colour.YCbCr_to_RGB(
+        #         colour.RGB_to_YCbCr(
+        #             image_py.linear_to_srgb(palettes_rgb12_iigs * 255) / 255,
+        #             K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
+        #         K=colour.WEIGHTS_YCBCR['ITU-R BT.601']), 0, 1)
+        #
+        # palettes_iigs = np.round(palette_rgb_rec601 * 15).astype(np.uint8)
         for i in range(16):
-            screen.set_palette(i, palettes_iigs[i, :, :])
+            screen.set_palette(i, palettes_rgb12_iigs[i, :, :])
 
         # Recompute current screen RGB image
         screen.set_pixels(output_4bit)
@@ -291,22 +312,22 @@ def main():
         for i in range(200):
             screen.line_palette[i] = line_to_palette[i]
             output_rgb[i, :, :] = (
-                    palettes_rgb[line_to_palette[i]][output_4bit[i, :]] * 255
+                    palettes_linear_rgb[line_to_palette[i]][output_4bit[i, :]] * 255
             ).astype(
                 # np.round(palettes_rgb[line_to_palette[i]][
                 #              output_4bit[i, :]] * 15) / 15 * 255).astype(
                 np.uint8)
-        output_srgb_rec709 = np.clip(colour.YCbCr_to_RGB(
-            colour.RGB_to_YCbCr(
-                image_py.linear_to_srgb(output_rgb) / 255,
-                K=colour.WEIGHTS_YCBCR['ITU-R BT.601']),
-            K=colour.WEIGHTS_YCBCR['ITU-R BT.709']), 0, 1) * 255
+        # output_srgb_rec709 = np.clip(colour.YCbCr_to_RGB(
+        #     colour.RGB_to_YCbCr(
+        #         image_py.linear_to_srgb(output_rgb) / 255,
+        #         K=colour.WEIGHTS_YCBCR['ITU-R BT.601']),
+        #     K=colour.WEIGHTS_YCBCR['ITU-R BT.709']), 0, 1) * 255
 
         output_srgb = (image_py.linear_to_srgb(output_rgb)).astype(np.uint8)
 
         # dither = dither_pattern.PATTERNS[args.dither]()
         # bitmap = dither_pyx.dither_image(
-        #     screen, rgb, dither, args.lookahead, args.verbose, rgb_to_cam16)
+        #     screen, rgb, dither, args.lookahead, args.verbose, rgb24_to_cam16ucs)
 
         # Show output image by rendering in target palette
         # output_palette_name = args.show_palette or args.palette
@@ -326,8 +347,8 @@ def main():
                 np.asarray(out_image).transpose((1, 0, 2)))  # flip y/x axes
             canvas.blit(surface, (0, 0))
             pygame.display.flip()
-    print((palettes_rgb * 255).astype(np.uint8))
-    unique_colours = np.unique(palettes_iigs.reshape(-1, 3), axis=0).shape[0]
+    # print((palettes_rgb * 255).astype(np.uint8))
+    unique_colours = np.unique(palettes_rgb12_iigs.reshape(-1, 3), axis=0).shape[0]
     print("%d unique colours" % unique_colours)
 
     # Save Double hi-res image
diff --git a/dither.pyx b/dither.pyx
index d1e53d8..e5ee87e 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -157,10 +157,10 @@ cdef int dither_lookahead(Dither* dither, float[:, :, ::1] palette_cam16, float[
     return best
 
 
-@cython.boundscheck(False)
+@cython.boundscheck(True)
 @cython.wraparound(False)
 cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, float r, float g, float b) nogil:
-    cdef int rgb_24bit = (<int>(r*255) << 16) + (<int>(g*255) << 8) + <int>(b*255)
+    cdef unsigned int rgb_24bit = (<unsigned int>(r*255) << 16) + (<unsigned int>(g*255) << 8) + <unsigned int>(b*255)
     return rgb_to_cam16ucs[rgb_24bit]
 
 @cython.boundscheck(False)
@@ -357,8 +357,6 @@ def dither_shr(
     total_image_error = 0.0
     for y in range(200):
         for x in range(320):
-            #for i in range(3):
-            #    working_image[y, x, i] = np.round(working_image[y, x, i] * 15) / 15
             colour_cam = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
             line_cam[x, :] = colour_cam
@@ -491,53 +489,133 @@ cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palette
             best_palette_idx = palette_idx
     return best_palette_idx
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
+@cython.boundscheck(True)
+@cython.wraparound(True)
+def convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned char)[::1] point_rgb12) -> float[::1]:
+    cdef int rgb12 = (point_rgb12[0] << 8) | (point_rgb12[1] << 4) | point_rgb12[2]
+    return rgb12_iigs_to_cam16ucs[rgb12]
+
+import colour
+
+cdef float[::1] linear_to_srgb_array(float[::1] a, float gamma=2.4):
+    cdef int i
+    cdef float[::1] res = np.empty(3, dtype=np.float32)
+    for i in range(3):
+        if a[i] <= 0.0031308:
+            res[i] = a[i] * 12.92
+        else:
+            res[i] = 1.055 * a[i] ** (1.0 / gamma) - 0.055
+    return res
+
+@cython.boundscheck(True)
+@cython.wraparound(True)
+def convert_cam16ucs_to_rgb12_iigs(float[::1] point_cam) -> int[::1]:  # XXX return type
+    cdef float[::1] rgb, rgb12_iigs
+    cdef int i
+
+    # Convert CAM16UCS input to RGB
+    with colour.utilities.suppress_warnings(python_warnings=True):
+        rgb = colour.convert(point_cam, "CAM16UCS", "RGB").astype(np.float32)
+
+    rgb12_iigs = np.clip(
+        # Convert to Rec.601 R'G'B'
+        colour.YCbCr_to_RGB(
+            # Gamma correct and convert Rec.709 R'G'B' to YCbCr
+            colour.RGB_to_YCbCr(
+                linear_to_srgb_array(rgb), K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
+            K=colour.WEIGHTS_YCBCR['ITU-R BT.601']), 0, 1).astype(np.float32)
+
+    for i in range(3):
+        rgb12_iigs[i] *= 15
+
+    return np.round(rgb12_iigs).astype(np.uint8)
+
+
+@cython.boundscheck(True)
+@cython.wraparound(True)
 def k_means_with_fixed_centroids(
-    int n_clusters, int n_fixed, float[:, ::1] samples, float[:, ::1] initial_centroids, int max_iterations, float tolerance):
+    int n_clusters, int n_fixed, float[:, ::1] samples, (unsigned char)[:, ::1] initial_centroids, int max_iterations,
+    float tolerance, float [:, ::1] rgb12_iigs_to_cam16ucs):
 
     cdef double error, best_error, centroid_movement, total_error
     cdef int centroid_idx, closest_centroid_idx, i, point_idx
 
-    cdef float[:, ::1] centroids = initial_centroids[:, :]
-    cdef float[::1] centroid, point, new_centroid = np.empty(3, dtype=np.float32)
+    cdef (unsigned char)[:, ::1] centroids_rgb12 = initial_centroids[:, :]
+    cdef (unsigned char)[::1] centroid_rgb12, new_centroid_rgb12
 
-    cdef float[:, ::1] centroid_sample_positions_total
+    cdef float[::1] point_cam, new_centroid_cam = np.empty(3, dtype=np.float32)
+    cdef float[:, ::1] centroid_cam_sample_positions_total
     cdef int[::1] centroid_sample_counts
 
+    # Allow centroids to move on lattice of size 15/255 in sRGB Rec.601 space -- matches //gs palette
+    # map centroids to CAM when computing distances, cluster means etc
+    # Map new centroid back to closest lattice point
+
+    # Return CAM centroids
+
+    cdef int centroid_moved
     for iteration in range(max_iterations):
+        centroid_moved = 1
         total_error = 0.0
         centroid_movement = 0.0
-        centroid_sample_positions_total = np.zeros((16, 3), dtype=np.float32)
+        centroid_cam_sample_positions_total = np.zeros((16, 3), dtype=np.float32)
         centroid_sample_counts = np.zeros(16, dtype=np.int32)
 
         for point_idx in range(samples.shape[0]):
-            point = samples[point_idx, :]
+            point_cam = samples[point_idx, :]
             best_error = 1e9
             closest_centroid_idx = 0
             for centroid_idx in range(n_clusters):
-                centroid = centroids[centroid_idx, :]
-                error = colour_distance_squared(centroid, point)
+                centroid_rgb12 = centroids_rgb12[centroid_idx, :]
+                error = colour_distance_squared(convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, centroid_rgb12), point_cam)
                 if error < best_error:
                     best_error = error
                     closest_centroid_idx = centroid_idx
             for i in range(3):
-                centroid_sample_positions_total[closest_centroid_idx, i] += point[i]
+                centroid_cam_sample_positions_total[closest_centroid_idx, i] += point_cam[i]
             centroid_sample_counts[closest_centroid_idx] += 1
             total_error += best_error
 
         for centroid_idx in range(n_fixed, n_clusters):
             if centroid_sample_counts[centroid_idx]:
                 for i in range(3):
-                    new_centroid[i] = (
-                        centroid_sample_positions_total[centroid_idx, i] / centroid_sample_counts[centroid_idx])
-                centroid_movement += colour_distance_squared(centroids[centroid_idx], new_centroid)
+                    new_centroid_cam[i] = (
+                        centroid_cam_sample_positions_total[centroid_idx, i] / centroid_sample_counts[centroid_idx])
+                centroid_movement += colour_distance_squared(
+                    convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, centroids_rgb12[centroid_idx]), new_centroid_cam)
+                new_centroid_rgb12 = convert_cam16ucs_to_rgb12_iigs(new_centroid_cam)
+                for i in range(3):
+                    if centroids_rgb12[centroid_idx, i] != new_centroid_rgb12[i]:
+                        print(i, centroids_rgb12[centroid_idx, i], new_centroid_rgb12[i])
+                        centroids_rgb12[centroid_idx, i] = new_centroid_rgb12[i]
+                        centroid_moved = 1
 
-                centroids[centroid_idx, :] = new_centroid
-
-        # print(iteration, total_error, centroids)
+        print(iteration, centroid_movement, total_error, centroids_rgb12)
 
         if centroid_movement < tolerance:
             break
+        if centroid_moved == 0:
+            break
+
+    return centroids_rgb12, total_error
+
+
+#@cython.boundscheck(False)
+#@cython.wraparound(False)
+#cdef float[::1] closest_quantized_point(float [:, ::1] rgb24_to_cam, float [::1] point_cam) nogil:
+#    cdef unsigned int rgb12, rgb24, closest_rgb24, r, g, b
+#    cdef double best_distance = 1e9, distance
+#    for rgb12 in range(2**12):
+#        r = rgb12 >> 8
+#        g = (rgb12 >> 4) & 0xf
+#        b = rgb12 & 0xf
+#        rgb24 = (r << 20) | (r << 16) | (g << 12) | (g << 8) | (b << 4) | b
+#        distance = colour_distance_squared(rgb24_to_cam[rgb24], point_cam)
+#        # print(hex(rgb24), distance)
+#        if distance < best_distance:
+#            best_distance = distance
+#            closest_rgb24 = rgb24
+#            # print(distance, rgb24, hex(rgb24))
+#    # print("-->", closest_rgb24, hex(closest_rgb24), best_distance)
+#    return rgb24_to_cam[closest_rgb24]
 
-    return centroids, total_error
diff --git a/precompute_conversion.py b/precompute_conversion.py
index b24933f..5c4c350 100644
--- a/precompute_conversion.py
+++ b/precompute_conversion.py
@@ -10,24 +10,57 @@ import colour
 import numpy as np
 
 
+def srgb_to_linear_array(a: np.ndarray, gamma=2.4) -> np.ndarray:
+    return np.where(a <= 0.04045, a / 12.92, ((a + 0.055) / 1.055) ** gamma)
+
 def main():
-    print("Precomputing conversion matrix from RGB to CAM16UCS colour space")
+    # print("Precomputing conversion matrix from 24-bit RGB to CAM16UCS colour "
+    #       "space")
+    # # Compute matrix of all 24-bit RGB values, normalized to 0..1 range
+    # bits24 = np.arange(2 ** 24, dtype=np.uint32).reshape(-1, 1)
+    # all_rgb24 = np.concatenate(
+    #     [bits24 >> 16 & 0xff, bits24 >> 8 & 0xff, bits24 & 0xff],
+    #     axis=1).astype(np.float32) / 255
+    # del bits24
+    #
+    # with colour.utilities.suppress_warnings(colour_usage_warnings=True):
+    #     # Compute matrix of corresponding CAM16UCS colour values, indexed
+    #     # by 24-bit RGB value
+    #     rgb24_to_cam16ucs = colour.convert(all_rgb24, "RGB", "CAM16UCS").astype(
+    #         np.float32)
+    # del all_rgb24
+    # np.save("data/rgb24_to_cam16ucs.npy", rgb24_to_cam16ucs)
+    # del rgb24_to_cam16ucs
 
-    # Compute matrix of all 24-bit RGB values, normalized to 0..1 range
-    bits24 = np.arange(2 ** 24, dtype=np.uint32).reshape(-1, 1)
-    all_rgb = np.concatenate(
-        [bits24 >> 16 & 0xff, bits24 >> 8 & 0xff, bits24 & 0xff],
-        axis=1).astype(np.float32) / 255
-    del bits24
+    print("Precomputing conversion matrix from 12-bit //gs RGB to CAM16UCS "
+          "colour space")
+    # Compute matrix of all 12-bit RGB values, normalized to 0..1 range
+    bits12 = np.arange(2 ** 12, dtype=np.uint32).reshape(-1, 1)
+    r = bits12 >> 8
+    g = (bits12 >> 4) & 0xf
+    b = bits12 & 0xf
+    all_rgb12 = np.concatenate(
+        [(r << 4) | r, (g << 4) | g, (b << 4) | b], axis=1).astype(
+        np.float32) / 255
+    del bits12, r, g, b
 
+    # //gs RGB values use gamma-corrected Rec.601 RGB colour space.  We need to
+    # convert to Rec.709 RGB as preparation for converting to CAM16UCS.  We
+    # do this via the YCbCr intermediate color model.
+    rgb12_iigs = np.clip(srgb_to_linear_array(
+        np.clip(colour.YCbCr_to_RGB(
+            colour.RGB_to_YCbCr(
+                all_rgb12, K=colour.WEIGHTS_YCBCR[
+                    'ITU-R BT.601']),
+            K=colour.WEIGHTS_YCBCR['ITU-R BT.709']), 0, 1)), 0, 1)
     with colour.utilities.suppress_warnings(colour_usage_warnings=True):
         # Compute matrix of corresponding CAM16UCS colour values, indexed
-        # by 24-bit RGB value
-        all_cam16 = colour.convert(all_rgb, "RGB", "CAM16UCS").astype(
-            np.float32)
-    del all_rgb
-    np.save("data/rgb_to_cam16ucs.npy", all_cam16)
-
+        # by 12-bit //gs RGB value
+        rgb12_iigs_to_cam16ucs = colour.convert(
+            rgb12_iigs, "RGB", "CAM16UCS").astype(np.float32)
+    del rgb12_iigs
+    np.save("data/rgb12_iigs_to_cam16ucs.npy", rgb12_iigs_to_cam16ucs)
+    del rgb12_iigs_to_cam16ucs
 
 if __name__ == "__main__":
     main()

From e53c085a919ecb38c5dc1038006d72937c21176b Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 17 Nov 2021 22:55:47 +0000
Subject: [PATCH 26/82] Remove debugging prints

---
 convert.py | 1 -
 dither.pyx | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/convert.py b/convert.py
index bf96218..189d38f 100644
--- a/convert.py
+++ b/convert.py
@@ -65,7 +65,6 @@ class ClusterPalette:
             res[i, :] = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
                 clusters.cluster_centers_[frequency_order][i].astype(
                     np.float32))
-        print(res)
         return res
 
     def propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
diff --git a/dither.pyx b/dither.pyx
index e5ee87e..cb2fca7 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -586,11 +586,11 @@ def k_means_with_fixed_centroids(
                 new_centroid_rgb12 = convert_cam16ucs_to_rgb12_iigs(new_centroid_cam)
                 for i in range(3):
                     if centroids_rgb12[centroid_idx, i] != new_centroid_rgb12[i]:
-                        print(i, centroids_rgb12[centroid_idx, i], new_centroid_rgb12[i])
+                        # print(i, centroids_rgb12[centroid_idx, i], new_centroid_rgb12[i])
                         centroids_rgb12[centroid_idx, i] = new_centroid_rgb12[i]
                         centroid_moved = 1
 
-        print(iteration, centroid_movement, total_error, centroids_rgb12)
+        # print(iteration, centroid_movement, total_error, centroids_rgb12)
 
         if centroid_movement < tolerance:
             break

From d7969f50ba40d7fe4b7f99c19206aefdd02264a4 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 18 Nov 2021 17:24:12 +0000
Subject: [PATCH 27/82] Remove cython checks and obsolete TODO

---
 convert.py |  8 +-------
 dither.pyx | 14 +++++++-------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/convert.py b/convert.py
index 189d38f..fc14502 100644
--- a/convert.py
+++ b/convert.py
@@ -109,12 +109,6 @@ class ClusterPalette:
                              int(p_lower * (200 / 16)) * 320:int(p_upper * (
                                      200 / 16)) * 320, :]
 
-            # TODO: clustering should be aware of the fact that we will
-            #  quantize to a 4-bit RGB value afterwards.  i.e. we should
-            #  not pick multiple centroids that will quantize to the same RGB
-            #  value since we'll "waste" a palette entry.  This doesn't seem to
-            #  be a major issue in practise though, and fixing it would require
-            #  implementing our own (optimized) k-means.
             # TODO: tune tolerance
             # clusters = cluster.MiniBatchKMeans(
             #     n_clusters=16, max_iter=10000,
@@ -242,7 +236,7 @@ def main():
 
     # TODO: flags
     penalty = 1e9
-    iterations = 10  # 50
+    iterations = 50
 
     pygame.init()
     # TODO: for some reason I need to execute this twice - the first time
diff --git a/dither.pyx b/dither.pyx
index cb2fca7..64ad6e3 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -157,7 +157,7 @@ cdef int dither_lookahead(Dither* dither, float[:, :, ::1] palette_cam16, float[
     return best
 
 
-@cython.boundscheck(True)
+@cython.boundscheck(False)
 @cython.wraparound(False)
 cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, float r, float g, float b) nogil:
     cdef unsigned int rgb_24bit = (<unsigned int>(r*255) << 16) + (<unsigned int>(g*255) << 8) + <unsigned int>(b*255)
@@ -489,8 +489,8 @@ cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palette
             best_palette_idx = palette_idx
     return best_palette_idx
 
-@cython.boundscheck(True)
-@cython.wraparound(True)
+@cython.boundscheck(False)
+@cython.wraparound(False)
 def convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned char)[::1] point_rgb12) -> float[::1]:
     cdef int rgb12 = (point_rgb12[0] << 8) | (point_rgb12[1] << 4) | point_rgb12[2]
     return rgb12_iigs_to_cam16ucs[rgb12]
@@ -507,8 +507,8 @@ cdef float[::1] linear_to_srgb_array(float[::1] a, float gamma=2.4):
             res[i] = 1.055 * a[i] ** (1.0 / gamma) - 0.055
     return res
 
-@cython.boundscheck(True)
-@cython.wraparound(True)
+@cython.boundscheck(False)
+@cython.wraparound(False)
 def convert_cam16ucs_to_rgb12_iigs(float[::1] point_cam) -> int[::1]:  # XXX return type
     cdef float[::1] rgb, rgb12_iigs
     cdef int i
@@ -531,8 +531,8 @@ def convert_cam16ucs_to_rgb12_iigs(float[::1] point_cam) -> int[::1]:  # XXX ret
     return np.round(rgb12_iigs).astype(np.uint8)
 
 
-@cython.boundscheck(True)
-@cython.wraparound(True)
+@cython.boundscheck(False)
+@cython.wraparound(False)
 def k_means_with_fixed_centroids(
     int n_clusters, int n_fixed, float[:, ::1] samples, (unsigned char)[:, ::1] initial_centroids, int max_iterations,
     float tolerance, float [:, ::1] rgb12_iigs_to_cam16ucs):

From 7609297f0d0d4dff1465154621a055e89d6a2f69 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 18 Nov 2021 17:34:27 +0000
Subject: [PATCH 28/82] Optimize a bit

---
 convert.py |  1 +
 dither.pyx | 18 +++++++++++++-----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/convert.py b/convert.py
index fc14502..8a6164c 100644
--- a/convert.py
+++ b/convert.py
@@ -254,6 +254,7 @@ def main():
         rgb, reserved_colours=1, rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs)
 
     while iterations_since_improvement < iterations:
+        print(iterations_since_improvement)
         new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
             cluster_palette.propose_palettes())
 
diff --git a/dither.pyx b/dither.pyx
index 64ad6e3..044e3d2 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -491,12 +491,17 @@ cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palette
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned char)[::1] point_rgb12) -> float[::1]:
+cdef float[::1] _convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned char)[::1] point_rgb12):
     cdef int rgb12 = (point_rgb12[0] << 8) | (point_rgb12[1] << 4) | point_rgb12[2]
     return rgb12_iigs_to_cam16ucs[rgb12]
 
+def convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned char)[::1] point_rgb12) -> float[::1]:
+    return _convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, point_rgb12)
+
 import colour
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
 cdef float[::1] linear_to_srgb_array(float[::1] a, float gamma=2.4):
     cdef int i
     cdef float[::1] res = np.empty(3, dtype=np.float32)
@@ -509,7 +514,7 @@ cdef float[::1] linear_to_srgb_array(float[::1] a, float gamma=2.4):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def convert_cam16ucs_to_rgb12_iigs(float[::1] point_cam) -> int[::1]:  # XXX return type
+cdef (unsigned char)[::1] _convert_cam16ucs_to_rgb12_iigs(float[::1] point_cam):
     cdef float[::1] rgb, rgb12_iigs
     cdef int i
 
@@ -531,6 +536,9 @@ def convert_cam16ucs_to_rgb12_iigs(float[::1] point_cam) -> int[::1]:  # XXX ret
     return np.round(rgb12_iigs).astype(np.uint8)
 
 
+def convert_cam16ucs_to_rgb12_iigs(float[::1] point_cam):
+    return _convert_cam16ucs_to_rgb12_iigs(point_cam)
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def k_means_with_fixed_centroids(
@@ -567,7 +575,7 @@ def k_means_with_fixed_centroids(
             closest_centroid_idx = 0
             for centroid_idx in range(n_clusters):
                 centroid_rgb12 = centroids_rgb12[centroid_idx, :]
-                error = colour_distance_squared(convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, centroid_rgb12), point_cam)
+                error = colour_distance_squared(_convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, centroid_rgb12), point_cam)
                 if error < best_error:
                     best_error = error
                     closest_centroid_idx = centroid_idx
@@ -582,8 +590,8 @@ def k_means_with_fixed_centroids(
                     new_centroid_cam[i] = (
                         centroid_cam_sample_positions_total[centroid_idx, i] / centroid_sample_counts[centroid_idx])
                 centroid_movement += colour_distance_squared(
-                    convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, centroids_rgb12[centroid_idx]), new_centroid_cam)
-                new_centroid_rgb12 = convert_cam16ucs_to_rgb12_iigs(new_centroid_cam)
+                    _convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, centroids_rgb12[centroid_idx]), new_centroid_cam)
+                new_centroid_rgb12 = _convert_cam16ucs_to_rgb12_iigs(new_centroid_cam)
                 for i in range(3):
                     if centroids_rgb12[centroid_idx, i] != new_centroid_rgb12[i]:
                         # print(i, centroids_rgb12[centroid_idx, i], new_centroid_rgb12[i])

From 3159a09c27608507428da3b5e643508781d6219e Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 18 Nov 2021 20:33:21 +0000
Subject: [PATCH 29/82] Uncomment

---
 precompute_conversion.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/precompute_conversion.py b/precompute_conversion.py
index 5c4c350..2805533 100644
--- a/precompute_conversion.py
+++ b/precompute_conversion.py
@@ -14,23 +14,23 @@ def srgb_to_linear_array(a: np.ndarray, gamma=2.4) -> np.ndarray:
     return np.where(a <= 0.04045, a / 12.92, ((a + 0.055) / 1.055) ** gamma)
 
 def main():
-    # print("Precomputing conversion matrix from 24-bit RGB to CAM16UCS colour "
-    #       "space")
-    # # Compute matrix of all 24-bit RGB values, normalized to 0..1 range
-    # bits24 = np.arange(2 ** 24, dtype=np.uint32).reshape(-1, 1)
-    # all_rgb24 = np.concatenate(
-    #     [bits24 >> 16 & 0xff, bits24 >> 8 & 0xff, bits24 & 0xff],
-    #     axis=1).astype(np.float32) / 255
-    # del bits24
-    #
-    # with colour.utilities.suppress_warnings(colour_usage_warnings=True):
-    #     # Compute matrix of corresponding CAM16UCS colour values, indexed
-    #     # by 24-bit RGB value
-    #     rgb24_to_cam16ucs = colour.convert(all_rgb24, "RGB", "CAM16UCS").astype(
-    #         np.float32)
-    # del all_rgb24
-    # np.save("data/rgb24_to_cam16ucs.npy", rgb24_to_cam16ucs)
-    # del rgb24_to_cam16ucs
+    print("Precomputing conversion matrix from 24-bit RGB to CAM16UCS colour "
+          "space")
+    # Compute matrix of all 24-bit RGB values, normalized to 0..1 range
+    bits24 = np.arange(2 ** 24, dtype=np.uint32).reshape(-1, 1)
+    all_rgb24 = np.concatenate(
+        [bits24 >> 16 & 0xff, bits24 >> 8 & 0xff, bits24 & 0xff],
+        axis=1).astype(np.float32) / 255
+    del bits24
+
+    with colour.utilities.suppress_warnings(colour_usage_warnings=True):
+        # Compute matrix of corresponding CAM16UCS colour values, indexed
+        # by 24-bit RGB value
+        rgb24_to_cam16ucs = colour.convert(all_rgb24, "RGB", "CAM16UCS").astype(
+            np.float32)
+    del all_rgb24
+    np.save("data/rgb24_to_cam16ucs.npy", rgb24_to_cam16ucs)
+    del rgb24_to_cam16ucs
 
     print("Precomputing conversion matrix from 12-bit //gs RGB to CAM16UCS "
           "colour space")

From c608f6b96189a1a7ce7f8f97324dfc8da3b0bef2 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 18 Nov 2021 21:50:39 +0000
Subject: [PATCH 30/82] Optimize calling _convert_cam16ucs_to_rgb12_iigs since
 it has significant overhead

---
 convert.py |  7 ++---
 dither.pyx | 88 +++++++++++++++++++++++-------------------------------
 2 files changed, 39 insertions(+), 56 deletions(-)

diff --git a/convert.py b/convert.py
index 8a6164c..b6be18c 100644
--- a/convert.py
+++ b/convert.py
@@ -60,10 +60,8 @@ class ClusterPalette:
                 list(zip(*np.unique(labels, return_counts=True))),
                 key=lambda kv: kv[1], reverse=True)]
 
-        res = np.empty((16, 3), dtype=np.uint8)
-        for i in range(16):
-            res[i, :] = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
-                clusters.cluster_centers_[frequency_order][i].astype(
+        res = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
+                clusters.cluster_centers_[frequency_order].astype(
                     np.float32))
         return res
 
@@ -254,7 +252,6 @@ def main():
         rgb, reserved_colours=1, rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs)
 
     while iterations_since_improvement < iterations:
-        print(iterations_since_improvement)
         new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
             cluster_palette.propose_palettes())
 
diff --git a/dither.pyx b/dither.pyx
index 044e3d2..599613c 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -502,23 +502,27 @@ import colour
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef float[::1] linear_to_srgb_array(float[::1] a, float gamma=2.4):
-    cdef int i
-    cdef float[::1] res = np.empty(3, dtype=np.float32)
-    for i in range(3):
-        if a[i] <= 0.0031308:
-            res[i] = a[i] * 12.92
-        else:
-            res[i] = 1.055 * a[i] ** (1.0 / gamma) - 0.055
+cdef float[:, ::1] linear_to_srgb_array(float[:, ::1] a, float gamma=2.4):
+    cdef int i, j
+    cdef float[:, ::1] res = np.empty_like(a, dtype=np.float32)
+    for i in range(res.shape[0]):
+        for j in range(3):
+            if a[i, j] <= 0.0031308:
+                res[i, j] = a[i, j] * 12.92
+            else:
+                res[i, j] = 1.055 * a[i, j] ** (1.0 / gamma) - 0.055
     return res
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef (unsigned char)[::1] _convert_cam16ucs_to_rgb12_iigs(float[::1] point_cam):
-    cdef float[::1] rgb, rgb12_iigs
-    cdef int i
+cdef (unsigned char)[:, ::1] _convert_cam16ucs_to_rgb12_iigs(float[:, ::1] point_cam):
+    cdef float[:, ::1] rgb
+    cdef (float)[:, ::1] rgb12_iigs
 
     # Convert CAM16UCS input to RGB
+    # TODO: this dynamically constructs a path on the graph of colour conversions every time, which is
+    #  presumably not very efficient.  However, colour.convert doesn't provide a way to cache the composed conversion
+    #  function so we'd have to build it ourselves (https://github.com/colour-science/colour/issues/905)
     with colour.utilities.suppress_warnings(python_warnings=True):
         rgb = colour.convert(point_cam, "CAM16UCS", "RGB").astype(np.float32)
 
@@ -528,17 +532,14 @@ cdef (unsigned char)[::1] _convert_cam16ucs_to_rgb12_iigs(float[::1] point_cam):
             # Gamma correct and convert Rec.709 R'G'B' to YCbCr
             colour.RGB_to_YCbCr(
                 linear_to_srgb_array(rgb), K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
-            K=colour.WEIGHTS_YCBCR['ITU-R BT.601']), 0, 1).astype(np.float32)
-
-    for i in range(3):
-        rgb12_iigs[i] *= 15
-
+            K=colour.WEIGHTS_YCBCR['ITU-R BT.601']), 0, 1).astype(np.float32) * 15
     return np.round(rgb12_iigs).astype(np.uint8)
 
 
-def convert_cam16ucs_to_rgb12_iigs(float[::1] point_cam):
+def convert_cam16ucs_to_rgb12_iigs(float[:, ::1] point_cam):
     return _convert_cam16ucs_to_rgb12_iigs(point_cam)
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def k_means_with_fixed_centroids(
@@ -549,9 +550,10 @@ def k_means_with_fixed_centroids(
     cdef int centroid_idx, closest_centroid_idx, i, point_idx
 
     cdef (unsigned char)[:, ::1] centroids_rgb12 = initial_centroids[:, :]
-    cdef (unsigned char)[::1] centroid_rgb12, new_centroid_rgb12
+    cdef (unsigned char)[:, ::1] new_centroids_rgb12
 
-    cdef float[::1] point_cam, new_centroid_cam = np.empty(3, dtype=np.float32)
+    cdef float[::1] point_cam
+    cdef float[:, ::1] new_centroids_cam = np.empty((n_clusters - n_fixed, 3), dtype=np.float32)
     cdef float[:, ::1] centroid_cam_sample_positions_total
     cdef int[::1] centroid_sample_counts
 
@@ -574,8 +576,8 @@ def k_means_with_fixed_centroids(
             best_error = 1e9
             closest_centroid_idx = 0
             for centroid_idx in range(n_clusters):
-                centroid_rgb12 = centroids_rgb12[centroid_idx, :]
-                error = colour_distance_squared(_convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, centroid_rgb12), point_cam)
+                error = colour_distance_squared(
+                    _convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, centroids_rgb12[centroid_idx, :]), point_cam)
                 if error < best_error:
                     best_error = error
                     closest_centroid_idx = centroid_idx
@@ -587,16 +589,21 @@ def k_means_with_fixed_centroids(
         for centroid_idx in range(n_fixed, n_clusters):
             if centroid_sample_counts[centroid_idx]:
                 for i in range(3):
-                    new_centroid_cam[i] = (
+                    new_centroids_cam[centroid_idx - n_fixed, i] = (
                         centroid_cam_sample_positions_total[centroid_idx, i] / centroid_sample_counts[centroid_idx])
                 centroid_movement += colour_distance_squared(
-                    _convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, centroids_rgb12[centroid_idx]), new_centroid_cam)
-                new_centroid_rgb12 = _convert_cam16ucs_to_rgb12_iigs(new_centroid_cam)
-                for i in range(3):
-                    if centroids_rgb12[centroid_idx, i] != new_centroid_rgb12[i]:
-                        # print(i, centroids_rgb12[centroid_idx, i], new_centroid_rgb12[i])
-                        centroids_rgb12[centroid_idx, i] = new_centroid_rgb12[i]
-                        centroid_moved = 1
+                    _convert_rgb12_iigs_to_cam(
+                        rgb12_iigs_to_cam16ucs, centroids_rgb12[centroid_idx]),
+                    new_centroids_cam[centroid_idx - n_fixed, :])
+
+        # Convert all new centroids as a single matrix since _convert_cam16ucs_to_rgb12_iigs has nontrivial overhead
+        new_centroids_rgb12 = _convert_cam16ucs_to_rgb12_iigs(new_centroids_cam)
+
+        for centroid_idx in range(n_clusters - n_fixed):
+            for i in range(3):
+                if centroids_rgb12[centroid_idx + n_fixed, i] != new_centroids_rgb12[centroid_idx, i]:
+                    centroids_rgb12[centroid_idx + n_fixed, i] = new_centroids_rgb12[centroid_idx, i]
+                    centroid_moved = 1
 
         # print(iteration, centroid_movement, total_error, centroids_rgb12)
 
@@ -605,25 +612,4 @@ def k_means_with_fixed_centroids(
         if centroid_moved == 0:
             break
 
-    return centroids_rgb12, total_error
-
-
-#@cython.boundscheck(False)
-#@cython.wraparound(False)
-#cdef float[::1] closest_quantized_point(float [:, ::1] rgb24_to_cam, float [::1] point_cam) nogil:
-#    cdef unsigned int rgb12, rgb24, closest_rgb24, r, g, b
-#    cdef double best_distance = 1e9, distance
-#    for rgb12 in range(2**12):
-#        r = rgb12 >> 8
-#        g = (rgb12 >> 4) & 0xf
-#        b = rgb12 & 0xf
-#        rgb24 = (r << 20) | (r << 16) | (g << 12) | (g << 8) | (b << 4) | b
-#        distance = colour_distance_squared(rgb24_to_cam[rgb24], point_cam)
-#        # print(hex(rgb24), distance)
-#        if distance < best_distance:
-#            best_distance = distance
-#            closest_rgb24 = rgb24
-#            # print(distance, rgb24, hex(rgb24))
-#    # print("-->", closest_rgb24, hex(closest_rgb24), best_distance)
-#    return rgb24_to_cam[closest_rgb24]
-
+    return centroids_rgb12, total_error
\ No newline at end of file

From cfc150ed134eba43d7d92ec8c62d90af4f4ec9c4 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 18 Nov 2021 22:03:18 +0000
Subject: [PATCH 31/82] Remove some dead code

---
 convert.py | 52 +++-------------------------------------------------
 1 file changed, 3 insertions(+), 49 deletions(-)

diff --git a/convert.py b/convert.py
index b6be18c..f2053da 100644
--- a/convert.py
+++ b/convert.py
@@ -107,15 +107,6 @@ class ClusterPalette:
                              int(p_lower * (200 / 16)) * 320:int(p_upper * (
                                      200 / 16)) * 320, :]
 
-            # TODO: tune tolerance
-            # clusters = cluster.MiniBatchKMeans(
-            #     n_clusters=16, max_iter=10000,
-            #     init=self._global_palette,
-            #     n_init=1)
-            # clusters.fit_predict(palette_pixels)
-            #
-            # palette_error = clusters.inertia_
-
             palettes_rgb12_iigs, palette_error = \
                 dither_pyx.k_means_with_fixed_centroids(
                     n_clusters=16, n_fixed=self._reserved_colours,
@@ -138,27 +129,6 @@ class ClusterPalette:
                         self._rgb12_iigs_to_cam16ucs, palettes_rgb12_iigs[
                             i]), dtype=np.float32))
 
-            # Suppress divide by zero warning,
-            # https://github.com/colour-science/colour/issues/900
-            # with colour.utilities.suppress_warnings(python_warnings=True):
-            #     palette_rgb = colour.convert(
-            #         new_palettes_cam[palette_idx, :, :], "CAM16UCS", "RGB")
-            # palette_rgb_rec601 = np.clip(image_py.srgb_to_linear(
-            #     colour.YCbCr_to_RGB(
-            #         colour.RGB_to_YCbCr(
-            #             image_py.linear_to_srgb(palette_rgb * 255) / 255,
-            #             K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
-            #         K=colour.WEIGHTS_YCBCR['ITU-R BT.601']) * 255) / 255, 0, 1)
-            # palette_rgb = np.clip(
-            #     image_py.srgb_to_linear(
-            #         colour.YCbCr_to_RGB(
-            #             colour.RGB_to_YCbCr(
-            #                 image_py.linear_to_srgb(
-            #                     palette_rgb[:, :] * 255) / 255,
-            #                 K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
-            #             K=colour.WEIGHTS_YCBCR[
-            #                 'ITU-R BT.601']) * 255) / 255,
-            #     0, 1)
             new_palettes_rgb12_iigs[palette_idx, :, :] = palettes_rgb12_iigs
             new_errors[palette_idx] = palette_error
 
@@ -285,15 +255,6 @@ def main():
         total_image_error = new_total_image_error
         palettes_rgb12_iigs = new_palettes_rgb12_iigs
         palettes_linear_rgb = new_palettes_linear_rgb
-        # # Recompute 4-bit //gs RGB palettes
-        # palette_rgb_rec601 = np.clip(
-        #     colour.YCbCr_to_RGB(
-        #         colour.RGB_to_YCbCr(
-        #             image_py.linear_to_srgb(palettes_rgb12_iigs * 255) / 255,
-        #             K=colour.WEIGHTS_YCBCR['ITU-R BT.709']),
-        #         K=colour.WEIGHTS_YCBCR['ITU-R BT.601']), 0, 1)
-        #
-        # palettes_iigs = np.round(palette_rgb_rec601 * 15).astype(np.uint8)
         for i in range(16):
             screen.set_palette(i, palettes_rgb12_iigs[i, :, :])
 
@@ -303,16 +264,9 @@ def main():
         for i in range(200):
             screen.line_palette[i] = line_to_palette[i]
             output_rgb[i, :, :] = (
-                    palettes_linear_rgb[line_to_palette[i]][output_4bit[i, :]] * 255
-            ).astype(
-                # np.round(palettes_rgb[line_to_palette[i]][
-                #              output_4bit[i, :]] * 15) / 15 * 255).astype(
-                np.uint8)
-        # output_srgb_rec709 = np.clip(colour.YCbCr_to_RGB(
-        #     colour.RGB_to_YCbCr(
-        #         image_py.linear_to_srgb(output_rgb) / 255,
-        #         K=colour.WEIGHTS_YCBCR['ITU-R BT.601']),
-        #     K=colour.WEIGHTS_YCBCR['ITU-R BT.709']), 0, 1) * 255
+                    palettes_linear_rgb[line_to_palette[i]][
+                        output_4bit[i, :]] * 255
+            ).astype(np.uint8)
 
         output_srgb = (image_py.linear_to_srgb(output_rgb)).astype(np.uint8)
 

From 9e46ca48a027e524d7a0f37ff359ad560d43708e Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 18 Nov 2021 22:08:09 +0000
Subject: [PATCH 32/82] Refactor to extract palette splits in preparation for
 tuning them dynamically

---
 convert.py | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/convert.py b/convert.py
index f2053da..0534f11 100644
--- a/convert.py
+++ b/convert.py
@@ -61,10 +61,26 @@ class ClusterPalette:
                 key=lambda kv: kv[1], reverse=True)]
 
         res = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
-                clusters.cluster_centers_[frequency_order].astype(
-                    np.float32))
+            clusters.cluster_centers_[frequency_order].astype(
+                np.float32))
         return res
 
+    def _palette_splits(self, palette_band_width=3):
+        # The 16 palettes are striped across consecutive (overlapping) line
+        # ranges.  The basic unit is 200/16 = 12.5 lines, but we extend the
+        # line range to cover a multiple of this so that the palette ranges
+        # overlap.  Since nearby lines tend to have similar colours, this has
+        # the effect of smoothing out the colour transitions across palettes.
+
+        palette_ranges = []
+        for palette_idx in range(16):
+            p_lower = max(palette_idx + 0.5 - (palette_band_width / 2), 0)
+            p_upper = min(palette_idx + 0.5 + (palette_band_width / 2), 16)
+            palette_ranges.append(
+                (int(p_lower * (200 / 16)) * 320, int(p_upper * (200 / 16)) *
+                 320))
+        return palette_ranges
+
     def propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
         """Attempt to find new palettes that locally improve image quality.
 
@@ -93,19 +109,11 @@ class ClusterPalette:
 
         dynamic_colours = 16 - self._reserved_colours
 
-        # The 16 palettes are striped across consecutive (overlapping) line
-        # ranges.  The basic unit is 200/16 = 12.5 lines, but we extend the
-        # line range to cover a multiple of this so that the palette ranges
-        # overlap.  Since nearby lines tend to have similar colours, this has
-        # the effect of smoothing out the colour transitions across palettes.
-        palette_band_width = 3
+        palette_splits = self._palette_splits()
         for palette_idx in range(16):
-            p_lower = max(palette_idx + 0.5 - (palette_band_width / 2), 0)
-            p_upper = min(palette_idx + 0.5 + (palette_band_width / 2), 16)
+            palette_lower, palette_upper = palette_splits[palette_idx]
             # TODO: dynamically tune palette cuts
-            palette_pixels = self._colours_cam[
-                             int(p_lower * (200 / 16)) * 320:int(p_upper * (
-                                     200 / 16)) * 320, :]
+            palette_pixels = self._colours_cam[palette_lower:palette_upper, :]
 
             palettes_rgb12_iigs, palette_error = \
                 dither_pyx.k_means_with_fixed_centroids(
@@ -217,7 +225,6 @@ def main():
     total_image_error = 1e9
     iterations_since_improvement = 0
 
-    # palettes_iigs = np.empty((16, 16, 3), dtype=np.uint8)
     cluster_palette = ClusterPalette(
         rgb, reserved_colours=1, rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs)
 
@@ -293,7 +300,8 @@ def main():
             canvas.blit(surface, (0, 0))
             pygame.display.flip()
     # print((palettes_rgb * 255).astype(np.uint8))
-    unique_colours = np.unique(palettes_rgb12_iigs.reshape(-1, 3), axis=0).shape[0]
+    unique_colours = np.unique(
+        palettes_rgb12_iigs.reshape(-1, 3), axis=0).shape[0]
     print("%d unique colours" % unique_colours)
 
     # Save Double hi-res image

From b1d3488182912bade2eb239d13c832a3dd44cab1 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 18 Nov 2021 22:27:19 +0000
Subject: [PATCH 33/82] Actually use equal-sized palette splits.  With the
 previous version the first and last were smaller.

---
 convert.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/convert.py b/convert.py
index 0534f11..3a3afdc 100644
--- a/convert.py
+++ b/convert.py
@@ -65,20 +65,29 @@ class ClusterPalette:
                 np.float32))
         return res
 
-    def _palette_splits(self, palette_band_width=3):
+    def _palette_splits(self, palette_height=35):
         # The 16 palettes are striped across consecutive (overlapping) line
-        # ranges.  The basic unit is 200/16 = 12.5 lines, but we extend the
-        # line range to cover a multiple of this so that the palette ranges
-        # overlap.  Since nearby lines tend to have similar colours, this has
+        # ranges.  Since nearby lines tend to have similar colours, this has
         # the effect of smoothing out the colour transitions across palettes.
 
+        # If we want to overlap 16 palettes in 200 lines, where each palette
+        # has height H and overlaps the previous one by L lines, then the
+        # boundaries are at lines:
+        #   (0, H), (H-L, 2H-L), (2H-2L, 3H-2L), ..., (15H-15L, 16H - 15L)
+        # i.e. 16H - 15L = 200, sofor a given palette height H we need to
+        # overlap by:
+        #   L = (16H - 200)/15
+
+        palette_overlap = (16 * palette_height - 200) / 15
+
         palette_ranges = []
         for palette_idx in range(16):
-            p_lower = max(palette_idx + 0.5 - (palette_band_width / 2), 0)
-            p_upper = min(palette_idx + 0.5 + (palette_band_width / 2), 16)
-            palette_ranges.append(
-                (int(p_lower * (200 / 16)) * 320, int(p_upper * (200 / 16)) *
-                 320))
+            palette_lower = int(
+                palette_idx * (palette_height - palette_overlap))
+            palette_upper = palette_lower + palette_height
+            palette_ranges.append((palette_lower, palette_upper))
+        assert palette_upper == 200
+        # print(palette_ranges)
         return palette_ranges
 
     def propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
@@ -113,7 +122,8 @@ class ClusterPalette:
         for palette_idx in range(16):
             palette_lower, palette_upper = palette_splits[palette_idx]
             # TODO: dynamically tune palette cuts
-            palette_pixels = self._colours_cam[palette_lower:palette_upper, :]
+            palette_pixels = self._colours_cam[
+                             palette_lower * 320:palette_upper * 320, :]
 
             palettes_rgb12_iigs, palette_error = \
                 dither_pyx.k_means_with_fixed_centroids(

From b78c42e2875eef98a9cb9dc50f36b559102fe63f Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 18 Nov 2021 22:35:15 +0000
Subject: [PATCH 34/82] Fix rounding

---
 convert.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/convert.py b/convert.py
index 3a3afdc..d964515 100644
--- a/convert.py
+++ b/convert.py
@@ -82,11 +82,10 @@ class ClusterPalette:
 
         palette_ranges = []
         for palette_idx in range(16):
-            palette_lower = int(
-                palette_idx * (palette_height - palette_overlap))
+            palette_lower = palette_idx * (palette_height - palette_overlap)
             palette_upper = palette_lower + palette_height
-            palette_ranges.append((palette_lower, palette_upper))
-        assert palette_upper == 200
+            palette_ranges.append((int(np.round(palette_lower)),
+                                   int(np.round(palette_upper))))
         # print(palette_ranges)
         return palette_ranges
 

From be55fb859d728ed38f79ae77654dc0bb6a2948bf Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 23 Nov 2021 12:46:36 +0000
Subject: [PATCH 35/82] - Fix a serious bug in best_palette_for_line which was
 not actually computing the palette with lowest per-row error, rather the
 lowest per-pixel error! - Tidy a bit

---
 dither.pyx | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/dither.pyx b/dither.pyx
index 599613c..c7026b8 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -343,9 +343,9 @@ def dither_shr(
         float[:,::1] rgb_to_cam16ucs, float penalty):
     cdef int y, x, idx, best_colour_idx, best_palette, i
     cdef double best_distance, distance, total_image_error
-    cdef float[::1] best_colour_rgb, pixel_cam, colour_rgb, colour_cam
+    cdef float[::1] best_colour_rgb, pixel_cam
     cdef float quant_error
-    cdef float[:, ::1] palette_rgb
+    cdef float[:, ::1] palette_rgb, palette_cam
 
     cdef (unsigned char)[:, ::1] output_4bit = np.zeros((200, 320), dtype=np.uint8)
     cdef float[:, :, ::1] working_image = np.copy(input_rgb)
@@ -353,16 +353,17 @@ def dither_shr(
 
     cdef int[::1] line_to_palette = np.zeros(200, dtype=np.int32)
 
-    best_palette = 15
+    best_palette = -1
     total_image_error = 0.0
     for y in range(200):
         for x in range(320):
-            colour_cam = convert_rgb_to_cam16ucs(
+            line_cam[x, :] = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
-            line_cam[x, :] = colour_cam
 
+        # TODO: needs to be aware of splits
         best_palette = best_palette_for_line(line_cam, palettes_cam, <int>(y * 16 / 200), best_palette, penalty)
         palette_rgb = palettes_rgb[best_palette, :, :]
+        palette_cam = palettes_cam[best_palette, :, :]
         line_to_palette[y] = best_palette
 
         for x in range(320):
@@ -372,9 +373,7 @@ def dither_shr(
             best_distance = 1e9
             best_colour_idx = -1
             for idx in range(16):
-                colour_rgb = palette_rgb[idx, :]
-                colour_cam = convert_rgb_to_cam16ucs(rgb_to_cam16ucs, colour_rgb[0], colour_rgb[1], colour_rgb[2])
-                distance = colour_distance_squared(pixel_cam, colour_cam)
+                distance = colour_distance_squared(pixel_cam, palette_cam[idx, :])
                 if distance < best_distance:
                     best_distance = distance
                     best_colour_idx = idx
@@ -459,7 +458,7 @@ cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palette
     cdef int palette_idx, best_palette_idx, palette_entry_idx, pixel_idx
     cdef double best_total_dist, total_dist, best_pixel_dist, pixel_dist
     cdef float[:, ::1] palette_cam
-    cdef float[::1] pixel_cam, palette_entry
+    cdef float[::1] pixel_cam
 
     best_total_dist = 1e9
     best_palette_idx = -1
@@ -474,16 +473,14 @@ cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palette
         else:
             penalty = 1.0
         total_dist = 0
-        best_pixel_dist = 1e9
         for pixel_idx in range(line_size):
             pixel_cam = line_cam[pixel_idx]
+            best_pixel_dist = 1e9
             for palette_entry_idx in range(16):
-                palette_entry = palette_cam[palette_entry_idx, :]
-                pixel_dist = colour_distance_squared(pixel_cam, palette_entry) * penalty
+                pixel_dist = colour_distance_squared(pixel_cam, palette_cam[palette_entry_idx, :]) * penalty
                 if pixel_dist < best_pixel_dist:
                     best_pixel_dist = pixel_dist
             total_dist += best_pixel_dist
-            # print(total_dist)
         if total_dist < best_total_dist:
             best_total_dist = total_dist
             best_palette_idx = palette_idx
@@ -605,8 +602,6 @@ def k_means_with_fixed_centroids(
                     centroids_rgb12[centroid_idx + n_fixed, i] = new_centroids_rgb12[centroid_idx, i]
                     centroid_moved = 1
 
-        # print(iteration, centroid_movement, total_error, centroids_rgb12)
-
         if centroid_movement < tolerance:
             break
         if centroid_moved == 0:

From 189b4655ad7166cc69d6a561b77825307b3c9d14 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 23 Nov 2021 12:49:37 +0000
Subject: [PATCH 36/82] Since fixing the bug in the previous commit there is no
 longer a need to limit to neighbouring palettes (which was unaware of the
 dynamic line splits anyway)

---
 dither.pyx | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/dither.pyx b/dither.pyx
index c7026b8..0881ef3 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -360,8 +360,7 @@ def dither_shr(
             line_cam[x, :] = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
 
-        # TODO: needs to be aware of splits
-        best_palette = best_palette_for_line(line_cam, palettes_cam, <int>(y * 16 / 200), best_palette, penalty)
+        best_palette = best_palette_for_line(line_cam, palettes_cam, best_palette, penalty)
         palette_rgb = palettes_rgb[best_palette, :, :]
         palette_cam = palettes_cam[best_palette, :, :]
         line_to_palette[y] = best_palette
@@ -452,9 +451,10 @@ def dither_shr(
 
     return np.array(output_4bit, dtype=np.uint8), line_to_palette, total_image_error
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int base_palette_idx, int last_palette_idx, float last_penalty) nogil:
+cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int last_palette_idx, float last_penalty) nogil:
     cdef int palette_idx, best_palette_idx, palette_entry_idx, pixel_idx
     cdef double best_total_dist, total_dist, best_pixel_dist, pixel_dist
     cdef float[:, ::1] palette_cam
@@ -466,12 +466,7 @@ cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palette
     cdef int line_size = line_cam.shape[0]
     for palette_idx in range(16):
         palette_cam = palettes_cam[palette_idx, :, :]
-        if palette_idx < (base_palette_idx - 1) or palette_idx > (base_palette_idx + 1):
-            continue
-        if palette_idx == last_palette_idx:
-            penalty = last_penalty
-        else:
-            penalty = 1.0
+        penalty = last_penalty if palette_idx == last_palette_idx else 1.0
         total_dist = 0
         for pixel_idx in range(line_size):
             pixel_cam = line_cam[pixel_idx]
@@ -486,12 +481,14 @@ cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palette
             best_palette_idx = palette_idx
     return best_palette_idx
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cdef float[::1] _convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned char)[::1] point_rgb12):
     cdef int rgb12 = (point_rgb12[0] << 8) | (point_rgb12[1] << 4) | point_rgb12[2]
     return rgb12_iigs_to_cam16ucs[rgb12]
 
+
 def convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned char)[::1] point_rgb12) -> float[::1]:
     return _convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, point_rgb12)
 

From 6e52680cf1b2a51e0f563b8aeda5436637046e70 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 23 Nov 2021 13:01:50 +0000
Subject: [PATCH 37/82] Dynamically tune the line ranges used to fit the 16 SHR
 palettes:

- start with an equal split
- with each iteration, pick a palette and adjust its line ranges by a small random amount
- if the proposed palette is accepted, continue to apply the same delta
- if not, revert the adjustment and pick a different one

In addition, often there will be palettes that are entirely unused by
the image.  For such palettes:

- find the palette with the largest line range.  If > 20, then
  subdivide this range and assign half each to both palettes
- if not, then pick a random line range for the unused palette

This helps to refine and explore more of the parameter space.
---
 convert.py | 130 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 120 insertions(+), 10 deletions(-)

diff --git a/convert.py b/convert.py
index d964515..dd59e9c 100644
--- a/convert.py
+++ b/convert.py
@@ -30,13 +30,44 @@ class ClusterPalette:
     def __init__(
             self, image: Image, rgb12_iigs_to_cam16ucs, reserved_colours=0):
         self._colours_cam = self._image_colours_cam(image)
-        self._reserved_colours = reserved_colours
+
         self._errors = [1e9] * 16
+
+        # We fit a 16-colour palette against the entire image which is used
+        # as starting values for fitting the 16 SHR palettes.  This helps to
+        # provide better global consistency of colours across the palettes,
+        # e.g. for large blocks of colour.  Otherwise these can take a while
+        # to converge.
+        self._global_palette = np.empty((16, 3), dtype=np.uint8)
+
+        # How many image colours to fix identically across all 16 SHR
+        # palettes.  These are taken to be the most prevalent colours from
+        # _global_palette.
+        self._reserved_colours = reserved_colours
+
+        # 16 SHR palettes each of 16 colours, in CAM16UCS format
         self._palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
+
+        # 16 SHR palettes each of 16 colours, in //gs 4-bit RGB format
         self._palettes_rgb = np.empty((16, 16, 3), dtype=np.uint8)
-        self._global_palette = np.empty((16, 16, 3), dtype=np.float32)
+
+        # Conversion matrix from 12-bit //gs RGB colour space to CAM16UCS
+        # colour space
         self._rgb12_iigs_to_cam16ucs = rgb12_iigs_to_cam16ucs
 
+        # List of line ranges used to train the 16 SHR palettes
+        # [(lower_0, upper_0), ...]
+        self._palette_splits = self._palette_splits()
+
+        # Whether the previous iteration of proposed palettes was accepted
+        self._palettes_accepted = False
+
+        # Which palette index's line ranges did we mutate in previous iteration
+        self._palette_mutate_idx = 0
+
+        # Delta applied to palette split in previous iteration
+        self._palette_mutate_delta = (0, 0)
+
     def _image_colours_cam(self, image: Image):
         colours_rgb = np.asarray(image).reshape((-1, 3))
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
@@ -60,10 +91,9 @@ class ClusterPalette:
                 list(zip(*np.unique(labels, return_counts=True))),
                 key=lambda kv: kv[1], reverse=True)]
 
-        res = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
+        return dither_pyx.convert_cam16ucs_to_rgb12_iigs(
             clusters.cluster_centers_[frequency_order].astype(
                 np.float32))
-        return res
 
     def _palette_splits(self, palette_height=35):
         # The 16 palettes are striped across consecutive (overlapping) line
@@ -74,7 +104,7 @@ class ClusterPalette:
         # has height H and overlaps the previous one by L lines, then the
         # boundaries are at lines:
         #   (0, H), (H-L, 2H-L), (2H-2L, 3H-2L), ..., (15H-15L, 16H - 15L)
-        # i.e. 16H - 15L = 200, sofor a given palette height H we need to
+        # i.e. 16H - 15L = 200, so for a given palette height H we need to
         # overlap by:
         #   L = (16H - 200)/15
 
@@ -86,9 +116,51 @@ class ClusterPalette:
             palette_upper = palette_lower + palette_height
             palette_ranges.append((int(np.round(palette_lower)),
                                    int(np.round(palette_upper))))
-        # print(palette_ranges)
         return palette_ranges
 
+    def _apply_palette_delta(
+            self, palette_to_mutate, palette_lower_delta, palette_upper_delta):
+        old_lower, old_upper = self._palette_splits[palette_to_mutate]
+        new_lower = old_lower + palette_lower_delta
+        new_upper = old_upper + palette_upper_delta
+
+        new_lower = np.clip(new_lower, 0, np.clip(new_upper, 1, 200) - 1)
+        new_upper = np.clip(new_upper, new_lower + 1, 200)
+        assert new_lower >= 0, new_upper-1
+
+        self._palette_splits[palette_to_mutate] = (new_lower, new_upper)
+        self._palette_mutate_idx = palette_to_mutate
+        self._palette_mutate_delta = (palette_lower_delta, palette_upper_delta)
+
+    def _mutate_palette_splits(self):
+        if self._palettes_accepted:
+            # Last time was good, keep going
+            self._apply_palette_delta(self._palette_mutate_idx,
+                                      self._palette_mutate_delta[0],
+                                      self._palette_mutate_delta[1])
+        else:
+            # undo last mutation
+            self._apply_palette_delta(self._palette_mutate_idx,
+                                      -self._palette_mutate_delta[0],
+                                      -self._palette_mutate_delta[1])
+
+            # Pick a palette endpoint to move up or down
+            palette_to_mutate = np.random.randint(0, 16)
+            while True:
+                if palette_to_mutate > 0:
+                    palette_lower_delta = np.random.randint(-20, 21)
+                else:
+                    palette_lower_delta = 0
+                if palette_to_mutate < 15:
+                    palette_upper_delta = np.random.randint(-20, 21)
+                else:
+                    palette_upper_delta = 0
+                if palette_lower_delta != 0 or palette_upper_delta != 0:
+                    break
+
+            self._apply_palette_delta(palette_to_mutate, palette_lower_delta,
+                                      palette_upper_delta)
+
     def propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
         """Attempt to find new palettes that locally improve image quality.
 
@@ -117,9 +189,9 @@ class ClusterPalette:
 
         dynamic_colours = 16 - self._reserved_colours
 
-        palette_splits = self._palette_splits()
+        self._mutate_palette_splits()
         for palette_idx in range(16):
-            palette_lower, palette_upper = palette_splits[palette_idx]
+            palette_lower, palette_upper = self._palette_splits[palette_idx]
             # TODO: dynamically tune palette cuts
             palette_pixels = self._colours_cam[
                              palette_lower * 320:palette_upper * 320, :]
@@ -149,6 +221,7 @@ class ClusterPalette:
             new_palettes_rgb12_iigs[palette_idx, :, :] = palettes_rgb12_iigs
             new_errors[palette_idx] = palette_error
 
+        self._palettes_accepted = False
         return new_palettes_cam, new_palettes_rgb12_iigs, new_errors
 
     def accept_palettes(
@@ -157,6 +230,7 @@ class ClusterPalette:
         self._palettes_cam = np.copy(new_palettes_cam)
         self._palettes_rgb = np.copy(new_palettes_rgb)
         self._errors = list(new_errors)
+        self._palettes_accepted = True
 
 
 def main():
@@ -220,8 +294,8 @@ def main():
                         gamma=args.gamma_correct)).astype(np.float32) / 255
 
     # TODO: flags
-    penalty = 1e9
-    iterations = 50
+    penalty = 1  # 1e18  # TODO: is this needed any more?
+    iterations = 200
 
     pygame.init()
     # TODO: for some reason I need to execute this twice - the first time
@@ -234,10 +308,13 @@ def main():
     total_image_error = 1e9
     iterations_since_improvement = 0
 
+    # TODO: reserved_colours should be a flag
     cluster_palette = ClusterPalette(
         rgb, reserved_colours=1, rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs)
+    last_good_splits = cluster_palette._palette_splits
 
     while iterations_since_improvement < iterations:
+        # print("Iterations %d" % iterations_since_improvement)
         new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
             cluster_palette.propose_palettes())
 
@@ -253,6 +330,37 @@ def main():
             dither_pyx.dither_shr(
                 rgb, new_palettes_cam, new_palettes_linear_rgb,
                 rgb24_to_cam16ucs, float(penalty))
+
+        # print(total_image_error, new_total_image_error,
+        #       cluster_palette._palette_splits)
+
+        # TODO: move this into ClusterPalettes
+        palettes_used = [False] * 16
+        for palette in new_line_to_palette:
+            palettes_used[palette] = True
+        for palette_idx, palette in enumerate(palettes_used):
+            if palette:
+                continue
+            print("Reassigning palette %d" % palette_idx)
+            max_width = 0
+            split_palette_idx = -1
+            idx = 0
+            for lower, upper in last_good_splits:
+                width = upper - lower
+                if width > max_width:
+                    split_palette_idx = idx
+                idx += 1
+
+            lower, upper = last_good_splits[split_palette_idx]
+            if upper - lower > 20:
+                mid = (lower + upper) // 2
+                cluster_palette._palette_splits[split_palette_idx] = (lower, mid)
+                cluster_palette._palette_splits[palette_idx] = (mid, upper)
+            else:
+                lower = np.random.randint(0, 199)
+                upper = np.random.randint(lower, 200)
+                cluster_palette._palette_splits[palette_idx] = (lower, upper)
+
         if new_total_image_error >= total_image_error:
             iterations_since_improvement += 1
             continue
@@ -261,11 +369,13 @@ def main():
         iterations_since_improvement = 0
         cluster_palette.accept_palettes(
             new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors)
+        last_good_splits = cluster_palette._palette_splits
 
         if total_image_error < 1e9:
             print("Improved quality +%f%% (%f)" % (
                 (1 - new_total_image_error / total_image_error) * 100,
                 new_total_image_error))
+            # print(cluster_palette._palette_splits)
         output_4bit = new_output_4bit
         line_to_palette = new_line_to_palette
         total_image_error = new_total_image_error

From 1ce5c25764c989ff3ad8eafdf9022ee2c3d31f80 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 23 Nov 2021 13:59:48 +0000
Subject: [PATCH 38/82] Fix a bug where _fit_global_palette would crash if
 there were fewer than 16 global colours computed.

---
 convert.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/convert.py b/convert.py
index dd59e9c..35a288f 100644
--- a/convert.py
+++ b/convert.py
@@ -83,17 +83,22 @@ class ClusterPalette:
 
         clusters = cluster.MiniBatchKMeans(n_clusters=16, max_iter=10000)
         clusters.fit_predict(self._colours_cam)
+        num_colours = clusters.n_clusters
 
         labels = clusters.labels_
+
+        # Dict of {palette idx : frequency count}
+        palette_freq = {idx: 0 for idx in range(16)}
+        for idx, freq in zip(*np.unique(labels, return_counts=True)):
+            palette_freq[idx] = freq
         frequency_order = [
             k for k, v in sorted(
-                # List of (palette idx, frequency count)
-                list(zip(*np.unique(labels, return_counts=True))),
-                key=lambda kv: kv[1], reverse=True)]
+                list(palette_freq.items()), key=lambda kv: kv[1], reverse=True)]
 
-        return dither_pyx.convert_cam16ucs_to_rgb12_iigs(
-            clusters.cluster_centers_[frequency_order].astype(
-                np.float32))
+        self._global_palette = (
+            dither_pyx.convert_cam16ucs_to_rgb12_iigs(
+                clusters.cluster_centers_[frequency_order].astype(
+                    np.float32)))
 
     def _palette_splits(self, palette_height=35):
         # The 16 palettes are striped across consecutive (overlapping) line
@@ -126,7 +131,7 @@ class ClusterPalette:
 
         new_lower = np.clip(new_lower, 0, np.clip(new_upper, 1, 200) - 1)
         new_upper = np.clip(new_upper, new_lower + 1, 200)
-        assert new_lower >= 0, new_upper-1
+        assert new_lower >= 0, new_upper - 1
 
         self._palette_splits[palette_to_mutate] = (new_lower, new_upper)
         self._palette_mutate_idx = palette_to_mutate
@@ -185,7 +190,7 @@ class ClusterPalette:
         # Compute a new 16-colour global palette for the entire image,
         # used as the starting center positions for k-means clustering of the
         # individual palettes
-        self._global_palette = self._fit_global_palette()
+        self._fit_global_palette()
 
         dynamic_colours = 16 - self._reserved_colours
 
@@ -338,8 +343,8 @@ def main():
         palettes_used = [False] * 16
         for palette in new_line_to_palette:
             palettes_used[palette] = True
-        for palette_idx, palette in enumerate(palettes_used):
-            if palette:
+        for palette_idx, palette_used in enumerate(palettes_used):
+            if palette_used:
                 continue
             print("Reassigning palette %d" % palette_idx)
             max_width = 0
@@ -354,7 +359,8 @@ def main():
             lower, upper = last_good_splits[split_palette_idx]
             if upper - lower > 20:
                 mid = (lower + upper) // 2
-                cluster_palette._palette_splits[split_palette_idx] = (lower, mid)
+                cluster_palette._palette_splits[split_palette_idx] = (
+                    lower, mid - 1)
                 cluster_palette._palette_splits[palette_idx] = (mid, upper)
             else:
                 lower = np.random.randint(0, 199)

From 6988b19b4375796e1b627d4bcd9fd3c78af11771 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 23 Nov 2021 14:00:57 +0000
Subject: [PATCH 39/82] Tidy

---
 convert.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/convert.py b/convert.py
index 35a288f..23101f5 100644
--- a/convert.py
+++ b/convert.py
@@ -83,13 +83,10 @@ class ClusterPalette:
 
         clusters = cluster.MiniBatchKMeans(n_clusters=16, max_iter=10000)
         clusters.fit_predict(self._colours_cam)
-        num_colours = clusters.n_clusters
-
-        labels = clusters.labels_
 
         # Dict of {palette idx : frequency count}
         palette_freq = {idx: 0 for idx in range(16)}
-        for idx, freq in zip(*np.unique(labels, return_counts=True)):
+        for idx, freq in zip(*np.unique(clusters.labels_, return_counts=True)):
             palette_freq[idx] = freq
         frequency_order = [
             k for k, v in sorted(

From 0323b80e685bd5eb9d29a6bb772014b9d717846a Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 23 Nov 2021 14:51:04 +0000
Subject: [PATCH 40/82] Refactor

---
 convert.py | 158 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 85 insertions(+), 73 deletions(-)

diff --git a/convert.py b/convert.py
index 23101f5..3dacec0 100644
--- a/convert.py
+++ b/convert.py
@@ -28,7 +28,9 @@ import screen as screen_py
 
 class ClusterPalette:
     def __init__(
-            self, image: Image, rgb12_iigs_to_cam16ucs, reserved_colours=0):
+            self, image: Image, rgb12_iigs_to_cam16ucs, rgb24_to_cam16ucs,
+            reserved_colours=0):
+        self._image_rgb = image
         self._colours_cam = self._image_colours_cam(image)
 
         self._errors = [1e9] * 16
@@ -55,6 +57,8 @@ class ClusterPalette:
         # colour space
         self._rgb12_iigs_to_cam16ucs = rgb12_iigs_to_cam16ucs
 
+        self._rgb24_to_cam16ucs = rgb24_to_cam16ucs
+
         # List of line ranges used to train the 16 SHR palettes
         # [(lower_0, upper_0), ...]
         self._palette_splits = self._palette_splits()
@@ -68,6 +72,75 @@ class ClusterPalette:
         # Delta applied to palette split in previous iteration
         self._palette_mutate_delta = (0, 0)
 
+    def iterate(self, penalty: float, max_iterations: int):
+        iterations_since_improvement = 0
+        total_image_error = 1e9
+
+        last_good_splits = self._palette_splits
+
+        while iterations_since_improvement < max_iterations:
+            # print("Iterations %d" % iterations_since_improvement)
+            new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
+                self._propose_palettes())
+
+            # Suppress divide by zero warning,
+            # https://github.com/colour-science/colour/issues/900
+            with colour.utilities.suppress_warnings(python_warnings=True):
+                new_palettes_linear_rgb = colour.convert(
+                    new_palettes_cam, "CAM16UCS", "RGB").astype(np.float32)
+
+            # Recompute image with proposed palettes and check whether it has
+            # lower total image error than our previous best.
+            new_output_4bit, new_line_to_palette, new_total_image_error = \
+                dither_pyx.dither_shr(
+                    self._image_rgb, new_palettes_cam, new_palettes_linear_rgb,
+                    self._rgb24_to_cam16ucs, float(penalty))
+
+            # print(total_image_error, new_total_image_error,
+            #       self._palette_splits)
+
+            # TODO: extract this into a function
+            palettes_used = [False] * 16
+            for palette in new_line_to_palette:
+                palettes_used[palette] = True
+            for palette_idx, palette_used in enumerate(palettes_used):
+                if palette_used:
+                    continue
+                print("Reassigning palette %d" % palette_idx)
+                max_width = 0
+                split_palette_idx = -1
+                idx = 0
+                for lower, upper in last_good_splits:
+                    width = upper - lower
+                    if width > max_width:
+                        split_palette_idx = idx
+                    idx += 1
+
+                lower, upper = last_good_splits[split_palette_idx]
+                if upper - lower > 20:
+                    mid = (lower + upper) // 2
+                    self._palette_splits[split_palette_idx] = (
+                        lower, mid - 1)
+                    self._palette_splits[palette_idx] = (mid, upper)
+                else:
+                    lower = np.random.randint(0, 199)
+                    upper = np.random.randint(lower, 200)
+                    self._palette_splits[palette_idx] = (lower, upper)
+
+            if new_total_image_error >= total_image_error:
+                iterations_since_improvement += 1
+                continue
+
+            # We found a globally better set of palettes
+            iterations_since_improvement = 0
+            last_good_splits = self._palette_splits
+            total_image_error = new_total_image_error
+            self._accept_palettes(
+                new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors)
+
+            yield (new_total_image_error, new_output_4bit, new_line_to_palette,
+                   new_palettes_rgb12_iigs, new_palettes_linear_rgb)
+
     def _image_colours_cam(self, image: Image):
         colours_rgb = np.asarray(image).reshape((-1, 3))
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
@@ -163,7 +236,7 @@ class ClusterPalette:
             self._apply_palette_delta(palette_to_mutate, palette_lower_delta,
                                       palette_upper_delta)
 
-    def propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
+    def _propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
         """Attempt to find new palettes that locally improve image quality.
 
         Re-fit a set of 16 palettes from (overlapping) line ranges of the
@@ -226,7 +299,7 @@ class ClusterPalette:
         self._palettes_accepted = False
         return new_palettes_cam, new_palettes_rgb12_iigs, new_errors
 
-    def accept_palettes(
+    def _accept_palettes(
             self, new_palettes_cam: np.ndarray,
             new_palettes_rgb: np.ndarray, new_errors: List[float]):
         self._palettes_cam = np.copy(new_palettes_cam)
@@ -297,7 +370,7 @@ def main():
 
     # TODO: flags
     penalty = 1  # 1e18  # TODO: is this needed any more?
-    iterations = 200
+    iterations = 20# 0
 
     pygame.init()
     # TODO: for some reason I need to execute this twice - the first time
@@ -307,83 +380,22 @@ def main():
     canvas.fill((0, 0, 0))
     pygame.display.flip()
 
-    total_image_error = 1e9
-    iterations_since_improvement = 0
-
+    total_image_error = None
     # TODO: reserved_colours should be a flag
     cluster_palette = ClusterPalette(
-        rgb, reserved_colours=1, rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs)
-    last_good_splits = cluster_palette._palette_splits
+        rgb, reserved_colours=1,
+        rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs,
+        rgb24_to_cam16ucs=rgb24_to_cam16ucs)
 
-    while iterations_since_improvement < iterations:
-        # print("Iterations %d" % iterations_since_improvement)
-        new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
-            cluster_palette.propose_palettes())
+    for (new_total_image_error, output_4bit, line_to_palette,
+         palettes_rgb12_iigs, palettes_linear_rgb) in cluster_palette.iterate(
+            penalty, iterations):
 
-        # Suppress divide by zero warning,
-        # https://github.com/colour-science/colour/issues/900
-        with colour.utilities.suppress_warnings(python_warnings=True):
-            new_palettes_linear_rgb = colour.convert(
-                new_palettes_cam, "CAM16UCS", "RGB").astype(np.float32)
-
-        # Recompute image with proposed palettes and check whether it has
-        # lower total image error than our previous best.
-        new_output_4bit, new_line_to_palette, new_total_image_error = \
-            dither_pyx.dither_shr(
-                rgb, new_palettes_cam, new_palettes_linear_rgb,
-                rgb24_to_cam16ucs, float(penalty))
-
-        # print(total_image_error, new_total_image_error,
-        #       cluster_palette._palette_splits)
-
-        # TODO: move this into ClusterPalettes
-        palettes_used = [False] * 16
-        for palette in new_line_to_palette:
-            palettes_used[palette] = True
-        for palette_idx, palette_used in enumerate(palettes_used):
-            if palette_used:
-                continue
-            print("Reassigning palette %d" % palette_idx)
-            max_width = 0
-            split_palette_idx = -1
-            idx = 0
-            for lower, upper in last_good_splits:
-                width = upper - lower
-                if width > max_width:
-                    split_palette_idx = idx
-                idx += 1
-
-            lower, upper = last_good_splits[split_palette_idx]
-            if upper - lower > 20:
-                mid = (lower + upper) // 2
-                cluster_palette._palette_splits[split_palette_idx] = (
-                    lower, mid - 1)
-                cluster_palette._palette_splits[palette_idx] = (mid, upper)
-            else:
-                lower = np.random.randint(0, 199)
-                upper = np.random.randint(lower, 200)
-                cluster_palette._palette_splits[palette_idx] = (lower, upper)
-
-        if new_total_image_error >= total_image_error:
-            iterations_since_improvement += 1
-            continue
-
-        # We found a globally better set of palettes
-        iterations_since_improvement = 0
-        cluster_palette.accept_palettes(
-            new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors)
-        last_good_splits = cluster_palette._palette_splits
-
-        if total_image_error < 1e9:
+        if total_image_error is not None:
             print("Improved quality +%f%% (%f)" % (
                 (1 - new_total_image_error / total_image_error) * 100,
                 new_total_image_error))
-            # print(cluster_palette._palette_splits)
-        output_4bit = new_output_4bit
-        line_to_palette = new_line_to_palette
         total_image_error = new_total_image_error
-        palettes_rgb12_iigs = new_palettes_rgb12_iigs
-        palettes_linear_rgb = new_palettes_linear_rgb
         for i in range(16):
             screen.set_palette(i, palettes_rgb12_iigs[i, :, :])
 

From c78f731cd75ad131f6c16b636922f2319acbd462 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 23 Nov 2021 14:55:45 +0000
Subject: [PATCH 41/82] Refactor

---
 convert.py | 81 +++++++++++++++++++++++++-----------------------------
 1 file changed, 37 insertions(+), 44 deletions(-)

diff --git a/convert.py b/convert.py
index 3dacec0..c91c180 100644
--- a/convert.py
+++ b/convert.py
@@ -96,36 +96,8 @@ class ClusterPalette:
                     self._image_rgb, new_palettes_cam, new_palettes_linear_rgb,
                     self._rgb24_to_cam16ucs, float(penalty))
 
-            # print(total_image_error, new_total_image_error,
-            #       self._palette_splits)
-
-            # TODO: extract this into a function
-            palettes_used = [False] * 16
-            for palette in new_line_to_palette:
-                palettes_used[palette] = True
-            for palette_idx, palette_used in enumerate(palettes_used):
-                if palette_used:
-                    continue
-                print("Reassigning palette %d" % palette_idx)
-                max_width = 0
-                split_palette_idx = -1
-                idx = 0
-                for lower, upper in last_good_splits:
-                    width = upper - lower
-                    if width > max_width:
-                        split_palette_idx = idx
-                    idx += 1
-
-                lower, upper = last_good_splits[split_palette_idx]
-                if upper - lower > 20:
-                    mid = (lower + upper) // 2
-                    self._palette_splits[split_palette_idx] = (
-                        lower, mid - 1)
-                    self._palette_splits[palette_idx] = (mid, upper)
-                else:
-                    lower = np.random.randint(0, 199)
-                    upper = np.random.randint(lower, 200)
-                    self._palette_splits[palette_idx] = (lower, upper)
+            self._reassign_unused_palettes(new_line_to_palette,
+                                           last_good_splits)
 
             if new_total_image_error >= total_image_error:
                 iterations_since_improvement += 1
@@ -135,8 +107,11 @@ class ClusterPalette:
             iterations_since_improvement = 0
             last_good_splits = self._palette_splits
             total_image_error = new_total_image_error
-            self._accept_palettes(
-                new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors)
+
+            self._palettes_cam = new_palettes_cam
+            self._palettes_rgb = new_palettes_rgb12_iigs
+            self._errors = new_palette_errors
+            self._palettes_accepted = True
 
             yield (new_total_image_error, new_output_4bit, new_line_to_palette,
                    new_palettes_rgb12_iigs, new_palettes_linear_rgb)
@@ -236,6 +211,34 @@ class ClusterPalette:
             self._apply_palette_delta(palette_to_mutate, palette_lower_delta,
                                       palette_upper_delta)
 
+    def _reassign_unused_palettes(self, new_line_to_palette, last_good_splits):
+        palettes_used = [False] * 16
+        for palette in new_line_to_palette:
+            palettes_used[palette] = True
+        for palette_idx, palette_used in enumerate(palettes_used):
+            if palette_used:
+                continue
+            print("Reassigning palette %d" % palette_idx)
+            max_width = 0
+            split_palette_idx = -1
+            idx = 0
+            for lower, upper in last_good_splits:
+                width = upper - lower
+                if width > max_width:
+                    split_palette_idx = idx
+                idx += 1
+
+            lower, upper = last_good_splits[split_palette_idx]
+            if upper - lower > 20:
+                mid = (lower + upper) // 2
+                self._palette_splits[split_palette_idx] = (
+                    lower, mid - 1)
+                self._palette_splits[palette_idx] = (mid, upper)
+            else:
+                lower = np.random.randint(0, 199)
+                upper = np.random.randint(lower, 200)
+                self._palette_splits[palette_idx] = (lower, upper)
+
     def _propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
         """Attempt to find new palettes that locally improve image quality.
 
@@ -262,8 +265,6 @@ class ClusterPalette:
         # individual palettes
         self._fit_global_palette()
 
-        dynamic_colours = 16 - self._reserved_colours
-
         self._mutate_palette_splits()
         for palette_idx in range(16):
             palette_lower, palette_upper = self._palette_splits[palette_idx]
@@ -299,14 +300,6 @@ class ClusterPalette:
         self._palettes_accepted = False
         return new_palettes_cam, new_palettes_rgb12_iigs, new_errors
 
-    def _accept_palettes(
-            self, new_palettes_cam: np.ndarray,
-            new_palettes_rgb: np.ndarray, new_errors: List[float]):
-        self._palettes_cam = np.copy(new_palettes_cam)
-        self._palettes_rgb = np.copy(new_palettes_rgb)
-        self._errors = list(new_errors)
-        self._palettes_accepted = True
-
 
 def main():
     parser = argparse.ArgumentParser()
@@ -370,7 +363,7 @@ def main():
 
     # TODO: flags
     penalty = 1  # 1e18  # TODO: is this needed any more?
-    iterations = 20# 0
+    iterations = 200
 
     pygame.init()
     # TODO: for some reason I need to execute this twice - the first time
@@ -389,7 +382,7 @@ def main():
 
     for (new_total_image_error, output_4bit, line_to_palette,
          palettes_rgb12_iigs, palettes_linear_rgb) in cluster_palette.iterate(
-            penalty, iterations):
+        penalty, iterations):
 
         if total_image_error is not None:
             print("Improved quality +%f%% (%f)" % (

From 0b985a66b9f7bb0b4944e7d875e7b3620b16e828 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 23 Nov 2021 14:58:09 +0000
Subject: [PATCH 42/82] Reorder and tidy

---
 convert.py | 191 ++++++++++++++++++++++++++---------------------------
 1 file changed, 95 insertions(+), 96 deletions(-)

diff --git a/convert.py b/convert.py
index c91c180..8848764 100644
--- a/convert.py
+++ b/convert.py
@@ -61,7 +61,7 @@ class ClusterPalette:
 
         # List of line ranges used to train the 16 SHR palettes
         # [(lower_0, upper_0), ...]
-        self._palette_splits = self._palette_splits()
+        self._palette_splits = self._equal_palette_splits()
 
         # Whether the previous iteration of proposed palettes was accepted
         self._palettes_accepted = False
@@ -123,29 +123,7 @@ class ClusterPalette:
                                          "CAM16UCS").astype(np.float32)
         return colours_cam
 
-    def _fit_global_palette(self):
-        """Compute a 16-colour palette for the entire image to use as
-        starting point for the sub-palettes.  This should help when the image
-        has large blocks of colour since the sub-palettes will tend to pick the
-        same colours."""
-
-        clusters = cluster.MiniBatchKMeans(n_clusters=16, max_iter=10000)
-        clusters.fit_predict(self._colours_cam)
-
-        # Dict of {palette idx : frequency count}
-        palette_freq = {idx: 0 for idx in range(16)}
-        for idx, freq in zip(*np.unique(clusters.labels_, return_counts=True)):
-            palette_freq[idx] = freq
-        frequency_order = [
-            k for k, v in sorted(
-                list(palette_freq.items()), key=lambda kv: kv[1], reverse=True)]
-
-        self._global_palette = (
-            dither_pyx.convert_cam16ucs_to_rgb12_iigs(
-                clusters.cluster_centers_[frequency_order].astype(
-                    np.float32)))
-
-    def _palette_splits(self, palette_height=35):
+    def _equal_palette_splits(self, palette_height=35):
         # The 16 palettes are striped across consecutive (overlapping) line
         # ranges.  Since nearby lines tend to have similar colours, this has
         # the effect of smoothing out the colour transitions across palettes.
@@ -168,77 +146,6 @@ class ClusterPalette:
                                    int(np.round(palette_upper))))
         return palette_ranges
 
-    def _apply_palette_delta(
-            self, palette_to_mutate, palette_lower_delta, palette_upper_delta):
-        old_lower, old_upper = self._palette_splits[palette_to_mutate]
-        new_lower = old_lower + palette_lower_delta
-        new_upper = old_upper + palette_upper_delta
-
-        new_lower = np.clip(new_lower, 0, np.clip(new_upper, 1, 200) - 1)
-        new_upper = np.clip(new_upper, new_lower + 1, 200)
-        assert new_lower >= 0, new_upper - 1
-
-        self._palette_splits[palette_to_mutate] = (new_lower, new_upper)
-        self._palette_mutate_idx = palette_to_mutate
-        self._palette_mutate_delta = (palette_lower_delta, palette_upper_delta)
-
-    def _mutate_palette_splits(self):
-        if self._palettes_accepted:
-            # Last time was good, keep going
-            self._apply_palette_delta(self._palette_mutate_idx,
-                                      self._palette_mutate_delta[0],
-                                      self._palette_mutate_delta[1])
-        else:
-            # undo last mutation
-            self._apply_palette_delta(self._palette_mutate_idx,
-                                      -self._palette_mutate_delta[0],
-                                      -self._palette_mutate_delta[1])
-
-            # Pick a palette endpoint to move up or down
-            palette_to_mutate = np.random.randint(0, 16)
-            while True:
-                if palette_to_mutate > 0:
-                    palette_lower_delta = np.random.randint(-20, 21)
-                else:
-                    palette_lower_delta = 0
-                if palette_to_mutate < 15:
-                    palette_upper_delta = np.random.randint(-20, 21)
-                else:
-                    palette_upper_delta = 0
-                if palette_lower_delta != 0 or palette_upper_delta != 0:
-                    break
-
-            self._apply_palette_delta(palette_to_mutate, palette_lower_delta,
-                                      palette_upper_delta)
-
-    def _reassign_unused_palettes(self, new_line_to_palette, last_good_splits):
-        palettes_used = [False] * 16
-        for palette in new_line_to_palette:
-            palettes_used[palette] = True
-        for palette_idx, palette_used in enumerate(palettes_used):
-            if palette_used:
-                continue
-            print("Reassigning palette %d" % palette_idx)
-            max_width = 0
-            split_palette_idx = -1
-            idx = 0
-            for lower, upper in last_good_splits:
-                width = upper - lower
-                if width > max_width:
-                    split_palette_idx = idx
-                idx += 1
-
-            lower, upper = last_good_splits[split_palette_idx]
-            if upper - lower > 20:
-                mid = (lower + upper) // 2
-                self._palette_splits[split_palette_idx] = (
-                    lower, mid - 1)
-                self._palette_splits[palette_idx] = (mid, upper)
-            else:
-                lower = np.random.randint(0, 199)
-                upper = np.random.randint(lower, 200)
-                self._palette_splits[palette_idx] = (lower, upper)
-
     def _propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
         """Attempt to find new palettes that locally improve image quality.
 
@@ -268,7 +175,6 @@ class ClusterPalette:
         self._mutate_palette_splits()
         for palette_idx in range(16):
             palette_lower, palette_upper = self._palette_splits[palette_idx]
-            # TODO: dynamically tune palette cuts
             palette_pixels = self._colours_cam[
                              palette_lower * 320:palette_upper * 320, :]
 
@@ -300,6 +206,99 @@ class ClusterPalette:
         self._palettes_accepted = False
         return new_palettes_cam, new_palettes_rgb12_iigs, new_errors
 
+    def _fit_global_palette(self):
+        """Compute a 16-colour palette for the entire image to use as
+        starting point for the sub-palettes.  This should help when the image
+        has large blocks of colour since the sub-palettes will tend to pick the
+        same colours."""
+
+        clusters = cluster.MiniBatchKMeans(n_clusters=16, max_iter=10000)
+        clusters.fit_predict(self._colours_cam)
+
+        # Dict of {palette idx : frequency count}
+        palette_freq = {idx: 0 for idx in range(16)}
+        for idx, freq in zip(*np.unique(clusters.labels_, return_counts=True)):
+            palette_freq[idx] = freq
+        frequency_order = [
+            k for k, v in sorted(
+                list(palette_freq.items()), key=lambda kv: kv[1], reverse=True)]
+
+        self._global_palette = (
+            dither_pyx.convert_cam16ucs_to_rgb12_iigs(
+                clusters.cluster_centers_[frequency_order].astype(
+                    np.float32)))
+
+    def _mutate_palette_splits(self):
+        if self._palettes_accepted:
+            # Last time was good, keep going
+            self._apply_palette_delta(self._palette_mutate_idx,
+                                      self._palette_mutate_delta[0],
+                                      self._palette_mutate_delta[1])
+        else:
+            # undo last mutation
+            self._apply_palette_delta(self._palette_mutate_idx,
+                                      -self._palette_mutate_delta[0],
+                                      -self._palette_mutate_delta[1])
+
+            # Pick a palette endpoint to move up or down
+            palette_to_mutate = np.random.randint(0, 16)
+            while True:
+                if palette_to_mutate > 0:
+                    palette_lower_delta = np.random.randint(-20, 21)
+                else:
+                    palette_lower_delta = 0
+                if palette_to_mutate < 15:
+                    palette_upper_delta = np.random.randint(-20, 21)
+                else:
+                    palette_upper_delta = 0
+                if palette_lower_delta != 0 or palette_upper_delta != 0:
+                    break
+
+            self._apply_palette_delta(palette_to_mutate, palette_lower_delta,
+                                      palette_upper_delta)
+
+    def _apply_palette_delta(
+            self, palette_to_mutate, palette_lower_delta, palette_upper_delta):
+        old_lower, old_upper = self._palette_splits[palette_to_mutate]
+        new_lower = old_lower + palette_lower_delta
+        new_upper = old_upper + palette_upper_delta
+
+        new_lower = np.clip(new_lower, 0, np.clip(new_upper, 1, 200) - 1)
+        new_upper = np.clip(new_upper, new_lower + 1, 200)
+        assert new_lower >= 0, new_upper - 1
+
+        self._palette_splits[palette_to_mutate] = (new_lower, new_upper)
+        self._palette_mutate_idx = palette_to_mutate
+        self._palette_mutate_delta = (palette_lower_delta, palette_upper_delta)
+
+    def _reassign_unused_palettes(self, new_line_to_palette, last_good_splits):
+        palettes_used = [False] * 16
+        for palette in new_line_to_palette:
+            palettes_used[palette] = True
+        for palette_idx, palette_used in enumerate(palettes_used):
+            if palette_used:
+                continue
+            print("Reassigning palette %d" % palette_idx)
+            max_width = 0
+            split_palette_idx = -1
+            idx = 0
+            for lower, upper in last_good_splits:
+                width = upper - lower
+                if width > max_width:
+                    split_palette_idx = idx
+                idx += 1
+
+            lower, upper = last_good_splits[split_palette_idx]
+            if upper - lower > 20:
+                mid = (lower + upper) // 2
+                self._palette_splits[split_palette_idx] = (
+                    lower, mid - 1)
+                self._palette_splits[palette_idx] = (mid, upper)
+            else:
+                lower = np.random.randint(0, 199)
+                upper = np.random.randint(lower, 200)
+                self._palette_splits[palette_idx] = (lower, upper)
+
 
 def main():
     parser = argparse.ArgumentParser()

From e488955c2310c3022cb6d9329d001f560a3f8390 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 23 Nov 2021 14:58:46 +0000
Subject: [PATCH 43/82] Reorder

---
 convert.py | 60 +++++++++++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/convert.py b/convert.py
index 8848764..e532a92 100644
--- a/convert.py
+++ b/convert.py
@@ -72,6 +72,36 @@ class ClusterPalette:
         # Delta applied to palette split in previous iteration
         self._palette_mutate_delta = (0, 0)
 
+    def _image_colours_cam(self, image: Image):
+        colours_rgb = np.asarray(image).reshape((-1, 3))
+        with colour.utilities.suppress_warnings(colour_usage_warnings=True):
+            colours_cam = colour.convert(colours_rgb, "RGB",
+                                         "CAM16UCS").astype(np.float32)
+        return colours_cam
+
+    def _equal_palette_splits(self, palette_height=35):
+        # The 16 palettes are striped across consecutive (overlapping) line
+        # ranges.  Since nearby lines tend to have similar colours, this has
+        # the effect of smoothing out the colour transitions across palettes.
+
+        # If we want to overlap 16 palettes in 200 lines, where each palette
+        # has height H and overlaps the previous one by L lines, then the
+        # boundaries are at lines:
+        #   (0, H), (H-L, 2H-L), (2H-2L, 3H-2L), ..., (15H-15L, 16H - 15L)
+        # i.e. 16H - 15L = 200, so for a given palette height H we need to
+        # overlap by:
+        #   L = (16H - 200)/15
+
+        palette_overlap = (16 * palette_height - 200) / 15
+
+        palette_ranges = []
+        for palette_idx in range(16):
+            palette_lower = palette_idx * (palette_height - palette_overlap)
+            palette_upper = palette_lower + palette_height
+            palette_ranges.append((int(np.round(palette_lower)),
+                                   int(np.round(palette_upper))))
+        return palette_ranges
+
     def iterate(self, penalty: float, max_iterations: int):
         iterations_since_improvement = 0
         total_image_error = 1e9
@@ -116,36 +146,6 @@ class ClusterPalette:
             yield (new_total_image_error, new_output_4bit, new_line_to_palette,
                    new_palettes_rgb12_iigs, new_palettes_linear_rgb)
 
-    def _image_colours_cam(self, image: Image):
-        colours_rgb = np.asarray(image).reshape((-1, 3))
-        with colour.utilities.suppress_warnings(colour_usage_warnings=True):
-            colours_cam = colour.convert(colours_rgb, "RGB",
-                                         "CAM16UCS").astype(np.float32)
-        return colours_cam
-
-    def _equal_palette_splits(self, palette_height=35):
-        # The 16 palettes are striped across consecutive (overlapping) line
-        # ranges.  Since nearby lines tend to have similar colours, this has
-        # the effect of smoothing out the colour transitions across palettes.
-
-        # If we want to overlap 16 palettes in 200 lines, where each palette
-        # has height H and overlaps the previous one by L lines, then the
-        # boundaries are at lines:
-        #   (0, H), (H-L, 2H-L), (2H-2L, 3H-2L), ..., (15H-15L, 16H - 15L)
-        # i.e. 16H - 15L = 200, so for a given palette height H we need to
-        # overlap by:
-        #   L = (16H - 200)/15
-
-        palette_overlap = (16 * palette_height - 200) / 15
-
-        palette_ranges = []
-        for palette_idx in range(16):
-            palette_lower = palette_idx * (palette_height - palette_overlap)
-            palette_upper = palette_lower + palette_height
-            palette_ranges.append((int(np.round(palette_lower)),
-                                   int(np.round(palette_upper))))
-        return palette_ranges
-
     def _propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
         """Attempt to find new palettes that locally improve image quality.
 

From 7179d009e13cc5d91b3e8bbcc003d8abf75bb6c0 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 23 Nov 2021 15:09:12 +0000
Subject: [PATCH 44/82] Refactor

Reassign palettes before computing new ones instead of after
---
 convert.py | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/convert.py b/convert.py
index e532a92..a90c7e6 100644
--- a/convert.py
+++ b/convert.py
@@ -102,32 +102,43 @@ class ClusterPalette:
                                    int(np.round(palette_upper))))
         return palette_ranges
 
+    def _dither_image(self, palettes_cam, penalty):
+        # Suppress divide by zero warning,
+        # https://github.com/colour-science/colour/issues/900
+        with colour.utilities.suppress_warnings(python_warnings=True):
+            palettes_linear_rgb = colour.convert(
+                palettes_cam, "CAM16UCS", "RGB").astype(np.float32)
+
+        output_4bit, line_to_palette, total_image_error = \
+            dither_pyx.dither_shr(
+                self._image_rgb, palettes_cam, palettes_linear_rgb,
+                self._rgb24_to_cam16ucs, float(penalty))
+
+        return (output_4bit, line_to_palette, palettes_linear_rgb,
+                total_image_error)
+
     def iterate(self, penalty: float, max_iterations: int):
         iterations_since_improvement = 0
         total_image_error = 1e9
 
         last_good_splits = self._palette_splits
+        (output_4bit, line_to_palette, palettes_linear_rgb,
+         new_total_image_error) = self._dither_image(
+            self._palettes_cam, penalty)
 
         while iterations_since_improvement < max_iterations:
+            self._reassign_unused_palettes(line_to_palette,
+                                           last_good_splits)
+
             # print("Iterations %d" % iterations_since_improvement)
             new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
                 self._propose_palettes())
 
-            # Suppress divide by zero warning,
-            # https://github.com/colour-science/colour/issues/900
-            with colour.utilities.suppress_warnings(python_warnings=True):
-                new_palettes_linear_rgb = colour.convert(
-                    new_palettes_cam, "CAM16UCS", "RGB").astype(np.float32)
-
             # Recompute image with proposed palettes and check whether it has
             # lower total image error than our previous best.
-            new_output_4bit, new_line_to_palette, new_total_image_error = \
-                dither_pyx.dither_shr(
-                    self._image_rgb, new_palettes_cam, new_palettes_linear_rgb,
-                    self._rgb24_to_cam16ucs, float(penalty))
-
-            self._reassign_unused_palettes(new_line_to_palette,
-                                           last_good_splits)
+            (output_4bit, line_to_palette, palettes_linear_rgb,
+             new_total_image_error) = self._dither_image(
+                new_palettes_cam, penalty)
 
             if new_total_image_error >= total_image_error:
                 iterations_since_improvement += 1
@@ -143,8 +154,8 @@ class ClusterPalette:
             self._errors = new_palette_errors
             self._palettes_accepted = True
 
-            yield (new_total_image_error, new_output_4bit, new_line_to_palette,
-                   new_palettes_rgb12_iigs, new_palettes_linear_rgb)
+            yield (new_total_image_error, output_4bit, line_to_palette,
+                   new_palettes_rgb12_iigs, palettes_linear_rgb)
 
     def _propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
         """Attempt to find new palettes that locally improve image quality.

From 62f23ff910ff61245be7604ed44f6d56f36865c0 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 09:10:03 +0000
Subject: [PATCH 45/82] Don't mutate initial_centroids

---
 dither.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dither.pyx b/dither.pyx
index 0881ef3..59b11b3 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -543,7 +543,7 @@ def k_means_with_fixed_centroids(
     cdef double error, best_error, centroid_movement, total_error
     cdef int centroid_idx, closest_centroid_idx, i, point_idx
 
-    cdef (unsigned char)[:, ::1] centroids_rgb12 = initial_centroids[:, :]
+    cdef (unsigned char)[:, ::1] centroids_rgb12 = np.copy(initial_centroids)
     cdef (unsigned char)[:, ::1] new_centroids_rgb12
 
     cdef float[::1] point_cam

From 04fd4f742726168a8935952c5a71cd65f7da3411 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 09:18:59 +0000
Subject: [PATCH 46/82] Move reassigning palettes back to after	fitting,
 otherwise it does the wrong thing the first time.

Fix an off by one when splitting palette ranges
---
 convert.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/convert.py b/convert.py
index a90c7e6..e8da9ca 100644
--- a/convert.py
+++ b/convert.py
@@ -120,16 +120,9 @@ class ClusterPalette:
     def iterate(self, penalty: float, max_iterations: int):
         iterations_since_improvement = 0
         total_image_error = 1e9
-
         last_good_splits = self._palette_splits
-        (output_4bit, line_to_palette, palettes_linear_rgb,
-         new_total_image_error) = self._dither_image(
-            self._palettes_cam, penalty)
-
+        
         while iterations_since_improvement < max_iterations:
-            self._reassign_unused_palettes(line_to_palette,
-                                           last_good_splits)
-
             # print("Iterations %d" % iterations_since_improvement)
             new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
                 self._propose_palettes())
@@ -140,6 +133,9 @@ class ClusterPalette:
              new_total_image_error) = self._dither_image(
                 new_palettes_cam, penalty)
 
+            self._reassign_unused_palettes(line_to_palette,
+                                           last_good_splits)
+
             if new_total_image_error >= total_image_error:
                 iterations_since_improvement += 1
                 continue
@@ -175,8 +171,8 @@ class ClusterPalette:
         using accept_palettes().
         """
         new_errors = list(self._errors)
-        new_palettes_cam = np.copy(self._palettes_cam)
-        new_palettes_rgb12_iigs = np.copy(self._palettes_rgb)
+        new_palettes_cam = np.empty_like(self._palettes_cam)
+        new_palettes_rgb12_iigs = np.empty_like(self._palettes_rgb)
 
         # Compute a new 16-colour global palette for the entire image,
         # used as the starting center positions for k-means clustering of the
@@ -307,7 +303,7 @@ class ClusterPalette:
                 self._palette_splits[palette_idx] = (mid, upper)
             else:
                 lower = np.random.randint(0, 199)
-                upper = np.random.randint(lower, 200)
+                upper = np.random.randint(lower + 1, 200)
                 self._palette_splits[palette_idx] = (lower, upper)
 
 

From 50c71d3a3506c1e2a4694ca6dc748a030a6b1087 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 09:19:35 +0000
Subject: [PATCH 47/82] Whitespace

---
 convert.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert.py b/convert.py
index e8da9ca..518d742 100644
--- a/convert.py
+++ b/convert.py
@@ -121,7 +121,7 @@ class ClusterPalette:
         iterations_since_improvement = 0
         total_image_error = 1e9
         last_good_splits = self._palette_splits
-        
+
         while iterations_since_improvement < max_iterations:
             # print("Iterations %d" % iterations_since_improvement)
             new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (

From de8a303de2f4cff3fb4b79a0f17175ac059ab64a Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 10:41:25 +0000
Subject: [PATCH 48/82] Initial attempt at fitting palettes to arbitrary lines
 instead of line ranges.

Works OK but isn't converging as well as I hoped.
---
 convert.py | 77 ++++++++++++++++++++++++++++++++++++++----------------
 dither.pyx | 21 ++++++++++++---
 2 files changed, 71 insertions(+), 27 deletions(-)

diff --git a/convert.py b/convert.py
index 518d742..d96b6f2 100644
--- a/convert.py
+++ b/convert.py
@@ -1,7 +1,9 @@
 """Image converter to Apple II Double Hi-Res format."""
 
 import argparse
+from collections import defaultdict
 import os.path
+import random
 from typing import Tuple, List
 
 from PIL import Image
@@ -72,8 +74,13 @@ class ClusterPalette:
         # Delta applied to palette split in previous iteration
         self._palette_mutate_delta = (0, 0)
 
+        self._palette_lines = defaultdict(list)
+        for i, lh in enumerate(self._palette_splits):
+            l, h = lh
+            self._palette_lines[i].extend(list(range(l, h)))
+
     def _image_colours_cam(self, image: Image):
-        colours_rgb = np.asarray(image).reshape((-1, 3))
+        colours_rgb = np.asarray(image)  # .reshape((-1, 3))
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
             colours_cam = colour.convert(colours_rgb, "RGB",
                                          "CAM16UCS").astype(np.float32)
@@ -109,11 +116,18 @@ class ClusterPalette:
             palettes_linear_rgb = colour.convert(
                 palettes_cam, "CAM16UCS", "RGB").astype(np.float32)
 
-        output_4bit, line_to_palette, total_image_error = \
+        output_4bit, line_to_palette, total_image_error, palette_line_errors = \
             dither_pyx.dither_shr(
                 self._image_rgb, palettes_cam, palettes_linear_rgb,
                 self._rgb24_to_cam16ucs, float(penalty))
 
+        palette_lines = defaultdict(list)
+        for line, palette in enumerate(line_to_palette):
+            palette_lines[palette].append(line)
+        self._palette_lines = palette_lines
+
+        self._palette_line_errors = palette_line_errors
+
         return (output_4bit, line_to_palette, palettes_linear_rgb,
                 total_image_error)
 
@@ -121,7 +135,7 @@ class ClusterPalette:
         iterations_since_improvement = 0
         total_image_error = 1e9
         last_good_splits = self._palette_splits
-
+        self._fit_global_palette()
         while iterations_since_improvement < max_iterations:
             # print("Iterations %d" % iterations_since_improvement)
             new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
@@ -136,6 +150,7 @@ class ClusterPalette:
             self._reassign_unused_palettes(line_to_palette,
                                            last_good_splits)
 
+            print(total_image_error, new_total_image_error)
             if new_total_image_error >= total_image_error:
                 iterations_since_improvement += 1
                 continue
@@ -177,13 +192,15 @@ class ClusterPalette:
         # Compute a new 16-colour global palette for the entire image,
         # used as the starting center positions for k-means clustering of the
         # individual palettes
-        self._fit_global_palette()
+        # self._fit_global_palette()
 
         self._mutate_palette_splits()
         for palette_idx in range(16):
-            palette_lower, palette_upper = self._palette_splits[palette_idx]
-            palette_pixels = self._colours_cam[
-                             palette_lower * 320:palette_upper * 320, :]
+            # print(palette_idx, self._palette_lines[palette_idx])
+            # palette_lower, palette_upper = self._palette_splits[palette_idx]
+            palette_pixels = (
+                self._colours_cam[
+                self._palette_lines[palette_idx], :, :].reshape(-1, 3))
 
             palettes_rgb12_iigs, palette_error = \
                 dither_pyx.k_means_with_fixed_centroids(
@@ -220,7 +237,7 @@ class ClusterPalette:
         same colours."""
 
         clusters = cluster.MiniBatchKMeans(n_clusters=16, max_iter=10000)
-        clusters.fit_predict(self._colours_cam)
+        clusters.fit_predict(self._colours_cam.reshape(-1, 3))
 
         # Dict of {palette idx : frequency count}
         palette_freq = {idx: 0 for idx in range(16)}
@@ -286,25 +303,39 @@ class ClusterPalette:
             if palette_used:
                 continue
             print("Reassigning palette %d" % palette_idx)
-            max_width = 0
+
+            worst_average_palette_error = 0
             split_palette_idx = -1
             idx = 0
-            for lower, upper in last_good_splits:
-                width = upper - lower
-                if width > max_width:
+            for idx, lines in self._palette_lines.items():
+                if len(lines) < 10:
+                    continue
+                average_palette_error = np.sum(self._palette_line_errors[
+                                                lines]) / len(lines)
+                print(idx, average_palette_error)
+                if average_palette_error > worst_average_palette_error:
+                    worst_average_palette_error = average_palette_error
                     split_palette_idx = idx
-                idx += 1
 
-            lower, upper = last_good_splits[split_palette_idx]
-            if upper - lower > 20:
-                mid = (lower + upper) // 2
-                self._palette_splits[split_palette_idx] = (
-                    lower, mid - 1)
-                self._palette_splits[palette_idx] = (mid, upper)
-            else:
-                lower = np.random.randint(0, 199)
-                upper = np.random.randint(lower + 1, 200)
-                self._palette_splits[palette_idx] = (lower, upper)
+            print("Picked %d with avg error %f" % (split_palette_idx, worst_average_palette_error))
+            # TODO: split off lines with largest error
+
+            palette_line_errors = self._palette_line_errors[
+                self._palette_lines[split_palette_idx]]
+
+            print(sorted(
+                list(zip(palette_line_errors, self._palette_lines[
+                split_palette_idx])), reverse=True))
+            best_palette_lines = [v for k, v in sorted(
+                list(zip(palette_line_errors, self._palette_lines[
+                split_palette_idx])))]
+            num_max_lines = len(self._palette_lines[split_palette_idx])
+
+            self._palette_lines[split_palette_idx] = best_palette_lines[
+                                                     :num_max_lines // 2]
+            # Move worst half to new palette
+            self._palette_lines[palette_idx] = best_palette_lines[
+                                               num_max_lines // 2:]
 
 
 def main():
diff --git a/dither.pyx b/dither.pyx
index 59b11b3..22f0e46 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -352,6 +352,8 @@ def dither_shr(
     cdef float[:, ::1] line_cam = np.zeros((320, 3), dtype=np.float32)
 
     cdef int[::1] line_to_palette = np.zeros(200, dtype=np.int32)
+    cdef double[::1] palette_line_errors = np.zeros(200, dtype=np.float64)
+    cdef PaletteSelection palette_line
 
     best_palette = -1
     total_image_error = 0.0
@@ -360,7 +362,10 @@ def dither_shr(
             line_cam[x, :] = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
 
-        best_palette = best_palette_for_line(line_cam, palettes_cam, best_palette, penalty)
+        palette_line = best_palette_for_line(line_cam, palettes_cam, best_palette, penalty)
+        best_palette = palette_line.palette_idx
+        palette_line_errors[y] = palette_line.total_error
+
         palette_rgb = palettes_rgb[best_palette, :, :]
         palette_cam = palettes_cam[best_palette, :, :]
         line_to_palette[y] = best_palette
@@ -449,12 +454,16 @@ def dither_shr(
                 #            working_image[y + 2, x + 2, i] + quant_error * (1 / 48),
                 #            0, 1)
 
-    return np.array(output_4bit, dtype=np.uint8), line_to_palette, total_image_error
+    return np.array(output_4bit, dtype=np.uint8), line_to_palette, total_image_error, np.array(palette_line_errors, dtype=np.float64)
 
 
+cdef struct PaletteSelection:
+    int palette_idx
+    double total_error
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int last_palette_idx, float last_penalty) nogil:
+cdef PaletteSelection best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int last_palette_idx, float last_penalty) nogil:
     cdef int palette_idx, best_palette_idx, palette_entry_idx, pixel_idx
     cdef double best_total_dist, total_dist, best_pixel_dist, pixel_dist
     cdef float[:, ::1] palette_cam
@@ -479,7 +488,11 @@ cdef int best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palette
         if total_dist < best_total_dist:
             best_total_dist = total_dist
             best_palette_idx = palette_idx
-    return best_palette_idx
+
+    cdef PaletteSelection res
+    res.palette_idx = best_palette_idx
+    res.total_error = best_total_dist
+    return res
 
 
 @cython.boundscheck(False)

From 3b8767782bf29342f74bd40c9088aafc19127d29 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 11:47:39 +0000
Subject: [PATCH 49/82] Each run seems to converge fairly quickly but there is
 a lot of variation across runs.  Run in a loop and keep the running best.

---
 convert.py | 184 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 105 insertions(+), 79 deletions(-)

diff --git a/convert.py b/convert.py
index d96b6f2..3cf2645 100644
--- a/convert.py
+++ b/convert.py
@@ -61,9 +61,12 @@ class ClusterPalette:
 
         self._rgb24_to_cam16ucs = rgb24_to_cam16ucs
 
+        self._palette_lines = defaultdict(list)
+
         # List of line ranges used to train the 16 SHR palettes
         # [(lower_0, upper_0), ...]
         self._palette_splits = self._equal_palette_splits()
+        self._init_palette_lines()
 
         # Whether the previous iteration of proposed palettes was accepted
         self._palettes_accepted = False
@@ -74,7 +77,7 @@ class ClusterPalette:
         # Delta applied to palette split in previous iteration
         self._palette_mutate_delta = (0, 0)
 
-        self._palette_lines = defaultdict(list)
+    def _init_palette_lines(self):
         for i, lh in enumerate(self._palette_splits):
             l, h = lh
             self._palette_lines[i].extend(list(range(l, h)))
@@ -132,41 +135,46 @@ class ClusterPalette:
                 total_image_error)
 
     def iterate(self, penalty: float, max_iterations: int):
-        iterations_since_improvement = 0
         total_image_error = 1e9
         last_good_splits = self._palette_splits
-        self._fit_global_palette()
-        while iterations_since_improvement < max_iterations:
-            # print("Iterations %d" % iterations_since_improvement)
-            new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
-                self._propose_palettes())
-
-            # Recompute image with proposed palettes and check whether it has
-            # lower total image error than our previous best.
-            (output_4bit, line_to_palette, palettes_linear_rgb,
-             new_total_image_error) = self._dither_image(
-                new_palettes_cam, penalty)
-
-            self._reassign_unused_palettes(line_to_palette,
-                                           last_good_splits)
-
-            print(total_image_error, new_total_image_error)
-            if new_total_image_error >= total_image_error:
-                iterations_since_improvement += 1
-                continue
-
-            # We found a globally better set of palettes
+        while True:
+            print("New iteration")
             iterations_since_improvement = 0
-            last_good_splits = self._palette_splits
-            total_image_error = new_total_image_error
+            self._palette_splits = self._equal_palette_splits()
+            self._init_palette_lines()
 
-            self._palettes_cam = new_palettes_cam
-            self._palettes_rgb = new_palettes_rgb12_iigs
-            self._errors = new_palette_errors
-            self._palettes_accepted = True
+            self._fit_global_palette()
+            while iterations_since_improvement < max_iterations:
+                # print("Iterations %d" % iterations_since_improvement)
+                new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
+                    self._propose_palettes())
 
-            yield (new_total_image_error, output_4bit, line_to_palette,
-                   new_palettes_rgb12_iigs, palettes_linear_rgb)
+                # Recompute image with proposed palettes and check whether it has
+                # lower total image error than our previous best.
+                (output_4bit, line_to_palette, palettes_linear_rgb,
+                 new_total_image_error) = self._dither_image(
+                    new_palettes_cam, penalty)
+
+                self._reassign_unused_palettes(line_to_palette,
+                                               last_good_splits)
+
+                # print(total_image_error, new_total_image_error)
+                if new_total_image_error >= total_image_error:
+                    iterations_since_improvement += 1
+                    continue
+
+                # We found a globally better set of palettes
+                iterations_since_improvement = 0
+                last_good_splits = self._palette_splits
+                total_image_error = new_total_image_error
+
+                self._palettes_cam = new_palettes_cam
+                self._palettes_rgb = new_palettes_rgb12_iigs
+                self._errors = new_palette_errors
+                self._palettes_accepted = True
+
+                yield (new_total_image_error, output_4bit, line_to_palette,
+                       new_palettes_rgb12_iigs, palettes_linear_rgb)
 
     def _propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
         """Attempt to find new palettes that locally improve image quality.
@@ -192,7 +200,7 @@ class ClusterPalette:
         # Compute a new 16-colour global palette for the entire image,
         # used as the starting center positions for k-means clustering of the
         # individual palettes
-        # self._fit_global_palette()
+        self._fit_global_palette()
 
         self._mutate_palette_splits()
         for palette_idx in range(16):
@@ -211,13 +219,13 @@ class ClusterPalette:
                     rgb12_iigs_to_cam16ucs=self._rgb12_iigs_to_cam16ucs
                 )
 
-            if (palette_error >= self._errors[palette_idx] and not
-            self._reserved_colours):
-                # Not a local improvement to the existing palette, so ignore it.
-                # We can't take this shortcut when we're reserving colours
-                # because it would break the invariant that all palettes must
-                # share colours.
-                continue
+            # if (palette_error >= self._errors[palette_idx] and not
+            # self._reserved_colours):
+            #     # Not a local improvement to the existing palette, so ignore it.
+            #     # We can't take this shortcut when we're reserving colours
+            #     # because it would break the invariant that all palettes must
+            #     # share colours.
+            #     continue
             for i in range(16):
                 new_palettes_cam[palette_idx, i, :] = (
                     np.array(dither_pyx.convert_rgb12_iigs_to_cam(
@@ -299,43 +307,56 @@ class ClusterPalette:
         palettes_used = [False] * 16
         for palette in new_line_to_palette:
             palettes_used[palette] = True
+        best_palette_lines = [v for k, v in sorted(list(zip(
+            self._palette_line_errors, range(200))))]
+
+        # print(self._palette_lines)
         for palette_idx, palette_used in enumerate(palettes_used):
             if palette_used:
                 continue
-            print("Reassigning palette %d" % palette_idx)
+            # print("Reassigning palette %d" % palette_idx)
 
-            worst_average_palette_error = 0
-            split_palette_idx = -1
-            idx = 0
-            for idx, lines in self._palette_lines.items():
-                if len(lines) < 10:
-                    continue
-                average_palette_error = np.sum(self._palette_line_errors[
-                                                lines]) / len(lines)
-                print(idx, average_palette_error)
-                if average_palette_error > worst_average_palette_error:
-                    worst_average_palette_error = average_palette_error
-                    split_palette_idx = idx
+            # TODO: also remove from old entry
+            worst_line = best_palette_lines.pop()
+            self._palette_lines[palette_idx] = [worst_line]
 
-            print("Picked %d with avg error %f" % (split_palette_idx, worst_average_palette_error))
-            # TODO: split off lines with largest error
+            # print("Picked line %d with error %f" % (worst_line,
+            #                                         self._palette_line_errors[worst_line]))
 
-            palette_line_errors = self._palette_line_errors[
-                self._palette_lines[split_palette_idx]]
 
-            print(sorted(
-                list(zip(palette_line_errors, self._palette_lines[
-                split_palette_idx])), reverse=True))
-            best_palette_lines = [v for k, v in sorted(
-                list(zip(palette_line_errors, self._palette_lines[
-                split_palette_idx])))]
-            num_max_lines = len(self._palette_lines[split_palette_idx])
-
-            self._palette_lines[split_palette_idx] = best_palette_lines[
-                                                     :num_max_lines // 2]
-            # Move worst half to new palette
-            self._palette_lines[palette_idx] = best_palette_lines[
-                                               num_max_lines // 2:]
+            #
+            # worst_average_palette_error = 0
+            # split_palette_idx = -1
+            # idx = 0
+            # for idx, lines in self._palette_lines.items():
+            #     if len(lines) < 10:
+            #         continue
+            #     average_palette_error = np.sum(self._palette_line_errors[
+            #                                     lines]) / len(lines)
+            #     print(idx, average_palette_error)
+            #     if average_palette_error > worst_average_palette_error:
+            #         worst_average_palette_error = average_palette_error
+            #         split_palette_idx = idx
+            #
+            # print("Picked %d with avg error %f" % (split_palette_idx, worst_average_palette_error))
+            # # TODO: split off lines with largest error
+            #
+            # palette_line_errors = self._palette_line_errors[
+            #     self._palette_lines[split_palette_idx]]
+            #
+            # print(sorted(
+            #     list(zip(palette_line_errors, self._palette_lines[
+            #     split_palette_idx])), reverse=True))
+            # best_palette_lines = [v for k, v in sorted(
+            #     list(zip(palette_line_errors, self._palette_lines[
+            #     split_palette_idx])))]
+            # num_max_lines = len(self._palette_lines[split_palette_idx])
+            #
+            # self._palette_lines[split_palette_idx] = best_palette_lines[
+            #                                          :num_max_lines // 2]
+            # # Move worst half to new palette
+            # self._palette_lines[palette_idx] = best_palette_lines[
+            #                                    num_max_lines // 2:]
 
 
 def main():
@@ -400,7 +421,7 @@ def main():
 
     # TODO: flags
     penalty = 1  # 1e18  # TODO: is this needed any more?
-    iterations = 200
+    iterations = 10 # 20
 
     pygame.init()
     # TODO: for some reason I need to execute this twice - the first time
@@ -417,6 +438,7 @@ def main():
         rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs,
         rgb24_to_cam16ucs=rgb24_to_cam16ucs)
 
+    seq = 0
     for (new_total_image_error, output_4bit, line_to_palette,
          palettes_rgb12_iigs, palettes_linear_rgb) in cluster_palette.iterate(
         penalty, iterations):
@@ -463,20 +485,24 @@ def main():
                 np.asarray(out_image).transpose((1, 0, 2)))  # flip y/x axes
             canvas.blit(surface, (0, 0))
             pygame.display.flip()
+
+        seq += 1
+        # Save Double hi-res image
+        outfile = os.path.join(
+            os.path.splitext(args.output)[0] + "-%d-preview.png" % seq)
+        out_image.save(outfile, "PNG")
+        screen.pack()
+        # with open(args.output, "wb") as f:
+        #     f.write(bytes(screen.aux))
+        #     f.write(bytes(screen.main))
+        with open(args.output, "wb") as f:
+            f.write(bytes(screen.memory))
+
     # print((palettes_rgb * 255).astype(np.uint8))
     unique_colours = np.unique(
         palettes_rgb12_iigs.reshape(-1, 3), axis=0).shape[0]
     print("%d unique colours" % unique_colours)
 
-    # Save Double hi-res image
-    outfile = os.path.join(os.path.splitext(args.output)[0] + "-preview.png")
-    out_image.save(outfile, "PNG")
-    screen.pack()
-    # with open(args.output, "wb") as f:
-    #     f.write(bytes(screen.aux))
-    #     f.write(bytes(screen.main))
-    with open(args.output, "wb") as f:
-        f.write(bytes(screen.memory))
 
 
 if __name__ == "__main__":

From c36de2b76b8d8abd00354279864e40b205109601 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 14:57:24 +0000
Subject: [PATCH 50/82] When initializing centroids for fitting the SHR
 palettes, only use the reserved colours from the global palette, and pick
 unique random points from the samples for the rest.  This encourages a larger
 range of colours in the resulting images and may improve quality.

Iterate a max number of times without improvement in the outer loop as
well.

Save intermediate preview outputs.
---
 convert.py | 124 ++++++++++++++++++++++++-----------------------------
 1 file changed, 57 insertions(+), 67 deletions(-)

diff --git a/convert.py b/convert.py
index 3cf2645..3921656 100644
--- a/convert.py
+++ b/convert.py
@@ -65,7 +65,7 @@ class ClusterPalette:
 
         # List of line ranges used to train the 16 SHR palettes
         # [(lower_0, upper_0), ...]
-        self._palette_splits = self._equal_palette_splits()
+        # self._palette_splits = self._equal_palette_splits()
         self._init_palette_lines()
 
         # Whether the previous iteration of proposed palettes was accepted
@@ -78,10 +78,19 @@ class ClusterPalette:
         self._palette_mutate_delta = (0, 0)
 
     def _init_palette_lines(self):
-        for i, lh in enumerate(self._palette_splits):
+        palette_splits = self._equal_palette_splits()
+        for i, lh in enumerate(palette_splits):
             l, h = lh
             self._palette_lines[i].extend(list(range(l, h)))
 
+        # lines = list(range(200))
+        # random.shuffle(lines)
+        # idx = 0
+        # while lines:
+        #     self._palette_lines[idx].append(lines.pop())
+        #     idx += 1
+
+
     def _image_colours_cam(self, image: Image):
         colours_rgb = np.asarray(image)  # .reshape((-1, 3))
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
@@ -134,18 +143,21 @@ class ClusterPalette:
         return (output_4bit, line_to_palette, palettes_linear_rgb,
                 total_image_error)
 
-    def iterate(self, penalty: float, max_iterations: int):
+    def iterate(self, penalty: float, max_inner_iterations: int,
+                max_outer_iterations: int):
         total_image_error = 1e9
-        last_good_splits = self._palette_splits
-        while True:
+        # last_good_splits = self._palette_splits
+
+        outer_iterations_since_improvement = 0
+        while outer_iterations_since_improvement < max_outer_iterations:
             print("New iteration")
-            iterations_since_improvement = 0
-            self._palette_splits = self._equal_palette_splits()
+            inner_iterations_since_improvement = 0
+            # self._palette_splits = self._equal_palette_splits()
             self._init_palette_lines()
 
             self._fit_global_palette()
-            while iterations_since_improvement < max_iterations:
-                # print("Iterations %d" % iterations_since_improvement)
+            while inner_iterations_since_improvement < max_inner_iterations:
+                # print("Iterations %d" % inner_iterations_since_improvement)
                 new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
                     self._propose_palettes())
 
@@ -155,17 +167,19 @@ class ClusterPalette:
                  new_total_image_error) = self._dither_image(
                     new_palettes_cam, penalty)
 
-                self._reassign_unused_palettes(line_to_palette,
-                                               last_good_splits)
+                # TODO: check for duplicate palettes and unused colours
+                #  within a palette
+                self._reassign_unused_palettes(line_to_palette)
 
                 # print(total_image_error, new_total_image_error)
                 if new_total_image_error >= total_image_error:
-                    iterations_since_improvement += 1
+                    inner_iterations_since_improvement += 1
                     continue
 
                 # We found a globally better set of palettes
-                iterations_since_improvement = 0
-                last_good_splits = self._palette_splits
+                inner_iterations_since_improvement = 0
+                outer_iterations_since_improvement = -1
+                # last_good_splits = self._palette_splits
                 total_image_error = new_total_image_error
 
                 self._palettes_cam = new_palettes_cam
@@ -175,6 +189,7 @@ class ClusterPalette:
 
                 yield (new_total_image_error, output_4bit, line_to_palette,
                        new_palettes_rgb12_iigs, palettes_linear_rgb)
+            outer_iterations_since_improvement += 1
 
     def _propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
         """Attempt to find new palettes that locally improve image quality.
@@ -202,7 +217,7 @@ class ClusterPalette:
         # individual palettes
         self._fit_global_palette()
 
-        self._mutate_palette_splits()
+        # self._mutate_palette_splits()
         for palette_idx in range(16):
             # print(palette_idx, self._palette_lines[palette_idx])
             # palette_lower, palette_upper = self._palette_splits[palette_idx]
@@ -210,11 +225,28 @@ class ClusterPalette:
                 self._colours_cam[
                 self._palette_lines[palette_idx], :, :].reshape(-1, 3))
 
+            initial_centroids = self._global_palette
+            pixels_rgb_iigs = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
+                palette_pixels)
+            seen_colours = set()
+            for i in range(self._reserved_colours):
+                seen_colours.add(tuple(initial_centroids[i, :]))
+            for i in range(self._reserved_colours, 16):
+                choice = np.random.randint(0, pixels_rgb_iigs.shape[
+                        0])
+                new_colour = pixels_rgb_iigs[choice, :]
+                if tuple(new_colour) in seen_colours:
+                    # print("Skipping")
+                    continue
+                seen_colours.add(tuple(new_colour))
+                # print(i, choice)
+                initial_centroids[i, :] = new_colour
+
             palettes_rgb12_iigs, palette_error = \
                 dither_pyx.k_means_with_fixed_centroids(
                     n_clusters=16, n_fixed=self._reserved_colours,
                     samples=palette_pixels,
-                    initial_centroids=self._global_palette,
+                    initial_centroids=initial_centroids,
                     max_iterations=1000, tolerance=0.05,
                     rgb12_iigs_to_cam16ucs=self._rgb12_iigs_to_cam16ucs
                 )
@@ -260,50 +292,7 @@ class ClusterPalette:
                 clusters.cluster_centers_[frequency_order].astype(
                     np.float32)))
 
-    def _mutate_palette_splits(self):
-        if self._palettes_accepted:
-            # Last time was good, keep going
-            self._apply_palette_delta(self._palette_mutate_idx,
-                                      self._palette_mutate_delta[0],
-                                      self._palette_mutate_delta[1])
-        else:
-            # undo last mutation
-            self._apply_palette_delta(self._palette_mutate_idx,
-                                      -self._palette_mutate_delta[0],
-                                      -self._palette_mutate_delta[1])
-
-            # Pick a palette endpoint to move up or down
-            palette_to_mutate = np.random.randint(0, 16)
-            while True:
-                if palette_to_mutate > 0:
-                    palette_lower_delta = np.random.randint(-20, 21)
-                else:
-                    palette_lower_delta = 0
-                if palette_to_mutate < 15:
-                    palette_upper_delta = np.random.randint(-20, 21)
-                else:
-                    palette_upper_delta = 0
-                if palette_lower_delta != 0 or palette_upper_delta != 0:
-                    break
-
-            self._apply_palette_delta(palette_to_mutate, palette_lower_delta,
-                                      palette_upper_delta)
-
-    def _apply_palette_delta(
-            self, palette_to_mutate, palette_lower_delta, palette_upper_delta):
-        old_lower, old_upper = self._palette_splits[palette_to_mutate]
-        new_lower = old_lower + palette_lower_delta
-        new_upper = old_upper + palette_upper_delta
-
-        new_lower = np.clip(new_lower, 0, np.clip(new_upper, 1, 200) - 1)
-        new_upper = np.clip(new_upper, new_lower + 1, 200)
-        assert new_lower >= 0, new_upper - 1
-
-        self._palette_splits[palette_to_mutate] = (new_lower, new_upper)
-        self._palette_mutate_idx = palette_to_mutate
-        self._palette_mutate_delta = (palette_lower_delta, palette_upper_delta)
-
-    def _reassign_unused_palettes(self, new_line_to_palette, last_good_splits):
+    def _reassign_unused_palettes(self, new_line_to_palette):
         palettes_used = [False] * 16
         for palette in new_line_to_palette:
             palettes_used[palette] = True
@@ -421,7 +410,8 @@ def main():
 
     # TODO: flags
     penalty = 1  # 1e18  # TODO: is this needed any more?
-    iterations = 10 # 20
+    inner_iterations = 10 # 20
+    outer_iterations = 20
 
     pygame.init()
     # TODO: for some reason I need to execute this twice - the first time
@@ -441,7 +431,7 @@ def main():
     seq = 0
     for (new_total_image_error, output_4bit, line_to_palette,
          palettes_rgb12_iigs, palettes_linear_rgb) in cluster_palette.iterate(
-        penalty, iterations):
+        penalty, inner_iterations, outer_iterations):
 
         if total_image_error is not None:
             print("Improved quality +%f%% (%f)" % (
@@ -486,6 +476,11 @@ def main():
             canvas.blit(surface, (0, 0))
             pygame.display.flip()
 
+        # print((palettes_rgb * 255).astype(np.uint8))
+        unique_colours = np.unique(
+            palettes_rgb12_iigs.reshape(-1, 3), axis=0).shape[0]
+        print("%d unique colours" % unique_colours)
+
         seq += 1
         # Save Double hi-res image
         outfile = os.path.join(
@@ -498,11 +493,6 @@ def main():
         with open(args.output, "wb") as f:
             f.write(bytes(screen.memory))
 
-    # print((palettes_rgb * 255).astype(np.uint8))
-    unique_colours = np.unique(
-        palettes_rgb12_iigs.reshape(-1, 3), axis=0).shape[0]
-    print("%d unique colours" % unique_colours)
-
 
 
 if __name__ == "__main__":

From d645cc5964eea720ea85abaee02de781100a86ca Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 15:21:50 +0000
Subject: [PATCH 51/82] Tidy

---
 convert.py | 157 ++++++++++++++++-------------------------------------
 1 file changed, 48 insertions(+), 109 deletions(-)

diff --git a/convert.py b/convert.py
index 3921656..813fc8a 100644
--- a/convert.py
+++ b/convert.py
@@ -32,65 +32,40 @@ class ClusterPalette:
     def __init__(
             self, image: Image, rgb12_iigs_to_cam16ucs, rgb24_to_cam16ucs,
             reserved_colours=0):
+
+        # Source image in 24-bit linear RGB colour space
         self._image_rgb = image
+
+        # Source image in CAM16UCS colour space
         self._colours_cam = self._image_colours_cam(image)
 
-        self._errors = [1e9] * 16
-
-        # We fit a 16-colour palette against the entire image which is used
-        # as starting values for fitting the 16 SHR palettes.  This helps to
-        # provide better global consistency of colours across the palettes,
-        # e.g. for large blocks of colour.  Otherwise these can take a while
-        # to converge.
-        self._global_palette = np.empty((16, 3), dtype=np.uint8)
-
         # How many image colours to fix identically across all 16 SHR
         # palettes.  These are taken to be the most prevalent colours from
         # _global_palette.
         self._reserved_colours = reserved_colours
 
-        # 16 SHR palettes each of 16 colours, in CAM16UCS format
+        # We fit a 16-colour palette against the entire image which is used
+        # as starting values for fitting the reserved colours in the 16 SHR
+        # palettes.
+        self._global_palette = np.empty((16, 3), dtype=np.uint8)
+
+        # 16 SHR palettes each of 16 colours, in CAM16UCS colour space
         self._palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
 
-        # 16 SHR palettes each of 16 colours, in //gs 4-bit RGB format
+        # 16 SHR palettes each of 16 colours, in //gs 4-bit RGB colour space
         self._palettes_rgb = np.empty((16, 16, 3), dtype=np.uint8)
 
+        # defaultdict(list) mapping palette index to lines using this palette
+        self._palette_lines = self._init_palette_lines()
+
         # Conversion matrix from 12-bit //gs RGB colour space to CAM16UCS
         # colour space
         self._rgb12_iigs_to_cam16ucs = rgb12_iigs_to_cam16ucs
 
+        # Conversion matrix from 24-bit linear RGB colour space to CAM16UCS
+        # colour space
         self._rgb24_to_cam16ucs = rgb24_to_cam16ucs
 
-        self._palette_lines = defaultdict(list)
-
-        # List of line ranges used to train the 16 SHR palettes
-        # [(lower_0, upper_0), ...]
-        # self._palette_splits = self._equal_palette_splits()
-        self._init_palette_lines()
-
-        # Whether the previous iteration of proposed palettes was accepted
-        self._palettes_accepted = False
-
-        # Which palette index's line ranges did we mutate in previous iteration
-        self._palette_mutate_idx = 0
-
-        # Delta applied to palette split in previous iteration
-        self._palette_mutate_delta = (0, 0)
-
-    def _init_palette_lines(self):
-        palette_splits = self._equal_palette_splits()
-        for i, lh in enumerate(palette_splits):
-            l, h = lh
-            self._palette_lines[i].extend(list(range(l, h)))
-
-        # lines = list(range(200))
-        # random.shuffle(lines)
-        # idx = 0
-        # while lines:
-        #     self._palette_lines[idx].append(lines.pop())
-        #     idx += 1
-
-
     def _image_colours_cam(self, image: Image):
         colours_rgb = np.asarray(image)  # .reshape((-1, 3))
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
@@ -98,6 +73,23 @@ class ClusterPalette:
                                          "CAM16UCS").astype(np.float32)
         return colours_cam
 
+    def _init_palette_lines(self, init_random = False):
+        palette_lines = defaultdict(list)
+
+        if init_random:
+            lines = list(range(200))
+            random.shuffle(lines)
+            idx = 0
+            while lines:
+                palette_lines[idx].append(lines.pop())
+                idx += 1
+        else:
+            palette_splits = self._equal_palette_splits()
+            for i, lh in enumerate(palette_splits):
+                l, h = lh
+                palette_lines[i].extend(list(range(l, h)))
+        return palette_lines
+
     def _equal_palette_splits(self, palette_height=35):
         # The 16 palettes are striped across consecutive (overlapping) line
         # ranges.  Since nearby lines tend to have similar colours, this has
@@ -133,6 +125,8 @@ class ClusterPalette:
                 self._image_rgb, palettes_cam, palettes_linear_rgb,
                 self._rgb24_to_cam16ucs, float(penalty))
 
+        # Update map of palettes to image lines for which the palette was the
+        # best match
         palette_lines = defaultdict(list)
         for line, palette in enumerate(line_to_palette):
             palette_lines[palette].append(line)
@@ -146,23 +140,21 @@ class ClusterPalette:
     def iterate(self, penalty: float, max_inner_iterations: int,
                 max_outer_iterations: int):
         total_image_error = 1e9
-        # last_good_splits = self._palette_splits
 
         outer_iterations_since_improvement = 0
         while outer_iterations_since_improvement < max_outer_iterations:
             print("New iteration")
             inner_iterations_since_improvement = 0
-            # self._palette_splits = self._equal_palette_splits()
-            self._init_palette_lines()
+            self._palette_lines = self._init_palette_lines()
 
             self._fit_global_palette()
             while inner_iterations_since_improvement < max_inner_iterations:
                 # print("Iterations %d" % inner_iterations_since_improvement)
-                new_palettes_cam, new_palettes_rgb12_iigs, new_palette_errors = (
+                new_palettes_cam, new_palettes_rgb12_iigs = (
                     self._propose_palettes())
 
-                # Recompute image with proposed palettes and check whether it has
-                # lower total image error than our previous best.
+                # Recompute image with proposed palettes and check whether it
+                # has lower total image error than our previous best.
                 (output_4bit, line_to_palette, palettes_linear_rgb,
                  new_total_image_error) = self._dither_image(
                     new_palettes_cam, penalty)
@@ -171,27 +163,24 @@ class ClusterPalette:
                 #  within a palette
                 self._reassign_unused_palettes(line_to_palette)
 
-                # print(total_image_error, new_total_image_error)
                 if new_total_image_error >= total_image_error:
                     inner_iterations_since_improvement += 1
                     continue
 
-                # We found a globally better set of palettes
+                # We found a globally better set of palettes, so restart the
+                # clocks
                 inner_iterations_since_improvement = 0
                 outer_iterations_since_improvement = -1
-                # last_good_splits = self._palette_splits
                 total_image_error = new_total_image_error
 
                 self._palettes_cam = new_palettes_cam
                 self._palettes_rgb = new_palettes_rgb12_iigs
-                self._errors = new_palette_errors
-                self._palettes_accepted = True
 
                 yield (new_total_image_error, output_4bit, line_to_palette,
                        new_palettes_rgb12_iigs, palettes_linear_rgb)
             outer_iterations_since_improvement += 1
 
-    def _propose_palettes(self) -> Tuple[np.ndarray, np.ndarray, List[float]]:
+    def _propose_palettes(self) -> Tuple[np.ndarray, np.ndarray]:
         """Attempt to find new palettes that locally improve image quality.
 
         Re-fit a set of 16 palettes from (overlapping) line ranges of the
@@ -208,7 +197,6 @@ class ClusterPalette:
         The current (locally) best palettes are returned and can be applied
         using accept_palettes().
         """
-        new_errors = list(self._errors)
         new_palettes_cam = np.empty_like(self._palettes_cam)
         new_palettes_rgb12_iigs = np.empty_like(self._palettes_rgb)
 
@@ -217,14 +205,15 @@ class ClusterPalette:
         # individual palettes
         self._fit_global_palette()
 
-        # self._mutate_palette_splits()
         for palette_idx in range(16):
-            # print(palette_idx, self._palette_lines[palette_idx])
-            # palette_lower, palette_upper = self._palette_splits[palette_idx]
             palette_pixels = (
                 self._colours_cam[
                 self._palette_lines[palette_idx], :, :].reshape(-1, 3))
 
+            # Fix reserved colours from the global palette and pick unique
+            # random colours from the sample points for the remaining initial
+            # centroids.  This tends to increase the number of colours in the
+            # resulting image, and improves quality.
             initial_centroids = self._global_palette
             pixels_rgb_iigs = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
                 palette_pixels)
@@ -236,10 +225,8 @@ class ClusterPalette:
                         0])
                 new_colour = pixels_rgb_iigs[choice, :]
                 if tuple(new_colour) in seen_colours:
-                    # print("Skipping")
                     continue
                 seen_colours.add(tuple(new_colour))
-                # print(i, choice)
                 initial_centroids[i, :] = new_colour
 
             palettes_rgb12_iigs, palette_error = \
@@ -251,13 +238,6 @@ class ClusterPalette:
                     rgb12_iigs_to_cam16ucs=self._rgb12_iigs_to_cam16ucs
                 )
 
-            # if (palette_error >= self._errors[palette_idx] and not
-            # self._reserved_colours):
-            #     # Not a local improvement to the existing palette, so ignore it.
-            #     # We can't take this shortcut when we're reserving colours
-            #     # because it would break the invariant that all palettes must
-            #     # share colours.
-            #     continue
             for i in range(16):
                 new_palettes_cam[palette_idx, i, :] = (
                     np.array(dither_pyx.convert_rgb12_iigs_to_cam(
@@ -265,10 +245,9 @@ class ClusterPalette:
                             i]), dtype=np.float32))
 
             new_palettes_rgb12_iigs[palette_idx, :, :] = palettes_rgb12_iigs
-            new_errors[palette_idx] = palette_error
 
         self._palettes_accepted = False
-        return new_palettes_cam, new_palettes_rgb12_iigs, new_errors
+        return new_palettes_cam, new_palettes_rgb12_iigs
 
     def _fit_global_palette(self):
         """Compute a 16-colour palette for the entire image to use as
@@ -299,7 +278,6 @@ class ClusterPalette:
         best_palette_lines = [v for k, v in sorted(list(zip(
             self._palette_line_errors, range(200))))]
 
-        # print(self._palette_lines)
         for palette_idx, palette_used in enumerate(palettes_used):
             if palette_used:
                 continue
@@ -309,44 +287,6 @@ class ClusterPalette:
             worst_line = best_palette_lines.pop()
             self._palette_lines[palette_idx] = [worst_line]
 
-            # print("Picked line %d with error %f" % (worst_line,
-            #                                         self._palette_line_errors[worst_line]))
-
-
-            #
-            # worst_average_palette_error = 0
-            # split_palette_idx = -1
-            # idx = 0
-            # for idx, lines in self._palette_lines.items():
-            #     if len(lines) < 10:
-            #         continue
-            #     average_palette_error = np.sum(self._palette_line_errors[
-            #                                     lines]) / len(lines)
-            #     print(idx, average_palette_error)
-            #     if average_palette_error > worst_average_palette_error:
-            #         worst_average_palette_error = average_palette_error
-            #         split_palette_idx = idx
-            #
-            # print("Picked %d with avg error %f" % (split_palette_idx, worst_average_palette_error))
-            # # TODO: split off lines with largest error
-            #
-            # palette_line_errors = self._palette_line_errors[
-            #     self._palette_lines[split_palette_idx]]
-            #
-            # print(sorted(
-            #     list(zip(palette_line_errors, self._palette_lines[
-            #     split_palette_idx])), reverse=True))
-            # best_palette_lines = [v for k, v in sorted(
-            #     list(zip(palette_line_errors, self._palette_lines[
-            #     split_palette_idx])))]
-            # num_max_lines = len(self._palette_lines[split_palette_idx])
-            #
-            # self._palette_lines[split_palette_idx] = best_palette_lines[
-            #                                          :num_max_lines // 2]
-            # # Move worst half to new palette
-            # self._palette_lines[palette_idx] = best_palette_lines[
-            #                                    num_max_lines // 2:]
-
 
 def main():
     parser = argparse.ArgumentParser()
@@ -410,7 +350,7 @@ def main():
 
     # TODO: flags
     penalty = 1  # 1e18  # TODO: is this needed any more?
-    inner_iterations = 10 # 20
+    inner_iterations = 10
     outer_iterations = 20
 
     pygame.init()
@@ -476,7 +416,6 @@ def main():
             canvas.blit(surface, (0, 0))
             pygame.display.flip()
 
-        # print((palettes_rgb * 255).astype(np.uint8))
         unique_colours = np.unique(
             palettes_rgb12_iigs.reshape(-1, 3), axis=0).shape[0]
         print("%d unique colours" % unique_colours)

From e77e7abd43f86015f583cbbb71eda4e2a946dbeb Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 15:24:45 +0000
Subject: [PATCH 52/82] Rename

---
 convert.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/convert.py b/convert.py
index 813fc8a..6347aad 100644
--- a/convert.py
+++ b/convert.py
@@ -147,11 +147,10 @@ class ClusterPalette:
             inner_iterations_since_improvement = 0
             self._palette_lines = self._init_palette_lines()
 
-            self._fit_global_palette()
             while inner_iterations_since_improvement < max_inner_iterations:
                 # print("Iterations %d" % inner_iterations_since_improvement)
                 new_palettes_cam, new_palettes_rgb12_iigs = (
-                    self._propose_palettes())
+                    self._fit_shr_palettes())
 
                 # Recompute image with proposed palettes and check whether it
                 # has lower total image error than our previous best.
@@ -180,7 +179,7 @@ class ClusterPalette:
                        new_palettes_rgb12_iigs, palettes_linear_rgb)
             outer_iterations_since_improvement += 1
 
-    def _propose_palettes(self) -> Tuple[np.ndarray, np.ndarray]:
+    def _fit_shr_palettes(self) -> Tuple[np.ndarray, np.ndarray]:
         """Attempt to find new palettes that locally improve image quality.
 
         Re-fit a set of 16 palettes from (overlapping) line ranges of the

From 5fefd0b0bbb0c238ddd0cee6233d135888929f27 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 15:24:58 +0000
Subject: [PATCH 53/82] Don't initialize pygame if --no-show-output

---
 convert.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/convert.py b/convert.py
index 6347aad..dfaede3 100644
--- a/convert.py
+++ b/convert.py
@@ -352,13 +352,14 @@ def main():
     inner_iterations = 10
     outer_iterations = 20
 
-    pygame.init()
-    # TODO: for some reason I need to execute this twice - the first time
-    #  the window is created and immediately destroyed
-    _ = pygame.display.set_mode((640, 400))
-    canvas = pygame.display.set_mode((640, 400))
-    canvas.fill((0, 0, 0))
-    pygame.display.flip()
+    if args.show_output:
+        pygame.init()
+        # TODO: for some reason I need to execute this twice - the first time
+        #  the window is created and immediately destroyed
+        _ = pygame.display.set_mode((640, 400))
+        canvas = pygame.display.set_mode((640, 400))
+        canvas.fill((0, 0, 0))
+        pygame.display.flip()
 
     total_image_error = None
     # TODO: reserved_colours should be a flag

From 8175dcb052d735b1ab83aa23fd33d82b03883b12 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 15:27:34 +0000
Subject: [PATCH 54/82] Add --fixed-colours to control how many colours will be
 kept identical across all 16 SHR palettes.

---
 convert.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/convert.py b/convert.py
index dfaede3..6eac012 100644
--- a/convert.py
+++ b/convert.py
@@ -31,7 +31,7 @@ import screen as screen_py
 class ClusterPalette:
     def __init__(
             self, image: Image, rgb12_iigs_to_cam16ucs, rgb24_to_cam16ucs,
-            reserved_colours=0):
+            fixed_colours=0):
 
         # Source image in 24-bit linear RGB colour space
         self._image_rgb = image
@@ -42,7 +42,7 @@ class ClusterPalette:
         # How many image colours to fix identically across all 16 SHR
         # palettes.  These are taken to be the most prevalent colours from
         # _global_palette.
-        self._reserved_colours = reserved_colours
+        self._fixed_colours = fixed_colours
 
         # We fit a 16-colour palette against the entire image which is used
         # as starting values for fitting the reserved colours in the 16 SHR
@@ -217,9 +217,9 @@ class ClusterPalette:
             pixels_rgb_iigs = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
                 palette_pixels)
             seen_colours = set()
-            for i in range(self._reserved_colours):
+            for i in range(self._fixed_colours):
                 seen_colours.add(tuple(initial_centroids[i, :]))
-            for i in range(self._reserved_colours, 16):
+            for i in range(self._fixed_colours, 16):
                 choice = np.random.randint(0, pixels_rgb_iigs.shape[
                         0])
                 new_colour = pixels_rgb_iigs[choice, :]
@@ -230,7 +230,7 @@ class ClusterPalette:
 
             palettes_rgb12_iigs, palette_error = \
                 dither_pyx.k_means_with_fixed_centroids(
-                    n_clusters=16, n_fixed=self._reserved_colours,
+                    n_clusters=16, n_fixed=self._fixed_colours,
                     samples=palette_pixels,
                     initial_centroids=initial_centroids,
                     max_iterations=1000, tolerance=0.05,
@@ -326,6 +326,10 @@ def main():
         '--gamma_correct', type=float, default=2.4,
         help='Gamma-correct image by this value (default: 2.4)'
     )
+    parser.add_argument(
+        '--fixed_colours', type=int, default=0,
+        help='How many colours to fix as identical across all 16 SHR palettes'
+    )
     args = parser.parse_args()
     if args.lookahead < 1:
         parser.error('--lookahead must be at least 1')
@@ -362,9 +366,8 @@ def main():
         pygame.display.flip()
 
     total_image_error = None
-    # TODO: reserved_colours should be a flag
     cluster_palette = ClusterPalette(
-        rgb, reserved_colours=1,
+        rgb, fixed_colours=args.fixed_colours,
         rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs,
         rgb24_to_cam16ucs=rgb24_to_cam16ucs)
 

From 8d3ab4f50e44d70fa3f5359bb2603535c63bbe05 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 15:41:32 +0000
Subject: [PATCH 55/82] Add the ability to disable saving preview images.  Also
 rename --gamma_correct to --gamma-correct for consistency

---
 convert.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/convert.py b/convert.py
index 6eac012..60e6d45 100644
--- a/convert.py
+++ b/convert.py
@@ -323,13 +323,17 @@ def main():
         '--verbose', action=argparse.BooleanOptionalAction,
         default=False, help="Show progress during conversion")
     parser.add_argument(
-        '--gamma_correct', type=float, default=2.4,
+        '--gamma-correct', type=float, default=2.4,
         help='Gamma-correct image by this value (default: 2.4)'
     )
     parser.add_argument(
-        '--fixed_colours', type=int, default=0,
+        '--fixed-colours', type=int, default=0,
         help='How many colours to fix as identical across all 16 SHR palettes'
     )
+    parser.add_argument(
+        '--save-preview', type=bool, default=True,
+        help='Whether to save a .PNG rendering of the output image'
+    )
     args = parser.parse_args()
     if args.lookahead < 1:
         parser.error('--lookahead must be at least 1')
@@ -424,10 +428,12 @@ def main():
         print("%d unique colours" % unique_colours)
 
         seq += 1
-        # Save Double hi-res image
-        outfile = os.path.join(
-            os.path.splitext(args.output)[0] + "-%d-preview.png" % seq)
-        out_image.save(outfile, "PNG")
+
+        if args.save_preview:
+            # Save Double hi-res image
+            outfile = os.path.join(
+                os.path.splitext(args.output)[0] + "-%d-preview.png" % seq)
+            out_image.save(outfile, "PNG")
         screen.pack()
         # with open(args.output, "wb") as f:
         #     f.write(bytes(screen.aux))

From 0036ee952272c441962551eecaf011897154dadf Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 15:44:37 +0000
Subject: [PATCH 56/82] Add default values to help

---
 convert.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/convert.py b/convert.py
index 60e6d45..35b14d4 100644
--- a/convert.py
+++ b/convert.py
@@ -328,11 +328,13 @@ def main():
     )
     parser.add_argument(
         '--fixed-colours', type=int, default=0,
-        help='How many colours to fix as identical across all 16 SHR palettes'
+        help='How many colours to fix as identical across all 16 SHR palettes '
+        '(default: 0)'
     )
     parser.add_argument(
         '--save-preview', type=bool, default=True,
-        help='Whether to save a .PNG rendering of the output image'
+        help='Whether to save a .PNG rendering of the output image (default: '
+             'True)'
     )
     args = parser.parse_args()
     if args.lookahead < 1:

From 9a77af37aaddac406ca3aac30cbd89507498ea06 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 15:49:56 +0000
Subject: [PATCH 57/82] Add a --show-final-score to output the final image
 quality score. This is useful when used as part of an image repository build
 pipeline, to avoid replacing existing images if the new score is higher.

Hide intermediate output behind --verbose
---
 convert.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/convert.py b/convert.py
index 35b14d4..b7c0932 100644
--- a/convert.py
+++ b/convert.py
@@ -73,7 +73,7 @@ class ClusterPalette:
                                          "CAM16UCS").astype(np.float32)
         return colours_cam
 
-    def _init_palette_lines(self, init_random = False):
+    def _init_palette_lines(self, init_random=False):
         palette_lines = defaultdict(list)
 
         if init_random:
@@ -143,7 +143,6 @@ class ClusterPalette:
 
         outer_iterations_since_improvement = 0
         while outer_iterations_since_improvement < max_outer_iterations:
-            print("New iteration")
             inner_iterations_since_improvement = 0
             self._palette_lines = self._init_palette_lines()
 
@@ -221,7 +220,7 @@ class ClusterPalette:
                 seen_colours.add(tuple(initial_centroids[i, :]))
             for i in range(self._fixed_colours, 16):
                 choice = np.random.randint(0, pixels_rgb_iigs.shape[
-                        0])
+                    0])
                 new_colour = pixels_rgb_iigs[choice, :]
                 if tuple(new_colour) in seen_colours:
                     continue
@@ -329,13 +328,17 @@ def main():
     parser.add_argument(
         '--fixed-colours', type=int, default=0,
         help='How many colours to fix as identical across all 16 SHR palettes '
-        '(default: 0)'
+             '(default: 0)'
     )
     parser.add_argument(
         '--save-preview', type=bool, default=True,
         help='Whether to save a .PNG rendering of the output image (default: '
              'True)'
     )
+    parser.add_argument(
+        '--show-final-score', type=bool, default=False,
+        help='Whether to output the final image quality score (default: False)'
+    )
     args = parser.parse_args()
     if args.lookahead < 1:
         parser.error('--lookahead must be at least 1')
@@ -382,7 +385,7 @@ def main():
          palettes_rgb12_iigs, palettes_linear_rgb) in cluster_palette.iterate(
         penalty, inner_iterations, outer_iterations):
 
-        if total_image_error is not None:
+        if args.verbose and total_image_error is not None:
             print("Improved quality +%f%% (%f)" % (
                 (1 - new_total_image_error / total_image_error) * 100,
                 new_total_image_error))
@@ -427,7 +430,8 @@ def main():
 
         unique_colours = np.unique(
             palettes_rgb12_iigs.reshape(-1, 3), axis=0).shape[0]
-        print("%d unique colours" % unique_colours)
+        if args.verbose:
+            print("%d unique colours" % unique_colours)
 
         seq += 1
 
@@ -443,6 +447,8 @@ def main():
         with open(args.output, "wb") as f:
             f.write(bytes(screen.memory))
 
+    if args.show_final_score:
+        print("FINAL_SCORE:", total_image_error)
 
 
 if __name__ == "__main__":

From 8b5c3dc6c1f8313bff8e5a5782b4b77902e598e8 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 24 Nov 2021 16:03:55 +0000
Subject: [PATCH 58/82] Fix bool flags

---
 convert.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/convert.py b/convert.py
index b7c0932..8f3b9f9 100644
--- a/convert.py
+++ b/convert.py
@@ -331,13 +331,14 @@ def main():
              '(default: 0)'
     )
     parser.add_argument(
-        '--save-preview', type=bool, default=True,
+        '--save-preview', action=argparse.BooleanOptionalAction, default=True,
         help='Whether to save a .PNG rendering of the output image (default: '
              'True)'
     )
     parser.add_argument(
-        '--show-final-score', type=bool, default=False,
-        help='Whether to output the final image quality score (default: False)'
+        '--show-final-score', action=argparse.BooleanOptionalAction,
+        default=False, help='Whether to output the final image quality score '
+                            '(default: False)'
     )
     args = parser.parse_args()
     if args.lookahead < 1:

From 870c008827f482a9af60b804815a509d0d4b880c Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 25 Nov 2021 09:09:40 +0000
Subject: [PATCH 59/82] Parametrize quantization error decay and minimum value.
  The latter helps with images where there are large solid colour fields that
 sometimes cause uneven dithering because of colours that cannot be matched
 with the //gs palette, but it's not a viable solution in general since it
 reduces overall quality (sometimes substantially, e.g. in case of vertical
 colour gradients)

---
 dither.pyx | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/dither.pyx b/dither.pyx
index 22f0e46..e4ad4af 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -355,6 +355,9 @@ def dither_shr(
     cdef double[::1] palette_line_errors = np.zeros(200, dtype=np.float64)
     cdef PaletteSelection palette_line
 
+    cdef float decay = 0.5
+    cdef float min_quant_error = 0.0  # 0.02
+
     best_palette = -1
     total_image_error = 0.0
     for y in range(200):
@@ -387,6 +390,8 @@ def dither_shr(
 
             for i in range(3):
                 quant_error = working_image[y, x, i] - best_colour_rgb[i]
+                if abs(quant_error) <= min_quant_error:
+                    quant_error = 0
 
                 # Floyd-Steinberg dither
                 # 0 * 7
@@ -396,15 +401,14 @@ def dither_shr(
                     working_image[y, x + 1, i] = clip(
                         working_image[y, x + 1, i] + quant_error * (7 / 16), 0, 1)
                 if y < 199:
-                    # TODO: parametrize the 0.5x decay factor
                     if x > 0:
                         working_image[y + 1, x - 1, i] = clip(
-                            working_image[y + 1, x - 1, i] + quant_error * (3 / 32), 0, 1)
+                            working_image[y + 1, x - 1, i] + decay * quant_error * (3 / 32), 0, 1)
                     working_image[y + 1, x, i] = clip(
-                        working_image[y + 1, x, i] + quant_error * (5 / 32), 0, 1)
+                        working_image[y + 1, x, i] + decay * quant_error * (5 / 32), 0, 1)
                     if x < 319:
                         working_image[y + 1, x + 1, i] = clip(
-                            working_image[y + 1, x + 1, i] + quant_error * (1 / 32), 0, 1)
+                            working_image[y + 1, x + 1, i] + decay * quant_error * (1 / 32), 0, 1)
 
 #                # 0 0 X 7 5
 #                # 3 5 7 5 3
@@ -418,40 +422,40 @@ def dither_shr(
                 #if y < 199:
                 #    if x > 1:
                 #        working_image[y + 1, x - 2, i] = clip(
-                #            working_image[y + 1, x - 2, i] + quant_error * (3 / 48), 0,
+                #            working_image[y + 1, x - 2, i] + decay * quant_error * (3 / 48), 0,
                 #            1)
                 #    if x > 0:
                 #        working_image[y + 1, x - 1, i] = clip(
-                #            working_image[y + 1, x - 1, i] + quant_error * (5 / 48), 0,
+                #            working_image[y + 1, x - 1, i] + decay * quant_error * (5 / 48), 0,
                 #            1)
                 #    working_image[y + 1, x, i] = clip(
-                #        working_image[y + 1, x, i] + quant_error * (7 / 48), 0, 1)
+                #        working_image[y + 1, x, i] + decay * quant_error * (7 / 48), 0, 1)
                 #    if x < 319:
                 #        working_image[y + 1, x + 1, i] = clip(
-                #            working_image[y + 1, x + 1, i] + quant_error * (5 / 48),
+                #            working_image[y + 1, x + 1, i] + decay * quant_error * (5 / 48),
                 #            0, 1)
                 #    if x < 318:
                 #        working_image[y + 1, x + 2, i] = clip(
-                #            working_image[y + 1, x + 2, i] + quant_error * (3 / 48),
+                #            working_image[y + 1, x + 2, i] + decay * quant_error * (3 / 48),
                 #            0, 1)
                 #if y < 198:
                 #    if x > 1:
                 #        working_image[y + 2, x - 2, i] = clip(
-                #            working_image[y + 2, x - 2, i] + quant_error * (1 / 48), 0,
+                #            working_image[y + 2, x - 2, i] + decay * decay * quant_error * (1 / 48), 0,
                 #            1)
                 #    if x > 0:
                 #        working_image[y + 2, x - 1, i] = clip(
-                #            working_image[y + 2, x - 1, i] + quant_error * (3 / 48), 0,
+                #            working_image[y + 2, x - 1, i] + decay * decay * quant_error * (3 / 48), 0,
                 #            1)
                 #    working_image[y + 2, x, i] = clip(
-                #        working_image[y + 2, x, i] + quant_error * (5 / 48), 0, 1)
+                #        working_image[y + 2, x, i] + decay * decay * quant_error * (5 / 48), 0, 1)
                 #    if x < 319:
                 #        working_image[y + 2, x + 1, i] = clip(
-                #            working_image[y + 2, x + 1, i] + quant_error * (3 / 48),
+                #            working_image[y + 2, x + 1, i] + decay * decay * quant_error * (3 / 48),
                 #            0, 1)
                 #    if x < 318:
                 #        working_image[y + 2, x + 2, i] = clip(
-                #            working_image[y + 2, x + 2, i] + quant_error * (1 / 48),
+                #            working_image[y + 2, x + 2, i] + decay * decay * quant_error * (1 / 48),
                 #            0, 1)
 
     return np.array(output_4bit, dtype=np.uint8), line_to_palette, total_image_error, np.array(palette_line_errors, dtype=np.float64)

From ad50ed103dac03ba5304e52e8467f6154206346f Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 25 Nov 2021 11:46:42 +0000
Subject: [PATCH 60/82] Improvements to image quality:

- Preprocess the source image by dithering with the full 12-bit //gs
  colour palette, ignoring SHR palette restrictions (i.e. each pixel
  chosen independently from 4096 colours)

- Using this as the ground truth allows much better handling of
  e.g. solid colours, which were being dithered inconsistently with
  the previous approach

- Also when fitting an SHR palette, fix any colours that comprise more
  than 10% of source pixels.  This also encourages more uniformity in
  regions of solid colour.
---
 convert.py |  71 ++++++++++++----
 dither.pyx | 242 +++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 237 insertions(+), 76 deletions(-)

diff --git a/convert.py b/convert.py
index 8f3b9f9..73ca6a2 100644
--- a/convert.py
+++ b/convert.py
@@ -30,14 +30,27 @@ import screen as screen_py
 
 class ClusterPalette:
     def __init__(
-            self, image: Image, rgb12_iigs_to_cam16ucs, rgb24_to_cam16ucs,
+            self, image: np.ndarray, rgb12_iigs_to_cam16ucs, rgb24_to_cam16ucs,
             fixed_colours=0):
 
-        # Source image in 24-bit linear RGB colour space
-        self._image_rgb = image
+        # Conversion matrix from 12-bit //gs RGB colour space to CAM16UCS
+        # colour space
+        self._rgb12_iigs_to_cam16ucs = rgb12_iigs_to_cam16ucs
 
-        # Source image in CAM16UCS colour space
-        self._colours_cam = self._image_colours_cam(image)
+        # Conversion matrix from 24-bit linear RGB colour space to CAM16UCS
+        # colour space
+        self._rgb24_to_cam16ucs = rgb24_to_cam16ucs
+
+        # Preprocessed source image in 24-bit linear RGB colour space.  We
+        # first dither the source image using the full 12-bit //gs RGB colour
+        # palette, ignoring SHR palette limitations (i.e. 4096 independent
+        # colours for each pixel).  This gives much better results for e.g.
+        # solid blocks of colour, which would be dithered inconsistently if
+        # targeting the source image directly.
+        self._image_rgb = self._perfect_dither(image)
+
+        # Preprocessed source image in CAM16UCS colour space
+        self._colours_cam = self._image_colours_cam(self._image_rgb)
 
         # How many image colours to fix identically across all 16 SHR
         # palettes.  These are taken to be the most prevalent colours from
@@ -58,14 +71,6 @@ class ClusterPalette:
         # defaultdict(list) mapping palette index to lines using this palette
         self._palette_lines = self._init_palette_lines()
 
-        # Conversion matrix from 12-bit //gs RGB colour space to CAM16UCS
-        # colour space
-        self._rgb12_iigs_to_cam16ucs = rgb12_iigs_to_cam16ucs
-
-        # Conversion matrix from 24-bit linear RGB colour space to CAM16UCS
-        # colour space
-        self._rgb24_to_cam16ucs = rgb24_to_cam16ucs
-
     def _image_colours_cam(self, image: Image):
         colours_rgb = np.asarray(image)  # .reshape((-1, 3))
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
@@ -113,6 +118,23 @@ class ClusterPalette:
                                    int(np.round(palette_upper))))
         return palette_ranges
 
+    def _perfect_dither(self, source_image: np.ndarray):
+        """Dither a "perfect" image using the full 12-bit //gs RGB colour
+        palette, ignoring restrictions."""
+
+        # Suppress divide by zero warning,
+        # https://github.com/colour-science/colour/issues/900
+        with colour.utilities.suppress_warnings(python_warnings=True):
+            full_palette_linear_rgb = colour.convert(
+                self._rgb12_iigs_to_cam16ucs, "CAM16UCS", "RGB").astype(
+                np.float32)
+
+        total_image_error, image_rgb = dither_pyx.dither_shr_perfect(
+            source_image, self._rgb12_iigs_to_cam16ucs, full_palette_linear_rgb,
+            self._rgb24_to_cam16ucs)
+        # print("Perfect image error:", total_image_error)
+        return image_rgb
+
     def _dither_image(self, palettes_cam, penalty):
         # Suppress divide by zero warning,
         # https://github.com/colour-science/colour/issues/900
@@ -227,9 +249,29 @@ class ClusterPalette:
                 seen_colours.add(tuple(new_colour))
                 initial_centroids[i, :] = new_colour
 
+            # If there are any single colours in our source //gs RGB
+            # pixels that represent more than fixed_colour_fraction_threshold
+            # of the pixels, then fix these colours for the palette instead of
+            # clustering them.  This reduces artifacting on blocks of
+            # colour.
+            fixed_colour_fraction_threshold = 0.1
+            fixed_colours = self._fixed_colours
+            for colour, freq in sorted(list(zip(
+                    *np.unique(dither_pyx.convert_cam16ucs_to_rgb12_iigs(
+                        palette_pixels), return_counts=True, axis=0))),
+                    key=lambda kv: kv[1], reverse=True):
+                if freq < (palette_pixels.shape[0] *
+                           fixed_colour_fraction_threshold):
+                    break
+                # print(colour, freq)
+                if tuple(colour) not in seen_colours:
+                    seen_colours.add(tuple(colour))
+                    initial_centroids[fixed_colours, :] = colour
+                    fixed_colours += 1
+
             palettes_rgb12_iigs, palette_error = \
                 dither_pyx.k_means_with_fixed_centroids(
-                    n_clusters=16, n_fixed=self._fixed_colours,
+                    n_clusters=16, n_fixed=fixed_colours,
                     samples=palette_pixels,
                     initial_centroids=initial_centroids,
                     max_iterations=1000, tolerance=0.05,
@@ -260,6 +302,7 @@ class ClusterPalette:
         palette_freq = {idx: 0 for idx in range(16)}
         for idx, freq in zip(*np.unique(clusters.labels_, return_counts=True)):
             palette_freq[idx] = freq
+
         frequency_order = [
             k for k, v in sorted(
                 list(palette_freq.items()), key=lambda kv: kv[1], reverse=True)]
diff --git a/dither.pyx b/dither.pyx
index e4ad4af..d7a63df 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -336,6 +336,121 @@ def dither_image(
     return image_nbit_to_bitmap(image_nbit, xres, yres, palette_depth)
 
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def dither_shr_perfect(
+        float[:, :, ::1] input_rgb, float[:, ::1] full_palette_cam, float[:, ::1] full_palette_rgb,
+        float[:,::1] rgb_to_cam16ucs):
+    cdef int y, x, idx, best_colour_idx, i
+    cdef double best_distance, distance, total_image_error
+    cdef float[::1] best_colour_rgb, pixel_cam
+    cdef float quant_error
+    cdef float[:, ::1] palette_rgb, palette_cam
+
+    cdef float[:, :, ::1] working_image = np.copy(input_rgb)
+    cdef float[:, ::1] line_cam = np.zeros((320, 3), dtype=np.float32)
+
+    cdef int palette_size = full_palette_rgb.shape[0]
+
+    cdef float decay = 0.5
+    cdef float min_quant_error = 0.0  # 0.02
+    cdef int floyd_steinberg = 1
+
+    total_image_error = 0.0
+    for y in range(200):
+        for x in range(320):
+            line_cam[x, :] = convert_rgb_to_cam16ucs(
+                rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
+
+        for x in range(320):
+            pixel_cam = convert_rgb_to_cam16ucs(
+                rgb_to_cam16ucs, working_image[y, x, 0], working_image[y, x, 1], working_image[y, x, 2])
+
+            best_distance = 1e9
+            best_colour_idx = -1
+            for idx in range(palette_size):
+                distance = colour_distance_squared(pixel_cam, full_palette_cam[idx, :])
+                if distance < best_distance:
+                    best_distance = distance
+                    best_colour_idx = idx
+            best_colour_rgb = full_palette_rgb[best_colour_idx]
+            total_image_error += best_distance
+
+            for i in range(3):
+                quant_error = working_image[y, x, i] - best_colour_rgb[i]
+                if abs(quant_error) <= min_quant_error:
+                    quant_error = 0
+
+                working_image[y, x, i] = best_colour_rgb[i]
+                if floyd_steinberg:
+                    # Floyd-Steinberg dither
+                    # 0 * 7
+                    # 3 5 1
+                    if x < 319:
+                        working_image[y, x + 1, i] = clip(
+                            working_image[y, x + 1, i] + quant_error * (7 / 16), 0, 1)
+                    if y < 199:
+                        if x > 0:
+                            working_image[y + 1, x - 1, i] = clip(
+                                working_image[y + 1, x - 1, i] + decay * quant_error * (3 / 16), 0, 1)
+                        working_image[y + 1, x, i] = clip(
+                            working_image[y + 1, x, i] + decay * quant_error * (5 / 16), 0, 1)
+                        if x < 319:
+                            working_image[y + 1, x + 1, i] = clip(
+                                working_image[y + 1, x + 1, i] + decay * quant_error * (1 / 16), 0, 1)
+                else:
+                    # Jarvis
+                    # 0 0 X 7 5
+                    # 3 5 7 5 3
+                    # 1 3 5 3 1
+                    if x < 319:
+                        working_image[y, x + 1, i] = clip(
+                            working_image[y, x + 1, i] + quant_error * (7 / 48), 0, 1)
+                    if x < 318:
+                        working_image[y, x + 2, i] = clip(
+                            working_image[y, x + 2, i] + quant_error * (5 / 48), 0, 1)
+                    if y < 199:
+                        if x > 1:
+                            working_image[y + 1, x - 2, i] = clip(
+                                working_image[y + 1, x - 2, i] + decay * quant_error * (3 / 48), 0,
+                                1)
+                        if x > 0:
+                            working_image[y + 1, x - 1, i] = clip(
+                                working_image[y + 1, x - 1, i] + decay * quant_error * (5 / 48), 0,
+                                1)
+                        working_image[y + 1, x, i] = clip(
+                            working_image[y + 1, x, i] + decay * quant_error * (7 / 48), 0, 1)
+                        if x < 319:
+                            working_image[y + 1, x + 1, i] = clip(
+                                working_image[y + 1, x + 1, i] + decay * quant_error * (5 / 48),
+                                0, 1)
+                        if x < 318:
+                            working_image[y + 1, x + 2, i] = clip(
+                                working_image[y + 1, x + 2, i] + decay * quant_error * (3 / 48),
+                                0, 1)
+                    if y < 198:
+                        if x > 1:
+                            working_image[y + 2, x - 2, i] = clip(
+                                working_image[y + 2, x - 2, i] + decay * decay * quant_error * (1 / 48), 0,
+                                1)
+                        if x > 0:
+                            working_image[y + 2, x - 1, i] = clip(
+                                working_image[y + 2, x - 1, i] + decay * decay * quant_error * (3 / 48), 0,
+                                1)
+                        working_image[y + 2, x, i] = clip(
+                            working_image[y + 2, x, i] + decay * decay * quant_error * (5 / 48), 0, 1)
+                        if x < 319:
+                            working_image[y + 2, x + 1, i] = clip(
+                                working_image[y + 2, x + 1, i] + decay * decay * quant_error * (3 / 48),
+                                0, 1)
+                        if x < 318:
+                            working_image[y + 2, x + 2, i] = clip(
+                                working_image[y + 2, x + 2, i] + decay * decay * quant_error * (1 / 48),
+                                0, 1)
+
+    return total_image_error, working_image
+
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def dither_shr(
@@ -357,6 +472,7 @@ def dither_shr(
 
     cdef float decay = 0.5
     cdef float min_quant_error = 0.0  # 0.02
+    cdef int floyd_steinberg = 1
 
     best_palette = -1
     total_image_error = 0.0
@@ -393,70 +509,72 @@ def dither_shr(
                 if abs(quant_error) <= min_quant_error:
                     quant_error = 0
 
-                # Floyd-Steinberg dither
-                # 0 * 7
-                # 3 5 1
                 working_image[y, x, i] = best_colour_rgb[i]
-                if x < 319:
-                    working_image[y, x + 1, i] = clip(
-                        working_image[y, x + 1, i] + quant_error * (7 / 16), 0, 1)
-                if y < 199:
-                    if x > 0:
-                        working_image[y + 1, x - 1, i] = clip(
-                            working_image[y + 1, x - 1, i] + decay * quant_error * (3 / 32), 0, 1)
-                    working_image[y + 1, x, i] = clip(
-                        working_image[y + 1, x, i] + decay * quant_error * (5 / 32), 0, 1)
+                if floyd_steinberg:
+                    # Floyd-Steinberg dither
+                    # 0 * 7
+                    # 3 5 1
                     if x < 319:
-                        working_image[y + 1, x + 1, i] = clip(
-                            working_image[y + 1, x + 1, i] + decay * quant_error * (1 / 32), 0, 1)
-
-#                # 0 0 X 7 5
-#                # 3 5 7 5 3
-#                # 1 3 5 3 1
-                #if x < 319:
-                #    working_image[y, x + 1, i] = clip(
-                #        working_image[y, x + 1, i] + quant_error * (7 / 48), 0, 1)
-                #if x < 318:
-                #    working_image[y, x + 2, i] = clip(
-                #        working_image[y, x + 2, i] + quant_error * (5 / 48), 0, 1)
-                #if y < 199:
-                #    if x > 1:
-                #        working_image[y + 1, x - 2, i] = clip(
-                #            working_image[y + 1, x - 2, i] + decay * quant_error * (3 / 48), 0,
-                #            1)
-                #    if x > 0:
-                #        working_image[y + 1, x - 1, i] = clip(
-                #            working_image[y + 1, x - 1, i] + decay * quant_error * (5 / 48), 0,
-                #            1)
-                #    working_image[y + 1, x, i] = clip(
-                #        working_image[y + 1, x, i] + decay * quant_error * (7 / 48), 0, 1)
-                #    if x < 319:
-                #        working_image[y + 1, x + 1, i] = clip(
-                #            working_image[y + 1, x + 1, i] + decay * quant_error * (5 / 48),
-                #            0, 1)
-                #    if x < 318:
-                #        working_image[y + 1, x + 2, i] = clip(
-                #            working_image[y + 1, x + 2, i] + decay * quant_error * (3 / 48),
-                #            0, 1)
-                #if y < 198:
-                #    if x > 1:
-                #        working_image[y + 2, x - 2, i] = clip(
-                #            working_image[y + 2, x - 2, i] + decay * decay * quant_error * (1 / 48), 0,
-                #            1)
-                #    if x > 0:
-                #        working_image[y + 2, x - 1, i] = clip(
-                #            working_image[y + 2, x - 1, i] + decay * decay * quant_error * (3 / 48), 0,
-                #            1)
-                #    working_image[y + 2, x, i] = clip(
-                #        working_image[y + 2, x, i] + decay * decay * quant_error * (5 / 48), 0, 1)
-                #    if x < 319:
-                #        working_image[y + 2, x + 1, i] = clip(
-                #            working_image[y + 2, x + 1, i] + decay * decay * quant_error * (3 / 48),
-                #            0, 1)
-                #    if x < 318:
-                #        working_image[y + 2, x + 2, i] = clip(
-                #            working_image[y + 2, x + 2, i] + decay * decay * quant_error * (1 / 48),
-                #            0, 1)
+                        working_image[y, x + 1, i] = clip(
+                            working_image[y, x + 1, i] + quant_error * (7 / 16), 0, 1)
+                    if y < 199:
+                        if x > 0:
+                            working_image[y + 1, x - 1, i] = clip(
+                                working_image[y + 1, x - 1, i] + decay * quant_error * (3 / 16), 0, 1)
+                        working_image[y + 1, x, i] = clip(
+                            working_image[y + 1, x, i] + decay * quant_error * (5 / 16), 0, 1)
+                        if x < 319:
+                            working_image[y + 1, x + 1, i] = clip(
+                                working_image[y + 1, x + 1, i] + decay * quant_error * (1 / 16), 0, 1)
+                else:
+                    # Jarvis
+                    # 0 0 X 7 5
+                    # 3 5 7 5 3
+                    # 1 3 5 3 1
+                    if x < 319:
+                        working_image[y, x + 1, i] = clip(
+                            working_image[y, x + 1, i] + quant_error * (7 / 48), 0, 1)
+                    if x < 318:
+                        working_image[y, x + 2, i] = clip(
+                            working_image[y, x + 2, i] + quant_error * (5 / 48), 0, 1)
+                    if y < 199:
+                        if x > 1:
+                            working_image[y + 1, x - 2, i] = clip(
+                                working_image[y + 1, x - 2, i] + decay * quant_error * (3 / 48), 0,
+                                1)
+                        if x > 0:
+                            working_image[y + 1, x - 1, i] = clip(
+                                working_image[y + 1, x - 1, i] + decay * quant_error * (5 / 48), 0,
+                                1)
+                        working_image[y + 1, x, i] = clip(
+                            working_image[y + 1, x, i] + decay * quant_error * (7 / 48), 0, 1)
+                        if x < 319:
+                            working_image[y + 1, x + 1, i] = clip(
+                                working_image[y + 1, x + 1, i] + decay * quant_error * (5 / 48),
+                                0, 1)
+                        if x < 318:
+                            working_image[y + 1, x + 2, i] = clip(
+                                working_image[y + 1, x + 2, i] + decay * quant_error * (3 / 48),
+                                0, 1)
+                    if y < 198:
+                        if x > 1:
+                            working_image[y + 2, x - 2, i] = clip(
+                                working_image[y + 2, x - 2, i] + decay * decay * quant_error * (1 / 48), 0,
+                                1)
+                        if x > 0:
+                            working_image[y + 2, x - 1, i] = clip(
+                                working_image[y + 2, x - 1, i] + decay * decay * quant_error * (3 / 48), 0,
+                                1)
+                        working_image[y + 2, x, i] = clip(
+                            working_image[y + 2, x, i] + decay * decay * quant_error * (5 / 48), 0, 1)
+                        if x < 319:
+                            working_image[y + 2, x + 1, i] = clip(
+                                working_image[y + 2, x + 1, i] + decay * decay * quant_error * (3 / 48),
+                                0, 1)
+                        if x < 318:
+                            working_image[y + 2, x + 2, i] = clip(
+                                working_image[y + 2, x + 2, i] + decay * decay * quant_error * (1 / 48),
+                                0, 1)
 
     return np.array(output_4bit, dtype=np.uint8), line_to_palette, total_image_error, np.array(palette_line_errors, dtype=np.float64)
 

From fc35387360c06124867a36ae77353e95e7fd3de0 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 25 Nov 2021 13:14:22 +0000
Subject: [PATCH 61/82] - Fill any palettes that have fewer than 16 unique
 entries after   clustering, using the most frequent pixel colours that are
 not yet   in the palette

- Reassign any palettes that are duplicated after clustering
---
 convert.py | 85 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 61 insertions(+), 24 deletions(-)

diff --git a/convert.py b/convert.py
index 73ca6a2..86298ea 100644
--- a/convert.py
+++ b/convert.py
@@ -179,9 +179,9 @@ class ClusterPalette:
                  new_total_image_error) = self._dither_image(
                     new_palettes_cam, penalty)
 
-                # TODO: check for duplicate palettes and unused colours
-                #  within a palette
-                self._reassign_unused_palettes(line_to_palette)
+                # TODO: check for unused colours within a palette
+                self._reassign_unused_palettes(
+                    line_to_palette, new_palettes_rgb12_iigs)
 
                 if new_total_image_error >= total_image_error:
                     inner_iterations_since_improvement += 1
@@ -230,46 +230,43 @@ class ClusterPalette:
                 self._colours_cam[
                 self._palette_lines[palette_idx], :, :].reshape(-1, 3))
 
-            # Fix reserved colours from the global palette and pick unique
-            # random colours from the sample points for the remaining initial
-            # centroids.  This tends to increase the number of colours in the
-            # resulting image, and improves quality.
+            # Fix reserved colours from the global palette.
             initial_centroids = self._global_palette
             pixels_rgb_iigs = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
                 palette_pixels)
             seen_colours = set()
             for i in range(self._fixed_colours):
                 seen_colours.add(tuple(initial_centroids[i, :]))
+
+            # Pick unique random colours from the sample points for the
+            # remaining initial centroids.
             for i in range(self._fixed_colours, 16):
-                choice = np.random.randint(0, pixels_rgb_iigs.shape[
-                    0])
+                choice = np.random.randint(0, pixels_rgb_iigs.shape[0])
                 new_colour = pixels_rgb_iigs[choice, :]
                 if tuple(new_colour) in seen_colours:
                     continue
                 seen_colours.add(tuple(new_colour))
                 initial_centroids[i, :] = new_colour
 
-            # If there are any single colours in our source //gs RGB
-            # pixels that represent more than fixed_colour_fraction_threshold
-            # of the pixels, then fix these colours for the palette instead of
-            # clustering them.  This reduces artifacting on blocks of
-            # colour.
+            # If there are any single colours in our source //gs RGB pixels that
+            # represent more than fixed_colour_fraction_threshold of the total,
+            # then fix these colours for the palette instead of clustering
+            # them.  This reduces artifacting on blocks of colour.
             fixed_colour_fraction_threshold = 0.1
+            most_frequent_colours = sorted(list(zip(
+                *np.unique(pixels_rgb_iigs, return_counts=True, axis=0))),
+                key=lambda kv: kv[1], reverse=True)
             fixed_colours = self._fixed_colours
-            for colour, freq in sorted(list(zip(
-                    *np.unique(dither_pyx.convert_cam16ucs_to_rgb12_iigs(
-                        palette_pixels), return_counts=True, axis=0))),
-                    key=lambda kv: kv[1], reverse=True):
+            for colour, freq in most_frequent_colours:
                 if freq < (palette_pixels.shape[0] *
                            fixed_colour_fraction_threshold):
                     break
-                # print(colour, freq)
                 if tuple(colour) not in seen_colours:
                     seen_colours.add(tuple(colour))
                     initial_centroids[fixed_colours, :] = colour
                     fixed_colours += 1
 
-            palettes_rgb12_iigs, palette_error = \
+            palette_rgb12_iigs, palette_error = \
                 dither_pyx.k_means_with_fixed_centroids(
                     n_clusters=16, n_fixed=fixed_colours,
                     samples=palette_pixels,
@@ -277,14 +274,19 @@ class ClusterPalette:
                     max_iterations=1000, tolerance=0.05,
                     rgb12_iigs_to_cam16ucs=self._rgb12_iigs_to_cam16ucs
                 )
+            # If the k-means clustering returned fewer than 16 unique colours,
+            # fill out the remainder with the most common pixels colours that
+            # have not yet been used.
+            palette_rgb12_iigs = self._fill_short_palette(
+                palette_rgb12_iigs, most_frequent_colours)
 
             for i in range(16):
                 new_palettes_cam[palette_idx, i, :] = (
                     np.array(dither_pyx.convert_rgb12_iigs_to_cam(
-                        self._rgb12_iigs_to_cam16ucs, palettes_rgb12_iigs[
+                        self._rgb12_iigs_to_cam16ucs, palette_rgb12_iigs[
                             i]), dtype=np.float32))
 
-            new_palettes_rgb12_iigs[palette_idx, :, :] = palettes_rgb12_iigs
+            new_palettes_rgb12_iigs[palette_idx, :, :] = palette_rgb12_iigs
 
         self._palettes_accepted = False
         return new_palettes_cam, new_palettes_rgb12_iigs
@@ -312,13 +314,48 @@ class ClusterPalette:
                 clusters.cluster_centers_[frequency_order].astype(
                     np.float32)))
 
-    def _reassign_unused_palettes(self, new_line_to_palette):
+    def _fill_short_palette(self, palette_iigs_rgb, most_frequent_colours):
+        """Fill out the palette to 16 unique entries."""
+
+        palette_set = set()
+        for palette_entry in palette_iigs_rgb:
+            palette_set.add(tuple(palette_entry))
+        if len(palette_set) == 16:
+            return palette_iigs_rgb
+
+        # Add most frequent image colours that are not yet in the palette
+        for colour, freq in most_frequent_colours:
+            if tuple(colour) in palette_set:
+                continue
+            palette_set.add(tuple(colour))
+            # print("Added freq %d" % freq)
+            if len(palette_set) == 16:
+                break
+
+        # We couldn't find any more unique colours, fill out with random ones.
+        while len(palette_set) < 16:
+            palette_set.add(
+                tuple(np.random.randint(0, 16, size=3, dtype=np.uint8)))
+
+        return np.array(tuple(palette_set), dtype=np.uint8)
+
+    def _reassign_unused_palettes(self, line_to_palette, palettes_iigs_rgb):
         palettes_used = [False] * 16
-        for palette in new_line_to_palette:
+        for palette in line_to_palette:
             palettes_used[palette] = True
         best_palette_lines = [v for k, v in sorted(list(zip(
             self._palette_line_errors, range(200))))]
 
+        all_palettes = set()
+        for palette_idx, palette_iigs_rgb in enumerate(palettes_iigs_rgb):
+            palette_set = set()
+            for palette_entry in palette_iigs_rgb:
+                palette_set.add(tuple(palette_entry))
+            palette_set = frozenset(palette_set)
+            if palette_set in all_palettes:
+                print("Duplicate palette", palette_idx, palette_set)
+                palettes_used[palette_idx] = False
+
         for palette_idx, palette_used in enumerate(palettes_used):
             if palette_used:
                 continue

From 61b4cbb18499cadc94bc1e872ce287eb9a3d3718 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 25 Nov 2021 21:33:12 +0000
Subject: [PATCH 62/82] Tweak k-means convergence criterion to return once the
 total centroid position error stops decreasing.

---
 convert.py |  8 +++-----
 dither.pyx | 45 +++++++++++++++++++++------------------------
 2 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/convert.py b/convert.py
index 86298ea..68a7d4d 100644
--- a/convert.py
+++ b/convert.py
@@ -266,14 +266,12 @@ class ClusterPalette:
                     initial_centroids[fixed_colours, :] = colour
                     fixed_colours += 1
 
-            palette_rgb12_iigs, palette_error = \
-                dither_pyx.k_means_with_fixed_centroids(
+            palette_rgb12_iigs = dither_pyx.k_means_with_fixed_centroids(
                     n_clusters=16, n_fixed=fixed_colours,
                     samples=palette_pixels,
                     initial_centroids=initial_centroids,
-                    max_iterations=1000, tolerance=0.05,
-                    rgb12_iigs_to_cam16ucs=self._rgb12_iigs_to_cam16ucs
-                )
+                    max_iterations=1000,
+                    rgb12_iigs_to_cam16ucs=self._rgb12_iigs_to_cam16ucs)
             # If the k-means clustering returned fewer than 16 unique colours,
             # fill out the remainder with the most common pixels colours that
             # have not yet been used.
diff --git a/dither.pyx b/dither.pyx
index d7a63df..60152b2 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -672,10 +672,9 @@ def convert_cam16ucs_to_rgb12_iigs(float[:, ::1] point_cam):
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def k_means_with_fixed_centroids(
-    int n_clusters, int n_fixed, float[:, ::1] samples, (unsigned char)[:, ::1] initial_centroids, int max_iterations,
-    float tolerance, float [:, ::1] rgb12_iigs_to_cam16ucs):
+    int n_clusters, int n_fixed, float[:, ::1] samples, (unsigned char)[:, ::1] initial_centroids, int max_iterations, float [:, ::1] rgb12_iigs_to_cam16ucs):
 
-    cdef double error, best_error, centroid_movement, total_error
+    cdef double error, best_error, total_error, last_total_error
     cdef int centroid_idx, closest_centroid_idx, i, point_idx
 
     cdef (unsigned char)[:, ::1] centroids_rgb12 = np.copy(initial_centroids)
@@ -686,20 +685,17 @@ def k_means_with_fixed_centroids(
     cdef float[:, ::1] centroid_cam_sample_positions_total
     cdef int[::1] centroid_sample_counts
 
-    # Allow centroids to move on lattice of size 15/255 in sRGB Rec.601 space -- matches //gs palette
-    # map centroids to CAM when computing distances, cluster means etc
-    # Map new centroid back to closest lattice point
-
-    # Return CAM centroids
-
-    cdef int centroid_moved
+    last_total_error = 1e9
     for iteration in range(max_iterations):
-        centroid_moved = 1
         total_error = 0.0
-        centroid_movement = 0.0
         centroid_cam_sample_positions_total = np.zeros((16, 3), dtype=np.float32)
         centroid_sample_counts = np.zeros(16, dtype=np.int32)
 
+        # For each sample, associate it to the closest centroid.  We want to compute the mean of all associated samples
+        # but we do this by accumulating the (coordinate vector) total and number of associated samples.
+        #
+        # Centroid positions are tracked in 4-bit //gs RGB colour space with distances measured in CAM16UCS colour
+        # space.
         for point_idx in range(samples.shape[0]):
             point_cam = samples[point_idx, :]
             best_error = 1e9
@@ -715,28 +711,29 @@ def k_means_with_fixed_centroids(
             centroid_sample_counts[closest_centroid_idx] += 1
             total_error += best_error
 
+        # Since the allowed centroid positions are discrete (and not uniformly spaced in CAM16UCS colour space), we
+        # can't rely on measuring total centroid movement as a termination condition.  e.g. sometimes the nearest
+        # available point to an intended next centroid position will increase the total distance, or centroids may
+        # oscillate between two neighbouring positions.  Instead, we terminate when the total error stops decreasing.
+        if total_error >= last_total_error:
+            break
+        last_total_error = total_error
+
+        # Compute new centroid positions in CAM16UCS colour space
         for centroid_idx in range(n_fixed, n_clusters):
             if centroid_sample_counts[centroid_idx]:
                 for i in range(3):
                     new_centroids_cam[centroid_idx - n_fixed, i] = (
                         centroid_cam_sample_positions_total[centroid_idx, i] / centroid_sample_counts[centroid_idx])
-                centroid_movement += colour_distance_squared(
-                    _convert_rgb12_iigs_to_cam(
-                        rgb12_iigs_to_cam16ucs, centroids_rgb12[centroid_idx]),
-                    new_centroids_cam[centroid_idx - n_fixed, :])
 
-        # Convert all new centroids as a single matrix since _convert_cam16ucs_to_rgb12_iigs has nontrivial overhead
+        # Convert all new centroids back to //gb RGB colour space (done as a single matrix since
+        # _convert_cam16ucs_to_rgb12_iigs has nontrivial overhead)
         new_centroids_rgb12 = _convert_cam16ucs_to_rgb12_iigs(new_centroids_cam)
 
+        # Update positions for non-fixed centroids
         for centroid_idx in range(n_clusters - n_fixed):
             for i in range(3):
                 if centroids_rgb12[centroid_idx + n_fixed, i] != new_centroids_rgb12[centroid_idx, i]:
                     centroids_rgb12[centroid_idx + n_fixed, i] = new_centroids_rgb12[centroid_idx, i]
-                    centroid_moved = 1
 
-        if centroid_movement < tolerance:
-            break
-        if centroid_moved == 0:
-            break
-
-    return centroids_rgb12, total_error
\ No newline at end of file
+    return centroids_rgb12
\ No newline at end of file

From 25e6ed7b887b228991a35a798298cdbcebf6d05f Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Thu, 25 Nov 2021 21:57:27 +0000
Subject: [PATCH 63/82] Preserve palette order when deduplicating entries

Also make sure we're not mutating _global_palettes, though this should
currently be harmless.
---
 convert.py | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/convert.py b/convert.py
index 68a7d4d..e78d17e 100644
--- a/convert.py
+++ b/convert.py
@@ -231,7 +231,7 @@ class ClusterPalette:
                 self._palette_lines[palette_idx], :, :].reshape(-1, 3))
 
             # Fix reserved colours from the global palette.
-            initial_centroids = self._global_palette
+            initial_centroids = np.copy(self._global_palette)
             pixels_rgb_iigs = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
                 palette_pixels)
             seen_colours = set()
@@ -258,8 +258,9 @@ class ClusterPalette:
                 key=lambda kv: kv[1], reverse=True)
             fixed_colours = self._fixed_colours
             for colour, freq in most_frequent_colours:
-                if freq < (palette_pixels.shape[0] *
-                           fixed_colour_fraction_threshold):
+                if (freq < (palette_pixels.shape[0] *
+                            fixed_colour_fraction_threshold)) or (
+                        fixed_colours == 16):
                     break
                 if tuple(colour) not in seen_colours:
                     seen_colours.add(tuple(colour))
@@ -267,11 +268,11 @@ class ClusterPalette:
                     fixed_colours += 1
 
             palette_rgb12_iigs = dither_pyx.k_means_with_fixed_centroids(
-                    n_clusters=16, n_fixed=fixed_colours,
-                    samples=palette_pixels,
-                    initial_centroids=initial_centroids,
-                    max_iterations=1000,
-                    rgb12_iigs_to_cam16ucs=self._rgb12_iigs_to_cam16ucs)
+                n_clusters=16, n_fixed=fixed_colours,
+                samples=palette_pixels,
+                initial_centroids=initial_centroids,
+                max_iterations=1000,
+                rgb12_iigs_to_cam16ucs=self._rgb12_iigs_to_cam16ucs)
             # If the k-means clustering returned fewer than 16 unique colours,
             # fill out the remainder with the most common pixels colours that
             # have not yet been used.
@@ -315,9 +316,12 @@ class ClusterPalette:
     def _fill_short_palette(self, palette_iigs_rgb, most_frequent_colours):
         """Fill out the palette to 16 unique entries."""
 
-        palette_set = set()
+        # We want to maintain order of insertion so that we respect the
+        # ordering of fixed colours in the palette.  Python doesn't have an
+        # orderedset but dicts preserve insertion order.
+        palette_set = {}
         for palette_entry in palette_iigs_rgb:
-            palette_set.add(tuple(palette_entry))
+            palette_set[tuple(palette_entry)] = True
         if len(palette_set) == 16:
             return palette_iigs_rgb
 
@@ -325,17 +329,17 @@ class ClusterPalette:
         for colour, freq in most_frequent_colours:
             if tuple(colour) in palette_set:
                 continue
-            palette_set.add(tuple(colour))
+            palette_set[tuple(colour)] = True
             # print("Added freq %d" % freq)
             if len(palette_set) == 16:
                 break
 
         # We couldn't find any more unique colours, fill out with random ones.
         while len(palette_set) < 16:
-            palette_set.add(
-                tuple(np.random.randint(0, 16, size=3, dtype=np.uint8)))
+            palette_set[
+                tuple(np.random.randint(0, 16, size=3, dtype=np.uint8))] = True
 
-        return np.array(tuple(palette_set), dtype=np.uint8)
+        return np.array(tuple(palette_set.keys()), dtype=np.uint8)
 
     def _reassign_unused_palettes(self, line_to_palette, palettes_iigs_rgb):
         palettes_used = [False] * 16

From cf63a357974f1db1d3649ec5ac48095435f8c036 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Fri, 26 Nov 2021 09:54:42 +0000
Subject: [PATCH 64/82] Cython tweaks to remove some unnecessary C code

---
 dither.pyx | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dither.pyx b/dither.pyx
index 60152b2..e4be0d4 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -583,6 +583,7 @@ cdef struct PaletteSelection:
     int palette_idx
     double total_error
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cdef PaletteSelection best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int last_palette_idx, float last_penalty) nogil:
@@ -619,7 +620,7 @@ cdef PaletteSelection best_palette_for_line(float [:, ::1] line_cam, float[:, :,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef float[::1] _convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned char)[::1] point_rgb12):
+cdef float[::1] _convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned char)[::1] point_rgb12) nogil:
     cdef int rgb12 = (point_rgb12[0] << 8) | (point_rgb12[1] << 4) | point_rgb12[2]
     return rgb12_iigs_to_cam16ucs[rgb12]
 
@@ -631,6 +632,7 @@ import colour
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
+@cython.cdivision(True)
 cdef float[:, ::1] linear_to_srgb_array(float[:, ::1] a, float gamma=2.4):
     cdef int i, j
     cdef float[:, ::1] res = np.empty_like(a, dtype=np.float32)
@@ -671,6 +673,7 @@ def convert_cam16ucs_to_rgb12_iigs(float[:, ::1] point_cam):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
+@cython.cdivision(True)
 def k_means_with_fixed_centroids(
     int n_clusters, int n_fixed, float[:, ::1] samples, (unsigned char)[:, ::1] initial_centroids, int max_iterations, float [:, ::1] rgb12_iigs_to_cam16ucs):
 

From 1075ff0136c536f6094c09d4b636d1ded3d3152c Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Fri, 26 Nov 2021 10:36:39 +0000
Subject: [PATCH 65/82] Tidy a bit and remove support for tunable parameters
 that are no longer needed

---
 convert.py | 13 +++++--------
 dither.pyx | 35 +++++++++++------------------------
 2 files changed, 16 insertions(+), 32 deletions(-)

diff --git a/convert.py b/convert.py
index e78d17e..fb2c248 100644
--- a/convert.py
+++ b/convert.py
@@ -135,7 +135,7 @@ class ClusterPalette:
         # print("Perfect image error:", total_image_error)
         return image_rgb
 
-    def _dither_image(self, palettes_cam, penalty):
+    def _dither_image(self, palettes_cam):
         # Suppress divide by zero warning,
         # https://github.com/colour-science/colour/issues/900
         with colour.utilities.suppress_warnings(python_warnings=True):
@@ -145,7 +145,7 @@ class ClusterPalette:
         output_4bit, line_to_palette, total_image_error, palette_line_errors = \
             dither_pyx.dither_shr(
                 self._image_rgb, palettes_cam, palettes_linear_rgb,
-                self._rgb24_to_cam16ucs, float(penalty))
+                self._rgb24_to_cam16ucs)
 
         # Update map of palettes to image lines for which the palette was the
         # best match
@@ -159,7 +159,7 @@ class ClusterPalette:
         return (output_4bit, line_to_palette, palettes_linear_rgb,
                 total_image_error)
 
-    def iterate(self, penalty: float, max_inner_iterations: int,
+    def iterate(self, max_inner_iterations: int,
                 max_outer_iterations: int):
         total_image_error = 1e9
 
@@ -176,10 +176,8 @@ class ClusterPalette:
                 # Recompute image with proposed palettes and check whether it
                 # has lower total image error than our previous best.
                 (output_4bit, line_to_palette, palettes_linear_rgb,
-                 new_total_image_error) = self._dither_image(
-                    new_palettes_cam, penalty)
+                 new_total_image_error) = self._dither_image(new_palettes_cam)
 
-                # TODO: check for unused colours within a palette
                 self._reassign_unused_palettes(
                     line_to_palette, new_palettes_rgb12_iigs)
 
@@ -444,7 +442,6 @@ def main():
                         gamma=args.gamma_correct)).astype(np.float32) / 255
 
     # TODO: flags
-    penalty = 1  # 1e18  # TODO: is this needed any more?
     inner_iterations = 10
     outer_iterations = 20
 
@@ -466,7 +463,7 @@ def main():
     seq = 0
     for (new_total_image_error, output_4bit, line_to_palette,
          palettes_rgb12_iigs, palettes_linear_rgb) in cluster_palette.iterate(
-        penalty, inner_iterations, outer_iterations):
+        inner_iterations, outer_iterations):
 
         if args.verbose and total_image_error is not None:
             print("Improved quality +%f%% (%f)" % (
diff --git a/dither.pyx b/dither.pyx
index e4be0d4..66db0c9 100644
--- a/dither.pyx
+++ b/dither.pyx
@@ -163,6 +163,7 @@ cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, fl
     cdef unsigned int rgb_24bit = (<unsigned int>(r*255) << 16) + (<unsigned int>(g*255) << 8) + <unsigned int>(b*255)
     return rgb_to_cam16ucs[rgb_24bit]
 
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cdef inline float fabs(float value) nogil:
@@ -175,12 +176,6 @@ cdef inline double colour_distance_squared(float[::1] colour1, float[::1] colour
     return (colour1[0] - colour2[0]) ** 2 + (colour1[1] - colour2[1]) ** 2 + (colour1[2] - colour2[2]) ** 2
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef inline float colour_distance(float[::1] colour1, float[::1] colour2) nogil:
-    return fabs(colour1[0] - colour2[0]) + fabs(colour1[1] - colour2[1]) + fabs(colour1[2] - colour2[2])
-
-
 # Perform error diffusion to a single image row.
 #
 # Args:
@@ -353,7 +348,6 @@ def dither_shr_perfect(
     cdef int palette_size = full_palette_rgb.shape[0]
 
     cdef float decay = 0.5
-    cdef float min_quant_error = 0.0  # 0.02
     cdef int floyd_steinberg = 1
 
     total_image_error = 0.0
@@ -373,13 +367,11 @@ def dither_shr_perfect(
                 if distance < best_distance:
                     best_distance = distance
                     best_colour_idx = idx
-            best_colour_rgb = full_palette_rgb[best_colour_idx]
+            best_colour_rgb = full_palette_rgb[best_colour_idx, :]
             total_image_error += best_distance
 
             for i in range(3):
                 quant_error = working_image[y, x, i] - best_colour_rgb[i]
-                if abs(quant_error) <= min_quant_error:
-                    quant_error = 0
 
                 working_image[y, x, i] = best_colour_rgb[i]
                 if floyd_steinberg:
@@ -455,7 +447,7 @@ def dither_shr_perfect(
 @cython.wraparound(False)
 def dither_shr(
         float[:, :, ::1] input_rgb, float[:, :, ::1] palettes_cam, float[:, :, ::1] palettes_rgb,
-        float[:,::1] rgb_to_cam16ucs, float penalty):
+        float[:,::1] rgb_to_cam16ucs):
     cdef int y, x, idx, best_colour_idx, best_palette, i
     cdef double best_distance, distance, total_image_error
     cdef float[::1] best_colour_rgb, pixel_cam
@@ -471,7 +463,6 @@ def dither_shr(
     cdef PaletteSelection palette_line
 
     cdef float decay = 0.5
-    cdef float min_quant_error = 0.0  # 0.02
     cdef int floyd_steinberg = 1
 
     best_palette = -1
@@ -481,7 +472,7 @@ def dither_shr(
             line_cam[x, :] = convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
 
-        palette_line = best_palette_for_line(line_cam, palettes_cam, best_palette, penalty)
+        palette_line = best_palette_for_line(line_cam, palettes_cam, best_palette)
         best_palette = palette_line.palette_idx
         palette_line_errors[y] = palette_line.total_error
 
@@ -506,8 +497,6 @@ def dither_shr(
 
             for i in range(3):
                 quant_error = working_image[y, x, i] - best_colour_rgb[i]
-                if abs(quant_error) <= min_quant_error:
-                    quant_error = 0
 
                 working_image[y, x, i] = best_colour_rgb[i]
                 if floyd_steinberg:
@@ -586,7 +575,7 @@ cdef struct PaletteSelection:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef PaletteSelection best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int last_palette_idx, float last_penalty) nogil:
+cdef PaletteSelection best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int last_palette_idx) nogil:
     cdef int palette_idx, best_palette_idx, palette_entry_idx, pixel_idx
     cdef double best_total_dist, total_dist, best_pixel_dist, pixel_dist
     cdef float[:, ::1] palette_cam
@@ -594,17 +583,15 @@ cdef PaletteSelection best_palette_for_line(float [:, ::1] line_cam, float[:, :,
 
     best_total_dist = 1e9
     best_palette_idx = -1
-    cdef float penalty
     cdef int line_size = line_cam.shape[0]
     for palette_idx in range(16):
         palette_cam = palettes_cam[palette_idx, :, :]
-        penalty = last_penalty if palette_idx == last_palette_idx else 1.0
         total_dist = 0
         for pixel_idx in range(line_size):
             pixel_cam = line_cam[pixel_idx]
             best_pixel_dist = 1e9
             for palette_entry_idx in range(16):
-                pixel_dist = colour_distance_squared(pixel_cam, palette_cam[palette_entry_idx, :]) * penalty
+                pixel_dist = colour_distance_squared(pixel_cam, palette_cam[palette_entry_idx, :])
                 if pixel_dist < best_pixel_dist:
                     best_pixel_dist = pixel_dist
             total_dist += best_pixel_dist
@@ -650,13 +637,12 @@ cdef (unsigned char)[:, ::1] _convert_cam16ucs_to_rgb12_iigs(float[:, ::1] point
     cdef float[:, ::1] rgb
     cdef (float)[:, ::1] rgb12_iigs
 
-    # Convert CAM16UCS input to RGB
-    # TODO: this dynamically constructs a path on the graph of colour conversions every time, which is
-    #  presumably not very efficient.  However, colour.convert doesn't provide a way to cache the composed conversion
-    #  function so we'd have to build it ourselves (https://github.com/colour-science/colour/issues/905)
+    # Convert CAM16UCS input to RGB.  Even though this dynamically constructs a path on the graph of colour conversions
+    # every time, in practise this seems to have a negligible overhead compared to the actual conversion functions.
     with colour.utilities.suppress_warnings(python_warnings=True):
         rgb = colour.convert(point_cam, "CAM16UCS", "RGB").astype(np.float32)
 
+    # TODO: precompute this conversion matrix since it's static.  This accounts for about 10% of the CPU time here.
     rgb12_iigs = np.clip(
         # Convert to Rec.601 R'G'B'
         colour.YCbCr_to_RGB(
@@ -675,7 +661,8 @@ def convert_cam16ucs_to_rgb12_iigs(float[:, ::1] point_cam):
 @cython.wraparound(False)
 @cython.cdivision(True)
 def k_means_with_fixed_centroids(
-    int n_clusters, int n_fixed, float[:, ::1] samples, (unsigned char)[:, ::1] initial_centroids, int max_iterations, float [:, ::1] rgb12_iigs_to_cam16ucs):
+    int n_clusters, int n_fixed, float[:, ::1] samples, (unsigned char)[:, ::1] initial_centroids, int max_iterations,
+    float [:, ::1] rgb12_iigs_to_cam16ucs):
 
     cdef double error, best_error, total_error, last_total_error
     cdef int centroid_idx, closest_centroid_idx, i, point_idx

From 4221c007010d9b32b474b8deb32931ba25a16813 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Fri, 26 Nov 2021 12:08:48 +0000
Subject: [PATCH 66/82] Split dither into dither_dhr and dither_shr

---
 convert.py                   |  17 +-
 dither_dhr.pyx               | 331 +++++++++++++++++++++++++++++++++++
 dither.pyx => dither_shr.pyx | 311 +-------------------------------
 setup.py                     |   2 +-
 4 files changed, 342 insertions(+), 319 deletions(-)
 create mode 100644 dither_dhr.pyx
 rename dither.pyx => dither_shr.pyx (59%)

diff --git a/convert.py b/convert.py
index fb2c248..7367f05 100644
--- a/convert.py
+++ b/convert.py
@@ -16,7 +16,8 @@ from os import environ
 environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1'
 import pygame
 
-import dither as dither_pyx
+import dither_dhr as dither_dhr_pyx
+import dither_shr as dither_shr_pyx
 import dither_pattern
 import image as image_py
 import palette as palette_py
@@ -129,7 +130,7 @@ class ClusterPalette:
                 self._rgb12_iigs_to_cam16ucs, "CAM16UCS", "RGB").astype(
                 np.float32)
 
-        total_image_error, image_rgb = dither_pyx.dither_shr_perfect(
+        total_image_error, image_rgb = dither_shr_pyx.dither_shr_perfect(
             source_image, self._rgb12_iigs_to_cam16ucs, full_palette_linear_rgb,
             self._rgb24_to_cam16ucs)
         # print("Perfect image error:", total_image_error)
@@ -143,7 +144,7 @@ class ClusterPalette:
                 palettes_cam, "CAM16UCS", "RGB").astype(np.float32)
 
         output_4bit, line_to_palette, total_image_error, palette_line_errors = \
-            dither_pyx.dither_shr(
+            dither_shr_pyx.dither_shr(
                 self._image_rgb, palettes_cam, palettes_linear_rgb,
                 self._rgb24_to_cam16ucs)
 
@@ -230,7 +231,7 @@ class ClusterPalette:
 
             # Fix reserved colours from the global palette.
             initial_centroids = np.copy(self._global_palette)
-            pixels_rgb_iigs = dither_pyx.convert_cam16ucs_to_rgb12_iigs(
+            pixels_rgb_iigs = dither_shr_pyx.convert_cam16ucs_to_rgb12_iigs(
                 palette_pixels)
             seen_colours = set()
             for i in range(self._fixed_colours):
@@ -265,7 +266,7 @@ class ClusterPalette:
                     initial_centroids[fixed_colours, :] = colour
                     fixed_colours += 1
 
-            palette_rgb12_iigs = dither_pyx.k_means_with_fixed_centroids(
+            palette_rgb12_iigs = dither_shr_pyx.k_means_with_fixed_centroids(
                 n_clusters=16, n_fixed=fixed_colours,
                 samples=palette_pixels,
                 initial_centroids=initial_centroids,
@@ -279,7 +280,7 @@ class ClusterPalette:
 
             for i in range(16):
                 new_palettes_cam[palette_idx, i, :] = (
-                    np.array(dither_pyx.convert_rgb12_iigs_to_cam(
+                    np.array(dither_shr_pyx.convert_rgb12_iigs_to_cam(
                         self._rgb12_iigs_to_cam16ucs, palette_rgb12_iigs[
                             i]), dtype=np.float32))
 
@@ -307,7 +308,7 @@ class ClusterPalette:
                 list(palette_freq.items()), key=lambda kv: kv[1], reverse=True)]
 
         self._global_palette = (
-            dither_pyx.convert_cam16ucs_to_rgb12_iigs(
+            dither_shr_pyx.convert_cam16ucs_to_rgb12_iigs(
                 clusters.cluster_centers_[frequency_order].astype(
                     np.float32)))
 
@@ -486,7 +487,7 @@ def main():
         output_srgb = (image_py.linear_to_srgb(output_rgb)).astype(np.uint8)
 
         # dither = dither_pattern.PATTERNS[args.dither]()
-        # bitmap = dither_pyx.dither_image(
+        # bitmap = dither_dhr_pyx.dither_image(
         #     screen, rgb, dither, args.lookahead, args.verbose, rgb24_to_cam16ucs)
 
         # Show output image by rendering in target palette
diff --git a/dither_dhr.pyx b/dither_dhr.pyx
new file mode 100644
index 0000000..4f6ae64
--- /dev/null
+++ b/dither_dhr.pyx
@@ -0,0 +1,331 @@
+# cython: infer_types=True
+# cython: profile=False
+
+cimport cython
+import numpy as np
+from libc.stdlib cimport malloc, free
+
+
+# TODO: use a cdef class
+# C representation of dither_pattern.DitherPattern data, for efficient access.
+cdef struct Dither:
+
+    float* pattern   # Flattened dither pattern
+    int x_shape
+    int y_shape
+    int x_origin
+    int y_origin
+
+
+cdef float clip(float a, float min_value, float max_value) nogil:
+    return min(max(a, min_value), max_value)
+
+
+# Compute left-hand bounding box for dithering at horizontal position x.
+cdef int dither_bounds_xl(Dither *dither, int x) nogil:
+    cdef int el = max(dither.x_origin - x, 0)
+    cdef int xl = x - dither.x_origin + el
+    return xl
+
+
+#Compute right-hand bounding box for dithering at horizontal position x.
+cdef int dither_bounds_xr(Dither *dither, int x_res, int x) nogil:
+    cdef int er = min(dither.x_shape, x_res - x)
+    cdef int xr = x - dither.x_origin + er
+    return xr
+
+
+# Compute upper bounding box for dithering at vertical position y.
+cdef int dither_bounds_yt(Dither *dither, int y) nogil:
+    cdef int et = max(dither.y_origin - y, 0)
+    cdef int yt = y - dither.y_origin + et
+
+    return yt
+
+
+# Compute lower bounding box for dithering at vertical position y.
+cdef int dither_bounds_yb(Dither *dither, int y_res, int y) nogil:
+    cdef int eb = min(dither.y_shape, y_res - y)
+    cdef int yb = y - dither.y_origin + eb
+    return yb
+
+
+cdef inline unsigned char shift_pixel_window(
+        unsigned char last_pixels,
+        unsigned int next_pixels,
+        unsigned char shift_right_by,
+        unsigned char window_width) nogil:
+    """Right-shift a sliding window of n pixels to incorporate new pixels.
+
+    Args:
+        last_pixels: n-bit value representing n pixels from left up to current position (MSB = current pixel).
+        next_pixels: n-bit value representing n pixels to right of current position (LSB = pixel to right)
+        shift_right_by: how many pixels of next_pixels to shift into the sliding window
+        window_width: how many pixels to maintain in the sliding window (must be <= 8)
+
+    Returns: n-bit value representing shifted pixel window
+    """
+    cdef unsigned char window_mask = 0xff >> (8 - window_width)
+    cdef unsigned int shifted_next_pixels
+
+    if window_width > shift_right_by:
+        shifted_next_pixels = next_pixels << (window_width - shift_right_by)
+    else:
+        shifted_next_pixels = next_pixels >> (shift_right_by - window_width)
+    return ((last_pixels >> shift_right_by) | shifted_next_pixels) & window_mask
+
+
+# Look ahead a number of pixels and compute choice for next pixel with lowest total squared error after dithering.
+#
+# Args:
+#     dither: error diffusion pattern to apply
+#     palette_rgb: matrix of all n-bit colour palette RGB values
+#     image_rgb: RGB image in the process of dithering
+#     x: current horizontal screen position
+#     y: current vertical screen position
+#     options_nbit: matrix of (2**lookahead, lookahead) possible n-bit colour choices at positions x .. x + lookahead
+#     lookahead: how many horizontal pixels to look ahead
+#     distances: matrix of (24-bit RGB, n-bit palette) perceptual colour distances
+#     x_res: horizontal screen resolution
+#
+# Returns: index from 0 .. 2**lookahead into options_nbit representing best available choice for position (x,y)
+#
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef int dither_lookahead(Dither* dither, float[:, :, ::1] palette_cam16, float[:, :, ::1] palette_rgb,
+        float[:, :, ::1] image_rgb, int x, int y, int lookahead, unsigned char last_pixels,
+        int x_res, float[:,::1] rgb_to_cam16ucs, unsigned char palette_depth) nogil:
+    cdef int candidate_pixels, i, j
+    cdef float[3] quant_error
+    cdef int best
+    cdef float best_error = 2**31-1
+    cdef float total_error
+    cdef unsigned char next_pixels
+    cdef int phase
+    cdef float[::1] lah_cam16ucs
+
+    # Don't bother dithering past the lookahead horizon or edge of screen.
+    cdef int xxr = min(x + lookahead, x_res)
+
+    cdef int lah_shape1 = xxr - x
+    cdef int lah_shape2 = 3
+    cdef float *lah_image_rgb = <float *> malloc(lah_shape1 * lah_shape2 * sizeof(float))
+
+    # For each 2**lookahead possibilities for the on/off state of the next lookahead pixels, apply error diffusion
+    # and compute the total squared error to the source image.  Since we only have two possible colours for each
+    # given pixel (dependent on the state already chosen for pixels to the left), we need to look beyond local minima.
+    # i.e. it might be better to make a sub-optimal choice for this pixel if it allows access to much better pixel
+    # colours at later positions.
+    for candidate_pixels in range(1 << lookahead):
+        # Working copy of input pixels
+        for i in range(xxr - x):
+            for j in range(3):
+                lah_image_rgb[i * lah_shape2 + j] = image_rgb[y, x+i, j]
+
+        total_error = 0
+        # Apply dithering to lookahead horizon or edge of screen
+        for i in range(xxr - x):
+            xl = dither_bounds_xl(dither, i)
+            xr = dither_bounds_xr(dither, xxr - x, i)
+            phase = (x + i) % 4
+
+            next_pixels = shift_pixel_window(
+                    last_pixels, next_pixels=candidate_pixels, shift_right_by=i+1, window_width=palette_depth)
+
+            # We don't update the input at position x (since we've already chosen fixed outputs), but we do propagate
+            # quantization errors to positions >x  so we can compensate for how good/bad these choices were.  i.e. the
+            # next_pixels choices are fixed, but we can still distribute quantization error from having made these
+            # choices, in order to compute the total error.
+            for j in range(3):
+                quant_error[j] = lah_image_rgb[i * lah_shape2 + j] - palette_rgb[next_pixels, phase, j]
+            apply_one_line(dither, xl, xr, i, lah_image_rgb, lah_shape2, quant_error)
+
+            lah_cam16ucs = convert_rgb_to_cam16ucs(
+                rgb_to_cam16ucs, lah_image_rgb[i*lah_shape2], lah_image_rgb[i*lah_shape2+1],
+                lah_image_rgb[i*lah_shape2+2])
+            total_error += colour_distance_squared(lah_cam16ucs, palette_cam16[next_pixels, phase])
+
+            if total_error >= best_error:
+                # No need to continue
+                break
+
+        if total_error < best_error:
+            best_error = total_error
+            best = candidate_pixels
+
+    free(lah_image_rgb)
+    return best
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, float r, float g, float b) nogil:
+    cdef unsigned int rgb_24bit = (<unsigned int>(r*255) << 16) + (<unsigned int>(g*255) << 8) + <unsigned int>(b*255)
+    return rgb_to_cam16ucs[rgb_24bit]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline float fabs(float value) nogil:
+    return -value if value < 0 else value
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline double colour_distance_squared(float[::1] colour1, float[::1] colour2) nogil:
+    return (colour1[0] - colour2[0]) ** 2 + (colour1[1] - colour2[1]) ** 2 + (colour1[2] - colour2[2]) ** 2
+
+
+# Perform error diffusion to a single image row.
+#
+# Args:
+#     dither: dither pattern to apply
+#     xl: lower x bounding box
+#     xr: upper x bounding box
+#     x: starting horizontal position to apply error diffusion
+#     image: array of shape (image_shape1, 3) representing RGB pixel data for a single image line, to be mutated.
+#     image_shape1: horizontal dimension of image
+#     quant_error: RGB quantization error to be diffused
+#
+cdef void apply_one_line(Dither* dither, int xl, int xr, int x, float[] image, int image_shape1,
+        float[] quant_error) nogil:
+
+    cdef int i, j
+    cdef float error_fraction
+
+    for i in range(xl, xr):
+        error_fraction = dither.pattern[i - x + dither.x_origin]
+        for j in range(3):
+            image[i * image_shape1 + j] = clip(image[i * image_shape1 + j] + error_fraction * quant_error[j], 0, 1)
+
+
+# Perform error diffusion across multiple image rows.
+#
+# Args:
+#     dither: dither pattern to apply
+#     x_res: horizontal image resolution
+#     y_res: vertical image resolution
+#     x: starting horizontal position to apply error diffusion
+#     y: starting vertical position to apply error diffusion
+#     image: RGB pixel data, to be mutated
+#     quant_error: RGB quantization error to be diffused
+#
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef void apply(Dither* dither, int x_res, int y_res, int x, int y, float[:,:,::1] image, float[] quant_error) nogil:
+
+    cdef int i, j, k
+
+    cdef int yt = dither_bounds_yt(dither, y)
+    cdef int yb = dither_bounds_yb(dither, y_res, y)
+    cdef int xl = dither_bounds_xl(dither, x)
+    cdef int xr = dither_bounds_xr(dither, x_res, x)
+
+    cdef float error_fraction
+    for i in range(yt, yb):
+        for j in range(xl, xr):
+            error_fraction = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin]
+            for k in range(3):
+                image[i,j,k] = clip(image[i,j,k] + error_fraction * quant_error[k], 0, 1)
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef image_nbit_to_bitmap(
+    (unsigned char)[:, ::1] image_nbit, unsigned int x_res, unsigned int y_res, unsigned char palette_depth):
+    cdef unsigned int x, y
+    bitmap = np.zeros((y_res, x_res), dtype=bool)
+    for y in range(y_res):
+        for x in range(x_res):
+            # MSB of each array element is the pixel state at (x, y)
+            bitmap[y, x] = image_nbit[y, x] >> (palette_depth - 1)
+    return bitmap
+
+
+# Dither a source image
+#
+# Args:
+#     screen: screen.Screen object
+#     image_rgb: input RGB image
+#     dither: dither_pattern.DitherPattern to apply during dithering
+#     lookahead: how many x positions to look ahead to optimize colour choices
+#     verbose: whether to output progress during image conversion
+#
+# Returns: tuple of n-bit output image array and RGB output image array
+#
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def dither_image(
+        screen, float[:, :, ::1] image_rgb, dither, int lookahead, unsigned char verbose, float[:,::1] rgb_to_cam16ucs):
+    cdef int y, x
+    cdef unsigned char i, j, pixels_nbit, phase
+    # cdef float[3] input_pixel_rgb
+    cdef float[3] quant_error
+    cdef unsigned char output_pixel_nbit
+    cdef unsigned char best_next_pixels
+    cdef float[3] output_pixel_rgb
+
+    # Hoist some python attribute accesses into C variables for efficient access during the main loop
+
+    cdef int yres = screen.Y_RES
+    cdef int xres = screen.X_RES
+
+    # TODO: convert this instead of storing on palette?
+    cdef float[:, :, ::1] palette_cam16 = np.zeros((len(screen.palette.CAM16UCS), 4, 3), dtype=np.float32)
+    for pixels_nbit, phase in screen.palette.CAM16UCS.keys():
+        for i in range(3):
+            palette_cam16[pixels_nbit, phase, i] = screen.palette.CAM16UCS[pixels_nbit, phase][i]
+
+    cdef float[:, :, ::1] palette_rgb = np.zeros((len(screen.palette.RGB), 4, 3), dtype=np.float32)
+    for pixels_nbit, phase in screen.palette.RGB.keys():
+        for i in range(3):
+            palette_rgb[pixels_nbit, phase, i] = screen.palette.RGB[pixels_nbit, phase][i] / 255
+
+    cdef Dither cdither
+    cdither.y_shape = dither.PATTERN.shape[0]
+    cdither.x_shape = dither.PATTERN.shape[1]
+    cdither.y_origin = dither.ORIGIN[0]
+    cdither.x_origin = dither.ORIGIN[1]
+    # TODO: should be just as efficient to use a memoryview?
+    cdither.pattern = <float *> malloc(cdither.x_shape * cdither.y_shape * sizeof(float))
+    for i in range(cdither.y_shape):
+        for j in range(cdither.x_shape):
+            cdither.pattern[i * cdither.x_shape + j] = dither.PATTERN[i, j]
+
+    cdef unsigned char palette_depth = screen.palette.PALETTE_DEPTH
+
+    # The nbit image representation contains the trailing n dot values as an n-bit value with MSB representing the
+    # current pixel.  This choice (cf LSB) is slightly awkward but matches the DHGR behaviour that bit positions in
+    # screen memory map LSB to MSB from L to R.  The value of n is chosen by the palette depth, i.e. how many trailing
+    # dot positions are used to determine the colour of a given pixel.
+    cdef (unsigned char)[:, ::1] image_nbit = np.empty((image_rgb.shape[0], image_rgb.shape[1]), dtype=np.uint8)
+
+    for y in range(yres):
+        if verbose:
+            print("%d/%d" % (y, yres))
+        output_pixel_nbit = 0
+        for x in range(xres):
+            # Compute all possible 2**N choices of n-bit pixel colours for positions x .. x + lookahead
+            # lookahead_palette_choices_nbit = lookahead_options(lookahead, output_pixel_nbit)
+            # Apply error diffusion for each of these 2**N choices, and compute which produces the closest match
+            # to the source image over the succeeding N pixels
+            best_next_pixels = dither_lookahead(
+                    &cdither, palette_cam16, palette_rgb, image_rgb, x, y, lookahead, output_pixel_nbit, xres,
+                    rgb_to_cam16ucs, palette_depth)
+            # Apply best choice for next 1 pixel
+            output_pixel_nbit = shift_pixel_window(
+                    output_pixel_nbit, best_next_pixels, shift_right_by=1, window_width=palette_depth)
+
+            # Apply error diffusion from chosen output pixel value
+            for i in range(3):
+                output_pixel_rgb[i] = palette_rgb[output_pixel_nbit, x % 4, i]
+                quant_error[i] = image_rgb[y,x,i] - output_pixel_rgb[i]
+            apply(&cdither, xres, yres, x, y, image_rgb, quant_error)
+
+            # Update image with our chosen image pixel
+            image_nbit[y, x] = output_pixel_nbit
+            for i in range(3):
+                image_rgb[y, x, i] = output_pixel_rgb[i]
+
+    free(cdither.pattern)
+    return image_nbit_to_bitmap(image_nbit, xres, yres, palette_depth)
diff --git a/dither.pyx b/dither_shr.pyx
similarity index 59%
rename from dither.pyx
rename to dither_shr.pyx
index 66db0c9..273315c 100644
--- a/dither.pyx
+++ b/dither_shr.pyx
@@ -3,160 +3,12 @@
 
 cimport cython
 import numpy as np
-from libc.stdlib cimport malloc, free
-
-
-# TODO: use a cdef class
-# C representation of dither_pattern.DitherPattern data, for efficient access.
-cdef struct Dither:
-
-    float* pattern   # Flattened dither pattern
-    int x_shape
-    int y_shape
-    int x_origin
-    int y_origin
-
 
+# TODO: move these into a common module
 cdef float clip(float a, float min_value, float max_value) nogil:
     return min(max(a, min_value), max_value)
 
 
-# Compute left-hand bounding box for dithering at horizontal position x.
-cdef int dither_bounds_xl(Dither *dither, int x) nogil:
-    cdef int el = max(dither.x_origin - x, 0)
-    cdef int xl = x - dither.x_origin + el
-    return xl
-
-
-#Compute right-hand bounding box for dithering at horizontal position x.
-cdef int dither_bounds_xr(Dither *dither, int x_res, int x) nogil:
-    cdef int er = min(dither.x_shape, x_res - x)
-    cdef int xr = x - dither.x_origin + er
-    return xr
-
-
-# Compute upper bounding box for dithering at vertical position y.
-cdef int dither_bounds_yt(Dither *dither, int y) nogil:
-    cdef int et = max(dither.y_origin - y, 0)
-    cdef int yt = y - dither.y_origin + et
-
-    return yt
-
-
-# Compute lower bounding box for dithering at vertical position y.
-cdef int dither_bounds_yb(Dither *dither, int y_res, int y) nogil:
-    cdef int eb = min(dither.y_shape, y_res - y)
-    cdef int yb = y - dither.y_origin + eb
-    return yb
-
-
-cdef inline unsigned char shift_pixel_window(
-        unsigned char last_pixels,
-        unsigned int next_pixels,
-        unsigned char shift_right_by,
-        unsigned char window_width) nogil:
-    """Right-shift a sliding window of n pixels to incorporate new pixels.
-
-    Args:
-        last_pixels: n-bit value representing n pixels from left up to current position (MSB = current pixel).
-        next_pixels: n-bit value representing n pixels to right of current position (LSB = pixel to right)
-        shift_right_by: how many pixels of next_pixels to shift into the sliding window
-        window_width: how many pixels to maintain in the sliding window (must be <= 8)
-
-    Returns: n-bit value representing shifted pixel window
-    """
-    cdef unsigned char window_mask = 0xff >> (8 - window_width)
-    cdef unsigned int shifted_next_pixels
-
-    if window_width > shift_right_by:
-        shifted_next_pixels = next_pixels << (window_width - shift_right_by)
-    else:
-        shifted_next_pixels = next_pixels >> (shift_right_by - window_width)
-    return ((last_pixels >> shift_right_by) | shifted_next_pixels) & window_mask
-
-
-# Look ahead a number of pixels and compute choice for next pixel with lowest total squared error after dithering.
-#
-# Args:
-#     dither: error diffusion pattern to apply
-#     palette_rgb: matrix of all n-bit colour palette RGB values
-#     image_rgb: RGB image in the process of dithering
-#     x: current horizontal screen position
-#     y: current vertical screen position
-#     options_nbit: matrix of (2**lookahead, lookahead) possible n-bit colour choices at positions x .. x + lookahead
-#     lookahead: how many horizontal pixels to look ahead
-#     distances: matrix of (24-bit RGB, n-bit palette) perceptual colour distances
-#     x_res: horizontal screen resolution
-#
-# Returns: index from 0 .. 2**lookahead into options_nbit representing best available choice for position (x,y)
-#
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef int dither_lookahead(Dither* dither, float[:, :, ::1] palette_cam16, float[:, :, ::1] palette_rgb,
-        float[:, :, ::1] image_rgb, int x, int y, int lookahead, unsigned char last_pixels,
-        int x_res, float[:,::1] rgb_to_cam16ucs, unsigned char palette_depth) nogil:
-    cdef int candidate_pixels, i, j
-    cdef float[3] quant_error
-    cdef int best
-    cdef float best_error = 2**31-1
-    cdef float total_error
-    cdef unsigned char next_pixels
-    cdef int phase
-    cdef float[::1] lah_cam16ucs
-
-    # Don't bother dithering past the lookahead horizon or edge of screen.
-    cdef int xxr = min(x + lookahead, x_res)
-
-    cdef int lah_shape1 = xxr - x
-    cdef int lah_shape2 = 3
-    cdef float *lah_image_rgb = <float *> malloc(lah_shape1 * lah_shape2 * sizeof(float))
-
-    # For each 2**lookahead possibilities for the on/off state of the next lookahead pixels, apply error diffusion
-    # and compute the total squared error to the source image.  Since we only have two possible colours for each
-    # given pixel (dependent on the state already chosen for pixels to the left), we need to look beyond local minima.
-    # i.e. it might be better to make a sub-optimal choice for this pixel if it allows access to much better pixel
-    # colours at later positions.
-    for candidate_pixels in range(1 << lookahead):
-        # Working copy of input pixels
-        for i in range(xxr - x):
-            for j in range(3):
-                lah_image_rgb[i * lah_shape2 + j] = image_rgb[y, x+i, j]
-
-        total_error = 0
-        # Apply dithering to lookahead horizon or edge of screen
-        for i in range(xxr - x):
-            xl = dither_bounds_xl(dither, i)
-            xr = dither_bounds_xr(dither, xxr - x, i)
-            phase = (x + i) % 4
-
-            next_pixels = shift_pixel_window(
-                    last_pixels, next_pixels=candidate_pixels, shift_right_by=i+1, window_width=palette_depth)
-
-            # We don't update the input at position x (since we've already chosen fixed outputs), but we do propagate
-            # quantization errors to positions >x  so we can compensate for how good/bad these choices were.  i.e. the
-            # next_pixels choices are fixed, but we can still distribute quantization error from having made these
-            # choices, in order to compute the total error.
-            for j in range(3):
-                quant_error[j] = lah_image_rgb[i * lah_shape2 + j] - palette_rgb[next_pixels, phase, j]
-            apply_one_line(dither, xl, xr, i, lah_image_rgb, lah_shape2, quant_error)
-
-            lah_cam16ucs = convert_rgb_to_cam16ucs(
-                rgb_to_cam16ucs, lah_image_rgb[i*lah_shape2], lah_image_rgb[i*lah_shape2+1],
-                lah_image_rgb[i*lah_shape2+2])
-            total_error += colour_distance_squared(lah_cam16ucs, palette_cam16[next_pixels, phase])
-
-            if total_error >= best_error:
-                # No need to continue
-                break
-
-        if total_error < best_error:
-            best_error = total_error
-            best = candidate_pixels
-
-    free(lah_image_rgb)
-    return best
-
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, float r, float g, float b) nogil:
@@ -164,173 +16,12 @@ cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, fl
     return rgb_to_cam16ucs[rgb_24bit]
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef inline float fabs(float value) nogil:
-    return -value if value < 0 else value
-
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cdef inline double colour_distance_squared(float[::1] colour1, float[::1] colour2) nogil:
     return (colour1[0] - colour2[0]) ** 2 + (colour1[1] - colour2[1]) ** 2 + (colour1[2] - colour2[2]) ** 2
 
 
-# Perform error diffusion to a single image row.
-#
-# Args:
-#     dither: dither pattern to apply
-#     xl: lower x bounding box
-#     xr: upper x bounding box
-#     x: starting horizontal position to apply error diffusion
-#     image: array of shape (image_shape1, 3) representing RGB pixel data for a single image line, to be mutated.
-#     image_shape1: horizontal dimension of image
-#     quant_error: RGB quantization error to be diffused
-#
-cdef void apply_one_line(Dither* dither, int xl, int xr, int x, float[] image, int image_shape1,
-        float[] quant_error) nogil:
-
-    cdef int i, j
-    cdef float error_fraction
-
-    for i in range(xl, xr):
-        error_fraction = dither.pattern[i - x + dither.x_origin]
-        for j in range(3):
-            image[i * image_shape1 + j] = clip(image[i * image_shape1 + j] + error_fraction * quant_error[j], 0, 1)
-
-
-# Perform error diffusion across multiple image rows.
-#
-# Args:
-#     dither: dither pattern to apply
-#     x_res: horizontal image resolution
-#     y_res: vertical image resolution
-#     x: starting horizontal position to apply error diffusion
-#     y: starting vertical position to apply error diffusion
-#     image: RGB pixel data, to be mutated
-#     quant_error: RGB quantization error to be diffused
-#
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void apply(Dither* dither, int x_res, int y_res, int x, int y, float[:,:,::1] image, float[] quant_error) nogil:
-
-    cdef int i, j, k
-
-    cdef int yt = dither_bounds_yt(dither, y)
-    cdef int yb = dither_bounds_yb(dither, y_res, y)
-    cdef int xl = dither_bounds_xl(dither, x)
-    cdef int xr = dither_bounds_xr(dither, x_res, x)
-
-    cdef float error_fraction
-    for i in range(yt, yb):
-        for j in range(xl, xr):
-            error_fraction = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin]
-            for k in range(3):
-                image[i,j,k] = clip(image[i,j,k] + error_fraction * quant_error[k], 0, 1)
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef image_nbit_to_bitmap(
-    (unsigned char)[:, ::1] image_nbit, unsigned int x_res, unsigned int y_res, unsigned char palette_depth):
-    cdef unsigned int x, y
-    bitmap = np.zeros((y_res, x_res), dtype=bool)
-    for y in range(y_res):
-        for x in range(x_res):
-            # MSB of each array element is the pixel state at (x, y)
-            bitmap[y, x] = image_nbit[y, x] >> (palette_depth - 1)
-    return bitmap
-
-
-# Dither a source image
-#
-# Args:
-#     screen: screen.Screen object
-#     image_rgb: input RGB image
-#     dither: dither_pattern.DitherPattern to apply during dithering
-#     lookahead: how many x positions to look ahead to optimize colour choices
-#     verbose: whether to output progress during image conversion
-#
-# Returns: tuple of n-bit output image array and RGB output image array
-#
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def dither_image(
-        screen, float[:, :, ::1] image_rgb, dither, int lookahead, unsigned char verbose, float[:,::1] rgb_to_cam16ucs):
-    cdef int y, x
-    cdef unsigned char i, j, pixels_nbit, phase
-    # cdef float[3] input_pixel_rgb
-    cdef float[3] quant_error
-    cdef unsigned char output_pixel_nbit
-    cdef unsigned char best_next_pixels
-    cdef float[3] output_pixel_rgb
-
-    # Hoist some python attribute accesses into C variables for efficient access during the main loop
-
-    cdef int yres = screen.Y_RES
-    cdef int xres = screen.X_RES
-
-    # TODO: convert this instead of storing on palette?
-    cdef float[:, :, ::1] palette_cam16 = np.zeros((len(screen.palette.CAM16UCS), 4, 3), dtype=np.float32)
-    for pixels_nbit, phase in screen.palette.CAM16UCS.keys():
-        for i in range(3):
-            palette_cam16[pixels_nbit, phase, i] = screen.palette.CAM16UCS[pixels_nbit, phase][i]
-
-    cdef float[:, :, ::1] palette_rgb = np.zeros((len(screen.palette.RGB), 4, 3), dtype=np.float32)
-    for pixels_nbit, phase in screen.palette.RGB.keys():
-        for i in range(3):
-            palette_rgb[pixels_nbit, phase, i] = screen.palette.RGB[pixels_nbit, phase][i] / 255
-
-    cdef Dither cdither
-    cdither.y_shape = dither.PATTERN.shape[0]
-    cdither.x_shape = dither.PATTERN.shape[1]
-    cdither.y_origin = dither.ORIGIN[0]
-    cdither.x_origin = dither.ORIGIN[1]
-    # TODO: should be just as efficient to use a memoryview?
-    cdither.pattern = <float *> malloc(cdither.x_shape * cdither.y_shape * sizeof(float))
-    for i in range(cdither.y_shape):
-        for j in range(cdither.x_shape):
-            cdither.pattern[i * cdither.x_shape + j] = dither.PATTERN[i, j]
-
-    cdef unsigned char palette_depth = screen.palette.PALETTE_DEPTH
-
-    # The nbit image representation contains the trailing n dot values as an n-bit value with MSB representing the
-    # current pixel.  This choice (cf LSB) is slightly awkward but matches the DHGR behaviour that bit positions in
-    # screen memory map LSB to MSB from L to R.  The value of n is chosen by the palette depth, i.e. how many trailing
-    # dot positions are used to determine the colour of a given pixel.
-    cdef (unsigned char)[:, ::1] image_nbit = np.empty((image_rgb.shape[0], image_rgb.shape[1]), dtype=np.uint8)
-
-    for y in range(yres):
-        if verbose:
-            print("%d/%d" % (y, yres))
-        output_pixel_nbit = 0
-        for x in range(xres):
-            # Compute all possible 2**N choices of n-bit pixel colours for positions x .. x + lookahead
-            # lookahead_palette_choices_nbit = lookahead_options(lookahead, output_pixel_nbit)
-            # Apply error diffusion for each of these 2**N choices, and compute which produces the closest match
-            # to the source image over the succeeding N pixels
-            best_next_pixels = dither_lookahead(
-                    &cdither, palette_cam16, palette_rgb, image_rgb, x, y, lookahead, output_pixel_nbit, xres,
-                    rgb_to_cam16ucs, palette_depth)
-            # Apply best choice for next 1 pixel
-            output_pixel_nbit = shift_pixel_window(
-                    output_pixel_nbit, best_next_pixels, shift_right_by=1, window_width=palette_depth)
-
-            # Apply error diffusion from chosen output pixel value
-            for i in range(3):
-                output_pixel_rgb[i] = palette_rgb[output_pixel_nbit, x % 4, i]
-                quant_error[i] = image_rgb[y,x,i] - output_pixel_rgb[i]
-            apply(&cdither, xres, yres, x, y, image_rgb, quant_error)
-
-            # Update image with our chosen image pixel
-            image_nbit[y, x] = output_pixel_nbit
-            for i in range(3):
-                image_rgb[y, x, i] = output_pixel_rgb[i]
-
-    free(cdither.pattern)
-    return image_nbit_to_bitmap(image_nbit, xres, yres, palette_depth)
-
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def dither_shr_perfect(
diff --git a/setup.py b/setup.py
index 15073e9..e754657 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@ Cython.Compiler.Options.annotate = True
 
 setup(
     ext_modules=cythonize(
-        ["dither.pyx"],
+        ["dither_dhr.pyx", "dither_shr.pyx"],
         annotate=True,
         compiler_directives={'language_level': "3"}
     )

From 0dc2c0a7a024294a3c4f0845d9787ac4ef423625 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Fri, 26 Nov 2021 12:12:55 +0000
Subject: [PATCH 67/82] Disable bounds checking and wraparound by default

---
 dither_dhr.pyx | 16 ++--------------
 dither_shr.pyx | 20 ++------------------
 2 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/dither_dhr.pyx b/dither_dhr.pyx
index 4f6ae64..0184286 100644
--- a/dither_dhr.pyx
+++ b/dither_dhr.pyx
@@ -1,5 +1,7 @@
 # cython: infer_types=True
 # cython: profile=False
+# cython: boundscheck=False
+# cython: wraparound=False
 
 cimport cython
 import numpy as np
@@ -90,8 +92,6 @@ cdef inline unsigned char shift_pixel_window(
 #
 # Returns: index from 0 .. 2**lookahead into options_nbit representing best available choice for position (x,y)
 #
-@cython.boundscheck(False)
-@cython.wraparound(False)
 cdef int dither_lookahead(Dither* dither, float[:, :, ::1] palette_cam16, float[:, :, ::1] palette_rgb,
         float[:, :, ::1] image_rgb, int x, int y, int lookahead, unsigned char last_pixels,
         int x_res, float[:,::1] rgb_to_cam16ucs, unsigned char palette_depth) nogil:
@@ -157,21 +157,15 @@ cdef int dither_lookahead(Dither* dither, float[:, :, ::1] palette_cam16, float[
     return best
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, float r, float g, float b) nogil:
     cdef unsigned int rgb_24bit = (<unsigned int>(r*255) << 16) + (<unsigned int>(g*255) << 8) + <unsigned int>(b*255)
     return rgb_to_cam16ucs[rgb_24bit]
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 cdef inline float fabs(float value) nogil:
     return -value if value < 0 else value
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 cdef inline double colour_distance_squared(float[::1] colour1, float[::1] colour2) nogil:
     return (colour1[0] - colour2[0]) ** 2 + (colour1[1] - colour2[1]) ** 2 + (colour1[2] - colour2[2]) ** 2
 
@@ -210,8 +204,6 @@ cdef void apply_one_line(Dither* dither, int xl, int xr, int x, float[] image, i
 #     image: RGB pixel data, to be mutated
 #     quant_error: RGB quantization error to be diffused
 #
-@cython.boundscheck(False)
-@cython.wraparound(False)
 cdef void apply(Dither* dither, int x_res, int y_res, int x, int y, float[:,:,::1] image, float[] quant_error) nogil:
 
     cdef int i, j, k
@@ -229,8 +221,6 @@ cdef void apply(Dither* dither, int x_res, int y_res, int x, int y, float[:,:,::
                 image[i,j,k] = clip(image[i,j,k] + error_fraction * quant_error[k], 0, 1)
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 cdef image_nbit_to_bitmap(
     (unsigned char)[:, ::1] image_nbit, unsigned int x_res, unsigned int y_res, unsigned char palette_depth):
     cdef unsigned int x, y
@@ -253,8 +243,6 @@ cdef image_nbit_to_bitmap(
 #
 # Returns: tuple of n-bit output image array and RGB output image array
 #
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def dither_image(
         screen, float[:, :, ::1] image_rgb, dither, int lookahead, unsigned char verbose, float[:,::1] rgb_to_cam16ucs):
     cdef int y, x
diff --git a/dither_shr.pyx b/dither_shr.pyx
index 273315c..30852bb 100644
--- a/dither_shr.pyx
+++ b/dither_shr.pyx
@@ -1,5 +1,7 @@
 # cython: infer_types=True
 # cython: profile=False
+# cython: boundscheck=False
+# cython: wraparound=False
 
 cimport cython
 import numpy as np
@@ -9,21 +11,15 @@ cdef float clip(float a, float min_value, float max_value) nogil:
     return min(max(a, min_value), max_value)
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, float r, float g, float b) nogil:
     cdef unsigned int rgb_24bit = (<unsigned int>(r*255) << 16) + (<unsigned int>(g*255) << 8) + <unsigned int>(b*255)
     return rgb_to_cam16ucs[rgb_24bit]
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 cdef inline double colour_distance_squared(float[::1] colour1, float[::1] colour2) nogil:
     return (colour1[0] - colour2[0]) ** 2 + (colour1[1] - colour2[1]) ** 2 + (colour1[2] - colour2[2]) ** 2
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def dither_shr_perfect(
         float[:, :, ::1] input_rgb, float[:, ::1] full_palette_cam, float[:, ::1] full_palette_rgb,
         float[:,::1] rgb_to_cam16ucs):
@@ -134,8 +130,6 @@ def dither_shr_perfect(
     return total_image_error, working_image
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def dither_shr(
         float[:, :, ::1] input_rgb, float[:, :, ::1] palettes_cam, float[:, :, ::1] palettes_rgb,
         float[:,::1] rgb_to_cam16ucs):
@@ -264,8 +258,6 @@ cdef struct PaletteSelection:
     double total_error
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 cdef PaletteSelection best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int last_palette_idx) nogil:
     cdef int palette_idx, best_palette_idx, palette_entry_idx, pixel_idx
     cdef double best_total_dist, total_dist, best_pixel_dist, pixel_dist
@@ -296,8 +288,6 @@ cdef PaletteSelection best_palette_for_line(float [:, ::1] line_cam, float[:, :,
     return res
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 cdef float[::1] _convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned char)[::1] point_rgb12) nogil:
     cdef int rgb12 = (point_rgb12[0] << 8) | (point_rgb12[1] << 4) | point_rgb12[2]
     return rgb12_iigs_to_cam16ucs[rgb12]
@@ -308,8 +298,6 @@ def convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned c
 
 import colour
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 @cython.cdivision(True)
 cdef float[:, ::1] linear_to_srgb_array(float[:, ::1] a, float gamma=2.4):
     cdef int i, j
@@ -322,8 +310,6 @@ cdef float[:, ::1] linear_to_srgb_array(float[:, ::1] a, float gamma=2.4):
                 res[i, j] = 1.055 * a[i, j] ** (1.0 / gamma) - 0.055
     return res
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 cdef (unsigned char)[:, ::1] _convert_cam16ucs_to_rgb12_iigs(float[:, ::1] point_cam):
     cdef float[:, ::1] rgb
     cdef (float)[:, ::1] rgb12_iigs
@@ -348,8 +334,6 @@ def convert_cam16ucs_to_rgb12_iigs(float[:, ::1] point_cam):
     return _convert_cam16ucs_to_rgb12_iigs(point_cam)
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 @cython.cdivision(True)
 def k_means_with_fixed_centroids(
     int n_clusters, int n_fixed, float[:, ::1] samples, (unsigned char)[:, ::1] initial_centroids, int max_iterations,

From ae89682dab71f84fe749997c4475d84c2ff49682 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Fri, 26 Nov 2021 12:26:46 +0000
Subject: [PATCH 68/82] Split out common utility functions into a shared module

---
 common.pxd     |   5 +++
 common.pyx     |  18 +++++++++
 dither_dhr.pyx |  27 +++----------
 dither_shr.pyx | 108 ++++++++++++++++++++++++-------------------------
 setup.py       |   2 +-
 5 files changed, 82 insertions(+), 78 deletions(-)
 create mode 100644 common.pxd
 create mode 100644 common.pyx

diff --git a/common.pxd b/common.pxd
new file mode 100644
index 0000000..5de23e9
--- /dev/null
+++ b/common.pxd
@@ -0,0 +1,5 @@
+cdef float clip(float a, float min_value, float max_value) nogil
+
+cdef float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, float r, float g, float b) nogil
+
+cdef double colour_distance_squared(float[::1] colour1, float[::1] colour2) nogil
\ No newline at end of file
diff --git a/common.pyx b/common.pyx
new file mode 100644
index 0000000..b8f435f
--- /dev/null
+++ b/common.pyx
@@ -0,0 +1,18 @@
+# cython: infer_types=True
+# cython: profile=False
+# cython: boundscheck=False
+# cython: wraparound=False
+
+
+cdef float clip(float a, float min_value, float max_value) nogil:
+    return min(max(a, min_value), max_value)
+
+
+cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, float r, float g, float b) nogil:
+    cdef unsigned int rgb_24bit = (<unsigned int>(r*255) << 16) + (<unsigned int>(g*255) << 8) + <unsigned int>(b*255)
+    return rgb_to_cam16ucs[rgb_24bit]
+
+
+cdef inline double colour_distance_squared(float[::1] colour1, float[::1] colour2) nogil:
+    return (colour1[0] - colour2[0]) ** 2 + (colour1[1] - colour2[1]) ** 2 + (colour1[2] - colour2[2]) ** 2
+
diff --git a/dither_dhr.pyx b/dither_dhr.pyx
index 0184286..b2008bb 100644
--- a/dither_dhr.pyx
+++ b/dither_dhr.pyx
@@ -7,6 +7,8 @@ cimport cython
 import numpy as np
 from libc.stdlib cimport malloc, free
 
+cimport common
+
 
 # TODO: use a cdef class
 # C representation of dither_pattern.DitherPattern data, for efficient access.
@@ -19,10 +21,6 @@ cdef struct Dither:
     int y_origin
 
 
-cdef float clip(float a, float min_value, float max_value) nogil:
-    return min(max(a, min_value), max_value)
-
-
 # Compute left-hand bounding box for dithering at horizontal position x.
 cdef int dither_bounds_xl(Dither *dither, int x) nogil:
     cdef int el = max(dither.x_origin - x, 0)
@@ -140,10 +138,10 @@ cdef int dither_lookahead(Dither* dither, float[:, :, ::1] palette_cam16, float[
                 quant_error[j] = lah_image_rgb[i * lah_shape2 + j] - palette_rgb[next_pixels, phase, j]
             apply_one_line(dither, xl, xr, i, lah_image_rgb, lah_shape2, quant_error)
 
-            lah_cam16ucs = convert_rgb_to_cam16ucs(
+            lah_cam16ucs = common.convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, lah_image_rgb[i*lah_shape2], lah_image_rgb[i*lah_shape2+1],
                 lah_image_rgb[i*lah_shape2+2])
-            total_error += colour_distance_squared(lah_cam16ucs, palette_cam16[next_pixels, phase])
+            total_error += common.colour_distance_squared(lah_cam16ucs, palette_cam16[next_pixels, phase])
 
             if total_error >= best_error:
                 # No need to continue
@@ -157,19 +155,6 @@ cdef int dither_lookahead(Dither* dither, float[:, :, ::1] palette_cam16, float[
     return best
 
 
-cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, float r, float g, float b) nogil:
-    cdef unsigned int rgb_24bit = (<unsigned int>(r*255) << 16) + (<unsigned int>(g*255) << 8) + <unsigned int>(b*255)
-    return rgb_to_cam16ucs[rgb_24bit]
-
-
-cdef inline float fabs(float value) nogil:
-    return -value if value < 0 else value
-
-
-cdef inline double colour_distance_squared(float[::1] colour1, float[::1] colour2) nogil:
-    return (colour1[0] - colour2[0]) ** 2 + (colour1[1] - colour2[1]) ** 2 + (colour1[2] - colour2[2]) ** 2
-
-
 # Perform error diffusion to a single image row.
 #
 # Args:
@@ -190,7 +175,7 @@ cdef void apply_one_line(Dither* dither, int xl, int xr, int x, float[] image, i
     for i in range(xl, xr):
         error_fraction = dither.pattern[i - x + dither.x_origin]
         for j in range(3):
-            image[i * image_shape1 + j] = clip(image[i * image_shape1 + j] + error_fraction * quant_error[j], 0, 1)
+            image[i * image_shape1 + j] = common.clip(image[i * image_shape1 + j] + error_fraction * quant_error[j], 0, 1)
 
 
 # Perform error diffusion across multiple image rows.
@@ -218,7 +203,7 @@ cdef void apply(Dither* dither, int x_res, int y_res, int x, int y, float[:,:,::
         for j in range(xl, xr):
             error_fraction = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin]
             for k in range(3):
-                image[i,j,k] = clip(image[i,j,k] + error_fraction * quant_error[k], 0, 1)
+                image[i,j,k] = common.clip(image[i,j,k] + error_fraction * quant_error[k], 0, 1)
 
 
 cdef image_nbit_to_bitmap(
diff --git a/dither_shr.pyx b/dither_shr.pyx
index 30852bb..aff880d 100644
--- a/dither_shr.pyx
+++ b/dither_shr.pyx
@@ -4,20 +4,10 @@
 # cython: wraparound=False
 
 cimport cython
+import colour
 import numpy as np
 
-# TODO: move these into a common module
-cdef float clip(float a, float min_value, float max_value) nogil:
-    return min(max(a, min_value), max_value)
-
-
-cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, float r, float g, float b) nogil:
-    cdef unsigned int rgb_24bit = (<unsigned int>(r*255) << 16) + (<unsigned int>(g*255) << 8) + <unsigned int>(b*255)
-    return rgb_to_cam16ucs[rgb_24bit]
-
-
-cdef inline double colour_distance_squared(float[::1] colour1, float[::1] colour2) nogil:
-    return (colour1[0] - colour2[0]) ** 2 + (colour1[1] - colour2[1]) ** 2 + (colour1[2] - colour2[2]) ** 2
+cimport common
 
 
 def dither_shr_perfect(
@@ -40,17 +30,17 @@ def dither_shr_perfect(
     total_image_error = 0.0
     for y in range(200):
         for x in range(320):
-            line_cam[x, :] = convert_rgb_to_cam16ucs(
+            line_cam[x, :] = common.convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
 
         for x in range(320):
-            pixel_cam = convert_rgb_to_cam16ucs(
+            pixel_cam = common.convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y, x, 0], working_image[y, x, 1], working_image[y, x, 2])
 
             best_distance = 1e9
             best_colour_idx = -1
             for idx in range(palette_size):
-                distance = colour_distance_squared(pixel_cam, full_palette_cam[idx, :])
+                distance = common.colour_distance_squared(pixel_cam, full_palette_cam[idx, :])
                 if distance < best_distance:
                     best_distance = distance
                     best_colour_idx = idx
@@ -66,16 +56,16 @@ def dither_shr_perfect(
                     # 0 * 7
                     # 3 5 1
                     if x < 319:
-                        working_image[y, x + 1, i] = clip(
+                        working_image[y, x + 1, i] = common.clip(
                             working_image[y, x + 1, i] + quant_error * (7 / 16), 0, 1)
                     if y < 199:
                         if x > 0:
-                            working_image[y + 1, x - 1, i] = clip(
+                            working_image[y + 1, x - 1, i] = common.clip(
                                 working_image[y + 1, x - 1, i] + decay * quant_error * (3 / 16), 0, 1)
-                        working_image[y + 1, x, i] = clip(
+                        working_image[y + 1, x, i] = common.clip(
                             working_image[y + 1, x, i] + decay * quant_error * (5 / 16), 0, 1)
                         if x < 319:
-                            working_image[y + 1, x + 1, i] = clip(
+                            working_image[y + 1, x + 1, i] = common.clip(
                                 working_image[y + 1, x + 1, i] + decay * quant_error * (1 / 16), 0, 1)
                 else:
                     # Jarvis
@@ -83,47 +73,47 @@ def dither_shr_perfect(
                     # 3 5 7 5 3
                     # 1 3 5 3 1
                     if x < 319:
-                        working_image[y, x + 1, i] = clip(
+                        working_image[y, x + 1, i] = common.clip(
                             working_image[y, x + 1, i] + quant_error * (7 / 48), 0, 1)
                     if x < 318:
-                        working_image[y, x + 2, i] = clip(
+                        working_image[y, x + 2, i] = common.clip(
                             working_image[y, x + 2, i] + quant_error * (5 / 48), 0, 1)
                     if y < 199:
                         if x > 1:
-                            working_image[y + 1, x - 2, i] = clip(
+                            working_image[y + 1, x - 2, i] = common.clip(
                                 working_image[y + 1, x - 2, i] + decay * quant_error * (3 / 48), 0,
                                 1)
                         if x > 0:
-                            working_image[y + 1, x - 1, i] = clip(
+                            working_image[y + 1, x - 1, i] = common.clip(
                                 working_image[y + 1, x - 1, i] + decay * quant_error * (5 / 48), 0,
                                 1)
-                        working_image[y + 1, x, i] = clip(
+                        working_image[y + 1, x, i] = common.clip(
                             working_image[y + 1, x, i] + decay * quant_error * (7 / 48), 0, 1)
                         if x < 319:
-                            working_image[y + 1, x + 1, i] = clip(
+                            working_image[y + 1, x + 1, i] = common.clip(
                                 working_image[y + 1, x + 1, i] + decay * quant_error * (5 / 48),
                                 0, 1)
                         if x < 318:
-                            working_image[y + 1, x + 2, i] = clip(
+                            working_image[y + 1, x + 2, i] = common.clip(
                                 working_image[y + 1, x + 2, i] + decay * quant_error * (3 / 48),
                                 0, 1)
                     if y < 198:
                         if x > 1:
-                            working_image[y + 2, x - 2, i] = clip(
+                            working_image[y + 2, x - 2, i] = common.clip(
                                 working_image[y + 2, x - 2, i] + decay * decay * quant_error * (1 / 48), 0,
                                 1)
                         if x > 0:
-                            working_image[y + 2, x - 1, i] = clip(
+                            working_image[y + 2, x - 1, i] = common.clip(
                                 working_image[y + 2, x - 1, i] + decay * decay * quant_error * (3 / 48), 0,
                                 1)
-                        working_image[y + 2, x, i] = clip(
+                        working_image[y + 2, x, i] = common.clip(
                             working_image[y + 2, x, i] + decay * decay * quant_error * (5 / 48), 0, 1)
                         if x < 319:
-                            working_image[y + 2, x + 1, i] = clip(
+                            working_image[y + 2, x + 1, i] = common.clip(
                                 working_image[y + 2, x + 1, i] + decay * decay * quant_error * (3 / 48),
                                 0, 1)
                         if x < 318:
-                            working_image[y + 2, x + 2, i] = clip(
+                            working_image[y + 2, x + 2, i] = common.clip(
                                 working_image[y + 2, x + 2, i] + decay * decay * quant_error * (1 / 48),
                                 0, 1)
 
@@ -154,7 +144,7 @@ def dither_shr(
     total_image_error = 0.0
     for y in range(200):
         for x in range(320):
-            line_cam[x, :] = convert_rgb_to_cam16ucs(
+            line_cam[x, :] = common.convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y,x,0], working_image[y,x,1], working_image[y,x,2])
 
         palette_line = best_palette_for_line(line_cam, palettes_cam, best_palette)
@@ -166,13 +156,13 @@ def dither_shr(
         line_to_palette[y] = best_palette
 
         for x in range(320):
-            pixel_cam = convert_rgb_to_cam16ucs(
+            pixel_cam = common.convert_rgb_to_cam16ucs(
                 rgb_to_cam16ucs, working_image[y, x, 0], working_image[y, x, 1], working_image[y, x, 2])
 
             best_distance = 1e9
             best_colour_idx = -1
             for idx in range(16):
-                distance = colour_distance_squared(pixel_cam, palette_cam[idx, :])
+                distance = common.colour_distance_squared(pixel_cam, palette_cam[idx, :])
                 if distance < best_distance:
                     best_distance = distance
                     best_colour_idx = idx
@@ -189,16 +179,16 @@ def dither_shr(
                     # 0 * 7
                     # 3 5 1
                     if x < 319:
-                        working_image[y, x + 1, i] = clip(
+                        working_image[y, x + 1, i] = common.clip(
                             working_image[y, x + 1, i] + quant_error * (7 / 16), 0, 1)
                     if y < 199:
                         if x > 0:
-                            working_image[y + 1, x - 1, i] = clip(
+                            working_image[y + 1, x - 1, i] = common.clip(
                                 working_image[y + 1, x - 1, i] + decay * quant_error * (3 / 16), 0, 1)
-                        working_image[y + 1, x, i] = clip(
+                        working_image[y + 1, x, i] = common.clip(
                             working_image[y + 1, x, i] + decay * quant_error * (5 / 16), 0, 1)
                         if x < 319:
-                            working_image[y + 1, x + 1, i] = clip(
+                            working_image[y + 1, x + 1, i] = common.clip(
                                 working_image[y + 1, x + 1, i] + decay * quant_error * (1 / 16), 0, 1)
                 else:
                     # Jarvis
@@ -206,51 +196,54 @@ def dither_shr(
                     # 3 5 7 5 3
                     # 1 3 5 3 1
                     if x < 319:
-                        working_image[y, x + 1, i] = clip(
+                        working_image[y, x + 1, i] = common.clip(
                             working_image[y, x + 1, i] + quant_error * (7 / 48), 0, 1)
                     if x < 318:
-                        working_image[y, x + 2, i] = clip(
+                        working_image[y, x + 2, i] = common.clip(
                             working_image[y, x + 2, i] + quant_error * (5 / 48), 0, 1)
                     if y < 199:
                         if x > 1:
-                            working_image[y + 1, x - 2, i] = clip(
+                            working_image[y + 1, x - 2, i] = common.clip(
                                 working_image[y + 1, x - 2, i] + decay * quant_error * (3 / 48), 0,
                                 1)
                         if x > 0:
-                            working_image[y + 1, x - 1, i] = clip(
+                            working_image[y + 1, x - 1, i] = common.clip(
                                 working_image[y + 1, x - 1, i] + decay * quant_error * (5 / 48), 0,
                                 1)
-                        working_image[y + 1, x, i] = clip(
+                        working_image[y + 1, x, i] = common.clip(
                             working_image[y + 1, x, i] + decay * quant_error * (7 / 48), 0, 1)
                         if x < 319:
-                            working_image[y + 1, x + 1, i] = clip(
+                            working_image[y + 1, x + 1, i] = common.clip(
                                 working_image[y + 1, x + 1, i] + decay * quant_error * (5 / 48),
                                 0, 1)
                         if x < 318:
-                            working_image[y + 1, x + 2, i] = clip(
+                            working_image[y + 1, x + 2, i] = common.clip(
                                 working_image[y + 1, x + 2, i] + decay * quant_error * (3 / 48),
                                 0, 1)
                     if y < 198:
                         if x > 1:
-                            working_image[y + 2, x - 2, i] = clip(
+                            working_image[y + 2, x - 2, i] = common.clip(
                                 working_image[y + 2, x - 2, i] + decay * decay * quant_error * (1 / 48), 0,
                                 1)
                         if x > 0:
-                            working_image[y + 2, x - 1, i] = clip(
+                            working_image[y + 2, x - 1, i] = common.clip(
                                 working_image[y + 2, x - 1, i] + decay * decay * quant_error * (3 / 48), 0,
                                 1)
-                        working_image[y + 2, x, i] = clip(
+                        working_image[y + 2, x, i] = common.clip(
                             working_image[y + 2, x, i] + decay * decay * quant_error * (5 / 48), 0, 1)
                         if x < 319:
-                            working_image[y + 2, x + 1, i] = clip(
+                            working_image[y + 2, x + 1, i] = common.clip(
                                 working_image[y + 2, x + 1, i] + decay * decay * quant_error * (3 / 48),
                                 0, 1)
                         if x < 318:
-                            working_image[y + 2, x + 2, i] = clip(
+                            working_image[y + 2, x + 2, i] = common.clip(
                                 working_image[y + 2, x + 2, i] + decay * decay * quant_error * (1 / 48),
                                 0, 1)
 
-    return np.array(output_4bit, dtype=np.uint8), line_to_palette, total_image_error, np.array(palette_line_errors, dtype=np.float64)
+    return (
+        np.array(output_4bit, dtype=np.uint8), line_to_palette, total_image_error,
+        np.array(palette_line_errors, dtype=np.float64)
+    )
 
 
 cdef struct PaletteSelection:
@@ -258,7 +251,8 @@ cdef struct PaletteSelection:
     double total_error
 
 
-cdef PaletteSelection best_palette_for_line(float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int last_palette_idx) nogil:
+cdef PaletteSelection best_palette_for_line(
+    float [:, ::1] line_cam, float[:, :, ::1] palettes_cam, int last_palette_idx) nogil:
     cdef int palette_idx, best_palette_idx, palette_entry_idx, pixel_idx
     cdef double best_total_dist, total_dist, best_pixel_dist, pixel_dist
     cdef float[:, ::1] palette_cam
@@ -274,7 +268,7 @@ cdef PaletteSelection best_palette_for_line(float [:, ::1] line_cam, float[:, :,
             pixel_cam = line_cam[pixel_idx]
             best_pixel_dist = 1e9
             for palette_entry_idx in range(16):
-                pixel_dist = colour_distance_squared(pixel_cam, palette_cam[palette_entry_idx, :])
+                pixel_dist = common.colour_distance_squared(pixel_cam, palette_cam[palette_entry_idx, :])
                 if pixel_dist < best_pixel_dist:
                     best_pixel_dist = pixel_dist
             total_dist += best_pixel_dist
@@ -293,10 +287,10 @@ cdef float[::1] _convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs
     return rgb12_iigs_to_cam16ucs[rgb12]
 
 
+# Wrapper around _convert_rgb12_iigs_to_cam to allow calling from python while retaining fast path for cython calls.
 def convert_rgb12_iigs_to_cam(float [:, ::1] rgb12_iigs_to_cam16ucs, (unsigned char)[::1] point_rgb12) -> float[::1]:
     return _convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, point_rgb12)
 
-import colour
 
 @cython.cdivision(True)
 cdef float[:, ::1] linear_to_srgb_array(float[:, ::1] a, float gamma=2.4):
@@ -310,6 +304,7 @@ cdef float[:, ::1] linear_to_srgb_array(float[:, ::1] a, float gamma=2.4):
                 res[i, j] = 1.055 * a[i, j] ** (1.0 / gamma) - 0.055
     return res
 
+
 cdef (unsigned char)[:, ::1] _convert_cam16ucs_to_rgb12_iigs(float[:, ::1] point_cam):
     cdef float[:, ::1] rgb
     cdef (float)[:, ::1] rgb12_iigs
@@ -329,7 +324,8 @@ cdef (unsigned char)[:, ::1] _convert_cam16ucs_to_rgb12_iigs(float[:, ::1] point
             K=colour.WEIGHTS_YCBCR['ITU-R BT.601']), 0, 1).astype(np.float32) * 15
     return np.round(rgb12_iigs).astype(np.uint8)
 
-
+# Wrapper around _convert_cam16ucs_to_rgb12_iigs to allow calling from python while retaining fast path for cython
+# calls.
 def convert_cam16ucs_to_rgb12_iigs(float[:, ::1] point_cam):
     return _convert_cam16ucs_to_rgb12_iigs(point_cam)
 
@@ -366,7 +362,7 @@ def k_means_with_fixed_centroids(
             best_error = 1e9
             closest_centroid_idx = 0
             for centroid_idx in range(n_clusters):
-                error = colour_distance_squared(
+                error = common.colour_distance_squared(
                     _convert_rgb12_iigs_to_cam(rgb12_iigs_to_cam16ucs, centroids_rgb12[centroid_idx, :]), point_cam)
                 if error < best_error:
                     best_error = error
diff --git a/setup.py b/setup.py
index e754657..c2271bc 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@ Cython.Compiler.Options.annotate = True
 
 setup(
     ext_modules=cythonize(
-        ["dither_dhr.pyx", "dither_shr.pyx"],
+        ["common.pyx", "dither_dhr.pyx", "dither_shr.pyx"],
         annotate=True,
         compiler_directives={'language_level': "3"}
     )

From 0a964b377adcb3f2afda36272684c1e7227a108f Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Fri, 26 Nov 2021 12:35:45 +0000
Subject: [PATCH 69/82] Move SHR conversion out into convert_shr in preparation
 for re-enabling dhr support

---
 convert.py     | 453 ++-----------------------------------------------
 convert_shr.py | 451 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 465 insertions(+), 439 deletions(-)
 create mode 100644 convert_shr.py

diff --git a/convert.py b/convert.py
index 7367f05..3dad419 100644
--- a/convert.py
+++ b/convert.py
@@ -1,23 +1,10 @@
 """Image converter to Apple II Double Hi-Res format."""
 
 import argparse
-from collections import defaultdict
-import os.path
-import random
-from typing import Tuple, List
-
-from PIL import Image
-import colour
 import numpy as np
-from sklearn import cluster
 
-from os import environ
 
-environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1'
-import pygame
-
-import dither_dhr as dither_dhr_pyx
-import dither_shr as dither_shr_pyx
+import convert_shr
 import dither_pattern
 import image as image_py
 import palette as palette_py
@@ -29,344 +16,6 @@ import screen as screen_py
 # - support HGR
 
 
-class ClusterPalette:
-    def __init__(
-            self, image: np.ndarray, rgb12_iigs_to_cam16ucs, rgb24_to_cam16ucs,
-            fixed_colours=0):
-
-        # Conversion matrix from 12-bit //gs RGB colour space to CAM16UCS
-        # colour space
-        self._rgb12_iigs_to_cam16ucs = rgb12_iigs_to_cam16ucs
-
-        # Conversion matrix from 24-bit linear RGB colour space to CAM16UCS
-        # colour space
-        self._rgb24_to_cam16ucs = rgb24_to_cam16ucs
-
-        # Preprocessed source image in 24-bit linear RGB colour space.  We
-        # first dither the source image using the full 12-bit //gs RGB colour
-        # palette, ignoring SHR palette limitations (i.e. 4096 independent
-        # colours for each pixel).  This gives much better results for e.g.
-        # solid blocks of colour, which would be dithered inconsistently if
-        # targeting the source image directly.
-        self._image_rgb = self._perfect_dither(image)
-
-        # Preprocessed source image in CAM16UCS colour space
-        self._colours_cam = self._image_colours_cam(self._image_rgb)
-
-        # How many image colours to fix identically across all 16 SHR
-        # palettes.  These are taken to be the most prevalent colours from
-        # _global_palette.
-        self._fixed_colours = fixed_colours
-
-        # We fit a 16-colour palette against the entire image which is used
-        # as starting values for fitting the reserved colours in the 16 SHR
-        # palettes.
-        self._global_palette = np.empty((16, 3), dtype=np.uint8)
-
-        # 16 SHR palettes each of 16 colours, in CAM16UCS colour space
-        self._palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
-
-        # 16 SHR palettes each of 16 colours, in //gs 4-bit RGB colour space
-        self._palettes_rgb = np.empty((16, 16, 3), dtype=np.uint8)
-
-        # defaultdict(list) mapping palette index to lines using this palette
-        self._palette_lines = self._init_palette_lines()
-
-    def _image_colours_cam(self, image: Image):
-        colours_rgb = np.asarray(image)  # .reshape((-1, 3))
-        with colour.utilities.suppress_warnings(colour_usage_warnings=True):
-            colours_cam = colour.convert(colours_rgb, "RGB",
-                                         "CAM16UCS").astype(np.float32)
-        return colours_cam
-
-    def _init_palette_lines(self, init_random=False):
-        palette_lines = defaultdict(list)
-
-        if init_random:
-            lines = list(range(200))
-            random.shuffle(lines)
-            idx = 0
-            while lines:
-                palette_lines[idx].append(lines.pop())
-                idx += 1
-        else:
-            palette_splits = self._equal_palette_splits()
-            for i, lh in enumerate(palette_splits):
-                l, h = lh
-                palette_lines[i].extend(list(range(l, h)))
-        return palette_lines
-
-    def _equal_palette_splits(self, palette_height=35):
-        # The 16 palettes are striped across consecutive (overlapping) line
-        # ranges.  Since nearby lines tend to have similar colours, this has
-        # the effect of smoothing out the colour transitions across palettes.
-
-        # If we want to overlap 16 palettes in 200 lines, where each palette
-        # has height H and overlaps the previous one by L lines, then the
-        # boundaries are at lines:
-        #   (0, H), (H-L, 2H-L), (2H-2L, 3H-2L), ..., (15H-15L, 16H - 15L)
-        # i.e. 16H - 15L = 200, so for a given palette height H we need to
-        # overlap by:
-        #   L = (16H - 200)/15
-
-        palette_overlap = (16 * palette_height - 200) / 15
-
-        palette_ranges = []
-        for palette_idx in range(16):
-            palette_lower = palette_idx * (palette_height - palette_overlap)
-            palette_upper = palette_lower + palette_height
-            palette_ranges.append((int(np.round(palette_lower)),
-                                   int(np.round(palette_upper))))
-        return palette_ranges
-
-    def _perfect_dither(self, source_image: np.ndarray):
-        """Dither a "perfect" image using the full 12-bit //gs RGB colour
-        palette, ignoring restrictions."""
-
-        # Suppress divide by zero warning,
-        # https://github.com/colour-science/colour/issues/900
-        with colour.utilities.suppress_warnings(python_warnings=True):
-            full_palette_linear_rgb = colour.convert(
-                self._rgb12_iigs_to_cam16ucs, "CAM16UCS", "RGB").astype(
-                np.float32)
-
-        total_image_error, image_rgb = dither_shr_pyx.dither_shr_perfect(
-            source_image, self._rgb12_iigs_to_cam16ucs, full_palette_linear_rgb,
-            self._rgb24_to_cam16ucs)
-        # print("Perfect image error:", total_image_error)
-        return image_rgb
-
-    def _dither_image(self, palettes_cam):
-        # Suppress divide by zero warning,
-        # https://github.com/colour-science/colour/issues/900
-        with colour.utilities.suppress_warnings(python_warnings=True):
-            palettes_linear_rgb = colour.convert(
-                palettes_cam, "CAM16UCS", "RGB").astype(np.float32)
-
-        output_4bit, line_to_palette, total_image_error, palette_line_errors = \
-            dither_shr_pyx.dither_shr(
-                self._image_rgb, palettes_cam, palettes_linear_rgb,
-                self._rgb24_to_cam16ucs)
-
-        # Update map of palettes to image lines for which the palette was the
-        # best match
-        palette_lines = defaultdict(list)
-        for line, palette in enumerate(line_to_palette):
-            palette_lines[palette].append(line)
-        self._palette_lines = palette_lines
-
-        self._palette_line_errors = palette_line_errors
-
-        return (output_4bit, line_to_palette, palettes_linear_rgb,
-                total_image_error)
-
-    def iterate(self, max_inner_iterations: int,
-                max_outer_iterations: int):
-        total_image_error = 1e9
-
-        outer_iterations_since_improvement = 0
-        while outer_iterations_since_improvement < max_outer_iterations:
-            inner_iterations_since_improvement = 0
-            self._palette_lines = self._init_palette_lines()
-
-            while inner_iterations_since_improvement < max_inner_iterations:
-                # print("Iterations %d" % inner_iterations_since_improvement)
-                new_palettes_cam, new_palettes_rgb12_iigs = (
-                    self._fit_shr_palettes())
-
-                # Recompute image with proposed palettes and check whether it
-                # has lower total image error than our previous best.
-                (output_4bit, line_to_palette, palettes_linear_rgb,
-                 new_total_image_error) = self._dither_image(new_palettes_cam)
-
-                self._reassign_unused_palettes(
-                    line_to_palette, new_palettes_rgb12_iigs)
-
-                if new_total_image_error >= total_image_error:
-                    inner_iterations_since_improvement += 1
-                    continue
-
-                # We found a globally better set of palettes, so restart the
-                # clocks
-                inner_iterations_since_improvement = 0
-                outer_iterations_since_improvement = -1
-                total_image_error = new_total_image_error
-
-                self._palettes_cam = new_palettes_cam
-                self._palettes_rgb = new_palettes_rgb12_iigs
-
-                yield (new_total_image_error, output_4bit, line_to_palette,
-                       new_palettes_rgb12_iigs, palettes_linear_rgb)
-            outer_iterations_since_improvement += 1
-
-    def _fit_shr_palettes(self) -> Tuple[np.ndarray, np.ndarray]:
-        """Attempt to find new palettes that locally improve image quality.
-
-        Re-fit a set of 16 palettes from (overlapping) line ranges of the
-        source image, using k-means clustering in CAM16-UCS colour space.
-
-        We maintain the total image error for the pixels on which the 16
-        palettes are clustered.  A new palette that increases this local
-        image error is rejected.
-
-        New palettes that reduce local error cannot be applied immediately
-        though, because they may cause an increase in *global* image error
-        when dithering.  i.e. they would reduce the overall image quality.
-
-        The current (locally) best palettes are returned and can be applied
-        using accept_palettes().
-        """
-        new_palettes_cam = np.empty_like(self._palettes_cam)
-        new_palettes_rgb12_iigs = np.empty_like(self._palettes_rgb)
-
-        # Compute a new 16-colour global palette for the entire image,
-        # used as the starting center positions for k-means clustering of the
-        # individual palettes
-        self._fit_global_palette()
-
-        for palette_idx in range(16):
-            palette_pixels = (
-                self._colours_cam[
-                self._palette_lines[palette_idx], :, :].reshape(-1, 3))
-
-            # Fix reserved colours from the global palette.
-            initial_centroids = np.copy(self._global_palette)
-            pixels_rgb_iigs = dither_shr_pyx.convert_cam16ucs_to_rgb12_iigs(
-                palette_pixels)
-            seen_colours = set()
-            for i in range(self._fixed_colours):
-                seen_colours.add(tuple(initial_centroids[i, :]))
-
-            # Pick unique random colours from the sample points for the
-            # remaining initial centroids.
-            for i in range(self._fixed_colours, 16):
-                choice = np.random.randint(0, pixels_rgb_iigs.shape[0])
-                new_colour = pixels_rgb_iigs[choice, :]
-                if tuple(new_colour) in seen_colours:
-                    continue
-                seen_colours.add(tuple(new_colour))
-                initial_centroids[i, :] = new_colour
-
-            # If there are any single colours in our source //gs RGB pixels that
-            # represent more than fixed_colour_fraction_threshold of the total,
-            # then fix these colours for the palette instead of clustering
-            # them.  This reduces artifacting on blocks of colour.
-            fixed_colour_fraction_threshold = 0.1
-            most_frequent_colours = sorted(list(zip(
-                *np.unique(pixels_rgb_iigs, return_counts=True, axis=0))),
-                key=lambda kv: kv[1], reverse=True)
-            fixed_colours = self._fixed_colours
-            for colour, freq in most_frequent_colours:
-                if (freq < (palette_pixels.shape[0] *
-                            fixed_colour_fraction_threshold)) or (
-                        fixed_colours == 16):
-                    break
-                if tuple(colour) not in seen_colours:
-                    seen_colours.add(tuple(colour))
-                    initial_centroids[fixed_colours, :] = colour
-                    fixed_colours += 1
-
-            palette_rgb12_iigs = dither_shr_pyx.k_means_with_fixed_centroids(
-                n_clusters=16, n_fixed=fixed_colours,
-                samples=palette_pixels,
-                initial_centroids=initial_centroids,
-                max_iterations=1000,
-                rgb12_iigs_to_cam16ucs=self._rgb12_iigs_to_cam16ucs)
-            # If the k-means clustering returned fewer than 16 unique colours,
-            # fill out the remainder with the most common pixels colours that
-            # have not yet been used.
-            palette_rgb12_iigs = self._fill_short_palette(
-                palette_rgb12_iigs, most_frequent_colours)
-
-            for i in range(16):
-                new_palettes_cam[palette_idx, i, :] = (
-                    np.array(dither_shr_pyx.convert_rgb12_iigs_to_cam(
-                        self._rgb12_iigs_to_cam16ucs, palette_rgb12_iigs[
-                            i]), dtype=np.float32))
-
-            new_palettes_rgb12_iigs[palette_idx, :, :] = palette_rgb12_iigs
-
-        self._palettes_accepted = False
-        return new_palettes_cam, new_palettes_rgb12_iigs
-
-    def _fit_global_palette(self):
-        """Compute a 16-colour palette for the entire image to use as
-        starting point for the sub-palettes.  This should help when the image
-        has large blocks of colour since the sub-palettes will tend to pick the
-        same colours."""
-
-        clusters = cluster.MiniBatchKMeans(n_clusters=16, max_iter=10000)
-        clusters.fit_predict(self._colours_cam.reshape(-1, 3))
-
-        # Dict of {palette idx : frequency count}
-        palette_freq = {idx: 0 for idx in range(16)}
-        for idx, freq in zip(*np.unique(clusters.labels_, return_counts=True)):
-            palette_freq[idx] = freq
-
-        frequency_order = [
-            k for k, v in sorted(
-                list(palette_freq.items()), key=lambda kv: kv[1], reverse=True)]
-
-        self._global_palette = (
-            dither_shr_pyx.convert_cam16ucs_to_rgb12_iigs(
-                clusters.cluster_centers_[frequency_order].astype(
-                    np.float32)))
-
-    def _fill_short_palette(self, palette_iigs_rgb, most_frequent_colours):
-        """Fill out the palette to 16 unique entries."""
-
-        # We want to maintain order of insertion so that we respect the
-        # ordering of fixed colours in the palette.  Python doesn't have an
-        # orderedset but dicts preserve insertion order.
-        palette_set = {}
-        for palette_entry in palette_iigs_rgb:
-            palette_set[tuple(palette_entry)] = True
-        if len(palette_set) == 16:
-            return palette_iigs_rgb
-
-        # Add most frequent image colours that are not yet in the palette
-        for colour, freq in most_frequent_colours:
-            if tuple(colour) in palette_set:
-                continue
-            palette_set[tuple(colour)] = True
-            # print("Added freq %d" % freq)
-            if len(palette_set) == 16:
-                break
-
-        # We couldn't find any more unique colours, fill out with random ones.
-        while len(palette_set) < 16:
-            palette_set[
-                tuple(np.random.randint(0, 16, size=3, dtype=np.uint8))] = True
-
-        return np.array(tuple(palette_set.keys()), dtype=np.uint8)
-
-    def _reassign_unused_palettes(self, line_to_palette, palettes_iigs_rgb):
-        palettes_used = [False] * 16
-        for palette in line_to_palette:
-            palettes_used[palette] = True
-        best_palette_lines = [v for k, v in sorted(list(zip(
-            self._palette_line_errors, range(200))))]
-
-        all_palettes = set()
-        for palette_idx, palette_iigs_rgb in enumerate(palettes_iigs_rgb):
-            palette_set = set()
-            for palette_entry in palette_iigs_rgb:
-                palette_set.add(tuple(palette_entry))
-            palette_set = frozenset(palette_set)
-            if palette_set in all_palettes:
-                print("Duplicate palette", palette_idx, palette_set)
-                palettes_used[palette_idx] = False
-
-        for palette_idx, palette_used in enumerate(palettes_used):
-            if palette_used:
-                continue
-            # print("Reassigning palette %d" % palette_idx)
-
-            # TODO: also remove from old entry
-            worst_line = best_palette_lines.pop()
-            self._palette_lines[palette_idx] = [worst_line]
-
-
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("input", type=str, help="Input image file to process.")
@@ -431,7 +80,6 @@ def main():
     # Conversion matrix from RGB to CAM16UCS colour values.  Indexed by
     # 24-bit RGB value
     rgb24_to_cam16ucs = np.load("data/rgb24_to_cam16ucs.npy")
-    rgb12_iigs_to_cam16ucs = np.load("data/rgb12_iigs_to_cam16ucs.npy")
 
     # Open and resize source image
     image = image_py.open(args.input)
@@ -442,94 +90,21 @@ def main():
         image_py.resize(image, screen.X_RES, screen.Y_RES,
                         gamma=args.gamma_correct)).astype(np.float32) / 255
 
-    # TODO: flags
-    inner_iterations = 10
-    outer_iterations = 20
+    convert_shr.convert(screen, rgb, args)
 
-    if args.show_output:
-        pygame.init()
-        # TODO: for some reason I need to execute this twice - the first time
-        #  the window is created and immediately destroyed
-        _ = pygame.display.set_mode((640, 400))
-        canvas = pygame.display.set_mode((640, 400))
-        canvas.fill((0, 0, 0))
-        pygame.display.flip()
+    # dither = dither_pattern.PATTERNS[args.dither]()
+    # bitmap = dither_dhr_pyx.dither_image(
+    #     screen, rgb, dither, args.lookahead, args.verbose, rgb24_to_cam16ucs)
 
-    total_image_error = None
-    cluster_palette = ClusterPalette(
-        rgb, fixed_colours=args.fixed_colours,
-        rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs,
-        rgb24_to_cam16ucs=rgb24_to_cam16ucs)
-
-    seq = 0
-    for (new_total_image_error, output_4bit, line_to_palette,
-         palettes_rgb12_iigs, palettes_linear_rgb) in cluster_palette.iterate(
-        inner_iterations, outer_iterations):
-
-        if args.verbose and total_image_error is not None:
-            print("Improved quality +%f%% (%f)" % (
-                (1 - new_total_image_error / total_image_error) * 100,
-                new_total_image_error))
-        total_image_error = new_total_image_error
-        for i in range(16):
-            screen.set_palette(i, palettes_rgb12_iigs[i, :, :])
-
-        # Recompute current screen RGB image
-        screen.set_pixels(output_4bit)
-        output_rgb = np.empty((200, 320, 3), dtype=np.uint8)
-        for i in range(200):
-            screen.line_palette[i] = line_to_palette[i]
-            output_rgb[i, :, :] = (
-                    palettes_linear_rgb[line_to_palette[i]][
-                        output_4bit[i, :]] * 255
-            ).astype(np.uint8)
-
-        output_srgb = (image_py.linear_to_srgb(output_rgb)).astype(np.uint8)
-
-        # dither = dither_pattern.PATTERNS[args.dither]()
-        # bitmap = dither_dhr_pyx.dither_image(
-        #     screen, rgb, dither, args.lookahead, args.verbose, rgb24_to_cam16ucs)
-
-        # Show output image by rendering in target palette
-        # output_palette_name = args.show_palette or args.palette
-        # output_palette = palette_py.PALETTES[output_palette_name]()
-        # output_screen = screen_py.DHGRScreen(output_palette)
-        # if output_palette_name == "ntsc":
-        #     output_srgb = output_screen.bitmap_to_image_ntsc(bitmap)
-        # else:
-        #     output_srgb = image_py.linear_to_srgb(
-        #         output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
-        out_image = image_py.resize(
-            Image.fromarray(output_srgb), screen.X_RES * 2, screen.Y_RES * 2,
-            srgb_output=True)
-
-        if args.show_output:
-            surface = pygame.surfarray.make_surface(
-                np.asarray(out_image).transpose((1, 0, 2)))  # flip y/x axes
-            canvas.blit(surface, (0, 0))
-            pygame.display.flip()
-
-        unique_colours = np.unique(
-            palettes_rgb12_iigs.reshape(-1, 3), axis=0).shape[0]
-        if args.verbose:
-            print("%d unique colours" % unique_colours)
-
-        seq += 1
-
-        if args.save_preview:
-            # Save Double hi-res image
-            outfile = os.path.join(
-                os.path.splitext(args.output)[0] + "-%d-preview.png" % seq)
-            out_image.save(outfile, "PNG")
-        screen.pack()
-        # with open(args.output, "wb") as f:
-        #     f.write(bytes(screen.aux))
-        #     f.write(bytes(screen.main))
-        with open(args.output, "wb") as f:
-            f.write(bytes(screen.memory))
-
-    if args.show_final_score:
-        print("FINAL_SCORE:", total_image_error)
+    # Show output image by rendering in target palette
+    # output_palette_name = args.show_palette or args.palette
+    # output_palette = palette_py.PALETTES[output_palette_name]()
+    # output_screen = screen_py.DHGRScreen(output_palette)
+    # if output_palette_name == "ntsc":
+    #     output_srgb = output_screen.bitmap_to_image_ntsc(bitmap)
+    # else:
+    #     output_srgb = image_py.linear_to_srgb(
+    #         output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
 
 
 if __name__ == "__main__":
diff --git a/convert_shr.py b/convert_shr.py
new file mode 100644
index 0000000..5db67f6
--- /dev/null
+++ b/convert_shr.py
@@ -0,0 +1,451 @@
+from collections import defaultdict
+import os.path
+import random
+from typing import Tuple
+
+from PIL import Image
+import colour
+import numpy as np
+from sklearn import cluster
+
+from os import environ
+
+environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1'
+import pygame
+
+import dither_shr as dither_shr_pyx
+import image as image_py
+
+
+class ClusterPalette:
+    def __init__(
+            self, image: np.ndarray, rgb12_iigs_to_cam16ucs, rgb24_to_cam16ucs,
+            fixed_colours=0):
+
+        # Conversion matrix from 12-bit //gs RGB colour space to CAM16UCS
+        # colour space
+        self._rgb12_iigs_to_cam16ucs = rgb12_iigs_to_cam16ucs
+
+        # Conversion matrix from 24-bit linear RGB colour space to CAM16UCS
+        # colour space
+        self._rgb24_to_cam16ucs = rgb24_to_cam16ucs
+
+        # Preprocessed source image in 24-bit linear RGB colour space.  We
+        # first dither the source image using the full 12-bit //gs RGB colour
+        # palette, ignoring SHR palette limitations (i.e. 4096 independent
+        # colours for each pixel).  This gives much better results for e.g.
+        # solid blocks of colour, which would be dithered inconsistently if
+        # targeting the source image directly.
+        self._image_rgb = self._perfect_dither(image)
+
+        # Preprocessed source image in CAM16UCS colour space
+        self._colours_cam = self._image_colours_cam(self._image_rgb)
+
+        # How many image colours to fix identically across all 16 SHR
+        # palettes.  These are taken to be the most prevalent colours from
+        # _global_palette.
+        self._fixed_colours = fixed_colours
+
+        # We fit a 16-colour palette against the entire image which is used
+        # as starting values for fitting the reserved colours in the 16 SHR
+        # palettes.
+        self._global_palette = np.empty((16, 3), dtype=np.uint8)
+
+        # 16 SHR palettes each of 16 colours, in CAM16UCS colour space
+        self._palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
+
+        # 16 SHR palettes each of 16 colours, in //gs 4-bit RGB colour space
+        self._palettes_rgb = np.empty((16, 16, 3), dtype=np.uint8)
+
+        # defaultdict(list) mapping palette index to lines using this palette
+        self._palette_lines = self._init_palette_lines()
+
+    def _image_colours_cam(self, image: Image):
+        colours_rgb = np.asarray(image)  # .reshape((-1, 3))
+        with colour.utilities.suppress_warnings(colour_usage_warnings=True):
+            colours_cam = colour.convert(colours_rgb, "RGB",
+                                         "CAM16UCS").astype(np.float32)
+        return colours_cam
+
+    def _init_palette_lines(self, init_random=False):
+        palette_lines = defaultdict(list)
+
+        if init_random:
+            lines = list(range(200))
+            random.shuffle(lines)
+            idx = 0
+            while lines:
+                palette_lines[idx].append(lines.pop())
+                idx += 1
+        else:
+            palette_splits = self._equal_palette_splits()
+            for i, lh in enumerate(palette_splits):
+                l, h = lh
+                palette_lines[i].extend(list(range(l, h)))
+        return palette_lines
+
+    def _equal_palette_splits(self, palette_height=35):
+        # The 16 palettes are striped across consecutive (overlapping) line
+        # ranges.  Since nearby lines tend to have similar colours, this has
+        # the effect of smoothing out the colour transitions across palettes.
+
+        # If we want to overlap 16 palettes in 200 lines, where each palette
+        # has height H and overlaps the previous one by L lines, then the
+        # boundaries are at lines:
+        #   (0, H), (H-L, 2H-L), (2H-2L, 3H-2L), ..., (15H-15L, 16H - 15L)
+        # i.e. 16H - 15L = 200, so for a given palette height H we need to
+        # overlap by:
+        #   L = (16H - 200)/15
+
+        palette_overlap = (16 * palette_height - 200) / 15
+
+        palette_ranges = []
+        for palette_idx in range(16):
+            palette_lower = palette_idx * (palette_height - palette_overlap)
+            palette_upper = palette_lower + palette_height
+            palette_ranges.append((int(np.round(palette_lower)),
+                                   int(np.round(palette_upper))))
+        return palette_ranges
+
+    def _perfect_dither(self, source_image: np.ndarray):
+        """Dither a "perfect" image using the full 12-bit //gs RGB colour
+        palette, ignoring restrictions."""
+
+        # Suppress divide by zero warning,
+        # https://github.com/colour-science/colour/issues/900
+        with colour.utilities.suppress_warnings(python_warnings=True):
+            full_palette_linear_rgb = colour.convert(
+                self._rgb12_iigs_to_cam16ucs, "CAM16UCS", "RGB").astype(
+                np.float32)
+
+        total_image_error, image_rgb = dither_shr_pyx.dither_shr_perfect(
+            source_image, self._rgb12_iigs_to_cam16ucs, full_palette_linear_rgb,
+            self._rgb24_to_cam16ucs)
+        # print("Perfect image error:", total_image_error)
+        return image_rgb
+
+    def _dither_image(self, palettes_cam):
+        # Suppress divide by zero warning,
+        # https://github.com/colour-science/colour/issues/900
+        with colour.utilities.suppress_warnings(python_warnings=True):
+            palettes_linear_rgb = colour.convert(
+                palettes_cam, "CAM16UCS", "RGB").astype(np.float32)
+
+        output_4bit, line_to_palette, total_image_error, palette_line_errors = \
+            dither_shr_pyx.dither_shr(
+                self._image_rgb, palettes_cam, palettes_linear_rgb,
+                self._rgb24_to_cam16ucs)
+
+        # Update map of palettes to image lines for which the palette was the
+        # best match
+        palette_lines = defaultdict(list)
+        for line, palette in enumerate(line_to_palette):
+            palette_lines[palette].append(line)
+        self._palette_lines = palette_lines
+
+        self._palette_line_errors = palette_line_errors
+
+        return (output_4bit, line_to_palette, palettes_linear_rgb,
+                total_image_error)
+
+    def iterate(self, max_inner_iterations: int,
+                max_outer_iterations: int):
+        total_image_error = 1e9
+
+        outer_iterations_since_improvement = 0
+        while outer_iterations_since_improvement < max_outer_iterations:
+            inner_iterations_since_improvement = 0
+            self._palette_lines = self._init_palette_lines()
+
+            while inner_iterations_since_improvement < max_inner_iterations:
+                # print("Iterations %d" % inner_iterations_since_improvement)
+                new_palettes_cam, new_palettes_rgb12_iigs = (
+                    self._fit_shr_palettes())
+
+                # Recompute image with proposed palettes and check whether it
+                # has lower total image error than our previous best.
+                (output_4bit, line_to_palette, palettes_linear_rgb,
+                 new_total_image_error) = self._dither_image(new_palettes_cam)
+
+                self._reassign_unused_palettes(
+                    line_to_palette, new_palettes_rgb12_iigs)
+
+                if new_total_image_error >= total_image_error:
+                    inner_iterations_since_improvement += 1
+                    continue
+
+                # We found a globally better set of palettes, so restart the
+                # clocks
+                inner_iterations_since_improvement = 0
+                outer_iterations_since_improvement = -1
+                total_image_error = new_total_image_error
+
+                self._palettes_cam = new_palettes_cam
+                self._palettes_rgb = new_palettes_rgb12_iigs
+
+                yield (new_total_image_error, output_4bit, line_to_palette,
+                       new_palettes_rgb12_iigs, palettes_linear_rgb)
+            outer_iterations_since_improvement += 1
+
+    def _fit_shr_palettes(self) -> Tuple[np.ndarray, np.ndarray]:
+        """Attempt to find new palettes that locally improve image quality.
+
+        Re-fit a set of 16 palettes from (overlapping) line ranges of the
+        source image, using k-means clustering in CAM16-UCS colour space.
+
+        We maintain the total image error for the pixels on which the 16
+        palettes are clustered.  A new palette that increases this local
+        image error is rejected.
+
+        New palettes that reduce local error cannot be applied immediately
+        though, because they may cause an increase in *global* image error
+        when dithering.  i.e. they would reduce the overall image quality.
+
+        The current (locally) best palettes are returned and can be applied
+        using accept_palettes().
+        """
+        new_palettes_cam = np.empty_like(self._palettes_cam)
+        new_palettes_rgb12_iigs = np.empty_like(self._palettes_rgb)
+
+        # Compute a new 16-colour global palette for the entire image,
+        # used as the starting center positions for k-means clustering of the
+        # individual palettes
+        self._fit_global_palette()
+
+        for palette_idx in range(16):
+            palette_pixels = (
+                self._colours_cam[
+                self._palette_lines[palette_idx], :, :].reshape(-1, 3))
+
+            # Fix reserved colours from the global palette.
+            initial_centroids = np.copy(self._global_palette)
+            pixels_rgb_iigs = dither_shr_pyx.convert_cam16ucs_to_rgb12_iigs(
+                palette_pixels)
+            seen_colours = set()
+            for i in range(self._fixed_colours):
+                seen_colours.add(tuple(initial_centroids[i, :]))
+
+            # Pick unique random colours from the sample points for the
+            # remaining initial centroids.
+            for i in range(self._fixed_colours, 16):
+                choice = np.random.randint(0, pixels_rgb_iigs.shape[0])
+                new_colour = pixels_rgb_iigs[choice, :]
+                if tuple(new_colour) in seen_colours:
+                    continue
+                seen_colours.add(tuple(new_colour))
+                initial_centroids[i, :] = new_colour
+
+            # If there are any single colours in our source //gs RGB pixels that
+            # represent more than fixed_colour_fraction_threshold of the total,
+            # then fix these colours for the palette instead of clustering
+            # them.  This reduces artifacting on blocks of colour.
+            fixed_colour_fraction_threshold = 0.1
+            most_frequent_colours = sorted(list(zip(
+                *np.unique(pixels_rgb_iigs, return_counts=True, axis=0))),
+                key=lambda kv: kv[1], reverse=True)
+            fixed_colours = self._fixed_colours
+            for colour, freq in most_frequent_colours:
+                if (freq < (palette_pixels.shape[0] *
+                            fixed_colour_fraction_threshold)) or (
+                        fixed_colours == 16):
+                    break
+                if tuple(colour) not in seen_colours:
+                    seen_colours.add(tuple(colour))
+                    initial_centroids[fixed_colours, :] = colour
+                    fixed_colours += 1
+
+            palette_rgb12_iigs = dither_shr_pyx.k_means_with_fixed_centroids(
+                n_clusters=16, n_fixed=fixed_colours,
+                samples=palette_pixels,
+                initial_centroids=initial_centroids,
+                max_iterations=1000,
+                rgb12_iigs_to_cam16ucs=self._rgb12_iigs_to_cam16ucs)
+            # If the k-means clustering returned fewer than 16 unique colours,
+            # fill out the remainder with the most common pixels colours that
+            # have not yet been used.
+            palette_rgb12_iigs = self._fill_short_palette(
+                palette_rgb12_iigs, most_frequent_colours)
+
+            for i in range(16):
+                new_palettes_cam[palette_idx, i, :] = (
+                    np.array(dither_shr_pyx.convert_rgb12_iigs_to_cam(
+                        self._rgb12_iigs_to_cam16ucs, palette_rgb12_iigs[
+                            i]), dtype=np.float32))
+
+            new_palettes_rgb12_iigs[palette_idx, :, :] = palette_rgb12_iigs
+
+        self._palettes_accepted = False
+        return new_palettes_cam, new_palettes_rgb12_iigs
+
+    def _fit_global_palette(self):
+        """Compute a 16-colour palette for the entire image to use as
+        starting point for the sub-palettes.  This should help when the image
+        has large blocks of colour since the sub-palettes will tend to pick the
+        same colours."""
+
+        clusters = cluster.MiniBatchKMeans(n_clusters=16, max_iter=10000)
+        clusters.fit_predict(self._colours_cam.reshape(-1, 3))
+
+        # Dict of {palette idx : frequency count}
+        palette_freq = {idx: 0 for idx in range(16)}
+        for idx, freq in zip(*np.unique(clusters.labels_, return_counts=True)):
+            palette_freq[idx] = freq
+
+        frequency_order = [
+            k for k, v in sorted(
+                list(palette_freq.items()), key=lambda kv: kv[1], reverse=True)]
+
+        self._global_palette = (
+            dither_shr_pyx.convert_cam16ucs_to_rgb12_iigs(
+                clusters.cluster_centers_[frequency_order].astype(
+                    np.float32)))
+
+    def _fill_short_palette(self, palette_iigs_rgb, most_frequent_colours):
+        """Fill out the palette to 16 unique entries."""
+
+        # We want to maintain order of insertion so that we respect the
+        # ordering of fixed colours in the palette.  Python doesn't have an
+        # orderedset but dicts preserve insertion order.
+        palette_set = {}
+        for palette_entry in palette_iigs_rgb:
+            palette_set[tuple(palette_entry)] = True
+        if len(palette_set) == 16:
+            return palette_iigs_rgb
+
+        # Add most frequent image colours that are not yet in the palette
+        for colour, freq in most_frequent_colours:
+            if tuple(colour) in palette_set:
+                continue
+            palette_set[tuple(colour)] = True
+            # print("Added freq %d" % freq)
+            if len(palette_set) == 16:
+                break
+
+        # We couldn't find any more unique colours, fill out with random ones.
+        while len(palette_set) < 16:
+            palette_set[
+                tuple(np.random.randint(0, 16, size=3, dtype=np.uint8))] = True
+
+        return np.array(tuple(palette_set.keys()), dtype=np.uint8)
+
+    def _reassign_unused_palettes(self, line_to_palette, palettes_iigs_rgb):
+        palettes_used = [False] * 16
+        for palette in line_to_palette:
+            palettes_used[palette] = True
+        best_palette_lines = [v for k, v in sorted(list(zip(
+            self._palette_line_errors, range(200))))]
+
+        all_palettes = set()
+        for palette_idx, palette_iigs_rgb in enumerate(palettes_iigs_rgb):
+            palette_set = set()
+            for palette_entry in palette_iigs_rgb:
+                palette_set.add(tuple(palette_entry))
+            palette_set = frozenset(palette_set)
+            if palette_set in all_palettes:
+                print("Duplicate palette", palette_idx, palette_set)
+                palettes_used[palette_idx] = False
+
+        for palette_idx, palette_used in enumerate(palettes_used):
+            if palette_used:
+                continue
+            # print("Reassigning palette %d" % palette_idx)
+
+            # TODO: also remove from old entry
+            worst_line = best_palette_lines.pop()
+            self._palette_lines[palette_idx] = [worst_line]
+
+
+def convert(screen, rgb: np.ndarray, args):
+    # Conversion matrix from RGB to CAM16UCS colour values.  Indexed by
+    # 24-bit RGB value
+    rgb24_to_cam16ucs = np.load("data/rgb24_to_cam16ucs.npy")
+    rgb12_iigs_to_cam16ucs = np.load("data/rgb12_iigs_to_cam16ucs.npy")
+
+    # TODO: flags
+    inner_iterations = 10
+    outer_iterations = 20
+
+    if args.show_output:
+        pygame.init()
+        # TODO: for some reason I need to execute this twice - the first time
+        #  the window is created and immediately destroyed
+        _ = pygame.display.set_mode((640, 400))
+        canvas = pygame.display.set_mode((640, 400))
+        canvas.fill((0, 0, 0))
+        pygame.display.flip()
+
+    total_image_error = None
+    cluster_palette = ClusterPalette(
+        rgb, fixed_colours=args.fixed_colours,
+        rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs,
+        rgb24_to_cam16ucs=rgb24_to_cam16ucs)
+
+    seq = 0
+    for (new_total_image_error, output_4bit, line_to_palette,
+         palettes_rgb12_iigs, palettes_linear_rgb) in cluster_palette.iterate(
+        inner_iterations, outer_iterations):
+
+        if args.verbose and total_image_error is not None:
+            print("Improved quality +%f%% (%f)" % (
+                (1 - new_total_image_error / total_image_error) * 100,
+                new_total_image_error))
+        total_image_error = new_total_image_error
+        for i in range(16):
+            screen.set_palette(i, palettes_rgb12_iigs[i, :, :])
+
+        # Recompute current screen RGB image
+        screen.set_pixels(output_4bit)
+        output_rgb = np.empty((200, 320, 3), dtype=np.uint8)
+        for i in range(200):
+            screen.line_palette[i] = line_to_palette[i]
+            output_rgb[i, :, :] = (
+                    palettes_linear_rgb[line_to_palette[i]][
+                        output_4bit[i, :]] * 255
+            ).astype(np.uint8)
+
+        output_srgb = (image_py.linear_to_srgb(output_rgb)).astype(np.uint8)
+
+        # dither = dither_pattern.PATTERNS[args.dither]()
+        # bitmap = dither_dhr_pyx.dither_image(
+        #     screen, rgb, dither, args.lookahead, args.verbose, rgb24_to_cam16ucs)
+
+        # Show output image by rendering in target palette
+        # output_palette_name = args.show_palette or args.palette
+        # output_palette = palette_py.PALETTES[output_palette_name]()
+        # output_screen = screen_py.DHGRScreen(output_palette)
+        # if output_palette_name == "ntsc":
+        #     output_srgb = output_screen.bitmap_to_image_ntsc(bitmap)
+        # else:
+        #     output_srgb = image_py.linear_to_srgb(
+        #         output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
+        out_image = image_py.resize(
+            Image.fromarray(output_srgb), screen.X_RES * 2, screen.Y_RES * 2,
+            srgb_output=True)
+
+        if args.show_output:
+            surface = pygame.surfarray.make_surface(
+                np.asarray(out_image).transpose((1, 0, 2)))  # flip y/x axes
+            canvas.blit(surface, (0, 0))
+            pygame.display.flip()
+
+        unique_colours = np.unique(
+            palettes_rgb12_iigs.reshape(-1, 3), axis=0).shape[0]
+        if args.verbose:
+            print("%d unique colours" % unique_colours)
+
+        seq += 1
+
+        if args.save_preview:
+            # Save Double hi-res image
+            outfile = os.path.join(
+                os.path.splitext(args.output)[0] + "-%d-preview.png" % seq)
+            out_image.save(outfile, "PNG")
+        screen.pack()
+        # with open(args.output, "wb") as f:
+        #     f.write(bytes(screen.aux))
+        #     f.write(bytes(screen.main))
+        with open(args.output, "wb") as f:
+            f.write(bytes(screen.memory))
+
+    if args.show_final_score:
+        print("FINAL_SCORE:", total_image_error)

From 4d5dea2c417c440e6813c71b07bfe4ee7a6df182 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Fri, 26 Nov 2021 13:15:57 +0000
Subject: [PATCH 70/82] Restore dhr conversion support

---
 convert.py     | 124 +++++++++++++++++++++++++++----------------------
 convert_dhr.py |  46 ++++++++++++++++++
 2 files changed, 114 insertions(+), 56 deletions(-)
 create mode 100644 convert_dhr.py

diff --git a/convert.py b/convert.py
index 3dad419..20957c9 100644
--- a/convert.py
+++ b/convert.py
@@ -3,8 +3,8 @@
 import argparse
 import numpy as np
 
-
-import convert_shr
+import convert_dhr as convert_dhr_py
+import convert_shr as convert_shr_py
 import dither_pattern
 import image as image_py
 import palette as palette_py
@@ -15,21 +15,10 @@ import screen as screen_py
 # - support LR/DLR
 # - support HGR
 
-
-def main():
-    parser = argparse.ArgumentParser()
+def add_common_args(parser):
     parser.add_argument("input", type=str, help="Input image file to process.")
     parser.add_argument("output", type=str, help="Output file for converted "
                                                  "Apple II image.")
-    parser.add_argument(
-        "--lookahead", type=int, default=8,
-        help=("How many pixels to look ahead to compensate for NTSC colour "
-              "artifacts (default: 8)"))
-    parser.add_argument(
-        '--dither', type=str, choices=list(dither_pattern.PATTERNS.keys()),
-        default=dither_pattern.DEFAULT_PATTERN,
-        help="Error distribution pattern to apply when dithering (default: "
-             + dither_pattern.DEFAULT_PATTERN + ")")
     parser.add_argument(
         '--show-input', action=argparse.BooleanOptionalAction, default=False,
         help="Whether to show the input image before conversion.")
@@ -37,17 +26,10 @@ def main():
         '--show-output', action=argparse.BooleanOptionalAction, default=True,
         help="Whether to show the output image after conversion.")
     parser.add_argument(
-        '--palette', type=str, choices=list(set(palette_py.PALETTES.keys())),
-        default=palette_py.DEFAULT_PALETTE,
-        help='RGB colour palette to dither to.  "ntsc" blends colours over 8 '
-             'pixels and gives better image quality on targets that '
-             'use/emulate NTSC, but can be substantially slower.  Other '
-             'palettes determine colours based on 4 pixel sequences '
-             '(default: ' + palette_py.DEFAULT_PALETTE + ")")
-    parser.add_argument(
-        '--show-palette', type=str, choices=list(palette_py.PALETTES.keys()),
-        help="RGB colour palette to use when --show_output (default: "
-             "value of --palette)")
+        '--save-preview', action=argparse.BooleanOptionalAction, default=True,
+        help='Whether to save a .PNG rendering of the output image (default: '
+             'True)'
+    )
     parser.add_argument(
         '--verbose', action=argparse.BooleanOptionalAction,
         default=False, help="Show progress during conversion")
@@ -55,56 +37,86 @@ def main():
         '--gamma-correct', type=float, default=2.4,
         help='Gamma-correct image by this value (default: 2.4)'
     )
-    parser.add_argument(
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers()
+
+    dhr_parser = subparsers.add_parser("dhr")
+    add_common_args(dhr_parser)
+
+    def validate_lookahead(arg: int) -> int:
+        try:
+            int_arg = int(arg)
+            if int_arg < 1:
+                return int_arg
+        except Exception:
+            raise argparse.ArgumentTypeError("--lookahead must be at least 1")
+    dhr_parser.add_argument(
+        "--lookahead", type=validate_lookahead, default=8,
+        help=("How many pixels to look ahead to compensate for NTSC colour "
+              "artifacts (default: 8)"))
+    dhr_parser.add_argument(
+        '--dither', type=str, choices=list(dither_pattern.PATTERNS.keys()),
+        default=dither_pattern.DEFAULT_PATTERN,
+        help="Error distribution pattern to apply when dithering (default: "
+             + dither_pattern.DEFAULT_PATTERN + ")")
+    dhr_parser.add_argument(
+        '--palette', type=str, choices=list(set(palette_py.PALETTES.keys())),
+        default=palette_py.DEFAULT_PALETTE,
+        help='RGB colour palette to dither to.  "ntsc" blends colours over 8 '
+             'pixels and gives better image quality on targets that '
+             'use/emulate NTSC, but can be substantially slower.  Other '
+             'palettes determine colours based on 4 pixel sequences '
+             '(default: ' + palette_py.DEFAULT_PALETTE + ")")
+    dhr_parser.add_argument(
+        '--show-palette', type=str, choices=list(palette_py.PALETTES.keys()),
+        help="RGB colour palette to use when --show_output (default: "
+             "value of --palette)")
+    dhr_parser.set_defaults(func=convert_dhr)
+
+    shr_parser = subparsers.add_parser("shr")
+    add_common_args(shr_parser)
+    shr_parser.add_argument(
         '--fixed-colours', type=int, default=0,
         help='How many colours to fix as identical across all 16 SHR palettes '
              '(default: 0)'
     )
-    parser.add_argument(
-        '--save-preview', action=argparse.BooleanOptionalAction, default=True,
-        help='Whether to save a .PNG rendering of the output image (default: '
-             'True)'
-    )
-    parser.add_argument(
+    shr_parser.add_argument(
         '--show-final-score', action=argparse.BooleanOptionalAction,
         default=False, help='Whether to output the final image quality score '
                             '(default: False)'
     )
+    shr_parser.set_defaults(func=convert_shr)
     args = parser.parse_args()
-    if args.lookahead < 1:
-        parser.error('--lookahead must be at least 1')
+    args.func(args)
 
-    # palette = palette_py.PALETTES[args.palette]()
-    screen = screen_py.SHR320Screen()
-
-    # Conversion matrix from RGB to CAM16UCS colour values.  Indexed by
-    # 24-bit RGB value
-    rgb24_to_cam16ucs = np.load("data/rgb24_to_cam16ucs.npy")
 
+def prepare_image(image_filename: str, show_input: bool, screen,
+                  gamma_correct: float) -> np.ndarray:
     # Open and resize source image
-    image = image_py.open(args.input)
-    if args.show_input:
+    image = image_py.open(image_filename)
+    if show_input:
         image_py.resize(image, screen.X_RES, screen.Y_RES,
                         srgb_output=False).show()
     rgb = np.array(
         image_py.resize(image, screen.X_RES, screen.Y_RES,
-                        gamma=args.gamma_correct)).astype(np.float32) / 255
+                        gamma=gamma_correct)).astype(np.float32) / 255
+    return rgb
 
-    convert_shr.convert(screen, rgb, args)
 
-    # dither = dither_pattern.PATTERNS[args.dither]()
-    # bitmap = dither_dhr_pyx.dither_image(
-    #     screen, rgb, dither, args.lookahead, args.verbose, rgb24_to_cam16ucs)
+def convert_dhr(args):
+    palette = palette_py.PALETTES[args.palette]()
+    screen = screen_py.DHGRScreen(palette)
+    rgb = prepare_image(args.input, args.show_input, screen, args.gamma_correct)
+    convert_dhr_py.convert(screen, rgb, args)
 
-    # Show output image by rendering in target palette
-    # output_palette_name = args.show_palette or args.palette
-    # output_palette = palette_py.PALETTES[output_palette_name]()
-    # output_screen = screen_py.DHGRScreen(output_palette)
-    # if output_palette_name == "ntsc":
-    #     output_srgb = output_screen.bitmap_to_image_ntsc(bitmap)
-    # else:
-    #     output_srgb = image_py.linear_to_srgb(
-    #         output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
+
+def convert_shr(args):
+    screen = screen_py.SHR320Screen()
+    rgb = prepare_image(args.input, args.show_input, screen, args.gamma_correct)
+    convert_shr_py.convert(screen, rgb, args)
 
 
 if __name__ == "__main__":
diff --git a/convert_dhr.py b/convert_dhr.py
new file mode 100644
index 0000000..c769bdf
--- /dev/null
+++ b/convert_dhr.py
@@ -0,0 +1,46 @@
+import os.path
+
+from PIL import Image
+import numpy as np
+
+import dither_dhr as dither_dhr_pyx
+import dither_pattern
+import palette as palette_py
+import screen as screen_py
+import image as image_py
+
+
+def convert(screen, rgb: np.ndarray, args):
+    # Conversion matrix from RGB to CAM16UCS colour values.  Indexed by
+    # 24-bit RGB value
+    rgb24_to_cam16ucs = np.load("data/rgb24_to_cam16ucs.npy")
+
+    dither = dither_pattern.PATTERNS[args.dither]()
+    bitmap = dither_dhr_pyx.dither_image(
+        screen, rgb, dither, args.lookahead, args.verbose, rgb24_to_cam16ucs)
+
+    # Show output image by rendering in target palette
+    output_palette_name = args.show_palette or args.palette
+    output_palette = palette_py.PALETTES[output_palette_name]()
+    output_screen = screen_py.DHGRScreen(output_palette)
+    if output_palette_name == "ntsc":
+        output_srgb = output_screen.bitmap_to_image_ntsc(bitmap)
+    else:
+        output_srgb = image_py.linear_to_srgb(
+            output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
+    out_image = image_py.resize(
+        Image.fromarray(output_srgb), screen.X_RES, screen.Y_RES * 2,
+        srgb_output=True)
+
+    if args.show_output:
+        out_image.show()
+
+    if args.save_preview:
+        # Save Double hi-res image
+        outfile = os.path.join(
+            os.path.splitext(args.output)[0] + "-preview.png")
+        out_image.save(outfile, "PNG")
+    screen.pack(bitmap)
+    with open(args.output, "wb") as f:
+        f.write(bytes(screen.aux))
+        f.write(bytes(screen.main))

From a2b67ba88224e759d34460de3a4d459f9a715d17 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Fri, 26 Nov 2021 13:36:29 +0000
Subject: [PATCH 71/82] Require a subcommand

---
 convert.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert.py b/convert.py
index 20957c9..7a80342 100644
--- a/convert.py
+++ b/convert.py
@@ -41,7 +41,7 @@ def add_common_args(parser):
 
 def main():
     parser = argparse.ArgumentParser()
-    subparsers = parser.add_subparsers()
+    subparsers = parser.add_subparsers(required=True)
 
     dhr_parser = subparsers.add_parser("dhr")
     add_common_args(dhr_parser)

From ccbb6980d98b9eeb756f9c02487ec8ee2953a8d9 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Sat, 27 Nov 2021 10:43:41 +0000
Subject: [PATCH 72/82] Load data files relative to script path

---
 convert_dhr.py | 4 +++-
 convert_shr.py | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/convert_dhr.py b/convert_dhr.py
index c769bdf..6e2d5df 100644
--- a/convert_dhr.py
+++ b/convert_dhr.py
@@ -13,7 +13,9 @@ import image as image_py
 def convert(screen, rgb: np.ndarray, args):
     # Conversion matrix from RGB to CAM16UCS colour values.  Indexed by
     # 24-bit RGB value
-    rgb24_to_cam16ucs = np.load("data/rgb24_to_cam16ucs.npy")
+    base_dir = os.path.dirname(__file__)
+    rgb24_to_cam16ucs = np.load(
+        os.path.join(base_dir, "data/rgb24_to_cam16ucs.npy"))
 
     dither = dither_pattern.PATTERNS[args.dither]()
     bitmap = dither_dhr_pyx.dither_image(
diff --git a/convert_shr.py b/convert_shr.py
index 5db67f6..40e955a 100644
--- a/convert_shr.py
+++ b/convert_shr.py
@@ -358,8 +358,11 @@ class ClusterPalette:
 def convert(screen, rgb: np.ndarray, args):
     # Conversion matrix from RGB to CAM16UCS colour values.  Indexed by
     # 24-bit RGB value
-    rgb24_to_cam16ucs = np.load("data/rgb24_to_cam16ucs.npy")
-    rgb12_iigs_to_cam16ucs = np.load("data/rgb12_iigs_to_cam16ucs.npy")
+    base_dir = os.path.dirname(__file__)
+    rgb24_to_cam16ucs = np.load(
+        os.path.join(base_dir, "data/rgb24_to_cam16ucs.npy"))
+    rgb12_iigs_to_cam16ucs = np.load(
+        os.path.join(base_dir, "data/rgb12_iigs_to_cam16ucs.npy"))
 
     # TODO: flags
     inner_iterations = 10

From cfef9fa3c94a4b1b0767f22a4d43ae063660394b Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Sat, 16 Jul 2022 21:57:45 +0100
Subject: [PATCH 73/82] Add arg type

---
 convert_dhr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert_dhr.py b/convert_dhr.py
index 6e2d5df..3ccf252 100644
--- a/convert_dhr.py
+++ b/convert_dhr.py
@@ -10,7 +10,7 @@ import screen as screen_py
 import image as image_py
 
 
-def convert(screen, rgb: np.ndarray, args):
+def convert(screen: screen_py.DHGRScreen, rgb: np.ndarray, args):
     # Conversion matrix from RGB to CAM16UCS colour values.  Indexed by
     # 24-bit RGB value
     base_dir = os.path.dirname(__file__)

From 99aa3941968812e605f2bc08aa97fb4d1998b78d Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Sat, 16 Jul 2022 22:00:14 +0100
Subject: [PATCH 74/82] Tweak comment

---
 convert.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/convert.py b/convert.py
index 7a80342..4c3114f 100644
--- a/convert.py
+++ b/convert.py
@@ -10,10 +10,13 @@ import image as image_py
 import palette as palette_py
 import screen as screen_py
 
-
 # TODO:
-# - support LR/DLR
-# - support HGR
+#  - support additional graphics modes (easiest --> hardest):
+#    - LR/DLR
+#    - SHR 3200
+#    - SHR 640
+#    - HGR
+
 
 def add_common_args(parser):
     parser.add_argument("input", type=str, help="Input image file to process.")

From e71352490d405ffab4a7e072c0dd56403f605e7a Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Sat, 16 Jul 2022 22:00:42 +0100
Subject: [PATCH 75/82] Add comments

---
 common.pyx | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/common.pyx b/common.pyx
index b8f435f..803f494 100644
--- a/common.pyx
+++ b/common.pyx
@@ -5,14 +5,19 @@
 
 
 cdef float clip(float a, float min_value, float max_value) nogil:
+    """Clip a value between min_value and max_value inclusive."""
     return min(max(a, min_value), max_value)
 
 
 cdef inline float[::1] convert_rgb_to_cam16ucs(float[:, ::1] rgb_to_cam16ucs, float r, float g, float b) nogil:
+    """Converts floating point (r,g,b) valueto 3-tuple in CAM16UCS colour space, via 24-bit RGB lookup matrix."""
+
     cdef unsigned int rgb_24bit = (<unsigned int>(r*255) << 16) + (<unsigned int>(g*255) << 8) + <unsigned int>(b*255)
     return rgb_to_cam16ucs[rgb_24bit]
 
 
 cdef inline double colour_distance_squared(float[::1] colour1, float[::1] colour2) nogil:
+    """Computes Euclidean squared distance between two floating-point colour 3-tuples."""
+
     return (colour1[0] - colour2[0]) ** 2 + (colour1[1] - colour2[1]) ** 2 + (colour1[2] - colour2[2]) ** 2
 

From 8fd0ec5dc64993aff67d51fb6b8c233873df6927 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Sat, 16 Jul 2022 22:13:26 +0100
Subject: [PATCH 76/82] Set window title and clean up a bit

---
 convert_shr.py | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/convert_shr.py b/convert_shr.py
index 40e955a..ec183c1 100644
--- a/convert_shr.py
+++ b/convert_shr.py
@@ -370,11 +370,10 @@ def convert(screen, rgb: np.ndarray, args):
 
     if args.show_output:
         pygame.init()
-        # TODO: for some reason I need to execute this twice - the first time
-        #  the window is created and immediately destroyed
-        _ = pygame.display.set_mode((640, 400))
         canvas = pygame.display.set_mode((640, 400))
         canvas.fill((0, 0, 0))
+        pygame.display.set_caption("][-Pix image preview")
+        pygame.event.pump()
         pygame.display.flip()
 
     total_image_error = None
@@ -384,9 +383,11 @@ def convert(screen, rgb: np.ndarray, args):
         rgb24_to_cam16ucs=rgb24_to_cam16ucs)
 
     seq = 0
-    for (new_total_image_error, output_4bit, line_to_palette,
-         palettes_rgb12_iigs, palettes_linear_rgb) in cluster_palette.iterate(
-        inner_iterations, outer_iterations):
+    for (
+            new_total_image_error, output_4bit, line_to_palette,
+            palettes_rgb12_iigs,
+            palettes_linear_rgb
+    ) in cluster_palette.iterate(inner_iterations, outer_iterations):
 
         if args.verbose and total_image_error is not None:
             print("Improved quality +%f%% (%f)" % (
@@ -407,20 +408,6 @@ def convert(screen, rgb: np.ndarray, args):
             ).astype(np.uint8)
 
         output_srgb = (image_py.linear_to_srgb(output_rgb)).astype(np.uint8)
-
-        # dither = dither_pattern.PATTERNS[args.dither]()
-        # bitmap = dither_dhr_pyx.dither_image(
-        #     screen, rgb, dither, args.lookahead, args.verbose, rgb24_to_cam16ucs)
-
-        # Show output image by rendering in target palette
-        # output_palette_name = args.show_palette or args.palette
-        # output_palette = palette_py.PALETTES[output_palette_name]()
-        # output_screen = screen_py.DHGRScreen(output_palette)
-        # if output_palette_name == "ntsc":
-        #     output_srgb = output_screen.bitmap_to_image_ntsc(bitmap)
-        # else:
-        #     output_srgb = image_py.linear_to_srgb(
-        #         output_screen.bitmap_to_image_rgb(bitmap)).astype(np.uint8)
         out_image = image_py.resize(
             Image.fromarray(output_srgb), screen.X_RES * 2, screen.Y_RES * 2,
             srgb_output=True)
@@ -429,6 +416,9 @@ def convert(screen, rgb: np.ndarray, args):
             surface = pygame.surfarray.make_surface(
                 np.asarray(out_image).transpose((1, 0, 2)))  # flip y/x axes
             canvas.blit(surface, (0, 0))
+            pygame.display.set_caption("][-Pix image preview [Iteration %d]"
+                                       % seq)
+            pygame.event.pump()
             pygame.display.flip()
 
         unique_colours = np.unique(
@@ -439,15 +429,12 @@ def convert(screen, rgb: np.ndarray, args):
         seq += 1
 
         if args.save_preview:
-            # Save Double hi-res image
+            # Save super hi-res image
             outfile = os.path.join(
-                os.path.splitext(args.output)[0] + "-%d-preview.png" % seq)
+                os.path.splitext(args.output)[0] + "-preview.png")
             out_image.save(outfile, "PNG")
         screen.pack()
-        # with open(args.output, "wb") as f:
-        #     f.write(bytes(screen.aux))
-        #     f.write(bytes(screen.main))
-        with open(args.output, "wb") as f:
+        with open(args.output + ".%d" % seq, "wb") as f:
             f.write(bytes(screen.memory))
 
     if args.show_final_score:

From 1ffb2c911034b651f6c054c5ea73b478ef9417a6 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Mon, 18 Jul 2022 09:59:01 +0100
Subject: [PATCH 77/82] Tidy

---
 precompute_conversion.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/precompute_conversion.py b/precompute_conversion.py
index 2805533..857c715 100644
--- a/precompute_conversion.py
+++ b/precompute_conversion.py
@@ -10,9 +10,10 @@ import colour
 import numpy as np
 
 
-def srgb_to_linear_array(a: np.ndarray, gamma=2.4) -> np.ndarray:
+def srgb_to_linear_rgb_array(a: np.ndarray, gamma=2.4) -> np.ndarray:
     return np.where(a <= 0.04045, a / 12.92, ((a + 0.055) / 1.055) ** gamma)
 
+
 def main():
     print("Precomputing conversion matrix from 24-bit RGB to CAM16UCS colour "
           "space")
@@ -47,7 +48,7 @@ def main():
     # //gs RGB values use gamma-corrected Rec.601 RGB colour space.  We need to
     # convert to Rec.709 RGB as preparation for converting to CAM16UCS.  We
     # do this via the YCbCr intermediate color model.
-    rgb12_iigs = np.clip(srgb_to_linear_array(
+    rgb12_iigs = np.clip(srgb_to_linear_rgb_array(
         np.clip(colour.YCbCr_to_RGB(
             colour.RGB_to_YCbCr(
                 all_rgb12, K=colour.WEIGHTS_YCBCR[
@@ -62,5 +63,6 @@ def main():
     np.save("data/rgb12_iigs_to_cam16ucs.npy", rgb12_iigs_to_cam16ucs)
     del rgb12_iigs_to_cam16ucs
 
+
 if __name__ == "__main__":
     main()

From 3196369b7d88a764cae1346870d0d4cc9d3bc6f4 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Mon, 18 Jul 2022 10:00:19 +0100
Subject: [PATCH 78/82] Tidy a bit and add a --save-intermediate flag

---
 convert.py     |  5 ++++
 convert_shr.py | 69 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/convert.py b/convert.py
index 4c3114f..d7b15a7 100644
--- a/convert.py
+++ b/convert.py
@@ -91,6 +91,11 @@ def main():
         default=False, help='Whether to output the final image quality score '
                             '(default: False)'
     )
+    shr_parser.add_argument(
+        '--save-intermediate', action=argparse.BooleanOptionalAction,
+        default=False, help='Whether to save each intermediate iteration, '
+                            'or just the final image (default: False)'
+    )
     shr_parser.set_defaults(func=convert_shr)
     args = parser.parse_args()
     args.func(args)
diff --git a/convert_shr.py b/convert_shr.py
index ec183c1..e0caeda 100644
--- a/convert_shr.py
+++ b/convert_shr.py
@@ -41,26 +41,28 @@ class ClusterPalette:
         # Preprocessed source image in CAM16UCS colour space
         self._colours_cam = self._image_colours_cam(self._image_rgb)
 
-        # How many image colours to fix identically across all 16 SHR
-        # palettes.  These are taken to be the most prevalent colours from
-        # _global_palette.
-        self._fixed_colours = fixed_colours
-
         # We fit a 16-colour palette against the entire image which is used
         # as starting values for fitting the reserved colours in the 16 SHR
         # palettes.
         self._global_palette = np.empty((16, 3), dtype=np.uint8)
 
+        # How many image colours to fix identically across all 16 SHR
+        # palettes.  These are taken to be the most prevalent colours from
+        # _global_palette.
+        self._fixed_colours = fixed_colours
+
         # 16 SHR palettes each of 16 colours, in CAM16UCS colour space
         self._palettes_cam = np.empty((16, 16, 3), dtype=np.float32)
 
         # 16 SHR palettes each of 16 colours, in //gs 4-bit RGB colour space
         self._palettes_rgb = np.empty((16, 16, 3), dtype=np.uint8)
 
-        # defaultdict(list) mapping palette index to lines using this palette
+        # defaultdict(list) mapping palette index to the lines that use this
+        # palette
         self._palette_lines = self._init_palette_lines()
 
-    def _image_colours_cam(self, image: Image):
+    @staticmethod
+    def _image_colours_cam(image: Image):
         colours_rgb = np.asarray(image)  # .reshape((-1, 3))
         with colour.utilities.suppress_warnings(colour_usage_warnings=True):
             colours_cam = colour.convert(colours_rgb, "RGB",
@@ -84,7 +86,8 @@ class ClusterPalette:
                 palette_lines[i].extend(list(range(l, h)))
         return palette_lines
 
-    def _equal_palette_splits(self, palette_height=35):
+    @staticmethod
+    def _equal_palette_splits(palette_height=35):
         # The 16 palettes are striped across consecutive (overlapping) line
         # ranges.  Since nearby lines tend to have similar colours, this has
         # the effect of smoothing out the colour transitions across palettes.
@@ -202,7 +205,9 @@ class ClusterPalette:
         when dithering.  i.e. they would reduce the overall image quality.
 
         The current (locally) best palettes are returned and can be applied
-        using accept_palettes().
+        using accept_palettes()
+
+        XXX update
         """
         new_palettes_cam = np.empty_like(self._palettes_cam)
         new_palettes_rgb12_iigs = np.empty_like(self._palettes_rgb)
@@ -214,8 +219,8 @@ class ClusterPalette:
 
         for palette_idx in range(16):
             palette_pixels = (
-                self._colours_cam[
-                self._palette_lines[palette_idx], :, :].reshape(-1, 3))
+                self._colours_cam[self._palette_lines[
+                                      palette_idx], :, :].reshape(-1, 3))
 
             # Fix reserved colours from the global palette.
             initial_centroids = np.copy(self._global_palette)
@@ -244,14 +249,14 @@ class ClusterPalette:
                 *np.unique(pixels_rgb_iigs, return_counts=True, axis=0))),
                 key=lambda kv: kv[1], reverse=True)
             fixed_colours = self._fixed_colours
-            for colour, freq in most_frequent_colours:
+            for palette_colour, freq in most_frequent_colours:
                 if (freq < (palette_pixels.shape[0] *
                             fixed_colour_fraction_threshold)) or (
                         fixed_colours == 16):
                     break
-                if tuple(colour) not in seen_colours:
-                    seen_colours.add(tuple(colour))
-                    initial_centroids[fixed_colours, :] = colour
+                if tuple(palette_colour) not in seen_colours:
+                    seen_colours.add(tuple(palette_colour))
+                    initial_centroids[fixed_colours, :] = palette_colour
                     fixed_colours += 1
 
             palette_rgb12_iigs = dither_shr_pyx.k_means_with_fixed_centroids(
@@ -300,7 +305,8 @@ class ClusterPalette:
                 clusters.cluster_centers_[frequency_order].astype(
                     np.float32)))
 
-    def _fill_short_palette(self, palette_iigs_rgb, most_frequent_colours):
+    @staticmethod
+    def _fill_short_palette(palette_iigs_rgb, most_frequent_colours):
         """Fill out the palette to 16 unique entries."""
 
         # We want to maintain order of insertion so that we respect the
@@ -313,11 +319,10 @@ class ClusterPalette:
             return palette_iigs_rgb
 
         # Add most frequent image colours that are not yet in the palette
-        for colour, freq in most_frequent_colours:
-            if tuple(colour) in palette_set:
+        for palette_colour, freq in most_frequent_colours:
+            if tuple(palette_colour) in palette_set:
                 continue
-            palette_set[tuple(colour)] = True
-            # print("Added freq %d" % freq)
+            palette_set[tuple(palette_colour)] = True
             if len(palette_set) == 16:
                 break
 
@@ -348,7 +353,6 @@ class ClusterPalette:
         for palette_idx, palette_used in enumerate(palettes_used):
             if palette_used:
                 continue
-            # print("Reassigning palette %d" % palette_idx)
 
             # TODO: also remove from old entry
             worst_line = best_palette_lines.pop()
@@ -373,7 +377,7 @@ def convert(screen, rgb: np.ndarray, args):
         canvas = pygame.display.set_mode((640, 400))
         canvas.fill((0, 0, 0))
         pygame.display.set_caption("][-Pix image preview")
-        pygame.event.pump()
+        pygame.event.pump()  # Update caption
         pygame.display.flip()
 
     total_image_error = None
@@ -382,6 +386,8 @@ def convert(screen, rgb: np.ndarray, args):
         rgb12_iigs_to_cam16ucs=rgb12_iigs_to_cam16ucs,
         rgb24_to_cam16ucs=rgb24_to_cam16ucs)
 
+    output_base, output_ext = os.path.splitext(args.output)
+
     seq = 0
     for (
             new_total_image_error, output_4bit, line_to_palette,
@@ -418,7 +424,7 @@ def convert(screen, rgb: np.ndarray, args):
             canvas.blit(surface, (0, 0))
             pygame.display.set_caption("][-Pix image preview [Iteration %d]"
                                        % seq)
-            pygame.event.pump()
+            pygame.event.pump()  # Update caption
             pygame.display.flip()
 
         unique_colours = np.unique(
@@ -426,16 +432,23 @@ def convert(screen, rgb: np.ndarray, args):
         if args.verbose:
             print("%d unique colours" % unique_colours)
 
-        seq += 1
-
         if args.save_preview:
             # Save super hi-res image
-            outfile = os.path.join(
-                os.path.splitext(args.output)[0] + "-preview.png")
+            if args.save_intermediate:
+                outfile = "%s-%d-preview.png" % (output_base, seq)
+            else:
+                outfile = "%s-preview.png" % output_base
             out_image.save(outfile, "PNG")
         screen.pack()
-        with open(args.output + ".%d" % seq, "wb") as f:
+
+        if args.save_intermediate:
+            outfile = "%s-%d%s" % (output_base, seq, output_ext)
+        else:
+            outfile = "%s%s" % (output_base, output_ext)
+        with open(outfile, "wb") as f:
             f.write(bytes(screen.memory))
 
+        seq += 1
+
     if args.show_final_score:
         print("FINAL_SCORE:", total_image_error)

From e156dd3b489bc130a1d97fb429873d8b5ec19054 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Mon, 18 Jul 2022 22:11:32 +0100
Subject: [PATCH 79/82] Add a requirements.txt to simplify installation

---
 requirements.txt | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..42f3682
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+colour-science
+Cython
+networkx
+numpy
+Pillow
+pygame
+scikit-learn
\ No newline at end of file

From 12d68056170ad16f2b1ed0699187daba75d343fa Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Mon, 18 Jul 2022 22:30:56 +0100
Subject: [PATCH 80/82] Update docs for 2.0 and split out the technical details
 of dhr into its own file

---
 README.md   | 344 ++++++++++------------------------------------------
 docs/dhr.md | 266 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 333 insertions(+), 277 deletions(-)
 create mode 100644 docs/dhr.md

diff --git a/README.md b/README.md
index 9b8432a..468b6d2 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,24 @@
-# ][-pix
+# ][-pix 2.0
 
-][-pix is an image conversion utility targeting Apple II graphics modes, currently Double Hi-Res.
+][-pix is an image conversion utility targeting Apple II graphics modes, currently Double Hi-Res and Super Hi-Res.
 
 ## Installation
 
 Requires:
 * python 3.x
-* [numpy](http://numpy.org/)
-* [cython](https://cython.org/)
-* [Pillow](https://python-pillow.org/)
 * [colour-science](https://www.colour-science.org/)
+* [cython](https://cython.org/)
+* [numpy](http://numpy.org/)
+* [Pillow](https://python-pillow.org/)
+* [pygame](https://www.pygame.org/)
+* [scikit-learn](https://scikit-learn.org/)
+
+These dependencies can be installed using the following command:
+
+```buildoutcfg
+# Install python dependencies
+pip install -r requirements.txt
+```
 
 To build ][-pix, run the following commands:
 
@@ -17,16 +26,37 @@ To build ][-pix, run the following commands:
 # Compile cython code
 python setup.py build_ext --inplace
 
-# Precompute RGB/CAM16-UCS colour conversion matrix, used as part of image optimization
+# Precompute colour conversion matrices, used as part of image optimization
 python precompute_conversion.py
 ```
 
-## Usage
+# Usage
 
-Then, to convert an image, the simplest usage is:
+To convert an image, the basic command is:
+
+```bash
+python convert.py <mode> [<flags>] <input> <output>
+```
+where
+* `mode` is `dhr` for Double Hi-Res (560x192), or `shr` for Super Hi-Res (320x200)
+* `input` is the source image file to convert (e.g. `my-image.jpg`)
+* `output` is the output filename to produce (e.g. `my-image.dhr`)
+
+The following flags are supported in both `dhr` and `shr` modes:
+
+* `--show-input` Whether to show the input image before conversion. (default: False)
+* `--show-output` Whether to show the output image after conversion. (default: True)
+* `--save-preview` Whether to save a .PNG rendering of the output image (default: True)
+* `--verbose` Show progress during conversion (default: False)
+* `--gamma-correct` Gamma-correct image by this value (default: 2.4)
+
+
+## Double Hi-Res
+
+To convert an image to double hi-res, the simplest usage is:
 
 ```buildoutcfg
-python convert.py --palette ntsc <input> <output.dhr>
+python convert.py dhr --palette ntsc <input> <output.dhr>
 ```
 
 `<output.dhr>` contains the double-hires image data in a form suitable for transfer to an Apple II disk image.  The 16k output consists of 8k AUX data first, 8K MAIN data second (this matches the output format of other DHGR image converters).  i.e. if loaded at 0x2000, the contents of 0x2000..0x3fff should be moved to 0x4000..0x5fff in AUX memory, and the image can be viewed on DHGR page 2.
@@ -37,17 +67,19 @@ For other available options, use `python convert.py --help`
 
 TODO: document flags
 
-## Examples
+For more details about Double Hi-Res graphics and the conversion process, see [here](docs/dhr.md).
 
-See [here](examples/gallery.md) for more sample image conversions.
+### Examples
 
-### Original
+See [here](examples/gallery.md) for more sample Double Hi-Res image conversions.
+
+#### Original
 
 ![Two colourful parrots sitting on a branch](examples/parrots-original.png)
 
  (Source: [Shreygadgil](https://commons.wikimedia.org/wiki/File:Vibrant_Wings.jpg), [CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0), via Wikimedia Commons)
 
-### ][-pix preview image
+#### ][-pix preview image
 
 This image was generated using
 
@@ -59,7 +91,7 @@ The resulting ][-pix preview PNG image is shown here.
 
 ![Two colourful parrots sitting on a branch](examples/parrots-iipix-openemulator-preview.png)
 
-### OpenEmulator screenshot
+#### OpenEmulator screenshot
 
 This is a screenshot taken from OpenEmulator when viewing the Double Hi-res image.
 
@@ -73,285 +105,39 @@ This is an OpenEmulator screenshot of the same image converted with `--palette=n
 
 ![Two colourful parrots sitting on a branch](examples/parrots-iipix-ntsc-openemulator.png)
 
+## Super Hi-Res
 
-## Some background on Apple II Double Hi-Res graphics
+To convert an image to Super Hi-Res (320x200, up to 256 colours), the simplest usage is:
 
-Like other (pre-//gs) Apple II graphics modes, Double Hi-Res relies on [NTSC Artifact Colour](https://en.wikipedia.org/wiki/Composite_artifact_colors), which means that the colour of a pixel is entirely determined by its horizontal position on the screen, and the on/off status of preceding horizontal pixels.
+```buildoutcfg
+python convert.py shr <input> <output.shr>
+```
 
-In Double Hi-Res mode, the 560 horizontal pixels per line are individually addressable.  This is an improvement over the (single) Hi-Res mode, which also has 560 horizontal pixels, but which can only be addressed in groups of two (with an option to shift blocks of 7 pixels each by one dot).  See _Assembly Lines: The Complete Book_ (Wagner) for a detailed introduction to this, or _Understanding the Apple IIe_ (Sather) for a deep technical discussion.
+i.e. no additional options are required.  In addition to the common flags described above, these additional flags are
+supported for `shr` conversions:
+* `--save-intermediate` Whether to save each intermediate iteration, or just the final image (default: False)
+* `--fixed-colours` How many colours to fix as identical across all 16 SHR palettes. (default: 0)
+* `--show-final-score` Whether to output the final image quality score (default: False)
 
-Double Hi-Res is usually characterized as being capable of producing 16 display colours, but with heavy restrictions on how these colours can be arranged horizontally.
+### Examples
 
-### Naive model: 140x192x16
-
-One simple model for Double Hi-Res graphics is to only treat the display in groups of 4 horizontal pixels, which gives an effective resolution of 140x192 in 16 colours (=2^4).  These 140 pixel colours can be chosen independently, which makes this model easy to think about and to work with (e.g. when creating images by hand).  However the resulting images will exhibit (sometimes severe) colour interference/fringing effects when two colours are next to one another, because the underlying hardware does not actually work this way.  See below for an example image conversion, showing the unwanted colour fringing that results. 
-
-### Simplest realistic model: 560 pixels, 4-pixel colour
-
-A more complete model for thinking about DHGR comes from looking at how the NTSC signal produces colour on the display.
-The [NTSC chrominance subcarrier](https://en.wikipedia.org/wiki/Chrominance_subcarrier) completes one complete phase cycle in the time taken to draw 4 horizontal dots.  The colours produced are due to the interactions of the pixel luminosity (on/off) relative to this NTSC chroma phase.
-
-What this means is that the colour of each of the 560 horizontal pixels is determined by the current pixel value (on/off), the current X-coordinate modulo 4 (X coordinate relative to NTSC phase), as well as the on-off status of the pixels to the left of it.
-
-The simplest approximation is to only look at the current pixel value and the 3 pixels to the left, i.e. to consider a sliding window of 4 horizontal pixels moving across the screen from left to right.  Within this window, we have one pixel for each of the 4 values of NTSC phase (x % 4, ranging from 0 .. 3).  The on-off values for these 4 values of NTSC phase determine the colour of the pixel.  See [here](https://docs.google.com/presentation/d/1_eqBknG-4-llQw3oAOmPO3FlawUeWCeRPYpr_mh2iRU/edit) for more details.
-
-This model allows us to understand and predict the interference behaviour when two "140px" colours are next to each other, and to go beyond this "140px" model to take more advantage of the true 560px horizontal resolution.
-
-If we imagine drawing pixels from left to right across the screen, at each pixel we only have *two* accessible choices of colour: those resulting from turning the current pixel on, or off.  Which two particular colours are produced are determined by the pixels already drawn to the left (the immediate 3 neighbours, in our model).  One of these possibilities will always be the same as the pixel colour to the left (the on/off pixel choice corresponding to the value that just "fell off the left side" of the sliding window), and the other choice is some other colour from our palette of 16.
-
-This can be summarized in a chart, showing the possible colour transitions depending on the colour of the pixel to the immediate left, and the value of x%4.
-
-![Double hi-res colour transitions](docs/Double_Hi-Res_colour_transitions.png)
-
-So, if we want to transition from one colour to a particular new colour, it may take up to 4 horizontal pixels before we are able to achieve it (e.g. transitioning all the way from black (0000) to white (1111)).  In the meantime we have to transition through up to 2 other colours.  Depending on the details of the image we are aiming for, this may either produce unwanted visual noise, or can actually be beneficial (e.g. if the colour we want is available immediately at the next pixel)
-
-These constraints are difficult to work with when constructing DHGR graphics "by hand", but we can account for them programmatically in our image conversion to take full advantage of the "true" 560px resolution while accounting for colour interference effects.
-
-#### Limitations of this colour model
-
-In practise the above description of the Apple II colour model is still only an approximation.  On real hardware, the video signal is a continuous analogue signal, and colour is continuously modulated rather than producing discretely-coloured pixels with fixed colour values.
-
-More importantly, in an NTSC video signal the colour (chroma) signal has a lower bandwidth than the luma (brightness) signal ([Chroma sub-sampling](https://en.wikipedia.org/wiki/Chroma_subsampling)), which means that colours will tend to bleed across more than 4 pixels.  However our simple "4-pixel chroma bleed" model already produces good results, and exactly matches the implementation behaviour of some emulators, e.g. Virtual II.
-
-### NTSC emulation and 8-pixel colour
-
-By simulating the NTSC (Y'UV) signal directly we are able to recover the Apple II colour output from "first principles".  Here are the 16 "basic" DHGR colours, obtained using saturation/hue parameters tuned to match OpenEmulator's NTSC implementation, and allowing chroma to bleed across 4 pixels.
-
-![NTSC colours with 4 pixel chroma bleed](docs/ntsc-colours-chroma-bleed-4.png)
-
-However in real NTSC, chroma bleeds over more than 4 pixels, which means that we actually have more than 2^4 colours available to work with.
-
-This means that **when viewed on a composite colour display, Double Hi-Res graphics is not just a 16-colour graphics mode!**
-
-If we allow the NTSC chroma signal to bleed over 8 pixels instead of 4, then the resulting colour is determined by sequences of 8 pixels instead of 4 pixels, i.e. there are 2^8 = 256 possibilities.  In practise many of these result in the same output colour, and (with this approximation) there are only 85 unique colours available.  However this is still a marked improvement on the 16 "basic" DHGR colours:
-
-![NTSC colours with 8 pixel chroma bleed](docs/ntsc-colours-chroma-bleed-8.png)
-
-The "extra" DHGR colours are only available on real hardware, or an emulator that implements NTSC chroma sub-sampling (such as OpenEmulator).   But the result is that on such targets a much larger range of colours is available for use in image conversion.  However the restriction still exists that any given pixel only has a choice of 2 colours available (as determined by the on/off state of pixels to the left).
-
-In practise this gives much better image quality, especially when shading areas of similar colour.  The Apple II is still unable to directly modulate the luma (brightness) NTSC signal component, so areas of low or high brightness still tend to be heavily dithered.  This is because there are more bit sequences that have the number of '1' bits close to the average than there are at the extremes, so there are correspondingly few available colours that are very bright or very dark.
-
-These 85 unique double hi-res colours produced by the ][-pix NTSC emulation are not the definitive story - though they're closer to it than the usual story that double hi-res is a 16-colour graphics mode.  The implementation used by ][-pix is the simplest one: the Y'UV signal is averaged with a sliding window of 4 pixels for the Y' (luma) component and 8 pixels for the UV (chroma) component.
-
-The choice of 8 pixels is not strictly correct - e.g. the chroma bandwidth (~0.6MHz) is much less than half of luma bandwidth (~2Mhz) so the signal bleeds over more than twice as many pixels; but also decays in a more complex way than the simple step function sliding window chosen here.  In practise using 8 pixels is a good compromise between ease of implementation, runtime performance and fidelity.
-
-By contrast, OpenEmulator uses a more complex (and realistic) band-pass filtering to produce its colour output, which presumably allows even more possible colours (physical hardware will also produce its own unique results, depending on the hardware implementation of the signal decoding, and other physical characteristics).  I expect that most of these will be small variations on the above though; and in practise the ][-pix NTSC implementation already produces a close colour match for the OpenEmulator behaviour.
-
-#### Examples of NTSC images
-
-(Source: [Reinhold Möller](https://commons.wikimedia.org/wiki/File:Nymphaea_caerulea-20091014-RM-115245.jpg), [CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0), via Wikimedia Commons)
-
-![Nymphaea](examples/nymphaea-original.png)
-
-OpenEmulator screenshot of image produced with `--palette=openemulator --lookahead=8`.  The distorted background colour compared to the original is particularly noticeable.
-
-![Nymphaea](examples/nymphaea-iipix-openemulator-openemulator.png)
-
-OpenEmulator screenshot of image produced with `--palette=ntsc --lookahead=8`.  Not only is the background colour a much better match, the image shading and detail is markedly improved.
-
-![Nymphaea](examples/nymphaea-iipix-ntsc-openemulator.png)
-
-Rendering the same .dhr image with 4-pixel colour shows the reason for the difference.  For example the background shading is due to pixel sequences that appear (with this simpler and less hardware-accurate rendering scheme) as sequences of grey and dark green, with a lot of blue and red sprinkled in.  In NTSC these pixel sequences combine to produce various shades of green.
-
-![Nymphaea](examples/nymphaea-iipix-ntsc-preview-openemulator.png)
-
-# Dithering and Double Hi-Res
-
-[Dithering](https://en.wikipedia.org/wiki/Dither) an image to produce an approximation with fewer image colours is a well-known technique.  The basic idea is to pick a "best colour match" for a pixel from our limited palette, then to compute the difference between the true and selected colour values and diffuse this error to nearby pixels (using some pattern).
-
-In the particular case of DHGR this algorithm runs into difficulties, because each pixel only has two possible colour choices (from a total of 16+).  If we only consider the two possibilities for the immediate next pixel then neither may be a particularly good match.  However it may be more beneficial to make a suboptimal choice now (deliberately introduce more error), if it allows us access to a better colour for a subsequent pixel.  "Classical" dithering algorithms do not account for these palette constraints, and produce suboptimal image quality for DHGR conversions. 
-
-We can deal with this by looking ahead N pixels (8 by default) for each image position (x,y), and computing the effect of choosing all 2^N combinations of these N-pixel states on the dithered source image.
-
-Specifically, for a fixed choice of one of these N pixel sequences, we tentatively perform the error diffusion as normal on a copy of the image, and compute the total mean squared distance from the (fixed) N-pixel sequence to the error-diffused source image.  To compute the perceptual difference between colours we convert to the perceptually uniform [CAM16-UCS](https://en.wikipedia.org/wiki/Color_appearance_model#CAM16) colour space in which perceptual distance is Euclidean. 
-
-Finally, we pick the N-pixel sequence with the lowest total error, and select the first pixel of this N-pixel sequence for position (x,y).  We then perform error diffusion as usual for this single pixel, and proceed to x+1.
-
-This allows us to "look beyond" local minima to find cases where it is better to make a suboptimal choice now to allow better overall image quality in subsequent pixels.  Since we will sometimes find that our choice of 2 next-pixel colours actually includes (or comes close to) the "ideal" choice, this means we can take maximal advantage of the 560-pixel horizontal resolution.
-
-## Gamma correction
-
-Most digital images are encoded using the [sRGB colour space](https://en.wikipedia.org/wiki/SRGB), which means that the stored RGB values do not map linearly onto the rendered colour intensities.  In order to work with linearized RGB values the source image needs to be gamma corrected.  Otherwise, the process of dithering an un-gamma-corrected image tends to result in an output that does not match the brightness of the input.  In particular shadows and highlights tend to get blown out/over-exposed.
-
-## Dither pattern
-
-The process of (error-diffusion) dithering involves distributing the "quantization error" (mismatch between the colour of the source image and chosen output pixels) across neighbouring pixels, according to some pattern.  [Floyd-Steinberg](https://en.wikipedia.org/wiki/Floyd%E2%80%93Steinberg_dithering) and [Jarvis-Judice-Ninke](https://en.wikipedia.org/wiki/Error_diffusion#minimized_average_error) ("Jarvis") are two common patterns, though there are many others, which have slightly different characteristics.
-
-Since it uses a small dither pattern, Floyd-Steinberg dithering retains more of the image detail than larger kernels.  On the other hand, it sometimes produces image artifacts that are highly structured (e.g. runs of a single colour, checkerboard patterns).  This seems to be especially common with 4-pixel colours.
-
-In part this may be because these "classical" dither patterns only propagate errors to a small number of neighbouring pixels, e.g. 1 pixels in the forward direction for Floyd-Steinberg, and 2 pixels for Jarvis.  However for double hi-res colours we know that it might take up to 4 pixels before a given colour can be selected for output (e.g. to alternate between black and white, or any other pairs that are 4 steps away on the transition chart above).
-
-In other words, given the results of error diffusion from our current pixel, there is one colour from our palette of 16 that is the best one to match this - but it might be only possible to render this particular colour up to 4 pixels further on.  If we only diffuse the errors by 1 or 2 pixels each time, it will tend to have diffused away by the time we reach that position, and the opportunity will be lost.  Combined with the small overall set of available colours this can result in image artifacts.
-
-Modifying the Jarvis dither pattern to extend 4 pixels in the forward direction seems to give much better results for such images (e.g. when dithering large blocks of colour), although at the cost of reduced detail.  This is presumably because we allow each quantization error to diffuse to each of the 4 subsequent pixels that might be best-placed to act on it.
-
-The bottom line is that choice of `--dither` argument is a tradeoff between image detail and handling of colour.  If the default `--dither=floyd` algorithm does not give pleasing results, try other patterns such as `--dither=jarvis-mod`.
-
-Further experimentation with other dithering patterns (and similar modifications to the above) may also produce interesting results.
-
-## Palettes
-
-Since the Apple II graphics (prior to //gs) are not based on RGB colour, we have to choose an (approximate) RGB colour palette when dithering an RGB image.  There is no "true" choice for this palette, since it depends heavily on how the image is viewed:
-
-1.  Different emulators have made (often quite different) choices for the RGB colour palettes used to emulate Apple II graphics on a RGB display.  This means that an image that looks good on one emulator may not look good on another (or on real hardware).
-    - For example, Virtual II (and the Apple //gs) uses two different RGB shades of grey for the two DHGR grey colours, whereas they are rendered identically in NTSC.  That means that images not targeted for the Virtual II palette will look quite different when viewed there (and vice versa).
-
-2.  Secondly, the actual display colours rendered by an Apple II are not fixed, but bleed into each other due to the behaviour of the (analogue) NTSC video signal.  i.e. the entire notion of a "16-colour RGB palette" is a flawed one.  Furthermore, the NTSC colours depend on the particular monitor/TV and its tuning (brightness/contrast/hue settings etc).  "Never Twice the Same Colour" indeed.   The "4-pixel colour" model described above where we can assign 2 from 16 fixed colours to each of 560 discrete pixels is only an approximation (though a useful one in practise).
-
-Some emulators emulate the NTSC video signal more faithfully (e.g. OpenEmulator), in which case they do not have a true "RGB palette".  The best we can do here is measure the colours that are produced by large blocks of colour, i.e. where there is no colour blending.  Others use some discrete approximation (e.g. Virtual II seems to exactly match the colour model described above), so a fixed palette can be reconstructed.
-
-To compute the emulator palettes used by ][-pix I measured the sRGB colour values produced by a full-screen Apple II colour image (using the colour picker tool of Mac OS X), using default emulator settings.  I have not yet attempted to measure/estimate palettes of other emulators, or "real hardware"
-
-Existing conversion tools (see below) tend to support a variety of RGB palette values sourced from various places (older tools, emulators, theoretical estimations etc).  In practise, these only matter in a few ways:
-
-1.  If you are trying to target colour balance as accurately as possible for a particular viewing target (e.g. emulator), i.e. so that the rendered colour output looks as close as possible to the source image.
-
-2.  If you are targeting an emulator that has a "non-standard" colour model, e.g. Virtual II with its two distinct shades of grey.
-
-3.  Otherwise, choices of palette effectively amount to changing the colour balance of the source image.  Some of these might produce better image quality for a particular image  (e.g. if the source image contains large colour blocks that are difficult to approximate with a particular target palette), at the cost of changing the colour balance.  i.e. it might look good on its own but not match the source image.  You could also achieve similar results by tweaking the colour balance of the source image in an editor, e.g GIMP or Photoshop.
-
-## Precomputing distance matrix
-
-The mapping from RGB colour space to CAM16-UCS is quite complex, so to avoid this runtime cost we precompute a matrix from all 256^3 integer RGB values to corresponding CAM16-UCS values. This 192MB matrix is generated by the `precompute_conversion.py` utility, and is loaded at runtime for efficient access.
-
-# Comparison to other DHGR image converters
-
-## bmp2dhr
-
-*  [bmp2dhr](http://www.appleoldies.ca/bmp2dhr/) (see [here](https://github.com/digarok/b2d) for a maintained code fork) supports additional graphics modes not yet supported by ][-pix, namely (double) lo-res, and hi-res.  Support for the lores modes would be easy to add to ][-pix, although hi-res requires more work to accommodate the colour model.  A similar lookahead strategy will likely work well though.
-
-*  supports additional image dither modes
-
-*  only supports BMP source images in a particular format.
-
-*  DHGR conversions are treated as simple 140x192x16 colour images without colour constraints, and ignores the colour fringing behaviour described above.  The generated .bmp preview images also do not show fringing, but it is present when viewing the image on an Apple II or emulator that accounts for it.  i.e. the preview images are sometimes not very representative of the actual results.  See below for an example.
-
-*  Apart from ignoring DHGR colour interactions, the 140px converted images are also lower than ideal resolution since they do not make use of the ability to address all 560px independently.
-
-*  The perceptual colour distance metric used to match the best colour to an input pixel is a custom metric based on a weighted sum of Euclidean sRGB distance and Rec.601 luma value.  It's not explained why this particular metric was chosen, and in practise it seems to often give much lower quality results than modern perceptually uniform colour spaces like CIE2000 or CAM16-UCS (though these are much slower to compute - which is why we precompute the conversion matrix ahead of time)
-
-* It does not perform RGB colour space conversions before dithering, i.e. if the input image is in sRGB colour space (as most digital images will be) then the dithering is also performed in sRGB.  Since sRGB is not a linear colour space, the effect of dithering is to distribute errors non-linearly, which distorts the brightness of the resulting image.
-
-## a2bestpix 
-
-*  Like ][-pix, [a2bestpix](http://lukazi.blogspot.com/2017/03/double-high-resolution-graphics-dhgr.html) only supports DHGR conversion.  Overall quality is usually fairly good, although colours and brightness are slightly distorted (for reasons described below), and the generated preview images do not quite give a faithful representation of the native image rendering.
-
-*  Like ][-pix, and unlike bmp2dhr, a2bestpix does apply a model of the DHGR colour interactions, albeit an ad-hoc one based on rules and tables of 4-pixel "colour blocks" reconstructed from (AppleWin) emulator behaviour.  This does allow it to make use of (closer to) full 560px resolution, although it still treats the screen as a sequence of 140 4-pixel colour blocks (with some constraints on the allowed arrangement of these blocks).
-
-*  supports additional (custom) dither modes (partly out of necessity due to the custom "colour block" model)
-
-*  Supports a variety of perceptual colour distance metrics including CIE2000 and the one bmp2dhr uses.  In practise I'm not sure the others are useful since CIE2000 is the more recent refinement of much research on this topic, and is the most accurate of them.
-
-* like bmp2dhr, only supports BMP source images in a particular format.
-
-*  Does not apply gamma correction before dithering (though sRGB conversion is done when computing CIE2000 distance), so errors are diffused non-linearly.  The resulting images don't match the brightness of the original, e.g. shadows/highlights tend to be over-exposed.
-
-*  image conversion performs an optimization over groups of multiple pixels (via choice of "colour blocks").  From what I can tell this minimizes the total colour distance from a fixed list of colour blocks to a group of 4 target pixels, similar to --lookahead=4 for ][-pix (though I'm not sure it's evaluating all 2^4 pixel combinations).  But since the image is (AFAICT) treated as a sequence of (non-overlapping) 4-pixel blocks this does not result in optimizing each output pixel independently.
-
-*  The list of "colour blocks" seem to contain colour sequences that cannot actually be rendered on the Apple II.  For example compare the spacing of yellow and orange pixels on the parrot between the preview image (LHS) and openemulator (RHS): 
-
-![Detail of a2bestpix preview image](docs/a2bestbix-preview-crop.png)
-![Detail of openemulator render](docs/a2bestpix-openemulator-crop.png)
-
-*  See below for another example where the output has major image discrepancies with the original - perhaps also due to bugs/omissions in the table of colour blocks.
-
-*  This means that (like bmp2dhr) the generated "preview" image may not closely match the native image, and the dithering algorithm is also optimizing over a slightly incorrect set of colour sequences, which presumably impacts image quality.  Possibly these are transcription errors, or artifacts of the particular emulator (AppleWin) from which they were reconstructed.
-
-## Image comparisons
-
-These three images were converted using the same target (openemulator) palette, using ][-pix, bmp2dhr and a2bestpix (since this is supported by all three), and are shown as screenshots from openemulator.
-
-### Original
-![original source image](examples/paperclips-original.png)
-
-(Source: [Purple Sherbet Photography from Worldwide!](https://commons.wikimedia.org/wiki/File:Colourful_assortment_of_paper_clips_(10421946796).jpg), [CC BY 2.0](https://creativecommons.org/licenses/by/2.0), via Wikimedia Commons)
-
-The following images were all generated with a palette approximating OpenEmulator's colours (`--palette=openemulator` for ][-pix)
-
-### ][-pix 4-pixel colour
-
-Preview image and OpenEmulator screenshot
-
-![ii-pix preview](examples/paperclips-iipix-openemulator-preview.png)
-![ii-pix screenshot](examples/paperclips-iipix-openemulator-openemulator.png)
-
-### ][-pix NTSC 8-pixel colour (Preview image)
-
-Preview image and OpenEmulator screenshot
-
-![ii-pix preview](examples/paperclips-iipix-ntsc-preview.png)
-![ii-pix screenshot](examples/paperclips-iipix-ntsc-openemulator.png)
-
-### bmp2dhr (OpenEmulator screenshot)
-![bmp2dhr screenshot](examples/paperclips-bmp2dhr-openemulator.png)
-
-Comparing bmp2dhr under openemulator is the scenario most favourable to it, since the 140px resolution and non-treatment of fringing is masked by the chroma blending.  Colours are similar to ][-pix, but the 140px dithering and lack of gamma correction results in less detail, e.g. in hilights/shadows.
-
-### a2bestpix (OpenEmulator screenshot)
-
-![a2bestpix screenshot](examples/paperclips-a2bestpix-openemulator.png)
-
-This a2bestpix image is actually atypical in quality, and shows some severe colour errors relating to the pixels that should be close to the orange/brown colours.  These may be due to errors/omissions in the set of "colour blocks".  The effects of not gamma-correcting the source image can also be seen.
-
-## NTSC artifacts
-
-The difference in treatment of NTSC artifacts is much more visible when using an emulator that doesn't perform chroma subsampling, e.g. Virtual II.  i.e. it displays the full 560-pixel colour image without blending.
-
-### Original
-
-![original source image](examples/groundhog-original.png)
-
-(Source: [Cephas](https://commons.wikimedia.org/wiki/File:Marmota_monax_UL_04.jpg), [CC BY-SA 3.0](https://creativecommons.org/licenses/by-sa/3.0), via Wikimedia Commons)
-
-The following images were generated with a palette matching the one used by Virtual II  (`--palette=virtualii` for ][-pix)
-
-### ][-pix
-
-![original source image](examples/groundhog-original.png)
-![ii-pix preview](examples/groundhog-iipix-virtualii-preview.png)
-
-### bmp2dhr
-
-![original source image](examples/groundhog-original.png)
-![ii-pix screenshot](examples/groundhog-bmp2dhr-virtualii.png)
-
-The image is heavily impacted by colour fringing, which bmp2dhr does not account for at all.  The difference in brightness of the groundhog's flank is also because bmp2dhr does not gamma-correct the image, so shadows/highlights tend to get blown out.
-
-### bmp2dhr (OpenEmulator)
-
-![original source image](examples/groundhog-original.png)
-![ii-pix screenshot](examples/groundhog-bmp2dhr-openemulator.png)
-
-This bmp2dhr image was generated using a palette approximating OpenEmulator's colours (`--palette=openemulator` for ][-pix), i.e. not the same image files as above.
-On OpenEmulator, which simulates NTSC chroma sub-sampling, the fringing is not pronounced but changes the colour balance of the image, e.g. creates a greenish tinge.
-
-### ][-pix, 4-pixel colour (OpenEmulator)
-
-![original source image](examples/groundhog-original.png)
-![ii-pix screenshot](examples/groundhog-iipix-openemulator-openemulator.png)
-
-Colour balance here is also slightly distorted due to not fully accounting for chroma blending.
-
-### ][-pix, NTSC 8-pixel colour (OpenEmulator)
-
-![original source image](examples/groundhog-original.png)
-![ii-pix screenshot](examples/groundhog-iipix-ntsc-openemulator.png)
-
-Detail and colour balance is much improved.
+TODO: add example images
 
 # Future work
 
-* Supporting lo-res and double lo-res graphics modes would be straightforward.
+* Supporting lo-res and double lo-res graphics modes, and super hi-res 3200 modes would be straightforward.
 
 * Hi-res will require more care, since the 560 pixel display is not individually dot addressible.  In particular the behaviour of the "palette bit" (which shifts a group of 7 dots to the right by 1) is another optimization constraint.  In practise a similar lookahead algorithm should work well though.
 
+* Super hi-res 640 mode would also likely require some investigation, since it is a more highly constrained optimization problem than 320 mode.
+
 * I would like to be able to find an ordered dithering algorithm that works well for Apple II graphics.  Ordered dithering specifically avoids diffusing errors arbitrarily across the image, which produces visual noise (and unnecessary deltas) when combined with animation.  For example such a thing may work well with my [II-Vision](https://github.com/KrisKennaway/ii-vision) video streamer.  However the properties of NTSC artifact colour seem to be in conflict with these requirements, i.e. pixel changes *always* propagate colour to some extent.
 
 # Version history
 
-## v1.0 (2021-03-15)
+## v2.0 (2022-07-16)
 
-Initial release
+* Added support for Super Hi-Res 320x200 image conversions
 
 ## v1.1 (2021-11-05)
 
@@ -362,4 +148,8 @@ Initial release
 * Switch default to --dither=floyd, which seems to produce the best results with --palette=ntsc
 * Various internal code simplifications and cleanups
 
+## v1.0 (2021-03-15)
+
+Initial release
+
 ![me](examples/kris-iipix-openemulator.png)
diff --git a/docs/dhr.md b/docs/dhr.md
new file mode 100644
index 0000000..5baa39c
--- /dev/null
+++ b/docs/dhr.md
@@ -0,0 +1,266 @@
+# Double Hi-Res image conversion
+
+## Some background on Apple II Double Hi-Res graphics
+
+Like other (pre-//gs) Apple II graphics modes, Double Hi-Res relies on [NTSC Artifact Colour](https://en.wikipedia.org/wiki/Composite_artifact_colors), which means that the colour of a pixel is entirely determined by its horizontal position on the screen, and the on/off status of preceding horizontal pixels.
+
+In Double Hi-Res mode, the 560 horizontal pixels per line are individually addressable.  This is an improvement over the (single) Hi-Res mode, which also has 560 horizontal pixels, but which can only be addressed in groups of two (with an option to shift blocks of 7 pixels each by one dot).  See _Assembly Lines: The Complete Book_ (Wagner) for a detailed introduction to this, or _Understanding the Apple IIe_ (Sather) for a deep technical discussion.
+
+Double Hi-Res is usually characterized as being capable of producing 16 display colours, but with heavy restrictions on how these colours can be arranged horizontally.
+
+### Naive model: 140x192x16
+
+One simple model for Double Hi-Res graphics is to only treat the display in groups of 4 horizontal pixels, which gives an effective resolution of 140x192 in 16 colours (=2^4).  These 140 pixel colours can be chosen independently, which makes this model easy to think about and to work with (e.g. when creating images by hand).  However the resulting images will exhibit (sometimes severe) colour interference/fringing effects when two colours are next to one another, because the underlying hardware does not actually work this way.  See below for an example image conversion, showing the unwanted colour fringing that results. 
+
+### Simplest realistic model: 560 pixels, 4-pixel colour
+
+A more complete model for thinking about DHGR comes from looking at how the NTSC signal produces colour on the display.
+The [NTSC chrominance subcarrier](https://en.wikipedia.org/wiki/Chrominance_subcarrier) completes one complete phase cycle in the time taken to draw 4 horizontal dots.  The colours produced are due to the interactions of the pixel luminosity (on/off) relative to this NTSC chroma phase.
+
+What this means is that the colour of each of the 560 horizontal pixels is determined by the current pixel value (on/off), the current X-coordinate modulo 4 (X coordinate relative to NTSC phase), as well as the on-off status of the pixels to the left of it.
+
+The simplest approximation is to only look at the current pixel value and the 3 pixels to the left, i.e. to consider a sliding window of 4 horizontal pixels moving across the screen from left to right.  Within this window, we have one pixel for each of the 4 values of NTSC phase (x % 4, ranging from 0 .. 3).  The on-off values for these 4 values of NTSC phase determine the colour of the pixel.  See [here](https://docs.google.com/presentation/d/1_eqBknG-4-llQw3oAOmPO3FlawUeWCeRPYpr_mh2iRU/edit) for more details.
+
+This model allows us to understand and predict the interference behaviour when two "140px" colours are next to each other, and to go beyond this "140px" model to take more advantage of the true 560px horizontal resolution.
+
+If we imagine drawing pixels from left to right across the screen, at each pixel we only have *two* accessible choices of colour: those resulting from turning the current pixel on, or off.  Which two particular colours are produced are determined by the pixels already drawn to the left (the immediate 3 neighbours, in our model).  One of these possibilities will always be the same as the pixel colour to the left (the on/off pixel choice corresponding to the value that just "fell off the left side" of the sliding window), and the other choice is some other colour from our palette of 16.
+
+This can be summarized in a chart, showing the possible colour transitions depending on the colour of the pixel to the immediate left, and the value of x%4.
+
+![Double hi-res colour transitions](Double_Hi-Res_colour_transitions.png)
+
+So, if we want to transition from one colour to a particular new colour, it may take up to 4 horizontal pixels before we are able to achieve it (e.g. transitioning all the way from black (0000) to white (1111)).  In the meantime we have to transition through up to 2 other colours.  Depending on the details of the image we are aiming for, this may either produce unwanted visual noise, or can actually be beneficial (e.g. if the colour we want is available immediately at the next pixel)
+
+These constraints are difficult to work with when constructing DHGR graphics "by hand", but we can account for them programmatically in our image conversion to take full advantage of the "true" 560px resolution while accounting for colour interference effects.
+
+#### Limitations of this colour model
+
+In practise the above description of the Apple II colour model is still only an approximation.  On real hardware, the video signal is a continuous analogue signal, and colour is continuously modulated rather than producing discretely-coloured pixels with fixed colour values.
+
+More importantly, in an NTSC video signal the colour (chroma) signal has a lower bandwidth than the luma (brightness) signal ([Chroma sub-sampling](https://en.wikipedia.org/wiki/Chroma_subsampling)), which means that colours will tend to bleed across more than 4 pixels.  However our simple "4-pixel chroma bleed" model already produces good results, and exactly matches the implementation behaviour of some emulators, e.g. Virtual II.
+
+### NTSC emulation and 8-pixel colour
+
+By simulating the NTSC (Y'UV) signal directly we are able to recover the Apple II colour output from "first principles".  Here are the 16 "basic" DHGR colours, obtained using saturation/hue parameters tuned to match OpenEmulator's NTSC implementation, and allowing chroma to bleed across 4 pixels.
+
+![NTSC colours with 4 pixel chroma bleed](ntsc-colours-chroma-bleed-4.png)
+
+However in real NTSC, chroma bleeds over more than 4 pixels, which means that we actually have more than 2^4 colours available to work with.
+
+This means that **when viewed on a composite colour display, Double Hi-Res graphics is not just a 16-colour graphics mode!**
+
+If we allow the NTSC chroma signal to bleed over 8 pixels instead of 4, then the resulting colour is determined by sequences of 8 pixels instead of 4 pixels, i.e. there are 2^8 = 256 possibilities.  In practise many of these result in the same output colour, and (with this approximation) there are only 85 unique colours available.  However this is still a marked improvement on the 16 "basic" DHGR colours:
+
+![NTSC colours with 8 pixel chroma bleed](ntsc-colours-chroma-bleed-8.png)
+
+The "extra" DHGR colours are only available on real hardware, or an emulator that implements NTSC chroma sub-sampling (such as OpenEmulator).   But the result is that on such targets a much larger range of colours is available for use in image conversion.  However the restriction still exists that any given pixel only has a choice of 2 colours available (as determined by the on/off state of pixels to the left).
+
+In practise this gives much better image quality, especially when shading areas of similar colour.  The Apple II is still unable to directly modulate the luma (brightness) NTSC signal component, so areas of low or high brightness still tend to be heavily dithered.  This is because there are more bit sequences that have the number of '1' bits close to the average than there are at the extremes, so there are correspondingly few available colours that are very bright or very dark.
+
+These 85 unique double hi-res colours produced by the ][-pix NTSC emulation are not the definitive story - though they're closer to it than the usual story that double hi-res is a 16-colour graphics mode.  The implementation used by ][-pix is the simplest one: the Y'UV signal is averaged with a sliding window of 4 pixels for the Y' (luma) component and 8 pixels for the UV (chroma) component.
+
+The choice of 8 pixels is not strictly correct - e.g. the chroma bandwidth (~0.6MHz) is much less than half of luma bandwidth (~2Mhz) so the signal bleeds over more than twice as many pixels; but also decays in a more complex way than the simple step function sliding window chosen here.  In practise using 8 pixels is a good compromise between ease of implementation, runtime performance and fidelity.
+
+By contrast, OpenEmulator uses a more complex (and realistic) band-pass filtering to produce its colour output, which presumably allows even more possible colours (physical hardware will also produce its own unique results, depending on the hardware implementation of the signal decoding, and other physical characteristics).  I expect that most of these will be small variations on the above though; and in practise the ][-pix NTSC implementation already produces a close colour match for the OpenEmulator behaviour.
+
+#### Examples of NTSC images
+
+(Source: [Reinhold Möller](https://commons.wikimedia.org/wiki/File:Nymphaea_caerulea-20091014-RM-115245.jpg), [CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0), via Wikimedia Commons)
+
+![Nymphaea](../examples/nymphaea-original.png)
+
+OpenEmulator screenshot of image produced with `--palette=openemulator --lookahead=8`.  The distorted background colour compared to the original is particularly noticeable.
+
+![Nymphaea](../examples/nymphaea-iipix-openemulator-openemulator.png)
+
+OpenEmulator screenshot of image produced with `--palette=ntsc --lookahead=8`.  Not only is the background colour a much better match, the image shading and detail is markedly improved.
+
+![Nymphaea](../examples/nymphaea-iipix-ntsc-openemulator.png)
+
+Rendering the same .dhr image with 4-pixel colour shows the reason for the difference.  For example the background shading is due to pixel sequences that appear (with this simpler and less hardware-accurate rendering scheme) as sequences of grey and dark green, with a lot of blue and red sprinkled in.  In NTSC these pixel sequences combine to produce various shades of green.
+
+![Nymphaea](../examples/nymphaea-iipix-ntsc-preview-openemulator.png)
+
+# Dithering and Double Hi-Res
+
+[Dithering](https://en.wikipedia.org/wiki/Dither) an image to produce an approximation with fewer image colours is a well-known technique.  The basic idea is to pick a "best colour match" for a pixel from our limited palette, then to compute the difference between the true and selected colour values and diffuse this error to nearby pixels (using some pattern).
+
+In the particular case of DHGR this algorithm runs into difficulties, because each pixel only has two possible colour choices (from a total of 16+).  If we only consider the two possibilities for the immediate next pixel then neither may be a particularly good match.  However it may be more beneficial to make a suboptimal choice now (deliberately introduce more error), if it allows us access to a better colour for a subsequent pixel.  "Classical" dithering algorithms do not account for these palette constraints, and produce suboptimal image quality for DHGR conversions. 
+
+We can deal with this by looking ahead N pixels (8 by default) for each image position (x,y), and computing the effect of choosing all 2^N combinations of these N-pixel states on the dithered source image.
+
+Specifically, for a fixed choice of one of these N pixel sequences, we tentatively perform the error diffusion as normal on a copy of the image, and compute the total mean squared distance from the (fixed) N-pixel sequence to the error-diffused source image.  To compute the perceptual difference between colours we convert to the perceptually uniform [CAM16-UCS](https://en.wikipedia.org/wiki/Color_appearance_model#CAM16) colour space in which perceptual distance is Euclidean. 
+
+Finally, we pick the N-pixel sequence with the lowest total error, and select the first pixel of this N-pixel sequence for position (x,y).  We then perform error diffusion as usual for this single pixel, and proceed to x+1.
+
+This allows us to "look beyond" local minima to find cases where it is better to make a suboptimal choice now to allow better overall image quality in subsequent pixels.  Since we will sometimes find that our choice of 2 next-pixel colours actually includes (or comes close to) the "ideal" choice, this means we can take maximal advantage of the 560-pixel horizontal resolution.
+
+## Gamma correction
+
+Most digital images are encoded using the [sRGB colour space](https://en.wikipedia.org/wiki/SRGB), which means that the stored RGB values do not map linearly onto the rendered colour intensities.  In order to work with linearized RGB values the source image needs to be gamma corrected.  Otherwise, the process of dithering an un-gamma-corrected image tends to result in an output that does not match the brightness of the input.  In particular shadows and highlights tend to get blown out/over-exposed.
+
+## Dither pattern
+
+The process of (error-diffusion) dithering involves distributing the "quantization error" (mismatch between the colour of the source image and chosen output pixels) across neighbouring pixels, according to some pattern.  [Floyd-Steinberg](https://en.wikipedia.org/wiki/Floyd%E2%80%93Steinberg_dithering) and [Jarvis-Judice-Ninke](https://en.wikipedia.org/wiki/Error_diffusion#minimized_average_error) ("Jarvis") are two common patterns, though there are many others, which have slightly different characteristics.
+
+Since it uses a small dither pattern, Floyd-Steinberg dithering retains more of the image detail than larger kernels.  On the other hand, it sometimes produces image artifacts that are highly structured (e.g. runs of a single colour, checkerboard patterns).  This seems to be especially common with 4-pixel colours.
+
+In part this may be because these "classical" dither patterns only propagate errors to a small number of neighbouring pixels, e.g. 1 pixels in the forward direction for Floyd-Steinberg, and 2 pixels for Jarvis.  However for double hi-res colours we know that it might take up to 4 pixels before a given colour can be selected for output (e.g. to alternate between black and white, or any other pairs that are 4 steps away on the transition chart above).
+
+In other words, given the results of error diffusion from our current pixel, there is one colour from our palette of 16 that is the best one to match this - but it might be only possible to render this particular colour up to 4 pixels further on.  If we only diffuse the errors by 1 or 2 pixels each time, it will tend to have diffused away by the time we reach that position, and the opportunity will be lost.  Combined with the small overall set of available colours this can result in image artifacts.
+
+Modifying the Jarvis dither pattern to extend 4 pixels in the forward direction seems to give much better results for such images (e.g. when dithering large blocks of colour), although at the cost of reduced detail.  This is presumably because we allow each quantization error to diffuse to each of the 4 subsequent pixels that might be best-placed to act on it.
+
+The bottom line is that choice of `--dither` argument is a tradeoff between image detail and handling of colour.  If the default `--dither=floyd` algorithm does not give pleasing results, try other patterns such as `--dither=jarvis-mod`.
+
+Further experimentation with other dithering patterns (and similar modifications to the above) may also produce interesting results.
+
+## Palettes
+
+Since the Apple II graphics (prior to //gs) are not based on RGB colour, we have to choose an (approximate) RGB colour palette when dithering an RGB image.  There is no "true" choice for this palette, since it depends heavily on how the image is viewed:
+
+1.  Different emulators have made (often quite different) choices for the RGB colour palettes used to emulate Apple II graphics on a RGB display.  This means that an image that looks good on one emulator may not look good on another (or on real hardware).
+    - For example, Virtual II (and the Apple //gs) uses two different RGB shades of grey for the two DHGR grey colours, whereas they are rendered identically in NTSC.  That means that images not targeted for the Virtual II palette will look quite different when viewed there (and vice versa).
+
+2.  Secondly, the actual display colours rendered by an Apple II are not fixed, but bleed into each other due to the behaviour of the (analogue) NTSC video signal.  i.e. the entire notion of a "16-colour RGB palette" is a flawed one.  Furthermore, the NTSC colours depend on the particular monitor/TV and its tuning (brightness/contrast/hue settings etc).  "Never Twice the Same Colour" indeed.   The "4-pixel colour" model described above where we can assign 2 from 16 fixed colours to each of 560 discrete pixels is only an approximation (though a useful one in practise).
+
+Some emulators emulate the NTSC video signal more faithfully (e.g. OpenEmulator), in which case they do not have a true "RGB palette".  The best we can do here is measure the colours that are produced by large blocks of colour, i.e. where there is no colour blending.  Others use some discrete approximation (e.g. Virtual II seems to exactly match the colour model described above), so a fixed palette can be reconstructed.
+
+To compute the emulator palettes used by ][-pix I measured the sRGB colour values produced by a full-screen Apple II colour image (using the colour picker tool of Mac OS X), using default emulator settings.  I have not yet attempted to measure/estimate palettes of other emulators, or "real hardware"
+
+Existing conversion tools (see below) tend to support a variety of RGB palette values sourced from various places (older tools, emulators, theoretical estimations etc).  In practise, these only matter in a few ways:
+
+1.  If you are trying to target colour balance as accurately as possible for a particular viewing target (e.g. emulator), i.e. so that the rendered colour output looks as close as possible to the source image.
+
+2.  If you are targeting an emulator that has a "non-standard" colour model, e.g. Virtual II with its two distinct shades of grey.
+
+3.  Otherwise, choices of palette effectively amount to changing the colour balance of the source image.  Some of these might produce better image quality for a particular image  (e.g. if the source image contains large colour blocks that are difficult to approximate with a particular target palette), at the cost of changing the colour balance.  i.e. it might look good on its own but not match the source image.  You could also achieve similar results by tweaking the colour balance of the source image in an editor, e.g GIMP or Photoshop.
+
+## Precomputing distance matrix
+
+The mapping from RGB colour space to CAM16-UCS is quite complex, so to avoid this runtime cost we precompute a matrix from all 256^3 integer RGB values to corresponding CAM16-UCS values. This 192MB matrix is generated by the `precompute_conversion.py` utility, and is loaded at runtime for efficient access.
+
+# Comparison to other DHGR image converters
+
+## bmp2dhr
+
+*  [bmp2dhr](http://www.appleoldies.ca/bmp2dhr/) (see [here](https://github.com/digarok/b2d) for a maintained code fork) supports additional graphics modes not yet supported by ][-pix, namely (double) lo-res, and hi-res.  Support for the lores modes would be easy to add to ][-pix, although hi-res requires more work to accommodate the colour model.  A similar lookahead strategy will likely work well though.
+
+*  supports additional image dither modes
+
+*  only supports BMP source images in a particular format.
+
+*  DHGR conversions are treated as simple 140x192x16 colour images without colour constraints, and ignores the colour fringing behaviour described above.  The generated .bmp preview images also do not show fringing, but it is present when viewing the image on an Apple II or emulator that accounts for it.  i.e. the preview images are sometimes not very representative of the actual results.  See below for an example.
+
+*  Apart from ignoring DHGR colour interactions, the 140px converted images are also lower than ideal resolution since they do not make use of the ability to address all 560px independently.
+
+*  The perceptual colour distance metric used to match the best colour to an input pixel is a custom metric based on a weighted sum of Euclidean sRGB distance and Rec.601 luma value.  It's not explained why this particular metric was chosen, and in practise it seems to often give much lower quality results than modern perceptually uniform colour spaces like CIE2000 or CAM16-UCS (though these are much slower to compute - which is why we precompute the conversion matrix ahead of time)
+
+* It does not perform RGB colour space conversions before dithering, i.e. if the input image is in sRGB colour space (as most digital images will be) then the dithering is also performed in sRGB.  Since sRGB is not a linear colour space, the effect of dithering is to distribute errors non-linearly, which distorts the brightness of the resulting image.
+
+## a2bestpix 
+
+*  Like ][-pix, [a2bestpix](http://lukazi.blogspot.com/2017/03/double-high-resolution-graphics-dhgr.html) only supports DHGR conversion.  Overall quality is usually fairly good, although colours and brightness are slightly distorted (for reasons described below), and the generated preview images do not quite give a faithful representation of the native image rendering.
+
+*  Like ][-pix, and unlike bmp2dhr, a2bestpix does apply a model of the DHGR colour interactions, albeit an ad-hoc one based on rules and tables of 4-pixel "colour blocks" reconstructed from (AppleWin) emulator behaviour.  This does allow it to make use of (closer to) full 560px resolution, although it still treats the screen as a sequence of 140 4-pixel colour blocks (with some constraints on the allowed arrangement of these blocks).
+
+*  supports additional (custom) dither modes (partly out of necessity due to the custom "colour block" model)
+
+*  Supports a variety of perceptual colour distance metrics including CIE2000 and the one bmp2dhr uses.  In practise I'm not sure the others are useful since CIE2000 is the more recent refinement of much research on this topic, and is the most accurate of them.
+
+* like bmp2dhr, only supports BMP source images in a particular format.
+
+*  Does not apply gamma correction before dithering (though sRGB conversion is done when computing CIE2000 distance), so errors are diffused non-linearly.  The resulting images don't match the brightness of the original, e.g. shadows/highlights tend to be over-exposed.
+
+*  image conversion performs an optimization over groups of multiple pixels (via choice of "colour blocks").  From what I can tell this minimizes the total colour distance from a fixed list of colour blocks to a group of 4 target pixels, similar to --lookahead=4 for ][-pix (though I'm not sure it's evaluating all 2^4 pixel combinations).  But since the image is (AFAICT) treated as a sequence of (non-overlapping) 4-pixel blocks this does not result in optimizing each output pixel independently.
+
+*  The list of "colour blocks" seem to contain colour sequences that cannot actually be rendered on the Apple II.  For example compare the spacing of yellow and orange pixels on the parrot between the preview image (LHS) and openemulator (RHS): 
+
+![Detail of a2bestpix preview image](a2bestbix-preview-crop.png)
+![Detail of openemulator render](a2bestpix-openemulator-crop.png)
+
+*  See below for another example where the output has major image discrepancies with the original - perhaps also due to bugs/omissions in the table of colour blocks.
+
+*  This means that (like bmp2dhr) the generated "preview" image may not closely match the native image, and the dithering algorithm is also optimizing over a slightly incorrect set of colour sequences, which presumably impacts image quality.  Possibly these are transcription errors, or artifacts of the particular emulator (AppleWin) from which they were reconstructed.
+
+## Image comparisons
+
+These three images were converted using the same target (openemulator) palette, using ][-pix, bmp2dhr and a2bestpix (since this is supported by all three), and are shown as screenshots from openemulator.
+
+### Original
+![original source image](../examples/paperclips-original.png)
+
+(Source: [Purple Sherbet Photography from Worldwide!](https://commons.wikimedia.org/wiki/File:Colourful_assortment_of_paper_clips_(10421946796).jpg), [CC BY 2.0](https://creativecommons.org/licenses/by/2.0), via Wikimedia Commons)
+
+The following images were all generated with a palette approximating OpenEmulator's colours (`--palette=openemulator` for ][-pix)
+
+### ][-pix 4-pixel colour
+
+Preview image and OpenEmulator screenshot
+
+![ii-pix preview](../examples/paperclips-iipix-openemulator-preview.png)
+![ii-pix screenshot](../examples/paperclips-iipix-openemulator-openemulator.png)
+
+### ][-pix NTSC 8-pixel colour (Preview image)
+
+Preview image and OpenEmulator screenshot
+
+![ii-pix preview](../examples/paperclips-iipix-ntsc-preview.png)
+![ii-pix screenshot](../examples/paperclips-iipix-ntsc-openemulator.png)
+
+### bmp2dhr (OpenEmulator screenshot)
+![bmp2dhr screenshot](../examples/paperclips-bmp2dhr-openemulator.png)
+
+Comparing bmp2dhr under openemulator is the scenario most favourable to it, since the 140px resolution and non-treatment of fringing is masked by the chroma blending.  Colours are similar to ][-pix, but the 140px dithering and lack of gamma correction results in less detail, e.g. in hilights/shadows.
+
+### a2bestpix (OpenEmulator screenshot)
+
+![a2bestpix screenshot](../examples/paperclips-a2bestpix-openemulator.png)
+
+This a2bestpix image is actually atypical in quality, and shows some severe colour errors relating to the pixels that should be close to the orange/brown colours.  These may be due to errors/omissions in the set of "colour blocks".  The effects of not gamma-correcting the source image can also be seen.
+
+## NTSC artifacts
+
+The difference in treatment of NTSC artifacts is much more visible when using an emulator that doesn't perform chroma subsampling, e.g. Virtual II.  i.e. it displays the full 560-pixel colour image without blending.
+
+### Original
+
+![original source image](../examples/groundhog-original.png)
+
+(Source: [Cephas](https://commons.wikimedia.org/wiki/File:Marmota_monax_UL_04.jpg), [CC BY-SA 3.0](https://creativecommons.org/licenses/by-sa/3.0), via Wikimedia Commons)
+
+The following images were generated with a palette matching the one used by Virtual II  (`--palette=virtualii` for ][-pix)
+
+### ][-pix
+
+![original source image](../examples/groundhog-original.png)
+![ii-pix preview](../examples/groundhog-iipix-virtualii-preview.png)
+
+### bmp2dhr
+
+![original source image](../examples/groundhog-original.png)
+![ii-pix screenshot](../examples/groundhog-bmp2dhr-virtualii.png)
+
+The image is heavily impacted by colour fringing, which bmp2dhr does not account for at all.  The difference in brightness of the groundhog's flank is also because bmp2dhr does not gamma-correct the image, so shadows/highlights tend to get blown out.
+
+### bmp2dhr (OpenEmulator)
+
+![original source image](../examples/groundhog-original.png)
+![ii-pix screenshot](../examples/groundhog-bmp2dhr-openemulator.png)
+
+This bmp2dhr image was generated using a palette approximating OpenEmulator's colours (`--palette=openemulator` for ][-pix), i.e. not the same image files as above.
+On OpenEmulator, which simulates NTSC chroma sub-sampling, the fringing is not pronounced but changes the colour balance of the image, e.g. creates a greenish tinge.
+
+### ][-pix, 4-pixel colour (OpenEmulator)
+
+![original source image](../examples/groundhog-original.png)
+![ii-pix screenshot](../examples/groundhog-iipix-openemulator-openemulator.png)
+
+Colour balance here is also slightly distorted due to not fully accounting for chroma blending.
+
+### ][-pix, NTSC 8-pixel colour (OpenEmulator)
+
+![original source image](../examples/groundhog-original.png)
+![ii-pix screenshot](../examples/groundhog-iipix-ntsc-openemulator.png)
+
+Detail and colour balance is much improved.

From 1486f8a3941489da2efbed65fff4bb3808344119 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Mon, 18 Jul 2022 22:31:22 +0100
Subject: [PATCH 81/82] Add TODO

---
 convert_shr.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/convert_shr.py b/convert_shr.py
index e0caeda..0a01f92 100644
--- a/convert_shr.py
+++ b/convert_shr.py
@@ -268,6 +268,9 @@ class ClusterPalette:
             # If the k-means clustering returned fewer than 16 unique colours,
             # fill out the remainder with the most common pixels colours that
             # have not yet been used.
+            #
+            # TODO: this seems like an opportunity to do something better -
+            #   e.g. forcibly split clusters and iterate the clustering
             palette_rgb12_iigs = self._fill_short_palette(
                 palette_rgb12_iigs, most_frequent_colours)
 

From 1468e06d2f245c70c886a3f20bb2c1cce3b3e11f Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Mon, 18 Jul 2022 22:50:31 +0100
Subject: [PATCH 82/82] Tweak

---
 README.md | 81 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 43 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index 468b6d2..172b6ad 100644
--- a/README.md
+++ b/README.md
@@ -50,10 +50,11 @@ The following flags are supported in both `dhr` and `shr` modes:
 * `--verbose` Show progress during conversion (default: False)
 * `--gamma-correct` Gamma-correct image by this value (default: 2.4)
 
+See below for DHR- and SHR- specific instructions.
 
 ## Double Hi-Res
 
-To convert an image to double hi-res, the simplest usage is:
+To convert an image to Double Hi-Res (560x192, 16 colours but [it's complicated](docs/dhr.md)), the simplest usage is:
 
 ```buildoutcfg
 python convert.py dhr --palette ntsc <input> <output.dhr>
@@ -69,42 +70,6 @@ TODO: document flags
 
 For more details about Double Hi-Res graphics and the conversion process, see [here](docs/dhr.md).
 
-### Examples
-
-See [here](examples/gallery.md) for more sample Double Hi-Res image conversions.
-
-#### Original
-
-![Two colourful parrots sitting on a branch](examples/parrots-original.png)
-
- (Source: [Shreygadgil](https://commons.wikimedia.org/wiki/File:Vibrant_Wings.jpg), [CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0), via Wikimedia Commons)
-
-#### ][-pix preview image
-
-This image was generated using
-
-```buildoutcfg
-python convert.py --lookahead 8 --palette openemulator examples/parrots-original.png examples/parrots-iipix-openemulator.dhr
-```
-
-The resulting ][-pix preview PNG image is shown here.
-
-![Two colourful parrots sitting on a branch](examples/parrots-iipix-openemulator-preview.png)
-
-#### OpenEmulator screenshot
-
-This is a screenshot taken from OpenEmulator when viewing the Double Hi-res image.
-
-![Two colourful parrots sitting on a branch](examples/parrots-iipix-openemulator-openemulator.png)
-
-Some difference in colour tone is visible due to blending of colours across pixels (e.g. brown blending into grey, in the background).  This is due to the fact that OpenEmulator simulates the reduced chroma bandwidth of the NTSC signal.
-
-][-pix also allows modeling this NTSC signal behaviour, which effectively allows access to more than 16 DHGR colours, through carefully chosen sequences of pixels (see below for more details).  The resulting images have much higher quality, but only when viewed on a suitable target (e.g. OpenEmulator, or real hardware).  On other targets the colour balance tends to be skewed, though image detail is still good.
-
-This is an OpenEmulator screenshot of the same image converted with `--palette=ntsc` instead of `--palette=openemulator`.  Colour match to the original is substantially improved, and more colour detail is visible, e.g. in the shading of the background.
-
-![Two colourful parrots sitting on a branch](examples/parrots-iipix-ntsc-openemulator.png)
-
 ## Super Hi-Res
 
 To convert an image to Super Hi-Res (320x200, up to 256 colours), the simplest usage is:
@@ -119,7 +84,47 @@ supported for `shr` conversions:
 * `--fixed-colours` How many colours to fix as identical across all 16 SHR palettes. (default: 0)
 * `--show-final-score` Whether to output the final image quality score (default: False)
 
-### Examples
+TODO: link to KansasFest 2022 talk slides/video for more details
+
+# Examples
+
+## Double Hi-Res
+
+See [here](examples/gallery.md) for more sample Double Hi-Res image conversions.
+
+### Original
+
+![Two colourful parrots sitting on a branch](examples/parrots-original.png)
+
+ (Source: [Shreygadgil](https://commons.wikimedia.org/wiki/File:Vibrant_Wings.jpg), [CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0), via Wikimedia Commons)
+
+### ][-pix preview image
+
+This image was generated using
+
+```buildoutcfg
+python convert.py --lookahead 8 --palette openemulator examples/parrots-original.png examples/parrots-iipix-openemulator.dhr
+```
+
+The resulting ][-pix preview PNG image is shown here.
+
+![Two colourful parrots sitting on a branch](examples/parrots-iipix-openemulator-preview.png)
+
+### OpenEmulator screenshot
+
+This is a screenshot taken from OpenEmulator when viewing the Double Hi-res image.
+
+![Two colourful parrots sitting on a branch](examples/parrots-iipix-openemulator-openemulator.png)
+
+Some difference in colour tone is visible due to blending of colours across pixels (e.g. brown blending into grey, in the background).  This is due to the fact that OpenEmulator simulates the reduced chroma bandwidth of the NTSC signal.
+
+][-pix also allows modeling this NTSC signal behaviour, which effectively allows access to more than 16 DHGR colours, through carefully chosen sequences of pixels (see below for more details).  The resulting images have much higher quality, but only when viewed on a suitable target (e.g. OpenEmulator, or real hardware).  On other targets the colour balance tends to be skewed, though image detail is still good.
+
+This is an OpenEmulator screenshot of the same image converted with `--palette=ntsc` instead of `--palette=openemulator`.  Colour match to the original is substantially improved, and more colour detail is visible, e.g. in the shading of the background.
+
+![Two colourful parrots sitting on a branch](examples/parrots-iipix-ntsc-openemulator.png)
+
+## Super Hi-Res
 
 TODO: add example images