Optimize palette initialization and NTSC image conversion (#13)

About 2x faster end-to-end with default dhr conversion options
2024-12-31 05:30:55 +00:00 · 2023-02-26 00:00:39 +00:00 · 2023-02-26 00:00:39 +00:00 · 69a96b4719
commit 69a96b4719
parent f8fbd768a5
2 changed files with 37 additions and 26 deletions
--- a/palette.py
+++ b/palette.py
@ -41,12 +41,19 @@ class Palette:

    def __init__(self):
        self.RGB = {}
-        for k, v in self.SRGB.items():
+
+        # Do a bulk conversion because it's much faster than doing it within the
+        # loop
+        srgb = np.stack(list(self.SRGB.values()))
+        with colour.utilities.suppress_warnings(colour_usage_warnings=True):
+            cam = colour.convert(srgb / 255, "sRGB", "CAM16UCS").astype(
+                np.float32)
+
+        for i, kv in enumerate(self.SRGB.items()):
+            k, v = kv
            self.RGB[k] = (np.clip(image.srgb_to_linear_array(v / 255), 0.0,
                                   1.0) * 255).astype(np.uint8)
-            with colour.utilities.suppress_warnings(colour_usage_warnings=True):
-                self.CAM16UCS[k] = colour.convert(
-                    v / 255, "sRGB", "CAM16UCS").astype(np.float32)
+            self.CAM16UCS[k] = cam[i, :]

    @staticmethod
    def _pixel_phase_shifts(phase_3_srgb):
--- a/screen.py
+++ b/screen.py
@ -139,10 +139,10 @@ class NTSCScreen:
        x = pos % 12 + self.NTSC_PHASE_SHIFT * 3
        return np.cos(x * 2 * np.pi / 12)

-    def _read(self, line, pos):
+    def _read(self, lines, pos):
        if pos < 0:
-            return 0
-        return 1 if line[pos] else 0
+            return np.zeros(lines.shape[0], dtype=np.float32)
+        return lines[:, pos].astype(np.float32)

    def bitmap_to_image_ntsc(self, bitmap: np.ndarray) -> np.ndarray:
        y_width = 12
@ -177,26 +177,30 @@ class NTSCScreen:

        out_rgb = np.empty((bitmap.shape[0], bitmap.shape[1] * 3, 3),
                           dtype=np.uint8)
-        for y in range(bitmap.shape[0]):
-            ysum = 0
-            usum = 0
-            vsum = 0
-            line = np.repeat(bitmap[y], 3)
+        ysum = np.zeros(bitmap.shape[0], dtype=np.float32)
+        usum = np.zeros(bitmap.shape[0], dtype=np.float32)
+        vsum = np.zeros(bitmap.shape[0], dtype=np.float32)
+
+        # Repeat each pixel 3 times so we can do sub-pixel colour sampling
+        lines = np.repeat(bitmap, 3, axis=1)
+
+        for x in range(bitmap.shape[1] * 3):
+            ysum += self._read(lines, x) - self._read(lines, x - y_width)
+            usum += self._read(lines, x) * self._sin(x) - self._read(
+                lines, x - u_width) * self._sin((x - u_width))
+            vsum += self._read(lines, x) * self._cos(x) - self._read(
+                lines, x - v_width) * self._cos((x - v_width))
+
+            rgb = np.matmul(
+                yuv_to_rgb, np.stack(
+                    (ysum / y_width, usum / u_width,
+                     vsum / v_width), axis=1).reshape(
+                    (bitmap.shape[0], 3, 1))).reshape(
+                    bitmap.shape[0], 3)
+            out_rgb[:, x, 0] = np.minimum(255, np.maximum(0, rgb[:, 0] * 255))
+            out_rgb[:, x, 1] = np.minimum(255, np.maximum(0, rgb[:, 1] * 255))
+            out_rgb[:, x, 2] = np.minimum(255, np.maximum(0, rgb[:, 2] * 255))

-            for x in range(bitmap.shape[1] * 3):
-                ysum += self._read(line, x) - self._read(line, x - y_width)
-                usum += self._read(line, x) * self._sin(x) - self._read(
-                    line, x - u_width) * self._sin((x - u_width))
-                vsum += self._read(line, x) * self._cos(x) - self._read(
-                    line, x - v_width) * self._cos((x - v_width))
-                rgb = np.matmul(
-                    yuv_to_rgb, np.array(
-                        (ysum / y_width, usum / u_width,
-                         vsum / v_width)).reshape((3, 1))).reshape(3)
-                r = min(255, max(0, rgb[0] * 255))
-                g = min(255, max(0, rgb[1] * 255))
-                b = min(255, max(0, rgb[2] * 255))
-                out_rgb[y, x, :] = (r, g, b)

        return out_rgb