No need to pattern to have third axis of shape 1

Minor optimization to not compute error_fraction 3 times Tidy a bit
2021-03-15 16:22:55 +00:00 · 2021-03-15 16:22:55 +00:00 · ede063a21b
parent 551be3eba7
commit ede063a21b
2 changed files with 31 additions and 31 deletions
--- a/dither.pyx
+++ b/dither.pyx
@ -130,12 +130,12 @@ cdef int dither_lookahead(Dither* dither, float[:, ::1] palette_rgb,
 cdef void apply_one_line(Dither* dither, int xl, int xr, int x, float[] image, int image_shape1,
        float[] quant_error) nogil:
    cdef int i, j
-    cdef float error
+    cdef float error_fraction

    for i in range(xl, xr):
+        error_fraction = dither.pattern[i - x + dither.x_origin]
        for j in range(3):
-            error = dither.pattern[i - x + dither.x_origin] * quant_error[j]
-            image[i * image_shape1 + j] = clip(image[i * image_shape1 + j] + error, 0, 255)
+            image[i * image_shape1 + j] = clip(image[i * image_shape1 + j] + error_fraction * quant_error[j], 0, 255)


@cython.boundscheck(False)
@ -148,16 +148,16 @@ cdef void apply(Dither* dither, int x_res, int y_res, int x, int y, float[:,:,::
    cdef int xl = dither_bounds_xl(dither, x)
    cdef int xr = dither_bounds_xr(dither, x_res, x)

-    cdef float error
+    cdef float error_fraction
    # We could avoid clipping here, i.e. allow RGB values to extend beyond
    # 0..255 to capture a larger range of residual error.  This is faster
    # but seems to reduce image quality.
    # TODO: is this still true?
    for i in range(yt, yb):
        for j in range(xl, xr):
+            error_fraction = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin]
            for k in range(3):
-                error = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin] * quant_error[k]
-                image[i,j,k] = clip(image[i,j,k] + error, 0, 255)
+                image[i,j,k] = clip(image[i,j,k] + error_fraction * quant_error[k], 0, 255)


@cython.boundscheck(False)
@ -183,12 +183,6 @@ cdef unsigned char find_nearest_colour(float[::1] pixel_rgb, unsigned char[::1]
@cython.boundscheck(False)
@cython.wraparound(False)
 def dither_image(screen, float[:, :, ::1] image_rgb, dither, int lookahead, unsigned char verbose):
-    cdef (unsigned char)[:, ::1] image_nbit = np.empty(
-        (image_rgb.shape[0], image_rgb.shape[1]), dtype=np.uint8)
-
-    cdef int yres = screen.Y_RES
-    cdef int xres = screen.X_RES
-
    cdef int y, x, i
    cdef float[3] input_pixel_rgb
    cdef float[3] quant_error
@ -200,26 +194,32 @@ def dither_image(screen, float[:, :, ::1] image_rgb, dither, int lookahead, unsi
    cdef unsigned char output_pixel_nbit
    cdef float[::1] output_pixel_rgb

-    # Flatten python dither pattern array for more efficient access
-    # TODO: doesn't actually help?
-    cdef Dither cdither
-    cdither.y_shape = dither.PATTERN.shape[0]
-    cdither.x_shape = dither.PATTERN.shape[1]
-    cdither.y_origin = dither.ORIGIN[0]
-    cdither.x_origin = dither.ORIGIN[1]
-    # Convert dither.PATTERN to a malloced array which is faster to access
-    cdither.pattern = <float *> malloc(cdither.x_shape * cdither.y_shape * sizeof(float))
-    for i in range(cdither.y_shape):
-        for j in range(cdither.x_shape):
-            cdither.pattern[i * cdither.x_shape + j] = dither.PATTERN[i, j, 0]
+    # Hoist some python attribute accesses into C variables for efficient access during the main loop

-    cdef (unsigned char)[:, ::1] distances = screen.palette.distances
+    cdef int yres = screen.Y_RES
+    cdef int xres = screen.X_RES

    cdef float[:, ::1] palette_rgb = np.zeros((len(screen.palette.RGB), 3), dtype=np.float32)
    for i in screen.palette.RGB.keys():
        for j in range(3):
            palette_rgb[i, j] = screen.palette.RGB[i][j]

+    cdef (unsigned char)[:, ::1] distances = screen.palette.distances
+
+    cdef Dither cdither
+    cdither.y_shape = dither.PATTERN.shape[0]
+    cdither.x_shape = dither.PATTERN.shape[1]
+    cdither.y_origin = dither.ORIGIN[0]
+    cdither.x_origin = dither.ORIGIN[1]
+    # TODO: should be just as efficient to use a memoryview?
+    cdither.pattern = <float *> malloc(cdither.x_shape * cdither.y_shape * sizeof(float))
+    for i in range(cdither.y_shape):
+        for j in range(cdither.x_shape):
+            cdither.pattern[i * cdither.x_shape + j] = dither.PATTERN[i, j]
+
+    cdef (unsigned char)[:, ::1] image_nbit = np.empty(
+        (image_rgb.shape[0], image_rgb.shape[1]), dtype=np.uint8)
+
    for y in range(yres):
        if verbose:
            print("%d/%d" % (y, yres))
--- a/dither_pattern.py
+++ b/dither_pattern.py
@ -11,7 +11,7 @@ class DitherPattern:
 class NoDither(DitherPattern):
    """No dithering."""
    PATTERN = np.array(((0, 0), (0, 0)),
-                       dtype=np.float32).reshape(2, 2, 1) / np.float(16)
+                       dtype=np.float32).reshape(2, 2) / np.float(16)
    ORIGIN = (0, 1)


@ -20,7 +20,7 @@ class FloydSteinbergDither(DitherPattern):
    # 0 * 7
    # 3 5 1
    PATTERN = np.array(((0, 0, 7), (3, 5, 1)),
-                       dtype=np.float32).reshape(2, 3, 1) / np.float(16)
+                       dtype=np.float32).reshape(2, 3) / np.float(16)
    ORIGIN = (0, 1)


@ -31,7 +31,7 @@ class FloydSteinbergDither2(DitherPattern):
    PATTERN = np.array(
        ((0, 0, 0, 0, 0, 7),
         (3, 5, 1, 0, 0, 0)),
-        dtype=np.float32).reshape(2, 6, 1) / np.float(16)
+        dtype=np.float32).reshape(2, 6) / np.float(16)
    ORIGIN = (0, 2)


@ -41,7 +41,7 @@ class BuckelsDither(DitherPattern):
    # 1 2 1 0
    # 0 1 0 0
    PATTERN = np.array(((0, 0, 2, 1), (1, 2, 1, 0), (0, 1, 0, 0)),
-                       dtype=np.float32).reshape(3, 4, 1) / np.float32(8)
+                       dtype=np.float32).reshape(3, 4) / np.float32(8)
    ORIGIN = (0, 1)


@ -52,7 +52,7 @@ class JarvisDither(DitherPattern):
    # 3 5 7 5 3
    # 1 3 5 3 1
    PATTERN = np.array(((0, 0, 0, 7, 5), (3, 5, 7, 5, 3), (1, 3, 5, 3, 1)),
-                       dtype=np.float32).reshape(3, 5, 1) / np.float32(48)
+                       dtype=np.float32).reshape(3, 5) / np.float32(48)
    ORIGIN = (0, 2)


@ -72,7 +72,7 @@ class JarvisModifiedDither(DitherPattern):
    PATTERN = np.array((
        (0, 0, 0, 15, 11, 7, 3),
        (3, 5, 7, 5, 3, 1, 0),
-        (1, 3, 5, 3, 1, 0, 0)), dtype=np.float32).reshape(3, 7, 1)
+        (1, 3, 5, 3, 1, 0, 0)), dtype=np.float32).reshape(3, 7)
    PATTERN /= np.sum(PATTERN)
    ORIGIN = (0, 2)