Flatten image array, about 5% faster

2021-01-22 22:19:48 +00:00 · 2021-01-22 22:19:48 +00:00 · 081c815643
parent 61e1448b87
commit 081c815643
1 changed files with 29 additions and 9 deletions
--- a/dither.pyx
+++ b/dither.pyx
@ -9,12 +9,18 @@ from libc.stdlib cimport malloc, free

 # TODO: use a cdef class
 cdef struct Dither:
-    float* pattern
+    float* pattern   # Flattened dither pattern
    int x_shape
    int y_shape
    int x_origin
    int y_origin

+cdef struct Image:
+    float* flat   # Flattened image array
+    int shape0
+    int shape1
+    int shape2
+

@cython.boundscheck(False)
@cython.wraparound(False)
@ -36,7 +42,7 @@ cdef void apply_one_line(Dither* dither, int xl, int xr, int x, float[] image, i

@cython.boundscheck(False)
@cython.wraparound(False)
-cdef apply(Dither* dither, screen, int x, int y, float [:, :, ::1]image, float[] quant_error):
+cdef apply(Dither* dither, screen, int x, int y, Image* image, float[] quant_error):
    cdef int i, j, k

    cdef int yt = dither_bounds_yt(dither, y)
@ -53,7 +59,8 @@ cdef apply(Dither* dither, screen, int x, int y, float [:, :, ::1]image, float[]
        for j in range(xl, xr):
            for k in range(3):
                error = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin] * quant_error[k]
-                image[i, j, k] = clip(image[i, j, k] + error, 0, 255)
+                image.flat[i * image.shape1 * image.shape2 + j * image.shape2 + k] = clip(
+                    image.flat[i * image.shape1 * image.shape2 + j * image.shape2 + k] + error, 0, 255)


@cython.boundscheck(False)
@ -88,7 +95,7 @@ cdef int dither_bounds_yb(Dither *dither, int y_res, int y):
@cython.boundscheck(False)
@cython.wraparound(False)
 cdef dither_lookahead(Dither* dither,
-        screen, float[:,:,::1] image_rgb, int x, int y, unsigned char[:, ::1] options_4bit,
+        screen, Image* image_rgb, int x, int y, unsigned char[:, ::1] options_4bit,
        float[:, :, ::1] options_rgb, int lookahead):
    cdef int x_res = screen.X_RES

@ -107,7 +114,8 @@ cdef dither_lookahead(Dither* dither,
        # Copies of input pixels so we can dither in bulk
        for j in range(xxr - x):
            for k in range(3):
-                lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + k] = image_rgb[y, x+j, k]
+                lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + k] = image_rgb.flat[
+                    y * image_rgb.shape1 * image_rgb.shape2 + (x+j) * image_rgb.shape2 + k]
        # Leave enough space at right of image so we can dither the last of our lookahead pixels.
        for j in range(xxr - x, lookahead + xr - xl):
            for k in range(3):
@ -213,7 +221,7 @@ def dither_image(screen, float[:, :, ::1] image_rgb, dither, int lookahead):
    cdef (unsigned char)[:, ::1] options_4bit
    cdef float[:, :, ::1] options_rgb
    cdef unsigned char output_pixel_4bit
-    cdef float[::1] input_pixel_rgb
+    cdef float[3] input_pixel_rgb

    cdef Dither cdither
    cdither.y_shape = dither.PATTERN.shape[0]
@ -226,16 +234,27 @@ def dither_image(screen, float[:, :, ::1] image_rgb, dither, int lookahead):
        for j in range(cdither.x_shape):
            cdither.pattern[i * cdither.x_shape + j] = dither.PATTERN[i, j, 0]

+    cdef Image image
+    image.flat = <float *> malloc(image_rgb.shape[0] * image_rgb.shape[1] * image_rgb.shape[2] * sizeof(float))
+    image.shape0 = image_rgb.shape[0]
+    image.shape1 = image_rgb.shape[1]
+    image.shape2 = image_rgb.shape[2]
+    for y in range(image.shape0):
+        for x in range(image.shape1):
+            for i in range(image.shape2):
+                image.flat[y * image.shape1 * image.shape2 + x * image.shape2 + i] = image_rgb[y, x, i]
+
    for y in range(yres):
        output_pixel_4bit = 0
        for x in range(xres):
-            input_pixel_rgb = image_rgb[y, x, :]
+            for i in range(3):
+                input_pixel_rgb[i] = image.flat[y * image.shape1 * image.shape2 + x * image.shape2 + i]
            if lookahead:
                palette_choices_4bit, palette_choices_rgb = lookahead_options(
                    screen, lookahead, output_pixel_4bit, x % 4)
                output_pixel_4bit, output_pixel_rgb = \
                    dither_lookahead(
-                        &cdither, screen, image_rgb, x, y, palette_choices_4bit, palette_choices_rgb, lookahead)
+                        &cdither, screen, &image, x, y, palette_choices_4bit, palette_choices_rgb, lookahead)
            else:
                palette_choices_4bit, palette_choices_rgb = screen.pixel_palette_options(output_pixel_4bit, x)
                output_pixel_4bit, output_pixel_rgb = \
@ -243,9 +262,10 @@ def dither_image(screen, float[:, :, ::1] image_rgb, dither, int lookahead):
            for i in range(3):
                quant_error[i] = input_pixel_rgb[i] - output_pixel_rgb[i]
            image_4bit[y, x] = output_pixel_4bit
-            apply(&cdither, screen, x, y, image_rgb, quant_error)
+            apply(&cdither, screen, x, y, &image, quant_error)
            for i in range(3):
                image_rgb[y, x, i] = output_pixel_rgb[i]

    free(cdither.pattern)
+    free(image.flat)
    return image_4bit, np.array(image_rgb)