From ede063a21ba290de3462e715c758dbd82dda6e2c Mon Sep 17 00:00:00 2001 From: kris Date: Mon, 15 Mar 2021 16:22:55 +0000 Subject: [PATCH] No need to pattern to have third axis of shape 1 Minor optimization to not compute error_fraction 3 times Tidy a bit --- dither.pyx | 50 +++++++++++++++++++++++------------------------ dither_pattern.py | 12 ++++++------ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/dither.pyx b/dither.pyx index da6f7cd..3d7a480 100644 --- a/dither.pyx +++ b/dither.pyx @@ -130,12 +130,12 @@ cdef int dither_lookahead(Dither* dither, float[:, ::1] palette_rgb, cdef void apply_one_line(Dither* dither, int xl, int xr, int x, float[] image, int image_shape1, float[] quant_error) nogil: cdef int i, j - cdef float error + cdef float error_fraction for i in range(xl, xr): + error_fraction = dither.pattern[i - x + dither.x_origin] for j in range(3): - error = dither.pattern[i - x + dither.x_origin] * quant_error[j] - image[i * image_shape1 + j] = clip(image[i * image_shape1 + j] + error, 0, 255) + image[i * image_shape1 + j] = clip(image[i * image_shape1 + j] + error_fraction * quant_error[j], 0, 255) @cython.boundscheck(False) @@ -148,16 +148,16 @@ cdef void apply(Dither* dither, int x_res, int y_res, int x, int y, float[:,:,:: cdef int xl = dither_bounds_xl(dither, x) cdef int xr = dither_bounds_xr(dither, x_res, x) - cdef float error + cdef float error_fraction # We could avoid clipping here, i.e. allow RGB values to extend beyond # 0..255 to capture a larger range of residual error. This is faster # but seems to reduce image quality. # TODO: is this still true? for i in range(yt, yb): for j in range(xl, xr): + error_fraction = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin] for k in range(3): - error = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin] * quant_error[k] - image[i,j,k] = clip(image[i,j,k] + error, 0, 255) + image[i,j,k] = clip(image[i,j,k] + error_fraction * quant_error[k], 0, 255) @cython.boundscheck(False) @@ -183,12 +183,6 @@ cdef unsigned char find_nearest_colour(float[::1] pixel_rgb, unsigned char[::1] @cython.boundscheck(False) @cython.wraparound(False) def dither_image(screen, float[:, :, ::1] image_rgb, dither, int lookahead, unsigned char verbose): - cdef (unsigned char)[:, ::1] image_nbit = np.empty( - (image_rgb.shape[0], image_rgb.shape[1]), dtype=np.uint8) - - cdef int yres = screen.Y_RES - cdef int xres = screen.X_RES - cdef int y, x, i cdef float[3] input_pixel_rgb cdef float[3] quant_error @@ -200,26 +194,32 @@ def dither_image(screen, float[:, :, ::1] image_rgb, dither, int lookahead, unsi cdef unsigned char output_pixel_nbit cdef float[::1] output_pixel_rgb - # Flatten python dither pattern array for more efficient access - # TODO: doesn't actually help? - cdef Dither cdither - cdither.y_shape = dither.PATTERN.shape[0] - cdither.x_shape = dither.PATTERN.shape[1] - cdither.y_origin = dither.ORIGIN[0] - cdither.x_origin = dither.ORIGIN[1] - # Convert dither.PATTERN to a malloced array which is faster to access - cdither.pattern = malloc(cdither.x_shape * cdither.y_shape * sizeof(float)) - for i in range(cdither.y_shape): - for j in range(cdither.x_shape): - cdither.pattern[i * cdither.x_shape + j] = dither.PATTERN[i, j, 0] + # Hoist some python attribute accesses into C variables for efficient access during the main loop - cdef (unsigned char)[:, ::1] distances = screen.palette.distances + cdef int yres = screen.Y_RES + cdef int xres = screen.X_RES cdef float[:, ::1] palette_rgb = np.zeros((len(screen.palette.RGB), 3), dtype=np.float32) for i in screen.palette.RGB.keys(): for j in range(3): palette_rgb[i, j] = screen.palette.RGB[i][j] + cdef (unsigned char)[:, ::1] distances = screen.palette.distances + + cdef Dither cdither + cdither.y_shape = dither.PATTERN.shape[0] + cdither.x_shape = dither.PATTERN.shape[1] + cdither.y_origin = dither.ORIGIN[0] + cdither.x_origin = dither.ORIGIN[1] + # TODO: should be just as efficient to use a memoryview? + cdither.pattern = malloc(cdither.x_shape * cdither.y_shape * sizeof(float)) + for i in range(cdither.y_shape): + for j in range(cdither.x_shape): + cdither.pattern[i * cdither.x_shape + j] = dither.PATTERN[i, j] + + cdef (unsigned char)[:, ::1] image_nbit = np.empty( + (image_rgb.shape[0], image_rgb.shape[1]), dtype=np.uint8) + for y in range(yres): if verbose: print("%d/%d" % (y, yres)) diff --git a/dither_pattern.py b/dither_pattern.py index 7380cf0..5cf34ff 100644 --- a/dither_pattern.py +++ b/dither_pattern.py @@ -11,7 +11,7 @@ class DitherPattern: class NoDither(DitherPattern): """No dithering.""" PATTERN = np.array(((0, 0), (0, 0)), - dtype=np.float32).reshape(2, 2, 1) / np.float(16) + dtype=np.float32).reshape(2, 2) / np.float(16) ORIGIN = (0, 1) @@ -20,7 +20,7 @@ class FloydSteinbergDither(DitherPattern): # 0 * 7 # 3 5 1 PATTERN = np.array(((0, 0, 7), (3, 5, 1)), - dtype=np.float32).reshape(2, 3, 1) / np.float(16) + dtype=np.float32).reshape(2, 3) / np.float(16) ORIGIN = (0, 1) @@ -31,7 +31,7 @@ class FloydSteinbergDither2(DitherPattern): PATTERN = np.array( ((0, 0, 0, 0, 0, 7), (3, 5, 1, 0, 0, 0)), - dtype=np.float32).reshape(2, 6, 1) / np.float(16) + dtype=np.float32).reshape(2, 6) / np.float(16) ORIGIN = (0, 2) @@ -41,7 +41,7 @@ class BuckelsDither(DitherPattern): # 1 2 1 0 # 0 1 0 0 PATTERN = np.array(((0, 0, 2, 1), (1, 2, 1, 0), (0, 1, 0, 0)), - dtype=np.float32).reshape(3, 4, 1) / np.float32(8) + dtype=np.float32).reshape(3, 4) / np.float32(8) ORIGIN = (0, 1) @@ -52,7 +52,7 @@ class JarvisDither(DitherPattern): # 3 5 7 5 3 # 1 3 5 3 1 PATTERN = np.array(((0, 0, 0, 7, 5), (3, 5, 7, 5, 3), (1, 3, 5, 3, 1)), - dtype=np.float32).reshape(3, 5, 1) / np.float32(48) + dtype=np.float32).reshape(3, 5) / np.float32(48) ORIGIN = (0, 2) @@ -72,7 +72,7 @@ class JarvisModifiedDither(DitherPattern): PATTERN = np.array(( (0, 0, 0, 15, 11, 7, 3), (3, 5, 7, 5, 3, 1, 0), - (1, 3, 5, 3, 1, 0, 0)), dtype=np.float32).reshape(3, 7, 1) + (1, 3, 5, 3, 1, 0, 0)), dtype=np.float32).reshape(3, 7) PATTERN /= np.sum(PATTERN) ORIGIN = (0, 2)