# cython: infer_types=True cimport cython import functools import numpy as np # from cython.parallel import prange from cython.view cimport array as cvarray from libc.stdlib cimport malloc, free # TODO: use a cdef class cdef struct Dither: float* pattern # Flattened dither pattern int x_shape int y_shape int x_origin int y_origin cdef struct Image: float* flat # Flattened image array int shape0 int shape1 int shape2 cdef float clip(float a, float min_value, float max_value) nogil: return min(max(a, min_value), max_value) cdef int dither_bounds_xl(Dither *dither, int x): cdef int el = max(dither.x_origin - x, 0) cdef int xl = x - dither.x_origin + el return xl cdef int dither_bounds_xr(Dither *dither, int x_res, int x): cdef int er = min(dither.x_shape, x_res - x) cdef int xr = x - dither.x_origin + er return xr cdef int dither_bounds_yt(Dither *dither, int y): cdef int et = max(dither.y_origin - y, 0) cdef int yt = y - dither.y_origin + et return yt cdef int dither_bounds_yb(Dither *dither, int y_res, int y): cdef int eb = min(dither.y_shape, y_res - y) cdef int yb = y - dither.y_origin + eb return yb @cython.boundscheck(False) @cython.wraparound(False) @functools.lru_cache(None) def lookahead_options(screen, lookahead, last_pixel_4bit, x): options_4bit = np.empty((2 ** lookahead, lookahead), dtype=np.uint8) options_rgb = np.empty((2 ** lookahead, lookahead, 3), dtype=np.float32) for i in range(2 ** lookahead): output_pixel_4bit = last_pixel_4bit for j in range(lookahead): xx = x + j palette_choices_4bit, palette_choices_rgb = \ screen.pixel_palette_options(output_pixel_4bit, xx) output_pixel_4bit = palette_choices_4bit[(i & (1 << j)) >> j] output_pixel_rgb = np.array( palette_choices_rgb[(i & (1 << j)) >> j]) options_4bit[i, j] = output_pixel_4bit options_rgb[i, j, :] = output_pixel_rgb return options_4bit, options_rgb @cython.boundscheck(False) @cython.wraparound(False) cdef dither_lookahead(Dither* dither, screen, Image* image_rgb, int x, int y, unsigned char[:, ::1] options_4bit, float[:, :, ::1] options_rgb, int lookahead): cdef int x_res = screen.X_RES cdef int xl = dither_bounds_xl(dither, x) cdef int xr = dither_bounds_xr(dither, x_res, x) # X coord value of larger of dither bounding box or lookahead horizon cdef int xxr = min(max(x + lookahead, xr), x_res) cdef int i, j, k, l cdef int lah_shape0 = 2 ** lookahead cdef int lah_shape1 = lookahead + xr - xl cdef int lah_shape2 = 3 cdef float *lah_image_rgb = malloc(lah_shape0 * lah_shape1 * lah_shape2 * sizeof(float)) for i in range(2**lookahead): # Copies of input pixels so we can dither in bulk for j in range(xxr - x): for k in range(3): lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + k] = image_rgb.flat[ y * image_rgb.shape1 * image_rgb.shape2 + (x+j) * image_rgb.shape2 + k] # Leave enough space at right of image so we can dither the last of our lookahead pixels. for j in range(xxr - x, lookahead + xr - xl): for k in range(3): lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + k] = 0 cdef float[3] quant_error # Iterating by row then column is faster for some reason? for i in range(xxr - x): xl = dither_bounds_xl(dither, i) xr = dither_bounds_xr(dither, x_res - x, i) for j in range(2 ** lookahead): # Don't update the input at position x (since we've already chosen # fixed outputs), but do propagate quantization errors to positions >x # so we can compensate for how good/bad these choices were # options_rgb choices are fixed, but we can still distribute # quantization error from having made these choices, in order to compute # the total error for k in range(3): quant_error[k] = lah_image_rgb[j * lah_shape1 * lah_shape2 + i * lah_shape2 + k] - options_rgb[j, i, k] apply_one_line(dither, xl, xr, i, &lah_image_rgb[j * lah_shape1 * lah_shape2], lah_shape2, quant_error) cdef unsigned char bit4 cdef int best cdef int best_error = 2**31-1 cdef int total_error cdef long flat, dist cdef long r, g, b cdef (unsigned char)[:, ::1] distances = screen.palette.distances for i in range(2**lookahead): total_error = 0 for j in range(lookahead): # Clip lah_image_rgb into 0..255 range to prepare for computing colour distance r = clip(lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + 0], 0, 255) g = clip(lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + 1], 0, 255) b = clip(lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + 2], 0, 255) flat = (r << 16) + (g << 8) + b bit4 = options_4bit[i, j] dist = distances[flat, bit4] total_error += dist ** 2 if total_error >= best_error: break if total_error < best_error: best_error = total_error best = i free(lah_image_rgb) return options_4bit[best, 0], options_rgb[best, 0, :] cdef void apply_one_line(Dither* dither, int xl, int xr, int x, float[] image, int image_shape1, float[] quant_error): cdef int i, j cdef float error for i in range(xl, xr): for j in range(3): error = dither.pattern[i - x + dither.x_origin] * quant_error[j] image[i * image_shape1 + j] = clip(image[i * image_shape1 + j] + error, 0, 255) cdef apply(Dither* dither, screen, int x, int y, Image* image, float[] quant_error): cdef int i, j, k cdef int yt = dither_bounds_yt(dither, y) cdef int yb = dither_bounds_yb(dither, screen.Y_RES, y) cdef int xl = dither_bounds_xl(dither, x) cdef int xr = dither_bounds_xr(dither, screen.X_RES, x) cdef float error # We could avoid clipping here, i.e. allow RGB values to extend beyond # 0..255 to capture a larger range of residual error. This is faster # but seems to reduce image quality. # TODO: is this still true? for i in range(yt, yb): for j in range(xl, xr): for k in range(3): error = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin] * quant_error[k] image.flat[i * image.shape1 * image.shape2 + j * image.shape2 + k] = clip( image.flat[i * image.shape1 * image.shape2 + j * image.shape2 + k] + error, 0, 255) @cython.boundscheck(False) @cython.wraparound(False) def find_nearest_colour(screen, float[::1] pixel_rgb, unsigned char[::1] options_4bit, unsigned char[:, ::1] options_rgb): cdef int best, dist cdef unsigned char bit4 cdef int best_dist = 2**8 cdef long flat cdef (unsigned char)[:, ::1] distances = screen.palette.distances for i in range(options_4bit.shape[0]): flat = (pixel_rgb[0] << 16) + (pixel_rgb[1] << 8) + pixel_rgb[2] bit4 = options_4bit[i] dist = distances[flat, bit4] if dist < best_dist: best_dist = dist best = i return options_4bit[best], options_rgb[best, :] @cython.boundscheck(False) @cython.wraparound(False) def dither_image(screen, float[:, :, ::1] image_rgb, dither, int lookahead): cdef (unsigned char)[:, ::1] image_4bit = np.empty( (image_rgb.shape[0], image_rgb.shape[1]), dtype=np.uint8) cdef int yres = screen.Y_RES cdef int xres = screen.X_RES cdef int y, x, i cdef float[3] quant_error cdef (unsigned char)[:, ::1] options_4bit cdef float[:, :, ::1] options_rgb cdef unsigned char output_pixel_4bit cdef float[3] input_pixel_rgb # Flatten python image array for more efficient access cdef Image cimage_rgb cimage_rgb.flat = malloc(image_rgb.shape[0] * image_rgb.shape[1] * image_rgb.shape[2] * sizeof(float)) cimage_rgb.shape0 = image_rgb.shape[0] cimage_rgb.shape1 = image_rgb.shape[1] cimage_rgb.shape2 = image_rgb.shape[2] for y in range(cimage_rgb.shape0): for x in range(cimage_rgb.shape1): for i in range(cimage_rgb.shape2): cimage_rgb.flat[y * cimage_rgb.shape1 * cimage_rgb.shape2 + x * cimage_rgb.shape2 + i] = ( image_rgb[y, x, i]) # Flatten python dither pattern array for more efficient access cdef Dither cdither cdither.y_shape = dither.PATTERN.shape[0] cdither.x_shape = dither.PATTERN.shape[1] cdither.y_origin = dither.ORIGIN[0] cdither.x_origin = dither.ORIGIN[1] # Convert dither.PATTERN to a malloced array which is faster to access cdither.pattern = malloc(cdither.x_shape * cdither.y_shape * sizeof(float)) for i in range(cdither.y_shape): for j in range(cdither.x_shape): cdither.pattern[i * cdither.x_shape + j] = dither.PATTERN[i, j, 0] for y in range(yres): output_pixel_4bit = 0 for x in range(xres): for i in range(3): input_pixel_rgb[i] = cimage_rgb.flat[ y * cimage_rgb.shape1 * cimage_rgb.shape2 + x * cimage_rgb.shape2 + i] if lookahead: palette_choices_4bit, palette_choices_rgb = lookahead_options( screen, lookahead, output_pixel_4bit, x % 4) output_pixel_4bit, output_pixel_rgb = \ dither_lookahead( &cdither, screen, &cimage_rgb, x, y, palette_choices_4bit, palette_choices_rgb, lookahead) else: palette_choices_4bit, palette_choices_rgb = screen.pixel_palette_options(output_pixel_4bit, x) output_pixel_4bit, output_pixel_rgb = \ find_nearest_colour(screen, input_pixel_rgb, palette_choices_4bit, palette_choices_rgb) for i in range(3): quant_error[i] = input_pixel_rgb[i] - output_pixel_rgb[i] image_4bit[y, x] = output_pixel_4bit apply(&cdither, screen, x, y, &cimage_rgb, quant_error) for i in range(3): image_rgb[y, x, i] = output_pixel_rgb[i] free(cdither.pattern) free(cimage_rgb.flat) return image_4bit, np.array(image_rgb)