ii-pix/dither_apply.pyx

# cython: infer_types=True

cimport cython
import numpy as np
# from cython.parallel import prange
from cython.view cimport array as cvarray
from libc.stdlib cimport malloc, free


cdef float clip(float a, float min_value, float max_value) nogil:
    return min(max(a, min_value), max_value)


#@cython.boundscheck(False)
#@cython.wraparound(False)
cdef apply_one_line(float[:, :, ::1] pattern, int el, int er, int xl, int xr, int y, float[:, ::1] image,
                   float[] quant_error):
    cdef int i, j
    cdef float *error = <float *> malloc(pattern.shape[1] * 3 * sizeof(float))

    for i in range(pattern.shape[1]):
        for j in range(3):
            error[i * 3 + j] = pattern[0, i, 0] * quant_error[j]

    for i in range(xr - xl):
        for j in range(3):
            image[xl+i, j] = clip(image[xl + i, j] + error[(el + i) * 3 + j], 0, 255)
    free(error)


# XXX cythonize
def apply(pattern, int el, int er, int xl, int xr, int et, int eb, int yt, int yb, image, quant_error):
    error = pattern * quant_error.reshape((1, 1, 3))

    # We could avoid clipping here, i.e. allow RGB values to extend beyond
    # 0..255 to capture a larger range of residual error.  This is faster
    # but seems to reduce image quality.
    # XXX extend image region to avoid need for boundary box clipping
    image[yt:yb, xl:xr, :] = np.clip(
        image[yt:yb, xl:xr, :] + error[et:eb, el:er, :], 0, 255)


cdef x_dither_bounds(float [:, :, ::1] pattern, int x_origin, int x_res, int x):
    cdef int el = max(x_origin - x, 0)
    cdef int er = min(pattern.shape[1], x_res - 1 - x)

    cdef int xl = x - x_origin + el
    cdef int xr = x - x_origin + er

    return el, er, xl, xr


cdef long* flatten_rgb(float [:, :, ::1] rgb):
    cdef i, j, k
    cdef long *flat = <long *> malloc(rgb.shape[0] * rgb.shape[1] * sizeof(long))
    for i in range(rgb.shape[0]):
        for j in range(rgb.shape[1]):
            for k in range(rgb.shape[2]):
                flat[i * rgb.shape[1] + j] = (int(rgb[i, j, 0]) << 16) + (int(rgb[i, j, 1]) << 8) + (int(rgb[i, j, 2]))
    return flat


cdef char* distance(char [:, ::1] distances, float [:, :, ::1] rgb, char [:, ::1] bit4):
    flat = flatten_rgb(rgb)

    cdef char *dist = <char *> malloc(rgb.shape[0] * rgb.shape[1] * sizeof(char))
    for i in range(rgb.shape[0]):
        for j in range(rgb.shape[1]):
            dist[i * rgb.shape[1] + j] = distances[flat[i * rgb.shape[1] + j], bit4[i, j]]
    free(flat)
    return dist


def dither_lookahead(
        screen, float[:,:,::1] image_rgb, dither, differ, int x, int y, char[:, ::1] options_4bit,
        float[:, :, ::1] options_rgb, int lookahead):
    cdef float[:, :, ::1] pattern = dither.PATTERN
    cdef int x_res = screen.X_RES
    cdef int dither_x_origin = dither.ORIGIN[1]

    cdef int el, er, xl, xr
    el, er, xl, xr = x_dither_bounds(pattern, dither_x_origin, x_res, x)

    # X coord value of larger of dither bounding box or lookahead horizon
    cdef int xxr = min(max(x + lookahead, xr), x_res)

    # Copies of input pixels so we can dither in bulk
    # Leave enough space at right of image so we can dither the last of our lookahead pixels
    cdef float[:, :, ::1] lah_image_rgb = np.zeros(
        (2 ** lookahead, lookahead + xr - xl, 3), dtype=np.float32)
    lah_image_rgb[:, 0:xxr - x, :] = image_rgb[y, x:xxr, :]

    cdef float[:, ::] output_pixels
    cdef float *quant_error = <float *> malloc(2 ** lookahead * 3 * sizeof(float))

    cdef int i, j, k, l

    for i in range(xxr - x):
        # options_rgb choices are fixed, but we can still distribute
        # quantization error from having made these choices, in order to compute
        # the total error
        for k in range(2 ** lookahead):
            for l in range(3):
                quant_error[k * 3 + l] = lah_image_rgb[k, i, l] - options_rgb[k, i, l]

        # Don't update the input at position x (since we've already chosen
        # fixed outputs), but do propagate quantization errors to positions >x
        # so we can compensate for how good/bad these choices were
        el, er, xl, xr = x_dither_bounds(pattern, dither_x_origin, x_res, i)
        for j in range(2 ** lookahead):
            apply_one_line(pattern, el, er, xl, xr, 0, lah_image_rgb[j, :, :], &quant_error[j])

    free(quant_error)

    # Clip lah_image_rgb into 0..255 range to prepare for computing colour distance
    #for i in range(2**lookahead):
    #    for j in range(lookahead):
    #        for k in range(3):
    #            lah_image_rgb[i, j, k] = clip(lah_image_rgb[i, j, k], 0, 255)

    # cdef char* error = distance(differ._distances, lah_image_rgb[:, 0:lookahead, :], options_4bit)
    # differ.distance(lah_image_rgb[:, 0:lookahead, :], options_4bit)
    cdef long[:, ::1] error = differ.distance(lah_image_rgb[:, 0:lookahead, :], options_4bit)
    cdef int best
    cdef int best_error = 2**31-1
    cdef int total_error
    for i in range(2**lookahead):
        total_error = 0
        for j in range(lookahead):
            total_error += error[i, j] ** 2
            if total_error >= best_error:
                break
        if total_error < best_error:
            best_error = total_error
            best = i

    #cdef long[::1] total_error = np.sum(np.power(error, 2), axis=1)
    #cdef int best = np.argmin(total_error)

    # free(error)
    return options_4bit[best, 0], options_rgb[best, 0, :]
Cythonize dither.apply 2021-01-10 22:12:14 +00:00			`# cython: infer_types=True`

			`cimport cython`
			`import numpy as np`
			`# from cython.parallel import prange`
			`from cython.view cimport array as cvarray`
malloc temporary array 2021-01-11 18:55:37 +00:00			`from libc.stdlib cimport malloc, free`
Cythonize dither.apply 2021-01-10 22:12:14 +00:00

			`cdef float clip(float a, float min_value, float max_value) nogil:`
			`return min(max(a, min_value), max_value)`


cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00			`#@cython.boundscheck(False)`
			`#@cython.wraparound(False)`
Optimize more 2021-01-11 20:43:28 +00:00			`cdef apply_one_line(float[:, :, ::1] pattern, int el, int er, int xl, int xr, int y, float[:, ::1] image,`
			`float[] quant_error):`
Cythonize dither.apply 2021-01-10 22:12:14 +00:00			`cdef int i, j`
Optimize more 2021-01-11 20:43:28 +00:00			`cdef float error = <float > malloc(pattern.shape[1] * 3 * sizeof(float))`
Cythonize dither.apply 2021-01-10 22:12:14 +00:00
			`for i in range(pattern.shape[1]):`
Optimize more 2021-01-11 20:43:28 +00:00			`for j in range(3):`
			`error[i * 3 + j] = pattern[0, i, 0] * quant_error[j]`
Cythonize dither.apply 2021-01-10 22:12:14 +00:00
			`for i in range(xr - xl):`
			`for j in range(3):`
Optimize more 2021-01-11 20:43:28 +00:00			`image[xl+i, j] = clip(image[xl + i, j] + error[(el + i) * 3 + j], 0, 255)`
malloc temporary array 2021-01-11 18:55:37 +00:00			`free(error)`
Cythonize dither.apply 2021-01-10 22:12:14 +00:00

			`# XXX cythonize`
			`def apply(pattern, int el, int er, int xl, int xr, int et, int eb, int yt, int yb, image, quant_error):`
			`error = pattern * quant_error.reshape((1, 1, 3))`

			`# We could avoid clipping here, i.e. allow RGB values to extend beyond`
			`# 0..255 to capture a larger range of residual error. This is faster`
			`# but seems to reduce image quality.`
			`# XXX extend image region to avoid need for boundary box clipping`
			`image[yt:yb, xl:xr, :] = np.clip(`
			`image[yt:yb, xl:xr, :] + error[et:eb, el:er, :], 0, 255)`
cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00

Optimize more 2021-01-11 20:56:26 +00:00			`cdef x_dither_bounds(float [:, :, ::1] pattern, int x_origin, int x_res, int x):`
			`cdef int el = max(x_origin - x, 0)`
			`cdef int er = min(pattern.shape[1], x_res - 1 - x)`
cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00
Optimize more 2021-01-11 20:56:26 +00:00			`cdef int xl = x - x_origin + el`
			`cdef int xr = x - x_origin + er`
cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00
			`return el, er, xl, xr`


Optimize more! 2021-01-11 21:35:13 +00:00			`cdef long* flatten_rgb(float [:, :, ::1] rgb):`
			`cdef i, j, k`
			`cdef long flat = <long > malloc(rgb.shape[0] * rgb.shape[1] * sizeof(long))`
			`for i in range(rgb.shape[0]):`
			`for j in range(rgb.shape[1]):`
			`for k in range(rgb.shape[2]):`
			`flat[i * rgb.shape[1] + j] = (int(rgb[i, j, 0]) << 16) + (int(rgb[i, j, 1]) << 8) + (int(rgb[i, j, 2]))`
			`return flat`


			`cdef char* distance(char [:, ::1] distances, float [:, :, ::1] rgb, char [:, ::1] bit4):`
			`flat = flatten_rgb(rgb)`

			`cdef char dist = <char > malloc(rgb.shape[0] * rgb.shape[1] * sizeof(char))`
			`for i in range(rgb.shape[0]):`
			`for j in range(rgb.shape[1]):`
			`dist[i * rgb.shape[1] + j] = distances[flat[i * rgb.shape[1] + j], bit4[i, j]]`
			`free(flat)`
			`return dist`


cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00			`def dither_lookahead(`
			`screen, float[:,:,::1] image_rgb, dither, differ, int x, int y, char[:, ::1] options_4bit,`
			`float[:, :, ::1] options_rgb, int lookahead):`
Optimize more 2021-01-11 20:56:26 +00:00			`cdef float[:, :, ::1] pattern = dither.PATTERN`
			`cdef int x_res = screen.X_RES`
			`cdef int dither_x_origin = dither.ORIGIN[1]`

			`cdef int el, er, xl, xr`
			`el, er, xl, xr = x_dither_bounds(pattern, dither_x_origin, x_res, x)`
cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00
			`# X coord value of larger of dither bounding box or lookahead horizon`
Optimize more 2021-01-11 20:56:26 +00:00			`cdef int xxr = min(max(x + lookahead, xr), x_res)`
cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00
Optimize more! 2021-01-11 21:35:13 +00:00			`# Copies of input pixels so we can dither in bulk`
			`# Leave enough space at right of image so we can dither the last of our lookahead pixels`
Optimize more 2021-01-11 20:43:28 +00:00			`cdef float[:, :, ::1] lah_image_rgb = np.zeros(`
cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00			`(2 ** lookahead, lookahead + xr - xl, 3), dtype=np.float32)`
Optimize more 2021-01-11 20:43:28 +00:00			`lah_image_rgb[:, 0:xxr - x, :] = image_rgb[y, x:xxr, :]`
cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00
			`cdef float[:, ::] output_pixels`
Optimize more 2021-01-11 20:43:28 +00:00			`cdef float quant_error = <float > malloc(2 ** lookahead * 3 * sizeof(float))`
cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00
Optimize more 2021-01-11 20:43:28 +00:00			`cdef int i, j, k, l`

cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00			`for i in range(xxr - x):`
			`# options_rgb choices are fixed, but we can still distribute`
			`# quantization error from having made these choices, in order to compute`
			`# the total error`
Optimize more 2021-01-11 20:43:28 +00:00			`for k in range(2 ** lookahead):`
			`for l in range(3):`
			`quant_error[k * 3 + l] = lah_image_rgb[k, i, l] - options_rgb[k, i, l]`

cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00			`# Don't update the input at position x (since we've already chosen`
			`# fixed outputs), but do propagate quantization errors to positions >x`
			`# so we can compensate for how good/bad these choices were`
Optimize more 2021-01-11 20:56:26 +00:00			`el, er, xl, xr = x_dither_bounds(pattern, dither_x_origin, x_res, i)`
cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00			`for j in range(2 ** lookahead):`
Optimize more 2021-01-11 20:43:28 +00:00			`apply_one_line(pattern, el, er, xl, xr, 0, lah_image_rgb[j, :, :], &quant_error[j])`

			`free(quant_error)`
cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00
Optimize more! 2021-01-11 21:35:13 +00:00			`# Clip lah_image_rgb into 0..255 range to prepare for computing colour distance`
			`#for i in range(2**lookahead):`
			`# for j in range(lookahead):`
			`# for k in range(3):`
			`# lah_image_rgb[i, j, k] = clip(lah_image_rgb[i, j, k], 0, 255)`

			`# cdef char* error = distance(differ._distances, lah_image_rgb[:, 0:lookahead, :], options_4bit)`
			`# differ.distance(lah_image_rgb[:, 0:lookahead, :], options_4bit)`
			`cdef long[:, ::1] error = differ.distance(lah_image_rgb[:, 0:lookahead, :], options_4bit)`
			`cdef int best`
			`cdef int best_error = 2**31-1`
			`cdef int total_error`
			`for i in range(2**lookahead):`
			`total_error = 0`
			`for j in range(lookahead):`
			`total_error += error[i, j] ** 2`
			`if total_error >= best_error:`
			`break`
			`if total_error < best_error:`
			`best_error = total_error`
			`best = i`

			`#cdef long[::1] total_error = np.sum(np.power(error, 2), axis=1)`
			`#cdef int best = np.argmin(total_error)`

			`# free(error)`
cythonize dither_lookahead, still lots of low-hanging fruit 2021-01-11 20:21:00 +00:00			`return options_4bit[best, 0], options_rgb[best, 0, :]`