No need to pattern to have third axis of shape 1

Minor optimization to not compute error_fraction 3 times

Tidy a bit
This commit is contained in:
kris 2021-03-15 16:22:55 +00:00
parent 551be3eba7
commit ede063a21b
2 changed files with 31 additions and 31 deletions

View File

@ -130,12 +130,12 @@ cdef int dither_lookahead(Dither* dither, float[:, ::1] palette_rgb,
cdef void apply_one_line(Dither* dither, int xl, int xr, int x, float[] image, int image_shape1,
float[] quant_error) nogil:
cdef int i, j
cdef float error
cdef float error_fraction
for i in range(xl, xr):
error_fraction = dither.pattern[i - x + dither.x_origin]
for j in range(3):
error = dither.pattern[i - x + dither.x_origin] * quant_error[j]
image[i * image_shape1 + j] = clip(image[i * image_shape1 + j] + error, 0, 255)
image[i * image_shape1 + j] = clip(image[i * image_shape1 + j] + error_fraction * quant_error[j], 0, 255)
@cython.boundscheck(False)
@ -148,16 +148,16 @@ cdef void apply(Dither* dither, int x_res, int y_res, int x, int y, float[:,:,::
cdef int xl = dither_bounds_xl(dither, x)
cdef int xr = dither_bounds_xr(dither, x_res, x)
cdef float error
cdef float error_fraction
# We could avoid clipping here, i.e. allow RGB values to extend beyond
# 0..255 to capture a larger range of residual error. This is faster
# but seems to reduce image quality.
# TODO: is this still true?
for i in range(yt, yb):
for j in range(xl, xr):
error_fraction = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin]
for k in range(3):
error = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin] * quant_error[k]
image[i,j,k] = clip(image[i,j,k] + error, 0, 255)
image[i,j,k] = clip(image[i,j,k] + error_fraction * quant_error[k], 0, 255)
@cython.boundscheck(False)
@ -183,12 +183,6 @@ cdef unsigned char find_nearest_colour(float[::1] pixel_rgb, unsigned char[::1]
@cython.boundscheck(False)
@cython.wraparound(False)
def dither_image(screen, float[:, :, ::1] image_rgb, dither, int lookahead, unsigned char verbose):
cdef (unsigned char)[:, ::1] image_nbit = np.empty(
(image_rgb.shape[0], image_rgb.shape[1]), dtype=np.uint8)
cdef int yres = screen.Y_RES
cdef int xres = screen.X_RES
cdef int y, x, i
cdef float[3] input_pixel_rgb
cdef float[3] quant_error
@ -200,26 +194,32 @@ def dither_image(screen, float[:, :, ::1] image_rgb, dither, int lookahead, unsi
cdef unsigned char output_pixel_nbit
cdef float[::1] output_pixel_rgb
# Flatten python dither pattern array for more efficient access
# TODO: doesn't actually help?
cdef Dither cdither
cdither.y_shape = dither.PATTERN.shape[0]
cdither.x_shape = dither.PATTERN.shape[1]
cdither.y_origin = dither.ORIGIN[0]
cdither.x_origin = dither.ORIGIN[1]
# Convert dither.PATTERN to a malloced array which is faster to access
cdither.pattern = <float *> malloc(cdither.x_shape * cdither.y_shape * sizeof(float))
for i in range(cdither.y_shape):
for j in range(cdither.x_shape):
cdither.pattern[i * cdither.x_shape + j] = dither.PATTERN[i, j, 0]
# Hoist some python attribute accesses into C variables for efficient access during the main loop
cdef (unsigned char)[:, ::1] distances = screen.palette.distances
cdef int yres = screen.Y_RES
cdef int xres = screen.X_RES
cdef float[:, ::1] palette_rgb = np.zeros((len(screen.palette.RGB), 3), dtype=np.float32)
for i in screen.palette.RGB.keys():
for j in range(3):
palette_rgb[i, j] = screen.palette.RGB[i][j]
cdef (unsigned char)[:, ::1] distances = screen.palette.distances
cdef Dither cdither
cdither.y_shape = dither.PATTERN.shape[0]
cdither.x_shape = dither.PATTERN.shape[1]
cdither.y_origin = dither.ORIGIN[0]
cdither.x_origin = dither.ORIGIN[1]
# TODO: should be just as efficient to use a memoryview?
cdither.pattern = <float *> malloc(cdither.x_shape * cdither.y_shape * sizeof(float))
for i in range(cdither.y_shape):
for j in range(cdither.x_shape):
cdither.pattern[i * cdither.x_shape + j] = dither.PATTERN[i, j]
cdef (unsigned char)[:, ::1] image_nbit = np.empty(
(image_rgb.shape[0], image_rgb.shape[1]), dtype=np.uint8)
for y in range(yres):
if verbose:
print("%d/%d" % (y, yres))

View File

@ -11,7 +11,7 @@ class DitherPattern:
class NoDither(DitherPattern):
"""No dithering."""
PATTERN = np.array(((0, 0), (0, 0)),
dtype=np.float32).reshape(2, 2, 1) / np.float(16)
dtype=np.float32).reshape(2, 2) / np.float(16)
ORIGIN = (0, 1)
@ -20,7 +20,7 @@ class FloydSteinbergDither(DitherPattern):
# 0 * 7
# 3 5 1
PATTERN = np.array(((0, 0, 7), (3, 5, 1)),
dtype=np.float32).reshape(2, 3, 1) / np.float(16)
dtype=np.float32).reshape(2, 3) / np.float(16)
ORIGIN = (0, 1)
@ -31,7 +31,7 @@ class FloydSteinbergDither2(DitherPattern):
PATTERN = np.array(
((0, 0, 0, 0, 0, 7),
(3, 5, 1, 0, 0, 0)),
dtype=np.float32).reshape(2, 6, 1) / np.float(16)
dtype=np.float32).reshape(2, 6) / np.float(16)
ORIGIN = (0, 2)
@ -41,7 +41,7 @@ class BuckelsDither(DitherPattern):
# 1 2 1 0
# 0 1 0 0
PATTERN = np.array(((0, 0, 2, 1), (1, 2, 1, 0), (0, 1, 0, 0)),
dtype=np.float32).reshape(3, 4, 1) / np.float32(8)
dtype=np.float32).reshape(3, 4) / np.float32(8)
ORIGIN = (0, 1)
@ -52,7 +52,7 @@ class JarvisDither(DitherPattern):
# 3 5 7 5 3
# 1 3 5 3 1
PATTERN = np.array(((0, 0, 0, 7, 5), (3, 5, 7, 5, 3), (1, 3, 5, 3, 1)),
dtype=np.float32).reshape(3, 5, 1) / np.float32(48)
dtype=np.float32).reshape(3, 5) / np.float32(48)
ORIGIN = (0, 2)
@ -72,7 +72,7 @@ class JarvisModifiedDither(DitherPattern):
PATTERN = np.array((
(0, 0, 0, 15, 11, 7, 3),
(3, 5, 7, 5, 3, 1, 0),
(1, 3, 5, 3, 1, 0, 0)), dtype=np.float32).reshape(3, 7, 1)
(1, 3, 5, 3, 1, 0, 0)), dtype=np.float32).reshape(3, 7)
PATTERN /= np.sum(PATTERN)
ORIGIN = (0, 2)