mirror of
https://github.com/KrisKennaway/ii-pix.git
synced 2024-09-30 03:55:42 +00:00
ad9515dcf2
Use this to precompute a new ntsc palette with 256 entries (though only 84 unique colours) that are available by appropriate pixel sequences. Unfortunately the precomputed distance matrix for this palette is 4GB! Optimize the precomputation to be less memory hungry, while also making efficient use of the mmapped output file. Add support for dithering images using this 8-bit palette depth, i.e. to optimize for NTSC rendering. This often gives better image quality since more colours are available, especially when modulating areas of similar colour. Fix 140 pixel dithering and render the output including NTSC fringing instead of the unrealistic 140px output that doesn't include it. Add support for rendering output image using any target palette, which is useful e.g. for comparing how an 8-pixel NTSC rendered image will be displayed on an emulator using 4-pixel ntsc emulation (there is usually some colour bias, because the 8 pixel chroma blending tends to average away colours). Switch the output binary format to write AUX memory first, which matches the image format of other utilities.
268 lines
10 KiB
Cython
268 lines
10 KiB
Cython
# cython: infer_types=True
|
|
|
|
cimport cython
|
|
import functools
|
|
import numpy as np
|
|
# from cython.parallel import prange
|
|
from cython.view cimport array as cvarray
|
|
from libc.stdlib cimport malloc, free
|
|
|
|
|
|
# TODO: use a cdef class
|
|
cdef struct Dither:
|
|
float* pattern # Flattened dither pattern
|
|
int x_shape
|
|
int y_shape
|
|
int x_origin
|
|
int y_origin
|
|
|
|
cdef struct Image:
|
|
float* flat # Flattened image array
|
|
int shape0
|
|
int shape1
|
|
int shape2
|
|
|
|
|
|
cdef float clip(float a, float min_value, float max_value) nogil:
|
|
return min(max(a, min_value), max_value)
|
|
|
|
|
|
|
|
cdef int dither_bounds_xl(Dither *dither, int x):
|
|
cdef int el = max(dither.x_origin - x, 0)
|
|
cdef int xl = x - dither.x_origin + el
|
|
return xl
|
|
|
|
|
|
cdef int dither_bounds_xr(Dither *dither, int x_res, int x):
|
|
cdef int er = min(dither.x_shape, x_res - x)
|
|
cdef int xr = x - dither.x_origin + er
|
|
return xr
|
|
|
|
|
|
cdef int dither_bounds_yt(Dither *dither, int y):
|
|
cdef int et = max(dither.y_origin - y, 0)
|
|
cdef int yt = y - dither.y_origin + et
|
|
|
|
return yt
|
|
|
|
cdef int dither_bounds_yb(Dither *dither, int y_res, int y):
|
|
cdef int eb = min(dither.y_shape, y_res - y)
|
|
cdef int yb = y - dither.y_origin + eb
|
|
return yb
|
|
|
|
|
|
@cython.boundscheck(False)
|
|
@cython.wraparound(False)
|
|
@functools.lru_cache(None)
|
|
def lookahead_options(screen, lookahead, last_pixel_4bit, x):
|
|
options_4bit = np.empty((2 ** lookahead, lookahead), dtype=np.uint8)
|
|
options_rgb = np.empty((2 ** lookahead, lookahead, 3), dtype=np.float32)
|
|
for i in range(2 ** lookahead):
|
|
output_pixel_4bit = last_pixel_4bit
|
|
for j in range(lookahead):
|
|
xx = x + j
|
|
palette_choices_4bit, palette_choices_rgb = \
|
|
screen.pixel_palette_options(output_pixel_4bit, xx)
|
|
output_pixel_4bit = palette_choices_4bit[(i & (1 << j)) >> j]
|
|
output_pixel_rgb = np.array(
|
|
palette_choices_rgb[(i & (1 << j)) >> j])
|
|
options_4bit[i, j] = output_pixel_4bit
|
|
options_rgb[i, j, :] = output_pixel_rgb
|
|
|
|
return options_4bit, options_rgb
|
|
|
|
|
|
@cython.boundscheck(False)
|
|
@cython.wraparound(False)
|
|
cdef dither_lookahead(Dither* dither,
|
|
screen, Image* image_rgb, int x, int y, unsigned char[:, ::1] options_4bit,
|
|
float[:, :, ::1] options_rgb, int lookahead):
|
|
cdef int x_res = screen.X_RES
|
|
|
|
cdef int xl = dither_bounds_xl(dither, x)
|
|
cdef int xr = dither_bounds_xr(dither, x_res, x)
|
|
|
|
# X coord value of larger of dither bounding box or lookahead horizon
|
|
cdef int xxr = min(max(x + lookahead, xr), x_res)
|
|
cdef int i, j, k, l
|
|
|
|
cdef int lah_shape0 = 2 ** lookahead
|
|
cdef int lah_shape1 = lookahead + xr - xl
|
|
cdef int lah_shape2 = 3
|
|
cdef float *lah_image_rgb = <float *> malloc(lah_shape0 * lah_shape1 * lah_shape2 * sizeof(float))
|
|
for i in range(2**lookahead):
|
|
# Copies of input pixels so we can dither in bulk
|
|
for j in range(xxr - x):
|
|
for k in range(3):
|
|
lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + k] = image_rgb.flat[
|
|
y * image_rgb.shape1 * image_rgb.shape2 + (x+j) * image_rgb.shape2 + k]
|
|
# Leave enough space at right of image so we can dither the last of our lookahead pixels.
|
|
for j in range(xxr - x, lookahead + xr - xl):
|
|
for k in range(3):
|
|
lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + k] = 0
|
|
|
|
cdef float[3] quant_error
|
|
# Iterating by row then column is faster for some reason?
|
|
for i in range(xxr - x):
|
|
xl = dither_bounds_xl(dither, i)
|
|
xr = dither_bounds_xr(dither, x_res - x, i)
|
|
for j in range(2 ** lookahead):
|
|
# Don't update the input at position x (since we've already chosen
|
|
# fixed outputs), but do propagate quantization errors to positions >x
|
|
# so we can compensate for how good/bad these choices were
|
|
|
|
# options_rgb choices are fixed, but we can still distribute
|
|
# quantization error from having made these choices, in order to compute
|
|
# the total error
|
|
for k in range(3):
|
|
quant_error[k] = lah_image_rgb[j * lah_shape1 * lah_shape2 + i * lah_shape2 + k] - options_rgb[j, i, k]
|
|
apply_one_line(dither, xl, xr, i, &lah_image_rgb[j * lah_shape1 * lah_shape2], lah_shape2, quant_error)
|
|
|
|
cdef unsigned char bit4
|
|
cdef int best
|
|
cdef int best_error = 2**31-1
|
|
cdef int total_error
|
|
cdef long flat, dist
|
|
|
|
cdef long r, g, b
|
|
cdef (unsigned char)[:, ::1] distances = screen.palette.distances
|
|
for i in range(2**lookahead):
|
|
total_error = 0
|
|
for j in range(lookahead):
|
|
# Clip lah_image_rgb into 0..255 range to prepare for computing colour distance
|
|
r = <long>clip(lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + 0], 0, 255)
|
|
g = <long>clip(lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + 1], 0, 255)
|
|
b = <long>clip(lah_image_rgb[i * lah_shape1 * lah_shape2 + j * lah_shape2 + 2], 0, 255)
|
|
|
|
flat = (r << 16) + (g << 8) + b
|
|
bit4 = options_4bit[i, j]
|
|
dist = distances[flat, bit4]
|
|
total_error += dist ** 2
|
|
if total_error >= best_error:
|
|
break
|
|
if total_error < best_error:
|
|
best_error = total_error
|
|
best = i
|
|
free(lah_image_rgb)
|
|
return options_4bit[best, 0], options_rgb[best, 0, :]
|
|
|
|
|
|
cdef void apply_one_line(Dither* dither, int xl, int xr, int x, float[] image, int image_shape1, float[] quant_error):
|
|
cdef int i, j
|
|
cdef float error
|
|
|
|
for i in range(xl, xr):
|
|
for j in range(3):
|
|
error = dither.pattern[i - x + dither.x_origin] * quant_error[j]
|
|
image[i * image_shape1 + j] = clip(image[i * image_shape1 + j] + error, 0, 255)
|
|
|
|
|
|
cdef apply(Dither* dither, screen, int x, int y, Image* image, float[] quant_error):
|
|
cdef int i, j, k
|
|
|
|
cdef int yt = dither_bounds_yt(dither, y)
|
|
cdef int yb = dither_bounds_yb(dither, screen.Y_RES, y)
|
|
cdef int xl = dither_bounds_xl(dither, x)
|
|
cdef int xr = dither_bounds_xr(dither, screen.X_RES, x)
|
|
|
|
cdef float error
|
|
# We could avoid clipping here, i.e. allow RGB values to extend beyond
|
|
# 0..255 to capture a larger range of residual error. This is faster
|
|
# but seems to reduce image quality.
|
|
# TODO: is this still true?
|
|
for i in range(yt, yb):
|
|
for j in range(xl, xr):
|
|
for k in range(3):
|
|
error = dither.pattern[(i - y) * dither.x_shape + j - x + dither.x_origin] * quant_error[k]
|
|
image.flat[i * image.shape1 * image.shape2 + j * image.shape2 + k] = clip(
|
|
image.flat[i * image.shape1 * image.shape2 + j * image.shape2 + k] + error, 0, 255)
|
|
|
|
|
|
@cython.boundscheck(False)
|
|
@cython.wraparound(False)
|
|
cdef find_nearest_colour(screen, float[3] pixel_rgb, unsigned char[::1] options_4bit, unsigned char[:, ::1] options_rgb):
|
|
cdef int best, dist, i
|
|
cdef unsigned char bit4
|
|
cdef int best_dist = 2**8
|
|
cdef long flat
|
|
|
|
cdef (unsigned char)[:, ::1] distances = screen.palette.distances
|
|
for i in range(options_4bit.shape[0]):
|
|
flat = (<long>pixel_rgb[0] << 16) + (<long>pixel_rgb[1] << 8) + <long>pixel_rgb[2]
|
|
bit4 = options_4bit[i]
|
|
dist = distances[flat, bit4]
|
|
if dist < best_dist:
|
|
best_dist = dist
|
|
best = i
|
|
|
|
return options_4bit[best], options_rgb[best, :]
|
|
|
|
|
|
@cython.boundscheck(False)
|
|
@cython.wraparound(False)
|
|
def dither_image(screen, float[:, :, ::1] image_rgb, dither, int lookahead):
|
|
cdef (unsigned char)[:, ::1] image_4bit = np.empty(
|
|
(image_rgb.shape[0], image_rgb.shape[1]), dtype=np.uint8)
|
|
|
|
cdef int yres = screen.Y_RES
|
|
cdef int xres = screen.X_RES
|
|
|
|
cdef int y, x, i
|
|
cdef float[3] quant_error
|
|
cdef (unsigned char)[:, ::1] options_4bit
|
|
cdef float[:, :, ::1] options_rgb
|
|
cdef unsigned char output_pixel_4bit
|
|
cdef float[3] input_pixel_rgb
|
|
|
|
# Flatten python image array for more efficient access
|
|
cdef Image cimage_rgb
|
|
cimage_rgb.flat = <float *> malloc(image_rgb.shape[0] * image_rgb.shape[1] * image_rgb.shape[2] * sizeof(float))
|
|
cimage_rgb.shape0 = image_rgb.shape[0]
|
|
cimage_rgb.shape1 = image_rgb.shape[1]
|
|
cimage_rgb.shape2 = image_rgb.shape[2]
|
|
for y in range(cimage_rgb.shape0):
|
|
for x in range(cimage_rgb.shape1):
|
|
for i in range(cimage_rgb.shape2):
|
|
cimage_rgb.flat[y * cimage_rgb.shape1 * cimage_rgb.shape2 + x * cimage_rgb.shape2 + i] = (
|
|
image_rgb[y, x, i])
|
|
|
|
# Flatten python dither pattern array for more efficient access
|
|
cdef Dither cdither
|
|
cdither.y_shape = dither.PATTERN.shape[0]
|
|
cdither.x_shape = dither.PATTERN.shape[1]
|
|
cdither.y_origin = dither.ORIGIN[0]
|
|
cdither.x_origin = dither.ORIGIN[1]
|
|
# Convert dither.PATTERN to a malloced array which is faster to access
|
|
cdither.pattern = <float *> malloc(cdither.x_shape * cdither.y_shape * sizeof(float))
|
|
for i in range(cdither.y_shape):
|
|
for j in range(cdither.x_shape):
|
|
cdither.pattern[i * cdither.x_shape + j] = dither.PATTERN[i, j, 0]
|
|
|
|
for y in range(yres):
|
|
output_pixel_4bit = 0
|
|
for x in range(xres):
|
|
for i in range(3):
|
|
input_pixel_rgb[i] = cimage_rgb.flat[
|
|
y * cimage_rgb.shape1 * cimage_rgb.shape2 + x * cimage_rgb.shape2 + i]
|
|
if lookahead:
|
|
palette_choices_4bit, palette_choices_rgb = lookahead_options(
|
|
screen, lookahead, output_pixel_4bit, x % 4)
|
|
output_pixel_4bit, output_pixel_rgb = \
|
|
dither_lookahead(
|
|
&cdither, screen, &cimage_rgb, x, y, palette_choices_4bit, palette_choices_rgb, lookahead)
|
|
else:
|
|
palette_choices_4bit, palette_choices_rgb = screen.pixel_palette_options(output_pixel_4bit, x)
|
|
output_pixel_4bit, output_pixel_rgb = \
|
|
find_nearest_colour(screen, input_pixel_rgb, palette_choices_4bit, palette_choices_rgb)
|
|
for i in range(3):
|
|
quant_error[i] = input_pixel_rgb[i] - output_pixel_rgb[i]
|
|
image_4bit[y, x] = output_pixel_4bit
|
|
apply(&cdither, screen, x, y, &cimage_rgb, quant_error)
|
|
for i in range(3):
|
|
image_rgb[y, x, i] = output_pixel_rgb[i]
|
|
|
|
free(cdither.pattern)
|
|
free(cimage_rgb.flat)
|
|
return image_4bit, np.array(image_rgb)
|