diff --git a/dither_hgr.pyx b/dither_hgr.pyx index c61d942..2f8bcd6 100644 --- a/dither_hgr.pyx +++ b/dither_hgr.pyx @@ -9,6 +9,8 @@ from libc.stdlib cimport malloc, free cimport common +import screen as screen_py + # TODO: use a cdef class # C representation of dither_pattern.DitherPattern data, for efficient access. @@ -98,6 +100,13 @@ cdef unsigned int compute_fat_pixels(unsigned int screen_byte, unsigned char las return result +cdef struct Context: + unsigned char bit_lookahead + unsigned char pixel_lookahead + unsigned char phase_shift + unsigned char is_hgr + + # Look ahead a number of pixels and compute choice for next pixel with lowest total squared error after dithering. # # Args: @@ -113,32 +122,31 @@ cdef unsigned int compute_fat_pixels(unsigned int screen_byte, unsigned char las # # Returns: index from 0 .. 2**lookahead into options_nbit representing best available choice for position (x,y) # -cdef int dither_lookahead(Dither* dither, float[:, :, ::1] palette_cam16, float[:, :, ::1] palette_rgb, - float[:, :, ::1] image_rgb, int x, int y, int lookahead, unsigned char last_pixels, - int x_res, float[:,::1] rgb_to_cam16ucs, unsigned char palette_depth) nogil: - cdef int candidate_pixels, i, j, fat_pixels +cdef int dither_lookahead(Dither* dither, unsigned char palette_depth, float[:, :, ::1] palette_cam16, + float[:, :, ::1] palette_rgb, float[:, :, ::1] image_rgb, int x, int y, unsigned char last_pixels, + int x_res, float[:,::1] rgb_to_cam16ucs, Context context) nogil: + cdef int candidate, next_pixels, i, j cdef float[3] quant_error cdef int best cdef float best_error = 2**31-1 cdef float total_error - cdef unsigned char next_pixels + cdef unsigned char current_pixels cdef int phase cdef float[::1] lah_cam16ucs # Don't bother dithering past the lookahead horizon or edge of screen. - cdef int xxr = min(x + 15, x_res) # XXX + cdef int xxr = min(x + context.pixel_lookahead, x_res) cdef int lah_shape1 = xxr - x cdef int lah_shape2 = 3 cdef float *lah_image_rgb = malloc(lah_shape1 * lah_shape2 * sizeof(float)) - # For each 2**lookahead possibilities for the on/off state of the next lookahead pixels, apply error diffusion # and compute the total squared error to the source image. Since we only have two possible colours for each # given pixel (dependent on the state already chosen for pixels to the left), we need to look beyond local minima. # i.e. it might be better to make a sub-optimal choice for this pixel if it allows access to much better pixel # colours at later positions. - for candidate_pixels in range(1 << lookahead): + for candidate in range(1 << context.bit_lookahead): # Working copy of input pixels for i in range(xxr - x): for j in range(3): @@ -146,28 +154,34 @@ cdef int dither_lookahead(Dither* dither, float[:, :, ::1] palette_cam16, float[ total_error = 0 - fat_pixels = compute_fat_pixels(candidate_pixels, last_pixels) + if context.is_hgr: + # A HGR screen byte controls 14 or 15 screen pixels + next_pixels = compute_fat_pixels(candidate, last_pixels) + else: + # DHGR pixels are 1:1 with memory bits + next_pixels = candidate + # Apply dithering to lookahead horizon or edge of screen for i in range(xxr - x): xl = dither_bounds_xl(dither, i) xr = dither_bounds_xr(dither, xxr - x, i) - phase = (x + i + 3) % 4 # XXX + phase = (x + i + context.phase_shift) % 4 - next_pixels = shift_pixel_window( - last_pixels, next_pixels=fat_pixels, shift_right_by=i+1, window_width=palette_depth) + current_pixels = shift_pixel_window( + last_pixels, next_pixels=next_pixels, shift_right_by=i+1, window_width=palette_depth) # We don't update the input at position x (since we've already chosen fixed outputs), but we do propagate # quantization errors to positions >x so we can compensate for how good/bad these choices were. i.e. the - # next_pixels choices are fixed, but we can still distribute quantization error from having made these + # current_pixels choices are fixed, but we can still distribute quantization error from having made these # choices, in order to compute the total error. for j in range(3): - quant_error[j] = lah_image_rgb[i * lah_shape2 + j] - palette_rgb[next_pixels, phase, j] + quant_error[j] = lah_image_rgb[i * lah_shape2 + j] - palette_rgb[current_pixels, phase, j] apply_one_line(dither, xl, xr, i, lah_image_rgb, lah_shape2, quant_error) lah_cam16ucs = common.convert_rgb_to_cam16ucs( rgb_to_cam16ucs, lah_image_rgb[i*lah_shape2], lah_image_rgb[i*lah_shape2+1], lah_image_rgb[i*lah_shape2+2]) - total_error += common.colour_distance_squared(lah_cam16ucs, palette_cam16[next_pixels, phase]) + total_error += common.colour_distance_squared(lah_cam16ucs, palette_cam16[current_pixels, phase]) if total_error >= best_error: # No need to continue @@ -175,7 +189,7 @@ cdef int dither_lookahead(Dither* dither, float[:, :, ::1] palette_cam16, float[ if total_error < best_error: best_error = total_error - best = candidate_pixels + best = candidate free(lah_image_rgb) return best @@ -231,6 +245,7 @@ cdef void apply(Dither* dither, int x_res, int y_res, int x, int y, float[:,:,:: for k in range(3): image[i,j,k] = common.clip(image[i,j,k] + error_fraction * quant_error[k], 0, 1) + cdef image_nbit_to_bitmap( (unsigned char)[:, ::1] image_nbit, unsigned int x_res, unsigned int y_res, unsigned char palette_depth): cdef unsigned int x, y @@ -257,10 +272,9 @@ def dither_image( screen, float[:, :, ::1] image_rgb, dither, int lookahead, unsigned char verbose, float[:,::1] rgb_to_cam16ucs): cdef int y, x cdef unsigned char i, j, pixels_nbit, phase - # cdef float[3] input_pixel_rgb cdef float[3] quant_error cdef unsigned char output_pixel_nbit - cdef unsigned int best_next_pixels + cdef unsigned int next_pixels cdef float[3] output_pixel_rgb # Hoist some python attribute accesses into C variables for efficient access during the main loop @@ -298,28 +312,52 @@ def dither_image( # dot positions are used to determine the colour of a given pixel. cdef (unsigned char)[:, ::1] image_nbit = np.empty((image_rgb.shape[0], image_rgb.shape[1]), dtype=np.uint8) + cdef Context context + if screen.MODE == screen_py.Mode.HI_RES: + context.is_hgr = 1 + context.bit_lookahead = 8 + context.pixel_lookahead = 15 + # HGR and DHGR have a timing phase shift which rotates the effective mappings from screen dots to colours + context.phase_shift = 3 + else: + context.is_hgr = 0 + context.bit_lookahead = lookahead + context.pixel_lookahead = lookahead + context.phase_shift = 0 + cdef (unsigned char)[:, ::1] linear_bytemap = np.zeros((192, 40), dtype=np.uint8) - cdef unsigned int fat_pixels + + # After performing lookahead, move ahead this many pixels at once. + cdef int apply_batch_size + if context.is_hgr: + # For HGR we have to apply an entire screen byte at a time, which controls 14 or 15 pixels (see + # compute_fat_pixels above). This is because the high bit shifts this entire group of 14 pixels at once, + # so we have to make a single decision about whether or not to enable it. + apply_batch_size = 14 + else: + # For DHGR we can choose each pixel state independently, so we get better results if we apply one pixel at + # a time. + apply_batch_size = 1 for y in range(yres): if verbose: print("%d/%d" % (y, yres)) output_pixel_nbit = 0 for x in range(xres): - if x % 14 == 0: + if x % apply_batch_size == 0: # Compute all possible 2**N choices of n-bit pixel colours for positions x .. x + lookahead - # lookahead_palette_choices_nbit = lookahead_options(lookahead, output_pixel_nbit) # Apply error diffusion for each of these 2**N choices, and compute which produces the closest match # to the source image over the succeeding N pixels - best_next_pixels = dither_lookahead( - &cdither, palette_cam16, palette_rgb, image_rgb, x, y, lookahead, output_pixel_nbit, xres, - rgb_to_cam16ucs, palette_depth) - linear_bytemap[y, x // 14] = best_next_pixels - fat_pixels = compute_fat_pixels(best_next_pixels, output_pixel_nbit) + next_pixels = dither_lookahead( + &cdither, palette_depth, palette_cam16, palette_rgb, image_rgb, x, y, output_pixel_nbit, xres, + rgb_to_cam16ucs, context) + if context.is_hgr: + linear_bytemap[y, x // 14] = next_pixels + next_pixels = compute_fat_pixels(next_pixels, output_pixel_nbit) # Apply best choice for next 1 pixel output_pixel_nbit = shift_pixel_window( - output_pixel_nbit, fat_pixels, shift_right_by=x%14 + 1, window_width=palette_depth) + output_pixel_nbit, next_pixels, shift_right_by=x % apply_batch_size + 1, window_width=palette_depth) # Apply error diffusion from chosen output pixel value for i in range(3): output_pixel_rgb[i] = palette_rgb[output_pixel_nbit, x % 4, i]