ii-vision/edit_distance.py

import functools

import numpy as np
import weighted_levenshtein


@functools.lru_cache(None)
def byte_to_colour_string(b: int, is_odd_offset: bool) -> str:
    pixels = []

    idx = 0
    if is_odd_offset:
        pixels.append("01"[b & 0x01])
        idx += 1

    # K = black
    # G = green
    # V = violet
    # W = white
    palettes = (
        (
            "K",  # 0x00
            "V",  # 0x01
            "G",  # 0x10
            "W"  # 0x11
        ), (
            "K",  # 0x00
            "B",  # 0x01
            "O",  # 0x10
            "W"  # 0x11
        )
    )
    palette = palettes[(b & 0x80) != 0]

    for _ in range(3):
        pixel = palette[(b >> idx) & 0b11]
        pixels.append(pixel)
        idx += 2

    if not is_odd_offset:
        pixels.append("01"[b & 0x40 != 0])
        idx += 1

    return "".join(pixels)


# TODO: what about increasing transposition cost?  Might be better to have
# any pixel at the right place even if the wrong colour?

substitute_costs = np.ones((128, 128), dtype=np.float64)
error_substitute_costs = np.ones((128, 128), dtype=np.float64)

# Penalty for turning on/off a black bit
for c in "01GVWOB":
    substitute_costs[(ord('K'), ord(c))] = 1
    substitute_costs[(ord(c), ord('K'))] = 1
    error_substitute_costs[(ord('K'), ord(c))] = 5
    error_substitute_costs[(ord(c), ord('K'))] = 5

# Penalty for changing colour
for c in "01GVWOB":
    for d in "01GVWOB":
        substitute_costs[(ord(c), ord(d))] = 1
        substitute_costs[(ord(d), ord(c))] = 1
        error_substitute_costs[(ord(c), ord(d))] = 5
        error_substitute_costs[(ord(d), ord(c))] = 5

insert_costs = np.ones(128, dtype=np.float64) * 1000
delete_costs = np.ones(128, dtype=np.float64) * 1000


def _edit_weight(a: int, b: int, is_odd_offset: bool, error: bool):
    a_pixels = byte_to_colour_string(a, is_odd_offset)
    b_pixels = byte_to_colour_string(b, is_odd_offset)

    dist = weighted_levenshtein.dam_lev(
        a_pixels, b_pixels,
        insert_costs=insert_costs,
        delete_costs=delete_costs,
        substitute_costs=error_substitute_costs if error else substitute_costs,
    )
    return np.int64(dist)


def edit_weight_matrixes(error: bool) -> np.array:
    ewm = np.zeros(shape=(256, 256, 2), dtype=np.int64)
    for a in range(256):
        for b in range(256):
            for is_odd_offset in (False, True):
                ewm[a, b, int(is_odd_offset)] = _edit_weight(
                    a, b, is_odd_offset, error)

    return ewm


_ewm = edit_weight_matrixes(False)
_error_ewm = edit_weight_matrixes(True)


@functools.lru_cache(None)
def edit_weight(a: int, b: int, is_odd_offset: bool, error: bool):
    e = _error_ewm if error else _ewm
    return e[a, b, int(is_odd_offset)]

_even_ewm = {}
_odd_ewm = {}
_even_error_ewm = {}
_odd_error_ewm = {}
for a in range(256):
    for b in range(256):
        _even_ewm[(a << 8) + b] = edit_weight(a, b, False, False)
        _odd_ewm[(a << 8) + b] = edit_weight(a, b, True, False)

        _even_error_ewm[(a << 8) + b] = edit_weight(a, b, False, True)
        _odd_error_ewm[(a << 8) + b] = edit_weight(a, b, True, True)


@functools.lru_cache(None)
def _content_a_array(content: int, shape) -> np.array:
    return (np.ones(shape, dtype=np.uint16) * content) << 8


def content_edit_weight(content: int, b: np.array) -> np.array:
    assert b.shape == (32, 256), b.shape

    # Extract even and off column offsets (128,)
    even_b = b[:, ::2]
    odd_b = b[:, 1::2]

    a = _content_a_array(content, even_b.shape)

    even = a + even_b
    odd = a + odd_b

    even_weights = np.vectorize(_even_error_ewm.__getitem__)(even)
    odd_weights = np.vectorize(_odd_error_ewm.__getitem__)(odd)

    res = np.ndarray(shape=b.shape, dtype=np.int64)
    res[:, ::2] = even_weights
    res[:, 1::2] = odd_weights

    return res


def array_edit_weight(a: np.array, b: np.array) -> np.array:
    # Extract even and off column offsets (32, 128)
    even_a = a[:, ::2]
    odd_a = a[:, 1::2]

    even_b = b[:, ::2]
    odd_b = b[:, 1::2]

    even = (even_a.astype(np.uint16) << 8) + even_b
    odd = (odd_a.astype(np.uint16) << 8) + odd_b

    even_weights = np.vectorize(_even_ewm.__getitem__)(even)
    odd_weights = np.vectorize(_odd_ewm.__getitem__)(odd)

    res = np.ndarray(shape=a.shape, dtype=np.int64)
    res[:, ::2] = even_weights
    res[:, 1::2] = odd_weights

    return res