Vectorize the computation of diff weights, by precomputing a map

of all possible weights. We encode the two 8-bit inputs into a single 16 bit value instead of dealing with an array of tuples. Fix an important bug in _compute_delta: old and is_odd were transposed so we weren't actually subtracting the old deltas! Surprisingly, when I accidentally fixed this bug in the vectorized version, the video encoding was much worse! This turned out to be because the edit distance metric allowed reducing diffs by turning on pixels, which meant it would tend to do this when "minimizing error" in a way that was visually unappealing. To remedy this, introduce a separate notion of substitution cost for errors, and weight pixel colour changes more highly to discourage them unless absolutely necessary. This gives very good quality results! Also vectorize the selection of page offsets and priorities having a negative error delta, instead of heapifying the entire page. Also it turns out to be a bit faster to compute (and memoize) the delta between a proposed content byte and the entire target screen at once, since we'll end up recomputing the same content diffs multiple times. (Committing messy version in case I want to revisit some of those interim versions)
2024-06-10 12:29:33 +00:00 · 2019-03-14 22:08:50 +00:00 · 2019-03-14 22:08:50 +00:00 · 976e26f159
commit 976e26f159
parent ede453d292
1 changed files with 285 additions and 53 deletions
--- a/video.py
+++ b/video.py
@ -6,7 +6,7 @@ import threading
 import queue
 import subprocess

-from typing import List, Iterator, Tuple, Iterable
+from typing import List, Iterator, Tuple

 from PIL import Image
 import numpy as np
@ -28,24 +28,28 @@ def hamming_weight(n):
 # any pixel at the right place even if the wrong colour?

 substitute_costs = np.ones((128, 128), dtype=np.float64)
+error_substitute_costs = np.ones((128, 128), dtype=np.float64)

 # Penalty for turning on/off a black bit
 for c in "01GVWOB":
-    substitute_costs[(ord('K'), ord(c))] = 5
-    substitute_costs[(ord(c), ord('K'))] = 5
+    substitute_costs[(ord('K'), ord(c))] = 1
+    substitute_costs[(ord(c), ord('K'))] = 1
+    error_substitute_costs[(ord('K'), ord(c))] = 5
+    error_substitute_costs[(ord(c), ord('K'))] = 5

 # Penalty for changing colour
 for c in "01GVWOB":
    for d in "01GVWOB":
        substitute_costs[(ord(c), ord(d))] = 1
        substitute_costs[(ord(d), ord(c))] = 1
+        error_substitute_costs[(ord(c), ord(d))] = 5
+        error_substitute_costs[(ord(d), ord(c))] = 5

 insert_costs = np.ones(128, dtype=np.float64) * 1000
 delete_costs = np.ones(128, dtype=np.float64) * 1000


-@functools.lru_cache(None)
-def edit_weight(a: int, b: int, is_odd_offset: bool):
+def _edit_weight(a: int, b: int, is_odd_offset: bool, error: bool):
    a_pixels = byte_to_colour_string(a, is_odd_offset)
    b_pixels = byte_to_colour_string(b, is_odd_offset)

@ -53,11 +57,157 @@ def edit_weight(a: int, b: int, is_odd_offset: bool):
        a_pixels, b_pixels,
        insert_costs=insert_costs,
        delete_costs=delete_costs,
-        substitute_costs=substitute_costs,
+        substitute_costs=error_substitute_costs if error else substitute_costs,
    )
    return np.int64(dist)


+def edit_weight_matrixes(error: bool) -> np.array:
+    ewm = np.zeros(shape=(256, 256, 2), dtype=np.int64)
+    for a in range(256):
+        for b in range(256):
+            for is_odd_offset in (False, True):
+                ewm[a, b, int(is_odd_offset)] = _edit_weight(
+                    a, b, is_odd_offset, error)
+
+    return ewm
+
+
+_ewm = edit_weight_matrixes(False)
+_error_ewm = edit_weight_matrixes(True)
+
+
+@functools.lru_cache(None)
+def edit_weight(a: int, b: int, is_odd_offset: bool, error: bool):
+    e = _error_ewm if error else _ewm
+    return e[a, b, int(is_odd_offset)]
+
+
+#
+# @functools.lru_cache(None)
+# def edit_weight_old(a: int, b: int, is_odd_offset: bool):
+#     a_pixels = byte_to_colour_string(a, is_odd_offset)
+#     b_pixels = byte_to_colour_string(b, is_odd_offset)
+#
+#     dist = weighted_levenshtein.dam_lev(
+#         a_pixels, b_pixels,
+#         insert_costs=insert_costs,
+#         delete_costs=delete_costs,
+#         substitute_costs=substitute_costs,
+#     )
+#     assert dist == edit_weight_new(a, b, is_odd_offset), (dist, a, b,
+#                                                           is_odd_offset)
+#     return np.int64(dist)
+
+_even_ewm = {}
+_odd_ewm = {}
+_even_error_ewm = {}
+_odd_error_ewm = {}
+for a in range(256):
+    for b in range(256):
+        _even_ewm[(a << 8) + b] = edit_weight(a, b, False, False)
+        _odd_ewm[(a << 8) + b] = edit_weight(a, b, True, False)
+
+        _even_error_ewm[(a << 8) + b] = edit_weight(a, b, False, True)
+        _odd_error_ewm[(a << 8) + b] = edit_weight(a, b, True, True)
+
+
+#
+# for a in range(256):
+#     for b in range(256):
+#         assert edit_weight(a, b, True) == edit_weight(b, a, True)
+#         assert edit_weight(a, b, False) == edit_weight(b, a, False)
+
+
+# def array_edit_weight2(content: int, b: np.array) -> np.array:
+#     assert b.shape == (256,), b.shape
+#
+#     # Extract even and off column offsets (128,)
+#     even_b = b[::2]
+#     odd_b = b[1::2]
+#
+#     a = np.ones(even_b.shape, dtype=np.int64) * content
+#
+#     even = (a << 8) + even_b
+#     odd = (a << 8) + odd_b
+#
+#     even_weights = npi.remap(
+#         even, _ewm_keys, _even_ewm_values, missing="raise")
+#     odd_weights = npi.remap(
+#         odd, _ewm_keys, _odd_ewm_values, missing="raise")
+#
+#     res = np.ndarray(shape=(256,), dtype=np.int64)
+#     res[::2] = even_weights
+#     res[1::2] = odd_weights
+#
+#     return res
+
+
+@functools.lru_cache(None)
+def _content_a_array(content: int, shape) -> np.array:
+    return (np.ones(shape, dtype=np.uint16) * content) << 8
+
+
+def content_edit_weight(content: int, b: np.array) -> np.array:
+    assert b.shape == (32, 256), b.shape
+
+    # Extract even and off column offsets (128,)
+    even_b = b[:, ::2]
+    odd_b = b[:, 1::2]
+
+    a = _content_a_array(content, even_b.shape)
+
+    even = a + even_b
+    odd = a + odd_b
+
+    even_weights = np.vectorize(_even_error_ewm.__getitem__)(even)
+    odd_weights = np.vectorize(_odd_error_ewm.__getitem__)(odd)
+
+    res = np.ndarray(shape=b.shape, dtype=np.int64)
+    res[:, ::2] = even_weights
+    res[:, 1::2] = odd_weights
+
+    return res
+
+
+def array_edit_weight(a: np.array, b: np.array) -> np.array:
+    # assert a.shape == b.shape == (32, 256), (a.shape, b.shape)
+
+    # Extract even and off column offsets (32, 128)
+    even_a = a[:, ::2]
+    odd_a = a[:, 1::2]
+
+    even_b = b[:, ::2]
+    odd_b = b[:, 1::2]
+
+    even = (even_a.astype(np.uint16) << 8) + even_b
+    odd = (odd_a.astype(np.uint16) << 8) + odd_b
+    #
+    # print("XXX")
+    # print(a)
+    # print(b)
+    # print(even_a)
+    # print(even_b)
+    # print(even)
+
+    even_weights = np.vectorize(_even_ewm.__getitem__)(even)
+    odd_weights = np.vectorize(_odd_ewm.__getitem__)(odd)
+
+    #
+    # print(even_weights)
+    # print(odd_weights)
+
+    res = np.ndarray(shape=a.shape, dtype=np.int64)
+    res[:, ::2] = even_weights
+    res[:, 1::2] = odd_weights
+
+    return res
+
+
+# _x = np.ndarray((4, 4), dtype=np.uint8)
+# print(array_edit_weight(_x, _x))
+# assert np.array_equal(array_edit_weight(_x, _x), np.zeros((32, 256)))
+
@functools.lru_cache(None)
 def byte_to_colour_string(b: int, is_odd_offset: bool) -> str:
    pixels = []
@ -194,28 +344,55 @@ class Video:
        print("Similarity %f" % (self.update_priority.mean()))
        yield from self._index_changes(self.memory_map, target)

-    def _diff_weights(
-            self,
+    # def _diff_weights(
+    #         self,
+    #         source: screen.MemoryMap,
+    #         target: screen.MemoryMap
+    # ):
+    #     diff_weights = np.zeros((32, 256), dtype=np.int64)
+    #
+    #     it = np.nditer(
+    #         source.page_offset ^ target.page_offset, flags=['multi_index'])
+    #     while not it.finished:
+    #         # If no diff, don't need to bother
+    #         if not it[0]:
+    #             it.iternext()
+    #             continue
+    #
+    #         diff_weights[it.multi_index] = edit_weight(
+    #             source.page_offset[it.multi_index],
+    #             target.page_offset[it.multi_index],
+    #             it.multi_index[1] % 2 == 1
+    #         )
+    #         it.iternext()
+
+    # aew = array_edit_weight(source.page_offset,
+    #                         target.page_offset)
+    # if not np.array_equal(
+    #     diff_weights, aew
+    # ):
+    #     it = np.nditer(
+    #         diff_weights - aew, flags=['multi_index'])
+    #     while not it.finished:
+    #         # If no diff, don't need to bother
+    #         if it[0]:
+    #             print(
+    #                 source.page_offset[it.multi_index],
+    #                 target.page_offset[it.multi_index],
+    #                 diff_weights[it.multi_index],
+    #                 aew[it.multi_index], it.multi_index)
+    #         it.iternext()
+    #     assert False
+
+    # return diff_weights
+
+    @staticmethod
+    def _diff_weights_new(
            source: screen.MemoryMap,
            target: screen.MemoryMap
    ):
-        diff_weights = np.zeros((32, 256), dtype=np.int64)
-
-        it = np.nditer(
-            source.page_offset ^ target.page_offset, flags=['multi_index'])
-        while not it.finished:
-            # If no diff, don't need to bother
-            if not it[0]:
-                it.iternext()
-                continue
-
-            diff_weights[it.multi_index] = edit_weight(
-                source.page_offset[it.multi_index],
-                target.page_offset[it.multi_index],
-                it.multi_index[1] % 2 == 1
-            )
-            it.iternext()
-        return diff_weights
+        return array_edit_weight(
+            source.page_offset, target.page_offset)

    def _heapify_priorities(self) -> List:
        priorities = []
@ -234,42 +411,88 @@ class Video:

        return priorities

-    @functools.lru_cache(None)
-    def _compute_delta(self, content, target, old, is_odd):
-        return edit_weight(content, target, is_odd, error=True) - old
+    @staticmethod
+    def _compute_delta(content, target, old):
+        return content_edit_weight(content, target) - old

-    def _compute_error(self, page, content, target, old_error):
+    # XXX 0WKK -> 1KKV (3)
+    #     1VVV -> 1KKV (2) is closer to target but a big
+    # visual difference
+
+    # 0WKK -> 1KKV = 2 transpose + 2 flip = 12, or 3 flip = 15
+    # 1VVV -> 1KKV = 2 flip = 10, delta = -2
+    # @functools.lru_cache(None)
+    # def _compute_delta_old(self, content, target, is_odd, old):
+    #     return edit_weight(content, target, is_odd)  # - old
+
+    _OFFSETS = np.arange(256)
+
+    def _compute_error(self, page, content, target, old_error, content_deltas):
        offsets = []

-        old_error_page = old_error[page]
-        tpo = target.page_offset[page]
+        delta_screen = content_deltas.get(content)
+        if delta_screen is None:
+            delta_screen = self._compute_delta(
+                content, target.page_offset, old_error)
+            content_deltas[content] = delta_screen
+        delta_page = delta_screen[page]

-        page_priorities = [(-p, random.random(), o) for o, p in enumerate(
-            self.update_priority[page]) if p]
-        heapq.heapify(page_priorities)
+        # old_error_page = old_error[page]
+        # tpo = target.page_offset[page]
+        #
+        # # If we store content at this offset, what is the difference
+        # # between this edit distance and the ideal target edit distance?
+        # delta_page = self._compute_delta(
+        #     content, tpo, old_error_page)
+        # # print(delta_page)
+        cond = delta_page < 0

-        # Iterate in descending priority order and take first 3 offsets with
-        # negative delta
-        while page_priorities:
-            _, _, o = heapq.heappop(page_priorities)
+        candidate_offsets = self._OFFSETS[cond]
+        priorities = self.update_priority[page][cond]
+        # deltas = delta_page[cond]

-            # If we store content at this offset, what is the difference
-            # between this edit distance and the ideal target edit distance?
-            delta = self._compute_delta(
-                content, tpo[o], o % 2 == 1, old_error_page[o])
+        # assert len(priorities) == len(candidate_offsets) == len(deltas) ==
+        # sum(cond)

-            # Getting further away from goal, no thanks!
-            if delta >= 0:
-                continue
-            #
-            # # print("Offset %d prio %d: %d -> %d = %d" % (
-            # #   o, p, content,
-            # #   target.page_offset[page, o],
-            # #   delta
-            # # ))
+        l = [
+            (-priorities[i], random.random(), candidate_offsets[i])
+            for i in range(len(candidate_offsets))
+        ]
+        # offsets = [o for _, _, o in heapq.nsmallest(3, l)]
+        heapq.heapify(l)
+
+        while l:
+            _, _, o = heapq.heappop(l)
            offsets.append(o)
            if len(offsets) == 3:
                break
+        #
+        # page_priorities = [(-p, random.random(), o) for o, p in enumerate(
+        #     self.update_priority[page]) if p]
+        # heapq.heapify(page_priorities)
+        #
+        # # Iterate in descending priority order and take first 3 offsets with
+        # # negative delta
+        # while page_priorities:
+        #     _, _, o = heapq.heappop(page_priorities)
+        #
+        #     # If we store content at this offset, what is the difference
+        #     # between this edit distance and the ideal target edit distance?
+        #     delta = self._compute_delta_old(
+        #         content, tpo[o], o % 2 == 1, old_error_page[o])
+        #
+        #     # Getting further away from goal, no thanks!
+        #     if delta >= 0:
+        #         continue
+        #     #
+        #     # # print("Offset %d prio %d: %d -> %d = %d" % (
+        #     # #   o, p, content,
+        #     # #   target.page_offset[page, o],
+        #     # #   delta
+        #     # # ))
+        #     offsets.append(o)
+        #     if len(offsets) == 3:
+        #         break

        return offsets

@ -283,15 +506,23 @@ class Video:
        Change tuple is (update_priority, page, offset, content, run_length)
        """

-        diff_weights = self._diff_weights(source, target)
+        diff_weights = self._diff_weights_new(source, target)

        # Clear any update priority entries that have resolved themselves 
        # with new frame
        self.update_priority[diff_weights == 0] = 0

+        # Halve existing weights to increase bias to new diffs.
+        # In particular this means that existing updates with diff 1 will
+        # become diff 0, i.e. will only be prioritized if they are still
+        # diffs in the new frame.
+        # self.update_priority >>= 1
        self.update_priority += diff_weights

        priorities = self._heapify_priorities()
+
+        content_deltas = {}
+
        while priorities:
            _, _, page, offset = heapq.heappop(priorities)
            # Check whether we've already cleared this diff while processing
@ -313,7 +544,8 @@ class Video:
                    page,
                    content,
                    target,
-                    diff_weights
+                    diff_weights,
+                    content_deltas
            ):
                offsets.append(o)
                # Clear priority for the offset we're emitting