From 0a3c81c8c6f92963b22081254b79517e036bde3e Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Tue, 17 Jan 2023 21:39:05 +0000
Subject: [PATCH] - have Bitmap.apply() update the memory representation
 instead of   requiring callers to keep track of it

- stop trying to cache content_deltas, I think it results in losing
  deltas.  Instead just recompute the deltas for each page as we need
  it.  This is fast enough in practice.

- track the average fill rate for the additional offsets we emit.
  This should be close to 3 if we're succeeding in finding enough
  collateral work

- overhaul how we pass in the target memory maps.  The previous way
  didn't make sense: we weren't actually encoding for the target video
  frame, but were using an inconsistent mix of old and new frames.  I
  think this was causing image artifacting because we were aiming for
  the wrong thing.

- Add some debugging assertions that were used to track this down.
---
 transcoder/movie.py  |  43 ++++++++++----
 transcoder/screen.py |  58 ++++++++++++++++++-
 transcoder/video.py  | 134 +++++++++++++++++++++++--------------------
 3 files changed, 161 insertions(+), 74 deletions(-)

diff --git a/transcoder/movie.py b/transcoder/movie.py
index 73dedff..d32be20 100644
--- a/transcoder/movie.py
+++ b/transcoder/movie.py
@@ -6,6 +6,7 @@ import audio
 import frame_grabber
 import machine
 import opcodes
+import screen
 import video
 from palette import Palette
 from video_mode import VideoMode
@@ -58,34 +59,54 @@ class Movie:
         :return:
         """
         video_frames = self.frame_grabber.frames()
-        main_seq = None
-        aux_seq = None
+        op_seq = None
 
         yield opcodes.Header(mode=self.video_mode)
 
+        last_memory_bank = self.aux_memory_bank
         for au in self.audio.audio_stream():
             self.ticks += 1
-            if self.video.tick(self.ticks):
+            new_video_frame = self.video.tick(self.ticks)
+            if new_video_frame:
                 try:
                     main, aux = next(video_frames)
                 except StopIteration:
                     break
 
-                if ((self.video.frame_number - 1) % self.every_n_video_frames
-                        == 0):
-                    print("Starting frame %d" % self.video.frame_number)
-                    main_seq = self.video.encode_frame(main, is_aux=False)
+                should_encode_frame = (
+                        (self.video.frame_number - 1) %
+                        self.every_n_video_frames == 0
+                )
+                if should_encode_frame:
+                    if self.video_mode == VideoMode.DHGR:
+                        target_pixelmap = screen.DHGRBitmap(
+                            main_memory=main,
+                            aux_memory=aux,
+                            palette=self.palette
+                        )
+                    else:
+                        target_pixelmap = screen.HGRBitmap(
+                            main_memory=main,
+                            palette=self.palette
+                        )
 
-                    if aux:
-                        aux_seq = self.video.encode_frame(aux, is_aux=True)
+                    print("Starting frame %d" % self.video.frame_number)
+                    op_seq = self.video.encode_frame(
+                        target_pixelmap, is_aux=self.aux_memory_bank)
+                    self.video.out_of_work = {True: False, False: False}
+
+            if self.aux_memory_bank != last_memory_bank:
+                # We've flipped memory banks, start new opcode sequence
+                last_memory_bank = self.aux_memory_bank
+                op_seq = self.video.encode_frame(
+                    target_pixelmap, is_aux=self.aux_memory_bank)
 
             # au has range -15 .. 16 (step=1)
             # Tick cycles are units of 2
             tick = au * 2  # -30 .. 32 (step=2)
             tick += 34  # 4 .. 66 (step=2)
 
-            (page, content, offsets) = next(
-                aux_seq if self.aux_memory_bank else main_seq)
+            (page, content, offsets) = next(op_seq)
 
             yield opcodes.TICK_OPCODES[(tick, page)](content, offsets)
 
diff --git a/transcoder/screen.py b/transcoder/screen.py
index 73cc7f2..4ac9400 100644
--- a/transcoder/screen.py
+++ b/transcoder/screen.py
@@ -268,6 +268,11 @@ class Bitmap:
             byte_offset, self.packed[page, packed_offset], value)
         self._fix_scalar_neighbours(page, packed_offset, byte_offset)
 
+        if is_aux:
+            self.aux_memory.write(page, offset, value)
+        else:
+            self.main_memory.write(page, offset, value)
+
     def _fix_scalar_neighbours(
             self,
             page: int,
@@ -445,6 +450,51 @@ class Bitmap:
 
         return diff
 
+    # TODO: combine with _diff_weights
+    # TODO: unit test
+    def _diff_weights_page(
+            self,
+            source_packed: np.ndarray,
+            target_packed: np.ndarray,
+            is_aux: bool,
+            content: np.uint8 = None
+    ) -> np.ndarray:
+        """Computes edit distance matrix from source_packed to self.packed
+
+        If content is set, the distance will be computed as if this value
+        was stored into each offset position of source_packed, i.e. to
+        allow evaluating which offsets (if any) should be chosen for storing
+        this content byte.
+        """
+
+        diff = np.ndarray((256,), dtype=np.int32)
+
+        offsets = self._byte_offsets(is_aux)
+
+        dists = []
+        for o in offsets:
+            if content is not None:
+                compare_packed = self.masked_update(o, source_packed, content)
+                self._fix_array_neighbours(compare_packed, o)
+            else:
+                compare_packed = source_packed
+
+            # Pixels influenced by byte offset o
+            source_pixels = self.mask_and_shift_data(compare_packed, o)
+            target_pixels = self.mask_and_shift_data(target_packed, o)
+
+            # Concatenate N-bit source and target into 2N-bit values
+            pair = (source_pixels << self.MASKED_BITS) + target_pixels
+            dist = self.edit_distances(self.palette)[o][pair].reshape(
+                pair.shape)
+            dists.append(dist)
+
+        # Interleave even/odd columns
+        diff[0::2] = dists[0]
+        diff[1::2] = dists[1]
+
+        return diff
+
     def _check_consistency(self):
         """Sanity check that headers and footers are consistent."""
 
@@ -474,8 +524,9 @@ class Bitmap:
             assert ok
 
     # TODO: unit tests
-    def compute_delta(
+    def compute_delta_page(
             self,
+            page: int,
             content: int,
             diff_weights: np.ndarray,
             is_aux: bool
@@ -490,7 +541,10 @@ class Bitmap:
         """
         # TODO: use error edit distance?
 
-        new_diff = self._diff_weights(self.packed, is_aux, content)
+        packed_page = self.packed[page, :].reshape(1, -1)
+
+        new_diff = self._diff_weights_page(
+            packed_page, packed_page, is_aux, content)
 
         # TODO: try different weightings
         return (new_diff * 5) - diff_weights
diff --git a/transcoder/video.py b/transcoder/video.py
index 81e14b2..7806710 100644
--- a/transcoder/video.py
+++ b/transcoder/video.py
@@ -27,13 +27,16 @@ class Video:
     ):
         self.mode = mode  # type: VideoMode
         self.frame_grabber = frame_grabber  # type: FrameGrabber
-        self.ticks_per_second = ticks_per_second  # type: float
+        self.ticks_per_second = float(ticks_per_second)  # type: float
         self.ticks_per_frame = (
                 self.ticks_per_second / frame_grabber.input_frame_rate
         )  # type: float
         self.frame_number = 0  # type: int
         self.palette = palette  # type: Palette
 
+        self._opcodes = 0
+        self._offsets = 0
+
         # Initialize empty screen
         self.memory_map = screen.MemoryMap(
             screen_page=1)  # type: screen.MemoryMap
@@ -57,6 +60,10 @@ class Video:
         if self.mode == mode.DHGR:
             self.aux_update_priority = np.zeros((32, 256), dtype=np.int32)
 
+        # Indicates whether we have run out of work for the main/aux banks.
+        # Key is True for aux bank and False for main bank
+        self.out_of_work = {True: False, False: False}
+
     def tick(self, ticks: int) -> bool:
         """Keep track of when it is time for a new image frame."""
 
@@ -67,7 +74,7 @@ class Video:
 
     def encode_frame(
             self,
-            target: screen.MemoryMap,
+            target: screen.Bitmap,
             is_aux: bool,
     ) -> Iterator[opcodes.Opcode]:
         """Converge towards target frame in priority order of edit distance."""
@@ -84,6 +91,8 @@ class Video:
             memory_map.page_offset[screen.SCREEN_HOLES]) == 0
 
         print("Similarity %f" % (update_priority.mean()))
+        if self._opcodes:
+            print("Opcode fill rate %f" % (self._offsets / self._opcodes))
 
         yield from self._index_changes(
             memory_map, target, update_priority, is_aux)
@@ -91,30 +100,16 @@ class Video:
     def _index_changes(
             self,
             source: screen.MemoryMap,
-            target: screen.MemoryMap,
+            target_pixelmap: screen.Bitmap,
             update_priority: np.array,
-            is_aux: True
+            is_aux: bool
     ) -> Iterator[Tuple[int, int, List[int]]]:
         """Transform encoded screen to sequence of change tuples."""
 
-        if self.mode == VideoMode.DHGR:
-            if is_aux:
-                target_pixelmap = screen.DHGRBitmap(
-                    main_memory=self.memory_map,
-                    aux_memory=target,
-                    palette=self.palette
-                )
-            else:
-                target_pixelmap = screen.DHGRBitmap(
-                    main_memory=target,
-                    aux_memory=self.aux_memory_map,
-                    palette=self.palette
-                )
+        if self.mode == VideoMode.DHGR and is_aux:
+            target = target_pixelmap.aux_memory
         else:
-            target_pixelmap = screen.HGRBitmap(
-                main_memory=target,
-                palette=self.palette
-            )
+            target = target_pixelmap.main_memory
 
         diff_weights = target_pixelmap.diff_weights(self.pixelmap, is_aux)
         # Don't bother storing into screen holes
@@ -124,11 +119,10 @@ class Video:
         # with new frame
         update_priority[diff_weights == 0] = 0
         update_priority += diff_weights
+        assert np.all(update_priority >= 0)
 
         priorities = self._heapify_priorities(update_priority)
 
-        content_deltas = {}
-
         while priorities:
             pri, _, page, offset = heapq.heappop(priorities)
 
@@ -152,23 +146,14 @@ class Video:
             diff_weights[page, offset] = 0
 
             # Update memory maps
-            source.page_offset[page, offset] = content
             self.pixelmap.apply(page, offset, is_aux, content)
 
-            # Make sure we don't emit this offset as a side-effect of some
-            # other offset later.
-            for cd in content_deltas.values():
-                cd[page, offset] = 0
-                # TODO: what if we add another content_deltas entry later?
-                #  We might clobber it again
-
             # Need to find 3 more offsets to fill this opcode
             for err, o in self._compute_error(
                     page,
                     content,
                     target_pixelmap,
                     diff_weights,
-                    content_deltas,
                     is_aux
             ):
                 assert o != offset
@@ -180,13 +165,6 @@ class Video:
                     # Someone already resolved this diff.
                     continue
 
-                # Make sure we don't end up considering this (page, offset)
-                # again until the next image frame.  Even if a better match
-                # comes along, it's probably better to fix up some other byte.
-                # TODO: or should we recompute it with new error?
-                for cd in content_deltas.values():
-                    cd[page, o] = 0
-
                 byte_offset = target_pixelmap.byte_offset(o, is_aux)
                 old_packed = target_pixelmap.packed[page, o // 2]
 
@@ -196,13 +174,11 @@ class Video:
                 # Update priority for the offset we're emitting
                 update_priority[page, o] = p
 
-                source.page_offset[page, o] = content
                 self.pixelmap.apply(page, o, is_aux, content)
-
                 if p:
                     # This content byte introduced an error, so put back on the
                     # heap in case we can get back to fixing it exactly
-                    # during this frame.  Otherwise we'll get to it later.
+                    # during this frame.  Otherwise, we'll get to it later.
                     heapq.heappush(
                         priorities, (-p, random.getrandbits(8), page, o))
 
@@ -210,13 +186,34 @@ class Video:
                 if len(offsets) == 3:
                     break
 
+            # Record how many additional offsets we were able to fill
+            self._opcodes += 1
+            self._offsets += len(offsets)
             # Pad to 4 if we didn't find enough
             for _ in range(len(offsets), 4):
                 offsets.append(offsets[0])
-            yield (page + 32, content, offsets)
+            yield page + 32, content, offsets
 
-        # # TODO: there is still a bug causing residual diffs when we have
-        # # apparently run out of work to do
+        self.out_of_work[is_aux] = True
+
+        # These debugging assertions validate that when we are out of work,
+        # our source and target representations should be identical.
+        #
+        # They only work correctly for palettes that do not have identical
+        # colours (e.g. IIGS but not NTSC which has two identical greys).
+        #
+        # The problem is that if we have substituted one grey for the other
+        # there may be no diff if they are part of an extended run of greys.
+        #
+        # The only difference is at the end of the run where these produce
+        # different artifact colours, but this may only be visible in the
+        # other bank.
+        #
+        # It may take several iterations of main/aux before we will notice and
+        # correct all of these differences.  That means we don't have a
+        # deterministic point in time when we can assert that all diffs should
+        # have been resolved.
+        # TODO: add flag to enable debug assertions
         if not np.array_equal(source.page_offset, target.page_offset):
             diffs = np.nonzero(source.page_offset != target.page_offset)
             for i in range(len(diffs[0])):
@@ -238,12 +235,28 @@ class Video:
                     diff_p, diff_o, source.page_offset[diff_p, diff_o],
                     target.page_offset[diff_p, diff_o]
                 ))
-                # assert False
+                assert False
+
+        # If we've finished both main and aux pages, there should be no residual
+        # diffs in packed representation
+        all_done = self.out_of_work[True] and self.out_of_work[False]
+        if all_done and not np.array_equal(self.pixelmap.packed,
+                                           target_pixelmap.packed):
+            diffs = np.nonzero(
+                self.pixelmap.packed != target_pixelmap.packed)
+            print("is_aux: %s" % is_aux)
+            for i in range(len(diffs[0])):
+                diff_p = diffs[0][i]
+                diff_o = diffs[1][i]
+                print("(%d, %d): got %d want %d" % (
+                    diff_p, diff_o, self.pixelmap.packed[diff_p, diff_o],
+                    target_pixelmap.packed[diff_p, diff_o]))
+            assert False
 
         # If we run out of things to do, pad forever
         content = target.page_offset[0, 0]
         while True:
-            yield (32, content, [0, 0, 0, 0])
+            yield 32, content, [0, 0, 0, 0]
 
     @staticmethod
     def _heapify_priorities(update_priority: np.array) -> List:
@@ -254,7 +267,9 @@ class Video:
         pages, offsets = update_priority.nonzero()
         priorities = [tuple(data) for data in np.stack((
             -update_priority[pages, offsets],
-            # Don't use deterministic order for page, offset
+            # Don't use deterministic order for page, offset.  Otherwise,
+            # we get the "venetian blind" effect when filling large blocks of
+            # colour.
             np.random.randint(0, 2 ** 8, size=pages.shape[0]),
             pages,
             offsets)
@@ -265,24 +280,21 @@ class Video:
 
     _OFFSETS = np.arange(256)
 
-    def _compute_error(self, page, content, target_pixelmap, diff_weights,
-                       content_deltas, is_aux):
+    def _compute_error(
+            self, page, content, target_pixelmap, diff_weights, is_aux):
         """Build priority queue of other offsets at which to store content.
 
         Ordered by offsets which are closest to the target content value.
         """
-        # TODO: move this up into parent
-        delta_screen = content_deltas.get(content)
-        if delta_screen is None:
-            delta_screen = target_pixelmap.compute_delta(
-                content, diff_weights, is_aux)
-            content_deltas[content] = delta_screen
-
-        delta_page = delta_screen[page]
+        delta_page = target_pixelmap.compute_delta_page(
+            page, content, diff_weights[page, :], is_aux)
         cond = delta_page < 0
         candidate_offsets = self._OFFSETS[cond]
         priorities = delta_page[cond]
 
+        # Don't use deterministic order for page, offset.  Otherwise,
+        # we get the "venetian blind" effect when filling large blocks of
+        # colour.
         deltas = [
             (priorities[i], random.getrandbits(8), candidate_offsets[i])
             for i in range(len(candidate_offsets))
@@ -290,8 +302,8 @@ class Video:
         heapq.heapify(deltas)
 
         while deltas:
-            pri, _, o = heapq.heappop(deltas)
+            pri, _, offset = heapq.heappop(deltas)
             assert pri < 0
-            assert o <= 255
+            assert 0 <= offset <= 255
 
-            yield -pri, o
+            yield -pri, offset