From 0a3c81c8c6f92963b22081254b79517e036bde3e Mon Sep 17 00:00:00 2001 From: kris Date: Tue, 17 Jan 2023 21:39:05 +0000 Subject: [PATCH] - have Bitmap.apply() update the memory representation instead of requiring callers to keep track of it - stop trying to cache content_deltas, I think it results in losing deltas. Instead just recompute the deltas for each page as we need it. This is fast enough in practice. - track the average fill rate for the additional offsets we emit. This should be close to 3 if we're succeeding in finding enough collateral work - overhaul how we pass in the target memory maps. The previous way didn't make sense: we weren't actually encoding for the target video frame, but were using an inconsistent mix of old and new frames. I think this was causing image artifacting because we were aiming for the wrong thing. - Add some debugging assertions that were used to track this down. --- transcoder/movie.py | 43 ++++++++++---- transcoder/screen.py | 58 ++++++++++++++++++- transcoder/video.py | 134 +++++++++++++++++++++++-------------------- 3 files changed, 161 insertions(+), 74 deletions(-) diff --git a/transcoder/movie.py b/transcoder/movie.py index 73dedff..d32be20 100644 --- a/transcoder/movie.py +++ b/transcoder/movie.py @@ -6,6 +6,7 @@ import audio import frame_grabber import machine import opcodes +import screen import video from palette import Palette from video_mode import VideoMode @@ -58,34 +59,54 @@ class Movie: :return: """ video_frames = self.frame_grabber.frames() - main_seq = None - aux_seq = None + op_seq = None yield opcodes.Header(mode=self.video_mode) + last_memory_bank = self.aux_memory_bank for au in self.audio.audio_stream(): self.ticks += 1 - if self.video.tick(self.ticks): + new_video_frame = self.video.tick(self.ticks) + if new_video_frame: try: main, aux = next(video_frames) except StopIteration: break - if ((self.video.frame_number - 1) % self.every_n_video_frames - == 0): - print("Starting frame %d" % self.video.frame_number) - main_seq = self.video.encode_frame(main, is_aux=False) + should_encode_frame = ( + (self.video.frame_number - 1) % + self.every_n_video_frames == 0 + ) + if should_encode_frame: + if self.video_mode == VideoMode.DHGR: + target_pixelmap = screen.DHGRBitmap( + main_memory=main, + aux_memory=aux, + palette=self.palette + ) + else: + target_pixelmap = screen.HGRBitmap( + main_memory=main, + palette=self.palette + ) - if aux: - aux_seq = self.video.encode_frame(aux, is_aux=True) + print("Starting frame %d" % self.video.frame_number) + op_seq = self.video.encode_frame( + target_pixelmap, is_aux=self.aux_memory_bank) + self.video.out_of_work = {True: False, False: False} + + if self.aux_memory_bank != last_memory_bank: + # We've flipped memory banks, start new opcode sequence + last_memory_bank = self.aux_memory_bank + op_seq = self.video.encode_frame( + target_pixelmap, is_aux=self.aux_memory_bank) # au has range -15 .. 16 (step=1) # Tick cycles are units of 2 tick = au * 2 # -30 .. 32 (step=2) tick += 34 # 4 .. 66 (step=2) - (page, content, offsets) = next( - aux_seq if self.aux_memory_bank else main_seq) + (page, content, offsets) = next(op_seq) yield opcodes.TICK_OPCODES[(tick, page)](content, offsets) diff --git a/transcoder/screen.py b/transcoder/screen.py index 73cc7f2..4ac9400 100644 --- a/transcoder/screen.py +++ b/transcoder/screen.py @@ -268,6 +268,11 @@ class Bitmap: byte_offset, self.packed[page, packed_offset], value) self._fix_scalar_neighbours(page, packed_offset, byte_offset) + if is_aux: + self.aux_memory.write(page, offset, value) + else: + self.main_memory.write(page, offset, value) + def _fix_scalar_neighbours( self, page: int, @@ -445,6 +450,51 @@ class Bitmap: return diff + # TODO: combine with _diff_weights + # TODO: unit test + def _diff_weights_page( + self, + source_packed: np.ndarray, + target_packed: np.ndarray, + is_aux: bool, + content: np.uint8 = None + ) -> np.ndarray: + """Computes edit distance matrix from source_packed to self.packed + + If content is set, the distance will be computed as if this value + was stored into each offset position of source_packed, i.e. to + allow evaluating which offsets (if any) should be chosen for storing + this content byte. + """ + + diff = np.ndarray((256,), dtype=np.int32) + + offsets = self._byte_offsets(is_aux) + + dists = [] + for o in offsets: + if content is not None: + compare_packed = self.masked_update(o, source_packed, content) + self._fix_array_neighbours(compare_packed, o) + else: + compare_packed = source_packed + + # Pixels influenced by byte offset o + source_pixels = self.mask_and_shift_data(compare_packed, o) + target_pixels = self.mask_and_shift_data(target_packed, o) + + # Concatenate N-bit source and target into 2N-bit values + pair = (source_pixels << self.MASKED_BITS) + target_pixels + dist = self.edit_distances(self.palette)[o][pair].reshape( + pair.shape) + dists.append(dist) + + # Interleave even/odd columns + diff[0::2] = dists[0] + diff[1::2] = dists[1] + + return diff + def _check_consistency(self): """Sanity check that headers and footers are consistent.""" @@ -474,8 +524,9 @@ class Bitmap: assert ok # TODO: unit tests - def compute_delta( + def compute_delta_page( self, + page: int, content: int, diff_weights: np.ndarray, is_aux: bool @@ -490,7 +541,10 @@ class Bitmap: """ # TODO: use error edit distance? - new_diff = self._diff_weights(self.packed, is_aux, content) + packed_page = self.packed[page, :].reshape(1, -1) + + new_diff = self._diff_weights_page( + packed_page, packed_page, is_aux, content) # TODO: try different weightings return (new_diff * 5) - diff_weights diff --git a/transcoder/video.py b/transcoder/video.py index 81e14b2..7806710 100644 --- a/transcoder/video.py +++ b/transcoder/video.py @@ -27,13 +27,16 @@ class Video: ): self.mode = mode # type: VideoMode self.frame_grabber = frame_grabber # type: FrameGrabber - self.ticks_per_second = ticks_per_second # type: float + self.ticks_per_second = float(ticks_per_second) # type: float self.ticks_per_frame = ( self.ticks_per_second / frame_grabber.input_frame_rate ) # type: float self.frame_number = 0 # type: int self.palette = palette # type: Palette + self._opcodes = 0 + self._offsets = 0 + # Initialize empty screen self.memory_map = screen.MemoryMap( screen_page=1) # type: screen.MemoryMap @@ -57,6 +60,10 @@ class Video: if self.mode == mode.DHGR: self.aux_update_priority = np.zeros((32, 256), dtype=np.int32) + # Indicates whether we have run out of work for the main/aux banks. + # Key is True for aux bank and False for main bank + self.out_of_work = {True: False, False: False} + def tick(self, ticks: int) -> bool: """Keep track of when it is time for a new image frame.""" @@ -67,7 +74,7 @@ class Video: def encode_frame( self, - target: screen.MemoryMap, + target: screen.Bitmap, is_aux: bool, ) -> Iterator[opcodes.Opcode]: """Converge towards target frame in priority order of edit distance.""" @@ -84,6 +91,8 @@ class Video: memory_map.page_offset[screen.SCREEN_HOLES]) == 0 print("Similarity %f" % (update_priority.mean())) + if self._opcodes: + print("Opcode fill rate %f" % (self._offsets / self._opcodes)) yield from self._index_changes( memory_map, target, update_priority, is_aux) @@ -91,30 +100,16 @@ class Video: def _index_changes( self, source: screen.MemoryMap, - target: screen.MemoryMap, + target_pixelmap: screen.Bitmap, update_priority: np.array, - is_aux: True + is_aux: bool ) -> Iterator[Tuple[int, int, List[int]]]: """Transform encoded screen to sequence of change tuples.""" - if self.mode == VideoMode.DHGR: - if is_aux: - target_pixelmap = screen.DHGRBitmap( - main_memory=self.memory_map, - aux_memory=target, - palette=self.palette - ) - else: - target_pixelmap = screen.DHGRBitmap( - main_memory=target, - aux_memory=self.aux_memory_map, - palette=self.palette - ) + if self.mode == VideoMode.DHGR and is_aux: + target = target_pixelmap.aux_memory else: - target_pixelmap = screen.HGRBitmap( - main_memory=target, - palette=self.palette - ) + target = target_pixelmap.main_memory diff_weights = target_pixelmap.diff_weights(self.pixelmap, is_aux) # Don't bother storing into screen holes @@ -124,11 +119,10 @@ class Video: # with new frame update_priority[diff_weights == 0] = 0 update_priority += diff_weights + assert np.all(update_priority >= 0) priorities = self._heapify_priorities(update_priority) - content_deltas = {} - while priorities: pri, _, page, offset = heapq.heappop(priorities) @@ -152,23 +146,14 @@ class Video: diff_weights[page, offset] = 0 # Update memory maps - source.page_offset[page, offset] = content self.pixelmap.apply(page, offset, is_aux, content) - # Make sure we don't emit this offset as a side-effect of some - # other offset later. - for cd in content_deltas.values(): - cd[page, offset] = 0 - # TODO: what if we add another content_deltas entry later? - # We might clobber it again - # Need to find 3 more offsets to fill this opcode for err, o in self._compute_error( page, content, target_pixelmap, diff_weights, - content_deltas, is_aux ): assert o != offset @@ -180,13 +165,6 @@ class Video: # Someone already resolved this diff. continue - # Make sure we don't end up considering this (page, offset) - # again until the next image frame. Even if a better match - # comes along, it's probably better to fix up some other byte. - # TODO: or should we recompute it with new error? - for cd in content_deltas.values(): - cd[page, o] = 0 - byte_offset = target_pixelmap.byte_offset(o, is_aux) old_packed = target_pixelmap.packed[page, o // 2] @@ -196,13 +174,11 @@ class Video: # Update priority for the offset we're emitting update_priority[page, o] = p - source.page_offset[page, o] = content self.pixelmap.apply(page, o, is_aux, content) - if p: # This content byte introduced an error, so put back on the # heap in case we can get back to fixing it exactly - # during this frame. Otherwise we'll get to it later. + # during this frame. Otherwise, we'll get to it later. heapq.heappush( priorities, (-p, random.getrandbits(8), page, o)) @@ -210,13 +186,34 @@ class Video: if len(offsets) == 3: break + # Record how many additional offsets we were able to fill + self._opcodes += 1 + self._offsets += len(offsets) # Pad to 4 if we didn't find enough for _ in range(len(offsets), 4): offsets.append(offsets[0]) - yield (page + 32, content, offsets) + yield page + 32, content, offsets - # # TODO: there is still a bug causing residual diffs when we have - # # apparently run out of work to do + self.out_of_work[is_aux] = True + + # These debugging assertions validate that when we are out of work, + # our source and target representations should be identical. + # + # They only work correctly for palettes that do not have identical + # colours (e.g. IIGS but not NTSC which has two identical greys). + # + # The problem is that if we have substituted one grey for the other + # there may be no diff if they are part of an extended run of greys. + # + # The only difference is at the end of the run where these produce + # different artifact colours, but this may only be visible in the + # other bank. + # + # It may take several iterations of main/aux before we will notice and + # correct all of these differences. That means we don't have a + # deterministic point in time when we can assert that all diffs should + # have been resolved. + # TODO: add flag to enable debug assertions if not np.array_equal(source.page_offset, target.page_offset): diffs = np.nonzero(source.page_offset != target.page_offset) for i in range(len(diffs[0])): @@ -238,12 +235,28 @@ class Video: diff_p, diff_o, source.page_offset[diff_p, diff_o], target.page_offset[diff_p, diff_o] )) - # assert False + assert False + + # If we've finished both main and aux pages, there should be no residual + # diffs in packed representation + all_done = self.out_of_work[True] and self.out_of_work[False] + if all_done and not np.array_equal(self.pixelmap.packed, + target_pixelmap.packed): + diffs = np.nonzero( + self.pixelmap.packed != target_pixelmap.packed) + print("is_aux: %s" % is_aux) + for i in range(len(diffs[0])): + diff_p = diffs[0][i] + diff_o = diffs[1][i] + print("(%d, %d): got %d want %d" % ( + diff_p, diff_o, self.pixelmap.packed[diff_p, diff_o], + target_pixelmap.packed[diff_p, diff_o])) + assert False # If we run out of things to do, pad forever content = target.page_offset[0, 0] while True: - yield (32, content, [0, 0, 0, 0]) + yield 32, content, [0, 0, 0, 0] @staticmethod def _heapify_priorities(update_priority: np.array) -> List: @@ -254,7 +267,9 @@ class Video: pages, offsets = update_priority.nonzero() priorities = [tuple(data) for data in np.stack(( -update_priority[pages, offsets], - # Don't use deterministic order for page, offset + # Don't use deterministic order for page, offset. Otherwise, + # we get the "venetian blind" effect when filling large blocks of + # colour. np.random.randint(0, 2 ** 8, size=pages.shape[0]), pages, offsets) @@ -265,24 +280,21 @@ class Video: _OFFSETS = np.arange(256) - def _compute_error(self, page, content, target_pixelmap, diff_weights, - content_deltas, is_aux): + def _compute_error( + self, page, content, target_pixelmap, diff_weights, is_aux): """Build priority queue of other offsets at which to store content. Ordered by offsets which are closest to the target content value. """ - # TODO: move this up into parent - delta_screen = content_deltas.get(content) - if delta_screen is None: - delta_screen = target_pixelmap.compute_delta( - content, diff_weights, is_aux) - content_deltas[content] = delta_screen - - delta_page = delta_screen[page] + delta_page = target_pixelmap.compute_delta_page( + page, content, diff_weights[page, :], is_aux) cond = delta_page < 0 candidate_offsets = self._OFFSETS[cond] priorities = delta_page[cond] + # Don't use deterministic order for page, offset. Otherwise, + # we get the "venetian blind" effect when filling large blocks of + # colour. deltas = [ (priorities[i], random.getrandbits(8), candidate_offsets[i]) for i in range(len(candidate_offsets)) @@ -290,8 +302,8 @@ class Video: heapq.heapify(deltas) while deltas: - pri, _, o = heapq.heappop(deltas) + pri, _, offset = heapq.heappop(deltas) assert pri < 0 - assert o <= 255 + assert 0 <= offset <= 255 - yield -pri, o + yield -pri, offset