From fd49736b7109628e44b5a2b8d442cdb96f0e6ff4 Mon Sep 17 00:00:00 2001 From: kris Date: Fri, 14 Jun 2019 00:12:26 +0100 Subject: [PATCH] FrameSequencer: - Extract out a (File)FrameSequencer class from Video to encapsulate the generation of still frames. This also makes Video easier to test. - Fix FileFrameSequencer.frames() to correctly handle filenames containing '.' - Temporarily switch to the BMP2DHR NTSC palette (#5) for evaluation. Video: - Temporarily hardcode DHGR decoding - Optimize _heapify_priorities() by using numpy to vectorize the construction of the list of tuples. This requires changing the random nonce to an int so the intermediate array has a uniform type. - Use the efficient 28-bit representation of DHGR (aux, main, aux, main) tuples introduced in DHGRBitmap to evaluate diffs - Switch to np.int type for accumulating diffs, and random.randint(0, 10000) instead of float for nonce values. - Fix/improve some of the error evaluation in _index_changes: - skip offsets whose diffs have already been cleared - hoist some stuff out of _compute_error into the parent - Add some validation that when we run out of work to do with a frame, the source and target memory maps should be equal. This isn't happening sometimes, i.e. there is a bug. --- transcoder/main.py | 2 +- transcoder/movie.py | 17 +- transcoder/video.py | 418 +++++++++++++++++++++++++++++--------------- 3 files changed, 286 insertions(+), 151 deletions(-) diff --git a/transcoder/main.py b/transcoder/main.py index 2579ffd..0857d41 100644 --- a/transcoder/main.py +++ b/transcoder/main.py @@ -40,7 +40,7 @@ def main(args): video_mode=video.Mode[args.video_mode] ) - print("Input frame rate = %f" % m.video.input_frame_rate) + print("Input frame rate = %f" % m.frame_sequencer.input_frame_rate) if args.output: out_filename = args.output diff --git a/transcoder/movie.py b/transcoder/movie.py index e6af86b..ef68a1a 100644 --- a/transcoder/movie.py +++ b/transcoder/movie.py @@ -23,8 +23,11 @@ class Movie: self.audio = audio.Audio( filename, normalization=audio_normalization) # type: audio.Audio + + self.frame_sequencer = video.FileFrameSequencer( + filename, mode=video_mode) self.video = video.Video( - filename, mode=video_mode, + self.frame_sequencer, mode=video_mode, ticks_per_second=self.audio.sample_rate ) # type: video.Video @@ -44,7 +47,7 @@ class Movie: :return: """ - video_frames = self.video.frames() + video_frames = self.frame_sequencer.frames() main_seq = None aux_seq = None @@ -61,21 +64,17 @@ class Movie: if ((self.video.frame_number - 1) % self.every_n_video_frames == 0): print("Starting frame %d" % self.video.frame_number) - main_seq = self.video.encode_frame( - main, self.video.memory_map, self.video.update_priority) + main_seq = self.video.encode_frame(main, is_aux=False) if aux: - aux_seq = self.video.encode_frame( - aux, self.video.aux_memory_map, - self.video.aux_update_priority) - + aux_seq = self.video.encode_frame(aux, is_aux=True) # au has range -15 .. 16 (step=1) # Tick cycles are units of 2 tick = au * 2 # -30 .. 32 (step=2) tick += 34 # 4 .. 66 (step=2) (page, content, offsets) = next( - aux_seq if self.aux_memory_bank else main_seq) + aux_seq if self.aux_memory_bank else main_seq) yield opcodes.TICK_OPCODES[(tick, page)](content, offsets) diff --git a/transcoder/video.py b/transcoder/video.py index 4214719..77950d8 100644 --- a/transcoder/video.py +++ b/transcoder/video.py @@ -1,6 +1,7 @@ """Encode a sequence of images as an optimized stream of screen changes.""" import enum +import functools import heapq import os import queue @@ -9,12 +10,10 @@ import subprocess import threading from typing import List, Iterator, Tuple -# import hitherdither import numpy as np import skvideo.io from PIL import Image -import edit_distance import opcodes import screen @@ -24,20 +23,25 @@ class Mode(enum.Enum): DHGR = 1 -class Video: - """Apple II screen memory map encoding a bitmapped frame.""" +class FrameSequencer: + def __init__(self, mode: Mode): + self.video_mode = mode + self.input_frame_rate = 30 - CLOCK_SPEED = 1024 * 1024 # type: int + def frames(self) -> Iterator[screen.MemoryMap]: + raise NotImplementedError + +class FileFrameSequencer(FrameSequencer): def __init__( self, filename: str, - ticks_per_second: float, mode: Mode = Mode.HGR, ): + super(FileFrameSequencer, self).__init__(mode) + self.filename = filename # type: str self.mode = mode # type: Mode - self.ticks_per_second = ticks_per_second # type: float self._reader = skvideo.io.FFmpegReader(filename) @@ -48,73 +52,17 @@ class Video: self.input_frame_rate = float( rate_data[0]) / float(rate_data[1]) # type: float - self.ticks_per_frame = ( - self.ticks_per_second / self.input_frame_rate) # type: float - self.frame_number = 0 # type: int - - # Initialize empty screen - self.memory_map = screen.MemoryMap( - screen_page=1) # type: screen.MemoryMap - if self.mode == mode.DHGR: - self.aux_memory_map = screen.MemoryMap( - screen_page=1) # type: screen.MemoryMap - - # Accumulates pending edit weights across frames - self.update_priority = np.zeros((32, 256), dtype=np.int64) - if self.mode == mode.DHGR: - self.aux_update_priority = np.zeros((32, 256), dtype=np.int64) - - def tick(self, ticks: int) -> bool: - if ticks >= (self.ticks_per_frame * self.frame_number): - self.frame_number += 1 - return True - return False - def _frame_grabber(self) -> Iterator[Image.Image]: for frame_array in self._reader.nextFrame(): yield Image.fromarray(frame_array) - @staticmethod - def _rgb(r, g, b): - return (r << 16) + (g << 8) + b - - # def dither_framesframes(self) -> Iterator[screen.MemoryMap]: - # palette = hitherdither.palette.Palette( - # [ - # self._rgb(0,0,0), # black */ - # self._rgb(148,12,125), # red - hgr 0*/ - # self._rgb(32,54,212), # dk blue - hgr 0 */ - # self._rgb(188,55,255), # purple - default HGR overlay color */ - # self._rgb(51,111,0), # dk green - hgr 0 */ - # self._rgb(126,126,126), # gray - hgr 0 */ - # self._rgb(7,168,225), # med blue - alternate HGR overlay - # # color */ - # self._rgb(158,172,255), # lt blue - hgr 0 */ - # self._rgb(99,77,0), # brown - hgr 0 */ - # self._rgb(249,86,29), # orange */ - # self._rgb(126,126,126), # grey - hgr 0 */ - # self._rgb(255,129,236), # pink - hgr 0 */ - # self._rgb(67,200,0), # lt green */ - # self._rgb(221,206,23), # yellow - hgr 0 */ - # self._rgb(93,248,133), # aqua - hgr 0 */ - # self._rgb(255,255,255) # white - # ] - # ) - # for _idx, _frame in enumerate(self._frame_grabber()): - # if _idx % 60 == 0: - # img_dithered = hitherdither.ordered.yliluoma.yliluomas_1_ordered_dithering( - # _frame.resize((280,192), resample=Image.NEAREST), - # palette, order=8) - # - # yield img_dithered - def frames(self) -> Iterator[screen.MemoryMap]: """Encode frame to HGR using bmp2dhr. We do the encoding in a background thread to parallelize. """ - frame_dir = self.filename.split(".")[0] + frame_dir = ".".join(self.filename.split(".")[:-1]) try: os.mkdir(frame_dir) except FileExistsError: @@ -135,7 +83,8 @@ class Video: # TODO: parametrize palette subprocess.call([ "/usr/local/bin/bmp2dhr", bmpfile, "hgr", - "P0", # Kegs32 RGB Color palette(for //gs playback) + "P5", + # "P0", # Kegs32 RGB Color palette(for //gs playback) "D9" # Buckels dither ]) @@ -160,8 +109,9 @@ class Video: # TODO: parametrize palette subprocess.call([ - "/usr/local/bin/bmp2dhr", bmpfile, "dhgr", - "P0", # Kegs32 RGB Color palette (for //gs playback) + "/usr/local/bin/bmp2dhr", bmpfile, "dhgr", # "v", + "P5", # "P0", # Kegs32 RGB Color palette (for //gs + # playback) "A", # Output separate .BIN and .AUX files "D9" # Buckels dither ]) @@ -176,7 +126,7 @@ class Video: def worker(): """Invoke bmp2dhr to encode input image frames and push to queue.""" for _idx, _frame in enumerate(self._frame_grabber()): - if self.mode == Mode.DHGR: + if self.video_mode == Mode.DHGR: res = _dhgr_decode(_idx, _frame) else: res = _hgr_decode(_idx, _frame) @@ -188,7 +138,6 @@ class Video: t.start() while True: - main, aux = q.get() if main is None: break @@ -205,35 +154,91 @@ class Video: t.join() + +class Video: + """Apple II screen memory map encoding a bitmapped frame.""" + + CLOCK_SPEED = 1024 * 1024 # type: int + + def __init__( + self, + frame_sequencer: FrameSequencer, + mode: Mode = Mode.HGR + ): + self.mode = mode # type: Mode + self.frame_sequencer = frame_sequencer # type: FrameSequencer + self.ticks_per_frame = ( + self.ticks_per_second / self.input_frame_rate) # type: float + self.frame_number = 0 # type: int + + # Initialize empty screen + self.memory_map = screen.MemoryMap( + screen_page=1) # type: screen.MemoryMap + if self.mode == mode.DHGR: + self.aux_memory_map = screen.MemoryMap( + screen_page=1) # type: screen.MemoryMap + + self.pixelmap = screen.DHGRBitmap( + main_memory=self.memory_map, + aux_memory=self.aux_memory_map + ) + + # Accumulates pending edit weights across frames + self.update_priority = np.zeros((32, 256), dtype=np.int) + if self.mode == mode.DHGR: + self.aux_update_priority = np.zeros((32, 256), dtype=np.int) + + def tick(self, cycles: int) -> bool: + if cycles > (self.cycles_per_frame * self.frame_number): + self.frame_number += 1 + return True + return False + def encode_frame( - self, target: screen.MemoryMap, - memory_map: screen.MemoryMap, - update_priority: np.array, + self, + target: screen.MemoryMap, + is_aux: bool, ) -> Iterator[opcodes.Opcode]: """Update to match content of frame within provided budget.""" + if is_aux: + memory_map = self.aux_memory_map + update_priority = self.aux_update_priority + else: + memory_map = self.memory_map + update_priority = self.update_priority print("Similarity %f" % (update_priority.mean())) - yield from self._index_changes(memory_map, target, update_priority) + + yield from self._index_changes( + memory_map, target, update_priority, is_aux) def _index_changes( self, source: screen.MemoryMap, target: screen.MemoryMap, - update_priority: np.array + update_priority: np.array, + is_aux: True ) -> Iterator[Tuple[int, int, List[int]]]: """Transform encoded screen to sequence of change tuples.""" - diff_weights = self._diff_weights(source, target) + if is_aux: + target_pixelmap = screen.DHGRBitmap( + main_memory=self.memory_map, + aux_memory=target + ) + else: + target_pixelmap = screen.DHGRBitmap( + main_memory=target, + aux_memory=self.aux_memory_map + ) + + diff_weights = self._diff_weights( + self.pixelmap, target_pixelmap, is_aux + ) # Clear any update priority entries that have resolved themselves # with new frame update_priority[diff_weights == 0] = 0 - - # Halve existing weights to increase bias to new diffs. - # In particular this means that existing updates with diff 1 will - # become diff 0, i.e. will only be prioritized if they are still - # diffs in the new frame. - # self.update_priority >>= 1 update_priority += diff_weights priorities = self._heapify_priorities(update_priority) @@ -241,7 +246,8 @@ class Video: content_deltas = {} while priorities: - _, _, page, offset = heapq.heappop(priorities) + pri, _, page, offset = heapq.heappop(priorities) + # Check whether we've already cleared this diff while processing # an earlier opcode if update_priority[page, offset] == 0: @@ -249,100 +255,237 @@ class Video: offsets = [offset] content = target.page_offset[page, offset] + assert content < 0x80 # DHGR palette bit not expected to be set # Clear priority for the offset we're emitting update_priority[page, offset] = 0 - source.page_offset[page, offset] = content diff_weights[page, offset] = 0 + # Update memory maps + source.page_offset[page, offset] = content + self.pixelmap.apply(page, offset, is_aux, content) + # Make sure we don't emit this offset as a side-effect of some # other offset later. for cd in content_deltas.values(): cd[page, offset] = 0 + # TODO: what if we add another content_deltas entry later? + # We might clobber it again # Need to find 3 more offsets to fill this opcode - for o in self._compute_error( + for err, o in self._compute_error( page, content, - target, + target_pixelmap, diff_weights, - content_deltas + content_deltas, + is_aux ): - offsets.append(o) + assert o != offset - # Compute new edit distance between new content and target - # byte, so we can reinsert with this value - p = edit_distance.edit_weight( - content, target.page_offset[page, o], o % 2 == 1, - error=False) + if update_priority[page, o] == 0: + # print("Skipping page=%d, offset=%d" % (page, o)) + continue + + # Make sure we don't end up considering this (page, offset) + # again until the next image frame. Even if a better match + # comes along, it's probably better to fix up some other byte. + # TODO: or should we recompute it with new error? + for cd in content_deltas.values(): + cd[page, o] = 0 + + byte_offset = target_pixelmap.interleaved_byte_offset(o, is_aux) + old_packed = target_pixelmap.packed[page, o // 2] + + p = self._byte_pair_difference( + target_pixelmap, byte_offset, old_packed, content) # Update priority for the offset we're emitting update_priority[page, o] = p # 0 source.page_offset[page, o] = content + self.pixelmap.apply(page, o, is_aux, content) if p: # This content byte introduced an error, so put back on the # heap in case we can get back to fixing it exactly # during this frame. Otherwise we'll get to it later. heapq.heappush( - priorities, (-p, random.random(), page, offset)) + priorities, (-p, random.randint(0, 10000), page, o)) + + offsets.append(o) + if len(offsets) == 3: + break # Pad to 4 if we didn't find enough for _ in range(len(offsets), 4): offsets.append(offsets[0]) - yield (page + 32, content, offsets) + # TODO: there is still a bug causing residual diffs when we have + # apparently run out of work to do + if not np.array_equal(source.page_offset, target.page_offset): + diffs = np.nonzero(source.page_offset != target.page_offset) + for i in range(len(diffs[0])): + diff_p = diffs[0][i] + diff_o = diffs[1][i] + + print("Diff at (%d, %d): %d != %d" % ( + diff_p, diff_o, source.page_offset[diff_p, diff_o], + target.page_offset[diff_p, diff_o] + )) + # assert False + # If we run out of things to do, pad forever - content = target.page_offset[(0, 0)] + content = target.page_offset[0, 0] while True: yield (32, content, [0, 0, 0, 0]) @staticmethod - def _diff_weights( - source: screen.MemoryMap, - target: screen.MemoryMap - ): - return edit_distance.screen_edit_distance( - source.page_offset, target.page_offset) - - def _heapify_priorities(self, update_priority: np.array) -> List: - priorities = [] - it = np.nditer(update_priority, flags=['multi_index']) - while not it.finished: - priority = it[0] - if not priority: - it.iternext() - continue - - page, offset = it.multi_index - + def _heapify_priorities(update_priority: np.array) -> List: + pages, offsets = update_priority.nonzero() + priorities = [tuple(data) for data in np.stack(( + -update_priority[pages, offsets], # Don't use deterministic order for page, offset - nonce = random.random() - priorities.append((-priority, nonce, page, offset)) - it.iternext() + np.random.randint(0, 10000, size=pages.shape[0]), + pages, + offsets) + ).T.tolist()] heapq.heapify(priorities) return priorities @staticmethod - def _compute_delta(content, target, old): - """ - This function is the critical path for the video encoding. - """ - return edit_distance.byte_screen_error_distance(content, target) - old + def _diff_weights( + source: screen.DHGRBitmap, + target: screen.DHGRBitmap, + is_aux: bool + ): + diff = np.ndarray((32, 256), dtype=np.int) + + if is_aux: + # Pixels influenced by byte offset 0 + source_pixels0 = source.mask_and_shift_data(source.packed, 0) + target_pixels0 = target.mask_and_shift_data(target.packed, 0) + + # Concatenate 8-bit source and target into 16-bit values + pair0 = (source_pixels0 << 8) + target_pixels0 + dist0 = source.edit_distances[0][pair0].reshape(pair0.shape) + + # Pixels influenced by byte offset 2 + source_pixels2 = source.mask_and_shift_data(source.packed, 2) + target_pixels2 = target.mask_and_shift_data(target.packed, 2) + # Concatenate 12-bit source and target into 24-bit values + pair2 = (source_pixels2 << 12) + target_pixels2 + dist2 = source.edit_distances[2][pair2].reshape(pair2.shape) + + diff[:, 0::2] = dist0 + diff[:, 1::2] = dist2 + + else: + # Pixels influenced by byte offset 1 + source_pixels1 = source.mask_and_shift_data(source.packed, 1) + target_pixels1 = target.mask_and_shift_data(target.packed, 1) + pair1 = (source_pixels1 << 12) + target_pixels1 + dist1 = source.edit_distances[1][pair1].reshape(pair1.shape) + + # Pixels influenced by byte offset 3 + source_pixels3 = source.mask_and_shift_data(source.packed, 3) + target_pixels3 = target.mask_and_shift_data(target.packed, 3) + pair3 = (source_pixels3 << 8) + target_pixels3 + dist3 = source.edit_distances[3][pair3].reshape(pair3.shape) + + diff[:, 0::2] = dist1 + diff[:, 1::2] = dist3 + + return diff + + @functools.lru_cache(None) + def _byte_pair_difference( + self, + target_pixelmap, + byte_offset, + old_packed, + content + ): + + old_pixels = target_pixelmap.mask_and_shift_data( + old_packed, byte_offset) + new_pixels = target_pixelmap.mask_and_shift_data( + target_pixelmap.masked_update( + byte_offset, old_packed, content), byte_offset) + + if byte_offset == 0 or byte_offset == 3: + pair = (old_pixels << 8) + new_pixels + else: + pair = (old_pixels << 12) + new_pixels + + p = target_pixelmap.edit_distances[byte_offset][pair] + + return p + + @staticmethod + def _compute_delta( + content: int, + target: screen.DHGRBitmap, + old, + is_aux: bool + ): + diff = np.ndarray((32, 256), dtype=np.int) + + # TODO: use error edit distance + + if is_aux: + # Pixels influenced by byte offset 0 + source_pixels0 = target.mask_and_shift_data( + target.masked_update(0, target.packed, content), 0) + target_pixels0 = target.mask_and_shift_data(target.packed, 0) + + # Concatenate 8-bit source and target into 16-bit values + pair0 = (source_pixels0 << 8) + target_pixels0 + dist0 = target.edit_distances[0][pair0].reshape(pair0.shape) + + # Pixels influenced by byte offset 2 + source_pixels2 = target.mask_and_shift_data( + target.masked_update(2, target.packed, content), 2) + target_pixels2 = target.mask_and_shift_data(target.packed, 2) + # Concatenate 12-bit source and target into 24-bit values + pair2 = (source_pixels2 << 12) + target_pixels2 + dist2 = target.edit_distances[2][pair2].reshape(pair2.shape) + + diff[:, 0::2] = dist0 + diff[:, 1::2] = dist2 + + else: + # Pixels influenced by byte offset 1 + source_pixels1 = target.mask_and_shift_data( + target.masked_update(1, target.packed, content), 1) + target_pixels1 = target.mask_and_shift_data(target.packed, 1) + pair1 = (source_pixels1 << 12) + target_pixels1 + dist1 = target.edit_distances[1][pair1].reshape(pair1.shape) + + # Pixels influenced by byte offset 3 + source_pixels3 = target.mask_and_shift_data( + target.masked_update(3, target.packed, content), 3) + target_pixels3 = target.mask_and_shift_data(target.packed, 3) + pair3 = (source_pixels3 << 8) + target_pixels3 + dist3 = target.edit_distances[3][pair3].reshape(pair3.shape) + + diff[:, 0::2] = dist1 + diff[:, 1::2] = dist3 + + # TODO: try different weightings + return (diff * 5) - old _OFFSETS = np.arange(256) - def _compute_error(self, page, content, target, old_error, content_deltas): - offsets = [] - + def _compute_error(self, page, content, target_pixelmap, old_error, + content_deltas, is_aux): # TODO: move this up into parent delta_screen = content_deltas.get(content) if delta_screen is None: delta_screen = self._compute_delta( - content, target.page_offset, old_error) + content, target_pixelmap, old_error, is_aux) content_deltas[content] = delta_screen delta_page = delta_screen[page] @@ -350,23 +493,16 @@ class Video: candidate_offsets = self._OFFSETS[cond] priorities = delta_page[cond] - l = [ - (priorities[i], random.random(), candidate_offsets[i]) + # TODO: vectorize this with numpy + deltas = [ + (priorities[i], random.randint(0, 10000), candidate_offsets[i]) for i in range(len(candidate_offsets)) ] - heapq.heapify(l) + heapq.heapify(deltas) - while l: - _, _, o = heapq.heappop(l) - offsets.append(o) + while deltas: + pri, _, o = heapq.heappop(deltas) + assert pri < 0 + assert o < 255 - # Make sure we don't end up considering this (page, offset) again - # until the next image frame. Even if a better match comes along, - # it's probably better to fix up some other byte. - for cd in content_deltas.values(): - cd[page, o] = 0 - - if len(offsets) == 3: - break - - return offsets + yield -pri, o