Reimplement opcode scheduler to one that is ~as fast as before. As a

bonus we now maintain much better tracking of our target frame rate. Maintain a running estimate of the opcode scheduling overhead, i.e. how many opcodes we end up scheduling for each content byte written. Use this to select an estimated number of screen changes to fill the cycle budget, ordered by hamming weight of the delta. Group these by content byte and then page as before.
2024-12-21 20:29:21 +00:00 · 2019-01-02 22:16:54 +00:00 · 2019-01-02 22:16:54 +00:00 · 6de5f1797d
commit 6de5f1797d
parent 8e3f8c9f6d
2 changed files with 143 additions and 129 deletions
--- a/main.py
+++ b/main.py
@ -8,8 +8,55 @@ import screen
 CYCLES = 1024 * 1024
 MAX_OUT = 20 * 1024
 VIDEO_FPS = 30
-APPLE_FPS = 5
+APPLE_FPS = 10
 # Old naive XOR algorithm:
 #
 #stores=1894, content changes=15, page changes=365
 #Frame 0, 2654 bytes, similarity = 0.850856
 #stores=1750, content changes=19, page changes=444
 #Frame 3, 2676 bytes, similarity = 0.903088
 #stores=1648, content changes=20, page changes=501
 #Frame 6, 2690 bytes, similarity = 0.922024
 #stores=1677, content changes=18, page changes=486
 #Frame 9, 2685 bytes, similarity = 0.912723
 #stores=1659, content changes=18, page changes=497
 #Frame 12, 2689 bytes, similarity = 0.923438
 #stores=1681, content changes=17, page changes=485
 #Frame 15, 2685 bytes, similarity = 0.922656
 #stores=1686, content changes=17, page changes=482
 #Frame 18, 2684 bytes, similarity = 0.921912
 #stores=1669, content changes=17, page changes=492
 # New
 #stores=2260, content changes=277, page changes=125
 #Frame 0, 3064 bytes, similarity = 0.874740
 #stores=2162, content changes=325, page changes=131
 #Frame 3, 3074 bytes, similarity = 0.925670
 #stores=2241, content changes=313, page changes=102
 #Frame 6, 3071 bytes, similarity = 0.936942
 #stores=2265, content changes=313, page changes=90
 #Frame 9, 3071 bytes, similarity = 0.931882
 #stores=2225, content changes=334, page changes=91
 #Frame 12, 3075 bytes, similarity = 0.929427
 #stores=2216, content changes=342, page changes=89
 #Frame 15, 3078 bytes, similarity = 0.919978
 #stores=2222, content changes=339, page changes=88
 # Optimized new
 #stores=1762, content changes=15, page changes=338
 #Frame 0, 2468 bytes, similarity = 0.841034
 #stores=2150, content changes=28, page changes=465
 #Frame 3, 3136 bytes, similarity = 0.921987
 #stores=2067, content changes=30, page changes=573
 #Frame 6, 3273 bytes, similarity = 0.939583
 #stores=1906, content changes=29, page changes=551
 #Frame 9, 3066 bytes, similarity = 0.928237
 #stores=1876, content changes=27, page changes=560
 #Frame 12, 3050 bytes, similarity = 0.933705
 #stores=1856, content changes=30, page changes=575
 #Frame 15, 3066 bytes, similarity = 0.929539
 #stores=1827, content changes=30, page changes=562
 def main():
    s = screen.Screen()
@ -19,6 +66,10 @@ def main():
    videogen = skvideo.io.vreader("CoffeeCup-H264-75.mov")
    with open("out.bin", "wb") as out:
        bytes_out = 0
        # Estimated opcode overhead, i.e. ratio of extra cycles from opcodes
        fullness = 1.6
        for idx, frame in enumerate(videogen):
            if idx % (VIDEO_FPS // APPLE_FPS):
                continue
@ -29,11 +80,20 @@ def main():
            # im.show()
            f = screen.Frame(im)
-            stream = bytes(s.update(f, CYCLES // APPLE_FPS))
+            cycle_budget = int(CYCLES / APPLE_FPS)
            stream = bytes(s.update(f, cycle_budget, fullness))
            fullness *= s.cycles / cycle_budget
            print("Fullness = %f, cycles = %d/%d budget" % (
                fullness, s.cycles, cycle_budget))
            # Assert that the opcode stream reconstructs the same screen
-            decoder.from_stream(iter(stream))
+            (num_content_stores, num_content_changes,
             num_page_changes) = decoder.from_stream(iter(stream))
            assert np.array_equal(decoder.screen, s.screen)
            print("stores=%d, content changes=%d, page changes=%d" % (
                num_content_stores, num_content_changes,
                num_page_changes))
            # print(" ".join("%02x(%02d)" % (b, b) for b in stream))
            # assert that the screen decodes to the original bitmap
--- a/screen.py
+++ b/screen.py
@ -1,9 +1,8 @@
 """Screen module represents Apple II video display."""
-from collections import defaultdict, Counter
+from collections import defaultdict
 import enum
-import functools
+from typing import Set, Iterator, Union, Tuple
 from typing import Dict, Set, Iterator, Union, Tuple
 import numpy as np
@ -97,88 +96,11 @@ class Screen:
        # invert this
        return np.flip(np.packbits(np.flip(pixels, axis=1), axis=1), axis=1)
-    def update(self, frame: Frame, cycle_budget: int) -> Iterator[int]:
+    def update(self, frame: Frame,
-        """Update to match content of frame within provided budget."""
+               cycle_budget: int, fullness: float) -> Iterator[int]:
        """Update to match content of frame within provided budget.
-        self.cycles = 0
+        Emits encoded byte stream for rendering the image.
        # Target screen memory map for new frame
        target = self._encode(frame.bitmap)
        # Compute difference from current frame
        delta = np.bitwise_xor(self.screen, target)
        delta = np.ma.masked_array(delta, np.logical_not(delta))
        for b in self.encoded_byte_stream(delta, target):
            yield b
            if (self.cycles >= cycle_budget and
                    not any(o.value == b for o in Opcode)):
                return
    def index_by_bytes(self, deltas: np.array,
                       memmap: np.array) -> Set[Tuple[int, int, int, int]]:
        """Transform encoded screen to map of byte --> addr.
        XXX
        """
        changes = set()
        it = np.nditer(memmap, flags=['multi_index'])
        while not it.finished:
            y, x_byte = it.multi_index
            # Skip masked values, i.e. unchanged in new frame
            xor = deltas[y][x_byte]
            if xor is np.ma.masked:
                it.iternext()
                continue
            y_base = self.Y_TO_BASE_ADDR[self.page][y]
            page = y_base >> 8
            #print("y=%d -> page=%02x" % (y, page))
            xor_weight = hamming_weight(xor)
            changes.add(
                (
                    page, y_base - (page << 8) + x_byte,
                    np.asscalar(it[0]), xor_weight
                )
            )
            it.iternext()
        return changes
    def _emit(self, opcode: Union[Opcode, int]) -> int:
        self.cycles += self.CYCLES[opcode]
        return opcode.value if opcode in Opcode else opcode
    @functools.lru_cache(None)
    def _score(self, diff_page: bool,
               diff_content: bool,
               xor_weight: int) -> float:
        """Computes score of how many pixels/cycle it would cost to emit"""
        cycles = 0
        if diff_page:
            cycles += self.CYCLES[Opcode.SET_PAGE]
        if diff_content:
            cycles += self.CYCLES[Opcode.SET_CONTENT]
        # Placeholder content since all content bytes have same cost
        cycles += self.CYCLES[0]
        cycles_per_pixel = cycles / xor_weight
        return cycles_per_pixel
    @staticmethod
    def similarity(a1: np.array, a2: np.array) -> float:
        """Measure bitwise % similarity between two arrays"""
        bits_different = np.sum(np.logical_xor(a1, a2))
        return 1 - (bits_different / (np.shape(a1)[0] * np.shape(a1)[1]))
    def encoded_byte_stream(self, deltas: np.array,
                            target: np.array) -> Iterator[int]:
        """Emit encoded byte stream for rendering the image.
        The byte stream consists of offsets against a selected page (e.g. $20xx)
        at which to write a selected content byte.  Those selections are
@ -201,58 +123,82 @@ class Screen:
        it optimizes the bytestream.
        """
-        # Construct map of byte to addr that contain it
+        self.cycles = 0
-        changes = self.index_by_bytes(deltas, target)
+        # Target screen memory map for new frame
        target = self._encode(frame.bitmap)
-        ctr = Counter()
+        # Compute difference from current frame
-        page = 0x20
+        delta = np.bitwise_xor(self.screen, target)
-        content = 0x7f
+        delta = np.ma.masked_array(delta, np.logical_not(delta))
-        # TODO: strictly picking the highest next score might end up
+        # Estimate number of opcodes that will end up fitting in the cycle
-        #  thrashing around between pages/content bytes.  Maybe score over
+        # budget.
-        #  larger runs of bytes?
+        est_opcodes = int(cycle_budget / fullness / self.CYCLES[0])
        scores = []
        while changes:
            if not scores:
                scores = sorted((
                    (
                        self._score(page != ch[0], content != ch[2], ch[3]),
                        ctr,
                        ch
                    ) for ch in changes))
-            best = scores.pop()
+        # Sort by highest xor weight and take the estimated number of change
-            best_change = best[2]
+        # operations
-            changes.remove(best_change)
+        changes = list(
-            #print(best_change)
+            sorted(self.index_changes(delta, target), reverse=True)
        )[:est_opcodes]
-            (new_page, offset, new_content, xor_weight) = best_change
+        # Heuristic: group by content byte first then page
-            #print("Score=%f" % best[0])
+        data = {}
        for ch in changes:
            xor_weight, page, offset, content = ch
            data.setdefault(content, {}).setdefault(page, set()).add(offset)
-            if new_page != page:
+        for content, page_offsets in data.items():
-                #print("changing page %02x -> %02x" % (page, new_page))
+            yield self._emit(Opcode.SET_CONTENT)
-                page = new_page
+            yield content
            for page, offsets in page_offsets.items():
                yield self._emit(Opcode.SET_PAGE)
                yield page
-                # Invalidate scores
+                for offset in offsets:
                # TODO: we don't need to invalidate all of them, just those
                #  for the old and new page
                scores = []
            if new_content != content:
                content = new_content
                yield self._emit(Opcode.SET_CONTENT)
                yield content
                # Invalidate scores
                # TODO: we don't need to invalidate all of them, just those
                #  for the old and new content byte
                scores = []
                    self._write(page << 8 | offset, content)
                    yield self._emit(offset)
    def index_changes(self, deltas: np.array,
                      memmap: np.array) -> Set[Tuple[int, int, int, int]]:
        """Transform encoded screen to sequence of change tuples.
        Change tuple is (xor_weight, page, offset, content)
        """
        changes = set()
        it = np.nditer(memmap, flags=['multi_index'])
        while not it.finished:
            y, x_byte = it.multi_index
            # Skip masked values, i.e. unchanged in new frame
            xor = deltas[y][x_byte]
            if xor is np.ma.masked:
                it.iternext()
                continue
            y_base = self.Y_TO_BASE_ADDR[self.page][y]
            page = y_base >> 8
            # print("y=%d -> page=%02x" % (y, page))
            xor_weight = hamming_weight(xor)
            offset = y_base - (page << 8) + x_byte
            changes.add((xor_weight, page, offset, np.asscalar(it[0])))
            it.iternext()
        return changes
    def _emit(self, opcode: Union[Opcode, int]) -> int:
        self.cycles += self.CYCLES[opcode]
        return opcode.value if opcode in Opcode else opcode
    @staticmethod
    def similarity(a1: np.array, a2: np.array) -> float:
        """Measure bitwise % similarity between two arrays"""
        bits_different = np.asscalar(np.sum(np.logical_xor(a1, a2)))
        return 1 - (bits_different / (np.shape(a1)[0] * np.shape(a1)[1]))
    def done(self) -> Iterator[int]:
        """Terminate opcode stream."""
@ -279,20 +225,28 @@ class Screen:
        return np.array(np.delete(bm, np.arange(0, bm.shape[1], 2), axis=1),
                        dtype=np.bool)
-    def from_stream(self, stream: Iterator[int]) -> None:
+    def from_stream(self, stream: Iterator[int]) -> Tuple[int, int, int]:
        """Replay an opcode stream to build a screen image."""
        page = 0x20
        content = 0x7f
        num_content_changes = 0
        num_page_changes = 0
        num_content_stores = 0
        for b in stream:
            if b == Opcode.SET_CONTENT.value:
                content = next(stream)
                num_content_changes += 1
                continue
            elif b == Opcode.SET_PAGE.value:
                page = next(stream)
                num_page_changes += 1
                continue
            elif b == Opcode.TICK.value:
                continue
            elif b == Opcode.END_FRAME.value:
-                return
+                break
            num_content_stores += 1
            self._write(page << 8 | b, content)
        return num_content_stores, num_content_changes, num_page_changes