Reimplement opcode scheduler to one that is ~as fast as before. As a

bonus we now maintain much better tracking of our target frame rate.

Maintain a running estimate of the opcode scheduling overhead, i.e.
how many opcodes we end up scheduling for each content byte written.

Use this to select an estimated number of screen changes to fill the
cycle budget, ordered by hamming weight of the delta.  Group these
by content byte and then page as before.
This commit is contained in:
kris 2019-01-02 22:16:54 +00:00
parent 8e3f8c9f6d
commit 6de5f1797d
2 changed files with 143 additions and 129 deletions

66
main.py
View File

@ -8,8 +8,55 @@ import screen
CYCLES = 1024 * 1024 CYCLES = 1024 * 1024
MAX_OUT = 20 * 1024 MAX_OUT = 20 * 1024
VIDEO_FPS = 30 VIDEO_FPS = 30
APPLE_FPS = 5 APPLE_FPS = 10
# Old naive XOR algorithm:
#
#stores=1894, content changes=15, page changes=365
#Frame 0, 2654 bytes, similarity = 0.850856
#stores=1750, content changes=19, page changes=444
#Frame 3, 2676 bytes, similarity = 0.903088
#stores=1648, content changes=20, page changes=501
#Frame 6, 2690 bytes, similarity = 0.922024
#stores=1677, content changes=18, page changes=486
#Frame 9, 2685 bytes, similarity = 0.912723
#stores=1659, content changes=18, page changes=497
#Frame 12, 2689 bytes, similarity = 0.923438
#stores=1681, content changes=17, page changes=485
#Frame 15, 2685 bytes, similarity = 0.922656
#stores=1686, content changes=17, page changes=482
#Frame 18, 2684 bytes, similarity = 0.921912
#stores=1669, content changes=17, page changes=492
# New
#stores=2260, content changes=277, page changes=125
#Frame 0, 3064 bytes, similarity = 0.874740
#stores=2162, content changes=325, page changes=131
#Frame 3, 3074 bytes, similarity = 0.925670
#stores=2241, content changes=313, page changes=102
#Frame 6, 3071 bytes, similarity = 0.936942
#stores=2265, content changes=313, page changes=90
#Frame 9, 3071 bytes, similarity = 0.931882
#stores=2225, content changes=334, page changes=91
#Frame 12, 3075 bytes, similarity = 0.929427
#stores=2216, content changes=342, page changes=89
#Frame 15, 3078 bytes, similarity = 0.919978
#stores=2222, content changes=339, page changes=88
# Optimized new
#stores=1762, content changes=15, page changes=338
#Frame 0, 2468 bytes, similarity = 0.841034
#stores=2150, content changes=28, page changes=465
#Frame 3, 3136 bytes, similarity = 0.921987
#stores=2067, content changes=30, page changes=573
#Frame 6, 3273 bytes, similarity = 0.939583
#stores=1906, content changes=29, page changes=551
#Frame 9, 3066 bytes, similarity = 0.928237
#stores=1876, content changes=27, page changes=560
#Frame 12, 3050 bytes, similarity = 0.933705
#stores=1856, content changes=30, page changes=575
#Frame 15, 3066 bytes, similarity = 0.929539
#stores=1827, content changes=30, page changes=562
def main(): def main():
s = screen.Screen() s = screen.Screen()
@ -19,6 +66,10 @@ def main():
videogen = skvideo.io.vreader("CoffeeCup-H264-75.mov") videogen = skvideo.io.vreader("CoffeeCup-H264-75.mov")
with open("out.bin", "wb") as out: with open("out.bin", "wb") as out:
bytes_out = 0 bytes_out = 0
# Estimated opcode overhead, i.e. ratio of extra cycles from opcodes
fullness = 1.6
for idx, frame in enumerate(videogen): for idx, frame in enumerate(videogen):
if idx % (VIDEO_FPS // APPLE_FPS): if idx % (VIDEO_FPS // APPLE_FPS):
continue continue
@ -29,11 +80,20 @@ def main():
# im.show() # im.show()
f = screen.Frame(im) f = screen.Frame(im)
stream = bytes(s.update(f, CYCLES // APPLE_FPS)) cycle_budget = int(CYCLES / APPLE_FPS)
stream = bytes(s.update(f, cycle_budget, fullness))
fullness *= s.cycles / cycle_budget
print("Fullness = %f, cycles = %d/%d budget" % (
fullness, s.cycles, cycle_budget))
# Assert that the opcode stream reconstructs the same screen # Assert that the opcode stream reconstructs the same screen
decoder.from_stream(iter(stream)) (num_content_stores, num_content_changes,
num_page_changes) = decoder.from_stream(iter(stream))
assert np.array_equal(decoder.screen, s.screen) assert np.array_equal(decoder.screen, s.screen)
print("stores=%d, content changes=%d, page changes=%d" % (
num_content_stores, num_content_changes,
num_page_changes))
# print(" ".join("%02x(%02d)" % (b, b) for b in stream)) # print(" ".join("%02x(%02d)" % (b, b) for b in stream))
# assert that the screen decodes to the original bitmap # assert that the screen decodes to the original bitmap

206
screen.py
View File

@ -1,9 +1,8 @@
"""Screen module represents Apple II video display.""" """Screen module represents Apple II video display."""
from collections import defaultdict, Counter from collections import defaultdict
import enum import enum
import functools from typing import Set, Iterator, Union, Tuple
from typing import Dict, Set, Iterator, Union, Tuple
import numpy as np import numpy as np
@ -97,88 +96,11 @@ class Screen:
# invert this # invert this
return np.flip(np.packbits(np.flip(pixels, axis=1), axis=1), axis=1) return np.flip(np.packbits(np.flip(pixels, axis=1), axis=1), axis=1)
def update(self, frame: Frame, cycle_budget: int) -> Iterator[int]: def update(self, frame: Frame,
"""Update to match content of frame within provided budget.""" cycle_budget: int, fullness: float) -> Iterator[int]:
"""Update to match content of frame within provided budget.
self.cycles = 0 Emits encoded byte stream for rendering the image.
# Target screen memory map for new frame
target = self._encode(frame.bitmap)
# Compute difference from current frame
delta = np.bitwise_xor(self.screen, target)
delta = np.ma.masked_array(delta, np.logical_not(delta))
for b in self.encoded_byte_stream(delta, target):
yield b
if (self.cycles >= cycle_budget and
not any(o.value == b for o in Opcode)):
return
def index_by_bytes(self, deltas: np.array,
memmap: np.array) -> Set[Tuple[int, int, int, int]]:
"""Transform encoded screen to map of byte --> addr.
XXX
"""
changes = set()
it = np.nditer(memmap, flags=['multi_index'])
while not it.finished:
y, x_byte = it.multi_index
# Skip masked values, i.e. unchanged in new frame
xor = deltas[y][x_byte]
if xor is np.ma.masked:
it.iternext()
continue
y_base = self.Y_TO_BASE_ADDR[self.page][y]
page = y_base >> 8
#print("y=%d -> page=%02x" % (y, page))
xor_weight = hamming_weight(xor)
changes.add(
(
page, y_base - (page << 8) + x_byte,
np.asscalar(it[0]), xor_weight
)
)
it.iternext()
return changes
def _emit(self, opcode: Union[Opcode, int]) -> int:
self.cycles += self.CYCLES[opcode]
return opcode.value if opcode in Opcode else opcode
@functools.lru_cache(None)
def _score(self, diff_page: bool,
diff_content: bool,
xor_weight: int) -> float:
"""Computes score of how many pixels/cycle it would cost to emit"""
cycles = 0
if diff_page:
cycles += self.CYCLES[Opcode.SET_PAGE]
if diff_content:
cycles += self.CYCLES[Opcode.SET_CONTENT]
# Placeholder content since all content bytes have same cost
cycles += self.CYCLES[0]
cycles_per_pixel = cycles / xor_weight
return cycles_per_pixel
@staticmethod
def similarity(a1: np.array, a2: np.array) -> float:
"""Measure bitwise % similarity between two arrays"""
bits_different = np.sum(np.logical_xor(a1, a2))
return 1 - (bits_different / (np.shape(a1)[0] * np.shape(a1)[1]))
def encoded_byte_stream(self, deltas: np.array,
target: np.array) -> Iterator[int]:
"""Emit encoded byte stream for rendering the image.
The byte stream consists of offsets against a selected page (e.g. $20xx) The byte stream consists of offsets against a selected page (e.g. $20xx)
at which to write a selected content byte. Those selections are at which to write a selected content byte. Those selections are
@ -201,57 +123,81 @@ class Screen:
it optimizes the bytestream. it optimizes the bytestream.
""" """
# Construct map of byte to addr that contain it self.cycles = 0
changes = self.index_by_bytes(deltas, target) # Target screen memory map for new frame
target = self._encode(frame.bitmap)
ctr = Counter() # Compute difference from current frame
page = 0x20 delta = np.bitwise_xor(self.screen, target)
content = 0x7f delta = np.ma.masked_array(delta, np.logical_not(delta))
# TODO: strictly picking the highest next score might end up # Estimate number of opcodes that will end up fitting in the cycle
# thrashing around between pages/content bytes. Maybe score over # budget.
# larger runs of bytes? est_opcodes = int(cycle_budget / fullness / self.CYCLES[0])
scores = []
while changes:
if not scores:
scores = sorted((
(
self._score(page != ch[0], content != ch[2], ch[3]),
ctr,
ch
) for ch in changes))
best = scores.pop() # Sort by highest xor weight and take the estimated number of change
best_change = best[2] # operations
changes.remove(best_change) changes = list(
#print(best_change) sorted(self.index_changes(delta, target), reverse=True)
)[:est_opcodes]
(new_page, offset, new_content, xor_weight) = best_change # Heuristic: group by content byte first then page
#print("Score=%f" % best[0]) data = {}
for ch in changes:
xor_weight, page, offset, content = ch
data.setdefault(content, {}).setdefault(page, set()).add(offset)
if new_page != page: for content, page_offsets in data.items():
#print("changing page %02x -> %02x" % (page, new_page)) yield self._emit(Opcode.SET_CONTENT)
page = new_page yield content
for page, offsets in page_offsets.items():
yield self._emit(Opcode.SET_PAGE) yield self._emit(Opcode.SET_PAGE)
yield page yield page
# Invalidate scores for offset in offsets:
# TODO: we don't need to invalidate all of them, just those self._write(page << 8 | offset, content)
# for the old and new page yield self._emit(offset)
scores = []
if new_content != content: def index_changes(self, deltas: np.array,
content = new_content memmap: np.array) -> Set[Tuple[int, int, int, int]]:
yield self._emit(Opcode.SET_CONTENT) """Transform encoded screen to sequence of change tuples.
yield content
# Invalidate scores Change tuple is (xor_weight, page, offset, content)
# TODO: we don't need to invalidate all of them, just those """
# for the old and new content byte
scores = []
self._write(page << 8 | offset, content) changes = set()
yield self._emit(offset) it = np.nditer(memmap, flags=['multi_index'])
while not it.finished:
y, x_byte = it.multi_index
# Skip masked values, i.e. unchanged in new frame
xor = deltas[y][x_byte]
if xor is np.ma.masked:
it.iternext()
continue
y_base = self.Y_TO_BASE_ADDR[self.page][y]
page = y_base >> 8
# print("y=%d -> page=%02x" % (y, page))
xor_weight = hamming_weight(xor)
offset = y_base - (page << 8) + x_byte
changes.add((xor_weight, page, offset, np.asscalar(it[0])))
it.iternext()
return changes
def _emit(self, opcode: Union[Opcode, int]) -> int:
self.cycles += self.CYCLES[opcode]
return opcode.value if opcode in Opcode else opcode
@staticmethod
def similarity(a1: np.array, a2: np.array) -> float:
"""Measure bitwise % similarity between two arrays"""
bits_different = np.asscalar(np.sum(np.logical_xor(a1, a2)))
return 1 - (bits_different / (np.shape(a1)[0] * np.shape(a1)[1]))
def done(self) -> Iterator[int]: def done(self) -> Iterator[int]:
"""Terminate opcode stream.""" """Terminate opcode stream."""
@ -279,20 +225,28 @@ class Screen:
return np.array(np.delete(bm, np.arange(0, bm.shape[1], 2), axis=1), return np.array(np.delete(bm, np.arange(0, bm.shape[1], 2), axis=1),
dtype=np.bool) dtype=np.bool)
def from_stream(self, stream: Iterator[int]) -> None: def from_stream(self, stream: Iterator[int]) -> Tuple[int, int, int]:
"""Replay an opcode stream to build a screen image.""" """Replay an opcode stream to build a screen image."""
page = 0x20 page = 0x20
content = 0x7f content = 0x7f
num_content_changes = 0
num_page_changes = 0
num_content_stores = 0
for b in stream: for b in stream:
if b == Opcode.SET_CONTENT.value: if b == Opcode.SET_CONTENT.value:
content = next(stream) content = next(stream)
num_content_changes += 1
continue continue
elif b == Opcode.SET_PAGE.value: elif b == Opcode.SET_PAGE.value:
page = next(stream) page = next(stream)
num_page_changes += 1
continue continue
elif b == Opcode.TICK.value: elif b == Opcode.TICK.value:
continue continue
elif b == Opcode.END_FRAME.value: elif b == Opcode.END_FRAME.value:
return break
num_content_stores += 1
self._write(page << 8 | b, content) self._write(page << 8 | b, content)
return num_content_stores, num_content_changes, num_page_changes