Reimplement opcode scheduler to one that is ~as fast as before. As a

bonus we now maintain much better tracking of our target frame rate.

Maintain a running estimate of the opcode scheduling overhead, i.e.
how many opcodes we end up scheduling for each content byte written.

Use this to select an estimated number of screen changes to fill the
cycle budget, ordered by hamming weight of the delta.  Group these
by content byte and then page as before.
This commit is contained in:
kris 2019-01-02 22:16:54 +00:00
parent 8e3f8c9f6d
commit 6de5f1797d
2 changed files with 143 additions and 129 deletions

66
main.py
View File

@ -8,8 +8,55 @@ import screen
CYCLES = 1024 * 1024
MAX_OUT = 20 * 1024
VIDEO_FPS = 30
APPLE_FPS = 5
APPLE_FPS = 10
# Old naive XOR algorithm:
#
#stores=1894, content changes=15, page changes=365
#Frame 0, 2654 bytes, similarity = 0.850856
#stores=1750, content changes=19, page changes=444
#Frame 3, 2676 bytes, similarity = 0.903088
#stores=1648, content changes=20, page changes=501
#Frame 6, 2690 bytes, similarity = 0.922024
#stores=1677, content changes=18, page changes=486
#Frame 9, 2685 bytes, similarity = 0.912723
#stores=1659, content changes=18, page changes=497
#Frame 12, 2689 bytes, similarity = 0.923438
#stores=1681, content changes=17, page changes=485
#Frame 15, 2685 bytes, similarity = 0.922656
#stores=1686, content changes=17, page changes=482
#Frame 18, 2684 bytes, similarity = 0.921912
#stores=1669, content changes=17, page changes=492
# New
#stores=2260, content changes=277, page changes=125
#Frame 0, 3064 bytes, similarity = 0.874740
#stores=2162, content changes=325, page changes=131
#Frame 3, 3074 bytes, similarity = 0.925670
#stores=2241, content changes=313, page changes=102
#Frame 6, 3071 bytes, similarity = 0.936942
#stores=2265, content changes=313, page changes=90
#Frame 9, 3071 bytes, similarity = 0.931882
#stores=2225, content changes=334, page changes=91
#Frame 12, 3075 bytes, similarity = 0.929427
#stores=2216, content changes=342, page changes=89
#Frame 15, 3078 bytes, similarity = 0.919978
#stores=2222, content changes=339, page changes=88
# Optimized new
#stores=1762, content changes=15, page changes=338
#Frame 0, 2468 bytes, similarity = 0.841034
#stores=2150, content changes=28, page changes=465
#Frame 3, 3136 bytes, similarity = 0.921987
#stores=2067, content changes=30, page changes=573
#Frame 6, 3273 bytes, similarity = 0.939583
#stores=1906, content changes=29, page changes=551
#Frame 9, 3066 bytes, similarity = 0.928237
#stores=1876, content changes=27, page changes=560
#Frame 12, 3050 bytes, similarity = 0.933705
#stores=1856, content changes=30, page changes=575
#Frame 15, 3066 bytes, similarity = 0.929539
#stores=1827, content changes=30, page changes=562
def main():
s = screen.Screen()
@ -19,6 +66,10 @@ def main():
videogen = skvideo.io.vreader("CoffeeCup-H264-75.mov")
with open("out.bin", "wb") as out:
bytes_out = 0
# Estimated opcode overhead, i.e. ratio of extra cycles from opcodes
fullness = 1.6
for idx, frame in enumerate(videogen):
if idx % (VIDEO_FPS // APPLE_FPS):
continue
@ -29,11 +80,20 @@ def main():
# im.show()
f = screen.Frame(im)
stream = bytes(s.update(f, CYCLES // APPLE_FPS))
cycle_budget = int(CYCLES / APPLE_FPS)
stream = bytes(s.update(f, cycle_budget, fullness))
fullness *= s.cycles / cycle_budget
print("Fullness = %f, cycles = %d/%d budget" % (
fullness, s.cycles, cycle_budget))
# Assert that the opcode stream reconstructs the same screen
decoder.from_stream(iter(stream))
(num_content_stores, num_content_changes,
num_page_changes) = decoder.from_stream(iter(stream))
assert np.array_equal(decoder.screen, s.screen)
print("stores=%d, content changes=%d, page changes=%d" % (
num_content_stores, num_content_changes,
num_page_changes))
# print(" ".join("%02x(%02d)" % (b, b) for b in stream))
# assert that the screen decodes to the original bitmap

206
screen.py
View File

@ -1,9 +1,8 @@
"""Screen module represents Apple II video display."""
from collections import defaultdict, Counter
from collections import defaultdict
import enum
import functools
from typing import Dict, Set, Iterator, Union, Tuple
from typing import Set, Iterator, Union, Tuple
import numpy as np
@ -97,88 +96,11 @@ class Screen:
# invert this
return np.flip(np.packbits(np.flip(pixels, axis=1), axis=1), axis=1)
def update(self, frame: Frame, cycle_budget: int) -> Iterator[int]:
"""Update to match content of frame within provided budget."""
def update(self, frame: Frame,
cycle_budget: int, fullness: float) -> Iterator[int]:
"""Update to match content of frame within provided budget.
self.cycles = 0
# Target screen memory map for new frame
target = self._encode(frame.bitmap)
# Compute difference from current frame
delta = np.bitwise_xor(self.screen, target)
delta = np.ma.masked_array(delta, np.logical_not(delta))
for b in self.encoded_byte_stream(delta, target):
yield b
if (self.cycles >= cycle_budget and
not any(o.value == b for o in Opcode)):
return
def index_by_bytes(self, deltas: np.array,
memmap: np.array) -> Set[Tuple[int, int, int, int]]:
"""Transform encoded screen to map of byte --> addr.
XXX
"""
changes = set()
it = np.nditer(memmap, flags=['multi_index'])
while not it.finished:
y, x_byte = it.multi_index
# Skip masked values, i.e. unchanged in new frame
xor = deltas[y][x_byte]
if xor is np.ma.masked:
it.iternext()
continue
y_base = self.Y_TO_BASE_ADDR[self.page][y]
page = y_base >> 8
#print("y=%d -> page=%02x" % (y, page))
xor_weight = hamming_weight(xor)
changes.add(
(
page, y_base - (page << 8) + x_byte,
np.asscalar(it[0]), xor_weight
)
)
it.iternext()
return changes
def _emit(self, opcode: Union[Opcode, int]) -> int:
self.cycles += self.CYCLES[opcode]
return opcode.value if opcode in Opcode else opcode
@functools.lru_cache(None)
def _score(self, diff_page: bool,
diff_content: bool,
xor_weight: int) -> float:
"""Computes score of how many pixels/cycle it would cost to emit"""
cycles = 0
if diff_page:
cycles += self.CYCLES[Opcode.SET_PAGE]
if diff_content:
cycles += self.CYCLES[Opcode.SET_CONTENT]
# Placeholder content since all content bytes have same cost
cycles += self.CYCLES[0]
cycles_per_pixel = cycles / xor_weight
return cycles_per_pixel
@staticmethod
def similarity(a1: np.array, a2: np.array) -> float:
"""Measure bitwise % similarity between two arrays"""
bits_different = np.sum(np.logical_xor(a1, a2))
return 1 - (bits_different / (np.shape(a1)[0] * np.shape(a1)[1]))
def encoded_byte_stream(self, deltas: np.array,
target: np.array) -> Iterator[int]:
"""Emit encoded byte stream for rendering the image.
Emits encoded byte stream for rendering the image.
The byte stream consists of offsets against a selected page (e.g. $20xx)
at which to write a selected content byte. Those selections are
@ -201,57 +123,81 @@ class Screen:
it optimizes the bytestream.
"""
# Construct map of byte to addr that contain it
changes = self.index_by_bytes(deltas, target)
self.cycles = 0
# Target screen memory map for new frame
target = self._encode(frame.bitmap)
ctr = Counter()
page = 0x20
content = 0x7f
# Compute difference from current frame
delta = np.bitwise_xor(self.screen, target)
delta = np.ma.masked_array(delta, np.logical_not(delta))
# TODO: strictly picking the highest next score might end up
# thrashing around between pages/content bytes. Maybe score over
# larger runs of bytes?
scores = []
while changes:
if not scores:
scores = sorted((
(
self._score(page != ch[0], content != ch[2], ch[3]),
ctr,
ch
) for ch in changes))
# Estimate number of opcodes that will end up fitting in the cycle
# budget.
est_opcodes = int(cycle_budget / fullness / self.CYCLES[0])
best = scores.pop()
best_change = best[2]
changes.remove(best_change)
#print(best_change)
# Sort by highest xor weight and take the estimated number of change
# operations
changes = list(
sorted(self.index_changes(delta, target), reverse=True)
)[:est_opcodes]
(new_page, offset, new_content, xor_weight) = best_change
#print("Score=%f" % best[0])
# Heuristic: group by content byte first then page
data = {}
for ch in changes:
xor_weight, page, offset, content = ch
data.setdefault(content, {}).setdefault(page, set()).add(offset)
if new_page != page:
#print("changing page %02x -> %02x" % (page, new_page))
page = new_page
for content, page_offsets in data.items():
yield self._emit(Opcode.SET_CONTENT)
yield content
for page, offsets in page_offsets.items():
yield self._emit(Opcode.SET_PAGE)
yield page
# Invalidate scores
# TODO: we don't need to invalidate all of them, just those
# for the old and new page
scores = []
for offset in offsets:
self._write(page << 8 | offset, content)
yield self._emit(offset)
if new_content != content:
content = new_content
yield self._emit(Opcode.SET_CONTENT)
yield content
def index_changes(self, deltas: np.array,
memmap: np.array) -> Set[Tuple[int, int, int, int]]:
"""Transform encoded screen to sequence of change tuples.
# Invalidate scores
# TODO: we don't need to invalidate all of them, just those
# for the old and new content byte
scores = []
Change tuple is (xor_weight, page, offset, content)
"""
self._write(page << 8 | offset, content)
yield self._emit(offset)
changes = set()
it = np.nditer(memmap, flags=['multi_index'])
while not it.finished:
y, x_byte = it.multi_index
# Skip masked values, i.e. unchanged in new frame
xor = deltas[y][x_byte]
if xor is np.ma.masked:
it.iternext()
continue
y_base = self.Y_TO_BASE_ADDR[self.page][y]
page = y_base >> 8
# print("y=%d -> page=%02x" % (y, page))
xor_weight = hamming_weight(xor)
offset = y_base - (page << 8) + x_byte
changes.add((xor_weight, page, offset, np.asscalar(it[0])))
it.iternext()
return changes
def _emit(self, opcode: Union[Opcode, int]) -> int:
self.cycles += self.CYCLES[opcode]
return opcode.value if opcode in Opcode else opcode
@staticmethod
def similarity(a1: np.array, a2: np.array) -> float:
"""Measure bitwise % similarity between two arrays"""
bits_different = np.asscalar(np.sum(np.logical_xor(a1, a2)))
return 1 - (bits_different / (np.shape(a1)[0] * np.shape(a1)[1]))
def done(self) -> Iterator[int]:
"""Terminate opcode stream."""
@ -279,20 +225,28 @@ class Screen:
return np.array(np.delete(bm, np.arange(0, bm.shape[1], 2), axis=1),
dtype=np.bool)
def from_stream(self, stream: Iterator[int]) -> None:
def from_stream(self, stream: Iterator[int]) -> Tuple[int, int, int]:
"""Replay an opcode stream to build a screen image."""
page = 0x20
content = 0x7f
num_content_changes = 0
num_page_changes = 0
num_content_stores = 0
for b in stream:
if b == Opcode.SET_CONTENT.value:
content = next(stream)
num_content_changes += 1
continue
elif b == Opcode.SET_PAGE.value:
page = next(stream)
num_page_changes += 1
continue
elif b == Opcode.TICK.value:
continue
elif b == Opcode.END_FRAME.value:
return
break
num_content_stores += 1
self._write(page << 8 | b, content)
return num_content_stores, num_content_changes, num_page_changes