mirror of
https://github.com/KrisKennaway/ii-vision.git
synced 2024-12-21 20:29:21 +00:00
Reimplement opcode scheduler to one that is ~as fast as before. As a
bonus we now maintain much better tracking of our target frame rate. Maintain a running estimate of the opcode scheduling overhead, i.e. how many opcodes we end up scheduling for each content byte written. Use this to select an estimated number of screen changes to fill the cycle budget, ordered by hamming weight of the delta. Group these by content byte and then page as before.
This commit is contained in:
parent
8e3f8c9f6d
commit
6de5f1797d
66
main.py
66
main.py
@ -8,8 +8,55 @@ import screen
|
|||||||
CYCLES = 1024 * 1024
|
CYCLES = 1024 * 1024
|
||||||
MAX_OUT = 20 * 1024
|
MAX_OUT = 20 * 1024
|
||||||
VIDEO_FPS = 30
|
VIDEO_FPS = 30
|
||||||
APPLE_FPS = 5
|
APPLE_FPS = 10
|
||||||
|
|
||||||
|
# Old naive XOR algorithm:
|
||||||
|
#
|
||||||
|
#stores=1894, content changes=15, page changes=365
|
||||||
|
#Frame 0, 2654 bytes, similarity = 0.850856
|
||||||
|
#stores=1750, content changes=19, page changes=444
|
||||||
|
#Frame 3, 2676 bytes, similarity = 0.903088
|
||||||
|
#stores=1648, content changes=20, page changes=501
|
||||||
|
#Frame 6, 2690 bytes, similarity = 0.922024
|
||||||
|
#stores=1677, content changes=18, page changes=486
|
||||||
|
#Frame 9, 2685 bytes, similarity = 0.912723
|
||||||
|
#stores=1659, content changes=18, page changes=497
|
||||||
|
#Frame 12, 2689 bytes, similarity = 0.923438
|
||||||
|
#stores=1681, content changes=17, page changes=485
|
||||||
|
#Frame 15, 2685 bytes, similarity = 0.922656
|
||||||
|
#stores=1686, content changes=17, page changes=482
|
||||||
|
#Frame 18, 2684 bytes, similarity = 0.921912
|
||||||
|
#stores=1669, content changes=17, page changes=492
|
||||||
|
|
||||||
|
# New
|
||||||
|
#stores=2260, content changes=277, page changes=125
|
||||||
|
#Frame 0, 3064 bytes, similarity = 0.874740
|
||||||
|
#stores=2162, content changes=325, page changes=131
|
||||||
|
#Frame 3, 3074 bytes, similarity = 0.925670
|
||||||
|
#stores=2241, content changes=313, page changes=102
|
||||||
|
#Frame 6, 3071 bytes, similarity = 0.936942
|
||||||
|
#stores=2265, content changes=313, page changes=90
|
||||||
|
#Frame 9, 3071 bytes, similarity = 0.931882
|
||||||
|
#stores=2225, content changes=334, page changes=91
|
||||||
|
#Frame 12, 3075 bytes, similarity = 0.929427
|
||||||
|
#stores=2216, content changes=342, page changes=89
|
||||||
|
#Frame 15, 3078 bytes, similarity = 0.919978
|
||||||
|
#stores=2222, content changes=339, page changes=88
|
||||||
|
|
||||||
|
# Optimized new
|
||||||
|
#stores=1762, content changes=15, page changes=338
|
||||||
|
#Frame 0, 2468 bytes, similarity = 0.841034
|
||||||
|
#stores=2150, content changes=28, page changes=465
|
||||||
|
#Frame 3, 3136 bytes, similarity = 0.921987
|
||||||
|
#stores=2067, content changes=30, page changes=573
|
||||||
|
#Frame 6, 3273 bytes, similarity = 0.939583
|
||||||
|
#stores=1906, content changes=29, page changes=551
|
||||||
|
#Frame 9, 3066 bytes, similarity = 0.928237
|
||||||
|
#stores=1876, content changes=27, page changes=560
|
||||||
|
#Frame 12, 3050 bytes, similarity = 0.933705
|
||||||
|
#stores=1856, content changes=30, page changes=575
|
||||||
|
#Frame 15, 3066 bytes, similarity = 0.929539
|
||||||
|
#stores=1827, content changes=30, page changes=562
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
s = screen.Screen()
|
s = screen.Screen()
|
||||||
@ -19,6 +66,10 @@ def main():
|
|||||||
videogen = skvideo.io.vreader("CoffeeCup-H264-75.mov")
|
videogen = skvideo.io.vreader("CoffeeCup-H264-75.mov")
|
||||||
with open("out.bin", "wb") as out:
|
with open("out.bin", "wb") as out:
|
||||||
bytes_out = 0
|
bytes_out = 0
|
||||||
|
|
||||||
|
# Estimated opcode overhead, i.e. ratio of extra cycles from opcodes
|
||||||
|
fullness = 1.6
|
||||||
|
|
||||||
for idx, frame in enumerate(videogen):
|
for idx, frame in enumerate(videogen):
|
||||||
if idx % (VIDEO_FPS // APPLE_FPS):
|
if idx % (VIDEO_FPS // APPLE_FPS):
|
||||||
continue
|
continue
|
||||||
@ -29,11 +80,20 @@ def main():
|
|||||||
# im.show()
|
# im.show()
|
||||||
|
|
||||||
f = screen.Frame(im)
|
f = screen.Frame(im)
|
||||||
stream = bytes(s.update(f, CYCLES // APPLE_FPS))
|
cycle_budget = int(CYCLES / APPLE_FPS)
|
||||||
|
stream = bytes(s.update(f, cycle_budget, fullness))
|
||||||
|
|
||||||
|
fullness *= s.cycles / cycle_budget
|
||||||
|
print("Fullness = %f, cycles = %d/%d budget" % (
|
||||||
|
fullness, s.cycles, cycle_budget))
|
||||||
|
|
||||||
# Assert that the opcode stream reconstructs the same screen
|
# Assert that the opcode stream reconstructs the same screen
|
||||||
decoder.from_stream(iter(stream))
|
(num_content_stores, num_content_changes,
|
||||||
|
num_page_changes) = decoder.from_stream(iter(stream))
|
||||||
assert np.array_equal(decoder.screen, s.screen)
|
assert np.array_equal(decoder.screen, s.screen)
|
||||||
|
print("stores=%d, content changes=%d, page changes=%d" % (
|
||||||
|
num_content_stores, num_content_changes,
|
||||||
|
num_page_changes))
|
||||||
|
|
||||||
# print(" ".join("%02x(%02d)" % (b, b) for b in stream))
|
# print(" ".join("%02x(%02d)" % (b, b) for b in stream))
|
||||||
# assert that the screen decodes to the original bitmap
|
# assert that the screen decodes to the original bitmap
|
||||||
|
208
screen.py
208
screen.py
@ -1,9 +1,8 @@
|
|||||||
"""Screen module represents Apple II video display."""
|
"""Screen module represents Apple II video display."""
|
||||||
|
|
||||||
from collections import defaultdict, Counter
|
from collections import defaultdict
|
||||||
import enum
|
import enum
|
||||||
import functools
|
from typing import Set, Iterator, Union, Tuple
|
||||||
from typing import Dict, Set, Iterator, Union, Tuple
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -97,88 +96,11 @@ class Screen:
|
|||||||
# invert this
|
# invert this
|
||||||
return np.flip(np.packbits(np.flip(pixels, axis=1), axis=1), axis=1)
|
return np.flip(np.packbits(np.flip(pixels, axis=1), axis=1), axis=1)
|
||||||
|
|
||||||
def update(self, frame: Frame, cycle_budget: int) -> Iterator[int]:
|
def update(self, frame: Frame,
|
||||||
"""Update to match content of frame within provided budget."""
|
cycle_budget: int, fullness: float) -> Iterator[int]:
|
||||||
|
"""Update to match content of frame within provided budget.
|
||||||
|
|
||||||
self.cycles = 0
|
Emits encoded byte stream for rendering the image.
|
||||||
# Target screen memory map for new frame
|
|
||||||
target = self._encode(frame.bitmap)
|
|
||||||
|
|
||||||
# Compute difference from current frame
|
|
||||||
delta = np.bitwise_xor(self.screen, target)
|
|
||||||
delta = np.ma.masked_array(delta, np.logical_not(delta))
|
|
||||||
|
|
||||||
for b in self.encoded_byte_stream(delta, target):
|
|
||||||
yield b
|
|
||||||
if (self.cycles >= cycle_budget and
|
|
||||||
not any(o.value == b for o in Opcode)):
|
|
||||||
return
|
|
||||||
|
|
||||||
def index_by_bytes(self, deltas: np.array,
|
|
||||||
memmap: np.array) -> Set[Tuple[int, int, int, int]]:
|
|
||||||
"""Transform encoded screen to map of byte --> addr.
|
|
||||||
|
|
||||||
XXX
|
|
||||||
"""
|
|
||||||
|
|
||||||
changes = set()
|
|
||||||
it = np.nditer(memmap, flags=['multi_index'])
|
|
||||||
while not it.finished:
|
|
||||||
y, x_byte = it.multi_index
|
|
||||||
|
|
||||||
# Skip masked values, i.e. unchanged in new frame
|
|
||||||
xor = deltas[y][x_byte]
|
|
||||||
if xor is np.ma.masked:
|
|
||||||
it.iternext()
|
|
||||||
continue
|
|
||||||
|
|
||||||
y_base = self.Y_TO_BASE_ADDR[self.page][y]
|
|
||||||
page = y_base >> 8
|
|
||||||
|
|
||||||
#print("y=%d -> page=%02x" % (y, page))
|
|
||||||
xor_weight = hamming_weight(xor)
|
|
||||||
|
|
||||||
changes.add(
|
|
||||||
(
|
|
||||||
page, y_base - (page << 8) + x_byte,
|
|
||||||
np.asscalar(it[0]), xor_weight
|
|
||||||
)
|
|
||||||
)
|
|
||||||
it.iternext()
|
|
||||||
|
|
||||||
return changes
|
|
||||||
|
|
||||||
def _emit(self, opcode: Union[Opcode, int]) -> int:
|
|
||||||
self.cycles += self.CYCLES[opcode]
|
|
||||||
return opcode.value if opcode in Opcode else opcode
|
|
||||||
|
|
||||||
@functools.lru_cache(None)
|
|
||||||
def _score(self, diff_page: bool,
|
|
||||||
diff_content: bool,
|
|
||||||
xor_weight: int) -> float:
|
|
||||||
"""Computes score of how many pixels/cycle it would cost to emit"""
|
|
||||||
cycles = 0
|
|
||||||
if diff_page:
|
|
||||||
cycles += self.CYCLES[Opcode.SET_PAGE]
|
|
||||||
if diff_content:
|
|
||||||
cycles += self.CYCLES[Opcode.SET_CONTENT]
|
|
||||||
|
|
||||||
# Placeholder content since all content bytes have same cost
|
|
||||||
cycles += self.CYCLES[0]
|
|
||||||
|
|
||||||
cycles_per_pixel = cycles / xor_weight
|
|
||||||
return cycles_per_pixel
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def similarity(a1: np.array, a2: np.array) -> float:
|
|
||||||
"""Measure bitwise % similarity between two arrays"""
|
|
||||||
bits_different = np.sum(np.logical_xor(a1, a2))
|
|
||||||
|
|
||||||
return 1 - (bits_different / (np.shape(a1)[0] * np.shape(a1)[1]))
|
|
||||||
|
|
||||||
def encoded_byte_stream(self, deltas: np.array,
|
|
||||||
target: np.array) -> Iterator[int]:
|
|
||||||
"""Emit encoded byte stream for rendering the image.
|
|
||||||
|
|
||||||
The byte stream consists of offsets against a selected page (e.g. $20xx)
|
The byte stream consists of offsets against a selected page (e.g. $20xx)
|
||||||
at which to write a selected content byte. Those selections are
|
at which to write a selected content byte. Those selections are
|
||||||
@ -201,58 +123,82 @@ class Screen:
|
|||||||
it optimizes the bytestream.
|
it optimizes the bytestream.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Construct map of byte to addr that contain it
|
self.cycles = 0
|
||||||
changes = self.index_by_bytes(deltas, target)
|
# Target screen memory map for new frame
|
||||||
|
target = self._encode(frame.bitmap)
|
||||||
|
|
||||||
ctr = Counter()
|
# Compute difference from current frame
|
||||||
page = 0x20
|
delta = np.bitwise_xor(self.screen, target)
|
||||||
content = 0x7f
|
delta = np.ma.masked_array(delta, np.logical_not(delta))
|
||||||
|
|
||||||
# TODO: strictly picking the highest next score might end up
|
# Estimate number of opcodes that will end up fitting in the cycle
|
||||||
# thrashing around between pages/content bytes. Maybe score over
|
# budget.
|
||||||
# larger runs of bytes?
|
est_opcodes = int(cycle_budget / fullness / self.CYCLES[0])
|
||||||
scores = []
|
|
||||||
while changes:
|
|
||||||
if not scores:
|
|
||||||
scores = sorted((
|
|
||||||
(
|
|
||||||
self._score(page != ch[0], content != ch[2], ch[3]),
|
|
||||||
ctr,
|
|
||||||
ch
|
|
||||||
) for ch in changes))
|
|
||||||
|
|
||||||
best = scores.pop()
|
# Sort by highest xor weight and take the estimated number of change
|
||||||
best_change = best[2]
|
# operations
|
||||||
changes.remove(best_change)
|
changes = list(
|
||||||
#print(best_change)
|
sorted(self.index_changes(delta, target), reverse=True)
|
||||||
|
)[:est_opcodes]
|
||||||
|
|
||||||
(new_page, offset, new_content, xor_weight) = best_change
|
# Heuristic: group by content byte first then page
|
||||||
#print("Score=%f" % best[0])
|
data = {}
|
||||||
|
for ch in changes:
|
||||||
|
xor_weight, page, offset, content = ch
|
||||||
|
data.setdefault(content, {}).setdefault(page, set()).add(offset)
|
||||||
|
|
||||||
if new_page != page:
|
for content, page_offsets in data.items():
|
||||||
#print("changing page %02x -> %02x" % (page, new_page))
|
yield self._emit(Opcode.SET_CONTENT)
|
||||||
page = new_page
|
yield content
|
||||||
|
for page, offsets in page_offsets.items():
|
||||||
yield self._emit(Opcode.SET_PAGE)
|
yield self._emit(Opcode.SET_PAGE)
|
||||||
yield page
|
yield page
|
||||||
|
|
||||||
# Invalidate scores
|
for offset in offsets:
|
||||||
# TODO: we don't need to invalidate all of them, just those
|
|
||||||
# for the old and new page
|
|
||||||
scores = []
|
|
||||||
|
|
||||||
if new_content != content:
|
|
||||||
content = new_content
|
|
||||||
yield self._emit(Opcode.SET_CONTENT)
|
|
||||||
yield content
|
|
||||||
|
|
||||||
# Invalidate scores
|
|
||||||
# TODO: we don't need to invalidate all of them, just those
|
|
||||||
# for the old and new content byte
|
|
||||||
scores = []
|
|
||||||
|
|
||||||
self._write(page << 8 | offset, content)
|
self._write(page << 8 | offset, content)
|
||||||
yield self._emit(offset)
|
yield self._emit(offset)
|
||||||
|
|
||||||
|
def index_changes(self, deltas: np.array,
|
||||||
|
memmap: np.array) -> Set[Tuple[int, int, int, int]]:
|
||||||
|
"""Transform encoded screen to sequence of change tuples.
|
||||||
|
|
||||||
|
Change tuple is (xor_weight, page, offset, content)
|
||||||
|
"""
|
||||||
|
|
||||||
|
changes = set()
|
||||||
|
it = np.nditer(memmap, flags=['multi_index'])
|
||||||
|
while not it.finished:
|
||||||
|
y, x_byte = it.multi_index
|
||||||
|
|
||||||
|
# Skip masked values, i.e. unchanged in new frame
|
||||||
|
xor = deltas[y][x_byte]
|
||||||
|
if xor is np.ma.masked:
|
||||||
|
it.iternext()
|
||||||
|
continue
|
||||||
|
|
||||||
|
y_base = self.Y_TO_BASE_ADDR[self.page][y]
|
||||||
|
page = y_base >> 8
|
||||||
|
|
||||||
|
# print("y=%d -> page=%02x" % (y, page))
|
||||||
|
xor_weight = hamming_weight(xor)
|
||||||
|
offset = y_base - (page << 8) + x_byte
|
||||||
|
|
||||||
|
changes.add((xor_weight, page, offset, np.asscalar(it[0])))
|
||||||
|
it.iternext()
|
||||||
|
|
||||||
|
return changes
|
||||||
|
|
||||||
|
def _emit(self, opcode: Union[Opcode, int]) -> int:
|
||||||
|
self.cycles += self.CYCLES[opcode]
|
||||||
|
return opcode.value if opcode in Opcode else opcode
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def similarity(a1: np.array, a2: np.array) -> float:
|
||||||
|
"""Measure bitwise % similarity between two arrays"""
|
||||||
|
bits_different = np.asscalar(np.sum(np.logical_xor(a1, a2)))
|
||||||
|
|
||||||
|
return 1 - (bits_different / (np.shape(a1)[0] * np.shape(a1)[1]))
|
||||||
|
|
||||||
def done(self) -> Iterator[int]:
|
def done(self) -> Iterator[int]:
|
||||||
"""Terminate opcode stream."""
|
"""Terminate opcode stream."""
|
||||||
|
|
||||||
@ -279,20 +225,28 @@ class Screen:
|
|||||||
return np.array(np.delete(bm, np.arange(0, bm.shape[1], 2), axis=1),
|
return np.array(np.delete(bm, np.arange(0, bm.shape[1], 2), axis=1),
|
||||||
dtype=np.bool)
|
dtype=np.bool)
|
||||||
|
|
||||||
def from_stream(self, stream: Iterator[int]) -> None:
|
def from_stream(self, stream: Iterator[int]) -> Tuple[int, int, int]:
|
||||||
"""Replay an opcode stream to build a screen image."""
|
"""Replay an opcode stream to build a screen image."""
|
||||||
page = 0x20
|
page = 0x20
|
||||||
content = 0x7f
|
content = 0x7f
|
||||||
|
num_content_changes = 0
|
||||||
|
num_page_changes = 0
|
||||||
|
num_content_stores = 0
|
||||||
for b in stream:
|
for b in stream:
|
||||||
if b == Opcode.SET_CONTENT.value:
|
if b == Opcode.SET_CONTENT.value:
|
||||||
content = next(stream)
|
content = next(stream)
|
||||||
|
num_content_changes += 1
|
||||||
continue
|
continue
|
||||||
elif b == Opcode.SET_PAGE.value:
|
elif b == Opcode.SET_PAGE.value:
|
||||||
page = next(stream)
|
page = next(stream)
|
||||||
|
num_page_changes += 1
|
||||||
continue
|
continue
|
||||||
elif b == Opcode.TICK.value:
|
elif b == Opcode.TICK.value:
|
||||||
continue
|
continue
|
||||||
elif b == Opcode.END_FRAME.value:
|
elif b == Opcode.END_FRAME.value:
|
||||||
return
|
break
|
||||||
|
|
||||||
|
num_content_stores += 1
|
||||||
self._write(page << 8 | b, content)
|
self._write(page << 8 | b, content)
|
||||||
|
|
||||||
|
return num_content_stores, num_content_changes, num_page_changes
|
||||||
|
Loading…
Reference in New Issue
Block a user