mirror of
https://github.com/KrisKennaway/ii-vision.git
synced 2025-02-01 01:34:13 +00:00
Refactor opcode schedulers and implement one based on the ortools TSP
solver to minimize the cycle cost to visit all changes in our estimated list. This is fortunately a tractable (though slow) computation that does give improvements on the previous heuristic at the level of ~6% better throughput. This opcode schedule prefers to group by page and vary over content, so implement a fast heuristic that does that. This scheduler is within 2% of the TSP solution.
This commit is contained in:
parent
a8688a6a7e
commit
ab4b4f22fd
136
main.py
136
main.py
@ -10,53 +10,105 @@ MAX_OUT = 20 * 1024
|
||||
VIDEO_FPS = 30
|
||||
APPLE_FPS = 10
|
||||
|
||||
|
||||
# Old naive XOR algorithm:
|
||||
#
|
||||
#stores=1894, content changes=15, page changes=365
|
||||
#Frame 0, 2654 bytes, similarity = 0.850856
|
||||
#stores=1750, content changes=19, page changes=444
|
||||
#Frame 3, 2676 bytes, similarity = 0.903088
|
||||
#stores=1648, content changes=20, page changes=501
|
||||
#Frame 6, 2690 bytes, similarity = 0.922024
|
||||
#stores=1677, content changes=18, page changes=486
|
||||
#Frame 9, 2685 bytes, similarity = 0.912723
|
||||
#stores=1659, content changes=18, page changes=497
|
||||
#Frame 12, 2689 bytes, similarity = 0.923438
|
||||
#stores=1681, content changes=17, page changes=485
|
||||
#Frame 15, 2685 bytes, similarity = 0.922656
|
||||
#stores=1686, content changes=17, page changes=482
|
||||
#Frame 18, 2684 bytes, similarity = 0.921912
|
||||
#stores=1669, content changes=17, page changes=492
|
||||
# stores=1894, content changes=15, page changes=365
|
||||
# Frame 0, 2654 bytes, similarity = 0.850856
|
||||
# stores=1750, content changes=19, page changes=444
|
||||
# Frame 3, 2676 bytes, similarity = 0.903088
|
||||
# stores=1648, content changes=20, page changes=501
|
||||
# Frame 6, 2690 bytes, similarity = 0.922024
|
||||
# stores=1677, content changes=18, page changes=486
|
||||
# Frame 9, 2685 bytes, similarity = 0.912723
|
||||
# stores=1659, content changes=18, page changes=497
|
||||
# Frame 12, 2689 bytes, similarity = 0.923438
|
||||
# stores=1681, content changes=17, page changes=485
|
||||
# Frame 15, 2685 bytes, similarity = 0.922656
|
||||
# stores=1686, content changes=17, page changes=482
|
||||
# Frame 18, 2684 bytes, similarity = 0.921912
|
||||
# stores=1669, content changes=17, page changes=492
|
||||
|
||||
# New
|
||||
#stores=2260, content changes=277, page changes=125
|
||||
#Frame 0, 3064 bytes, similarity = 0.874740
|
||||
#stores=2162, content changes=325, page changes=131
|
||||
#Frame 3, 3074 bytes, similarity = 0.925670
|
||||
#stores=2241, content changes=313, page changes=102
|
||||
#Frame 6, 3071 bytes, similarity = 0.936942
|
||||
#stores=2265, content changes=313, page changes=90
|
||||
#Frame 9, 3071 bytes, similarity = 0.931882
|
||||
#stores=2225, content changes=334, page changes=91
|
||||
#Frame 12, 3075 bytes, similarity = 0.929427
|
||||
#stores=2216, content changes=342, page changes=89
|
||||
#Frame 15, 3078 bytes, similarity = 0.919978
|
||||
#stores=2222, content changes=339, page changes=88
|
||||
# stores=2260, content changes=277, page changes=125
|
||||
# Frame 0, 3064 bytes, similarity = 0.874740
|
||||
# stores=2162, content changes=325, page changes=131
|
||||
# Frame 3, 3074 bytes, similarity = 0.925670
|
||||
# stores=2241, content changes=313, page changes=102
|
||||
# Frame 6, 3071 bytes, similarity = 0.936942
|
||||
# stores=2265, content changes=313, page changes=90
|
||||
# Frame 9, 3071 bytes, similarity = 0.931882
|
||||
# stores=2225, content changes=334, page changes=91
|
||||
# Frame 12, 3075 bytes, similarity = 0.929427
|
||||
# stores=2216, content changes=342, page changes=89
|
||||
# Frame 15, 3078 bytes, similarity = 0.919978
|
||||
# stores=2222, content changes=339, page changes=88
|
||||
|
||||
# Optimized new
|
||||
#stores=1762, content changes=15, page changes=338
|
||||
#Frame 0, 2468 bytes, similarity = 0.841034
|
||||
#stores=2150, content changes=28, page changes=465
|
||||
#Frame 3, 3136 bytes, similarity = 0.921987
|
||||
#stores=2067, content changes=30, page changes=573
|
||||
#Frame 6, 3273 bytes, similarity = 0.939583
|
||||
#stores=1906, content changes=29, page changes=551
|
||||
#Frame 9, 3066 bytes, similarity = 0.928237
|
||||
#stores=1876, content changes=27, page changes=560
|
||||
#Frame 12, 3050 bytes, similarity = 0.933705
|
||||
#stores=1856, content changes=30, page changes=575
|
||||
#Frame 15, 3066 bytes, similarity = 0.929539
|
||||
#stores=1827, content changes=30, page changes=562
|
||||
# Fullness = 1.384560, cycles = 90738/104857 budget
|
||||
# stores=1872, content changes=15, page changes=352
|
||||
# Frame 0, 2606 bytes, similarity = 0.849219
|
||||
# Fullness = 1.452588, cycles = 110009/104857 budget
|
||||
# stores=2163, content changes=28, page changes=472
|
||||
# Frame 3, 3163 bytes, similarity = 0.924256
|
||||
# Fullness = 1.577072, cycles = 113843/104857 budget
|
||||
# stores=2062, content changes=30, page changes=577
|
||||
# Frame 6, 3276 bytes, similarity = 0.939918
|
||||
# Fullness = 1.597466, cycles = 106213/104857 budget
|
||||
# stores=1899, content changes=29, page changes=550
|
||||
# Frame 9, 3057 bytes, similarity = 0.928274
|
||||
# Fullness = 1.615001, cycles = 106008/104857 budget
|
||||
# stores=1875, content changes=27, page changes=561
|
||||
# Frame 12, 3051 bytes, similarity = 0.933854
|
||||
# Fullness = 1.639691, cycles = 106460/104857 budget
|
||||
# stores=1855, content changes=30, page changes=575
|
||||
# Frame 15, 3065 bytes, similarity = 0.929725
|
||||
# Fullness = 1.635406, cycles = 104583/104857 budget
|
||||
# stores=1827, content changes=30, page changes=562
|
||||
|
||||
# TSP solver
|
||||
# Fullness = 1.336189, cycles = 87568/104857 budget
|
||||
# stores=1872, content changes=320, page changes=32
|
||||
# Frame 0, 2576 bytes, similarity = 0.849219
|
||||
# Fullness = 1.386065, cycles = 108771/104857 budget
|
||||
# stores=2242, content changes=452, page changes=33
|
||||
# Frame 3, 3212 bytes, similarity = 0.927604
|
||||
# Fullness = 1.482284, cycles = 112136/104857 budget
|
||||
# stores=2161, content changes=552, page changes=33
|
||||
# Frame 6, 3331 bytes, similarity = 0.943415
|
||||
# Fullness = 1.501014, cycles = 106182/104857 budget
|
||||
# stores=2021, content changes=535, page changes=33
|
||||
# Frame 9, 3157 bytes, similarity = 0.934263
|
||||
# Fullness = 1.523818, cycles = 106450/104857 budget
|
||||
# stores=1995, content changes=554, page changes=33
|
||||
# Frame 12, 3169 bytes, similarity = 0.939844
|
||||
# Fullness = 1.543029, cycles = 106179/104857 budget
|
||||
# stores=1966, content changes=566, page changes=33
|
||||
# Frame 15, 3164 bytes, similarity = 0.935231
|
||||
# Fullness = 1.538659, cycles = 104560/104857 budget
|
||||
# stores=1941, content changes=554, page changes=33
|
||||
|
||||
# page first
|
||||
# Fullness = 1.366463, cycles = 89552/104857 budget
|
||||
# stores=1872, content changes=352, page changes=32
|
||||
# Frame 0, 2640 bytes, similarity = 0.849219
|
||||
# Fullness = 1.413155, cycles = 108440/104857 budget
|
||||
# stores=2192, content changes=476, page changes=32
|
||||
# Frame 3, 3208 bytes, similarity = 0.925744
|
||||
# Fullness = 1.516888, cycles = 112554/104857 budget
|
||||
# stores=2120, content changes=583, page changes=32
|
||||
# Frame 6, 3350 bytes, similarity = 0.942187
|
||||
# Fullness = 1.535086, cycles = 106115/104857 budget
|
||||
# stores=1975, content changes=561, page changes=32
|
||||
# Frame 9, 3161 bytes, similarity = 0.932106
|
||||
# Fullness = 1.553913, cycles = 106143/104857 budget
|
||||
# stores=1951, content changes=575, page changes=32
|
||||
# Frame 12, 3165 bytes, similarity = 0.937835
|
||||
# Fullness = 1.571548, cycles = 106047/104857 budget
|
||||
# stores=1927, content changes=587, page changes=32
|
||||
# Frame 15, 3165 bytes, similarity = 0.933259
|
||||
# Fullness = 1.572792, cycles = 104940/104857 budget
|
||||
# stores=1906, content changes=581, page changes=32
|
||||
|
||||
def main():
|
||||
s = screen.Screen()
|
||||
@ -117,7 +169,7 @@ def main():
|
||||
break
|
||||
|
||||
print("Frame %d, %d bytes, similarity = %f" % (
|
||||
idx, len(stream), s.similarity(im,bm)))
|
||||
idx, len(stream), s.similarity(im, bm)))
|
||||
out.write(stream)
|
||||
|
||||
out.write(bytes(s.done()))
|
||||
|
93
screen.py
93
screen.py
@ -5,8 +5,12 @@ import functools
|
||||
import enum
|
||||
from typing import Set, Iterator, Union, Tuple
|
||||
|
||||
from ortools.constraint_solver import pywrapcp
|
||||
from ortools.constraint_solver import routing_enums_pb2
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
@functools.lru_cache(None)
|
||||
def hamming_weight(n: int) -> int:
|
||||
"""Compute hamming weight of 8-bit int"""
|
||||
@ -142,6 +146,77 @@ class Screen:
|
||||
sorted(self.index_changes(delta, target), reverse=True)
|
||||
)[:est_opcodes]
|
||||
|
||||
for b in self._heuristic_opcode_scheduler(changes):
|
||||
yield b
|
||||
|
||||
def _tsp_opcode_scheduler(self, changes):
|
||||
# Build distance matrix for pairs of changes based on number of
|
||||
# opcodes it would cost for opcodes to emit target change given source
|
||||
|
||||
dist = np.zeros(shape=(len(changes), len(changes)), dtype=np.int)
|
||||
for i1, ch1 in enumerate(changes):
|
||||
_, page1, _, content1 = ch1
|
||||
for i2, ch2 in enumerate(changes):
|
||||
if ch1 == ch2:
|
||||
continue
|
||||
_, page2, _, content2 = ch2
|
||||
|
||||
cost = self.CYCLES[0] # Emit the target content byte
|
||||
if page1 != page2:
|
||||
cost += self.CYCLES[Opcode.SET_PAGE]
|
||||
if content1 != content2:
|
||||
cost += self.CYCLES[Opcode.SET_CONTENT]
|
||||
|
||||
dist[i1][i2] = cost
|
||||
dist[i2][i1] = cost
|
||||
|
||||
def create_distance_callback(dist_matrix):
|
||||
# Create a callback to calculate distances between cities.
|
||||
|
||||
def distance_callback(from_node, to_node):
|
||||
return int(dist_matrix[from_node][to_node])
|
||||
|
||||
return distance_callback
|
||||
|
||||
routing = pywrapcp.RoutingModel(len(changes), 1, 0)
|
||||
search_parameters = pywrapcp.RoutingModel.DefaultSearchParameters()
|
||||
# Create the distance callback.
|
||||
dist_callback = create_distance_callback(dist)
|
||||
routing.SetArcCostEvaluatorOfAllVehicles(dist_callback)
|
||||
|
||||
assignment = routing.SolveWithParameters(search_parameters)
|
||||
if assignment:
|
||||
# Solution distance.
|
||||
print("Total cycles: " + str(assignment.ObjectiveValue()))
|
||||
# Display the solution.
|
||||
# Only one route here; otherwise iterate from 0 to routing.vehicles() - 1
|
||||
route_number = 0
|
||||
index = routing.Start(route_number) # Index of the variable for the starting node.
|
||||
page = 0x20
|
||||
content = 0x7f
|
||||
# TODO: I think this will end by visiting the origin node which
|
||||
# is not what we want
|
||||
while not routing.IsEnd(index):
|
||||
_, new_page, offset, new_content = changes[index]
|
||||
|
||||
if new_page != page:
|
||||
page = new_page
|
||||
yield self._emit(Opcode.SET_PAGE)
|
||||
yield page
|
||||
|
||||
if new_content != content:
|
||||
content = new_content
|
||||
yield self._emit(Opcode.SET_CONTENT)
|
||||
yield content
|
||||
|
||||
self._write(page << 8 | offset, content)
|
||||
yield self._emit(offset)
|
||||
|
||||
index = assignment.Value(routing.NextVar(index))
|
||||
else:
|
||||
raise ValueError('No solution found.')
|
||||
|
||||
def _heuristic_opcode_scheduler(self, changes):
|
||||
# Heuristic: group by content byte first then page
|
||||
data = {}
|
||||
for ch in changes:
|
||||
@ -159,6 +234,24 @@ class Screen:
|
||||
self._write(page << 8 | offset, content)
|
||||
yield self._emit(offset)
|
||||
|
||||
def _heuristic_page_first_opcode_scheduler(self, changes):
|
||||
# Heuristic: group by page first then content byte
|
||||
data = {}
|
||||
for ch in changes:
|
||||
xor_weight, page, offset, content = ch
|
||||
data.setdefault(page, {}).setdefault(content, set()).add(offset)
|
||||
|
||||
for page, content_offsets in data.items():
|
||||
yield self._emit(Opcode.SET_PAGE)
|
||||
yield page
|
||||
for content, offsets in content_offsets.items():
|
||||
yield self._emit(Opcode.SET_CONTENT)
|
||||
yield content
|
||||
|
||||
for offset in offsets:
|
||||
self._write(page << 8 | offset, content)
|
||||
yield self._emit(offset)
|
||||
|
||||
def index_changes(self, deltas: np.array,
|
||||
memmap: np.array) -> Set[Tuple[int, int, int, int]]:
|
||||
"""Transform encoded screen to sequence of change tuples.
|
||||
|
Loading…
x
Reference in New Issue
Block a user