Refactor opcode schedulers and implement one based on the ortools TSP

solver to minimize the cycle cost to visit all changes in our estimated
list.

This is fortunately a tractable (though slow) computation that does give
improvements on the previous heuristic at the level of ~6% better
throughput.

This opcode schedule prefers to group by page and vary over content, so
implement a fast heuristic that does that.  This scheduler is within 2%
of the TSP solution.
This commit is contained in:
kris 2019-01-02 23:10:03 +00:00
parent a8688a6a7e
commit ab4b4f22fd
2 changed files with 187 additions and 42 deletions

136
main.py
View File

@ -10,53 +10,105 @@ MAX_OUT = 20 * 1024
VIDEO_FPS = 30
APPLE_FPS = 10
# Old naive XOR algorithm:
#
#stores=1894, content changes=15, page changes=365
#Frame 0, 2654 bytes, similarity = 0.850856
#stores=1750, content changes=19, page changes=444
#Frame 3, 2676 bytes, similarity = 0.903088
#stores=1648, content changes=20, page changes=501
#Frame 6, 2690 bytes, similarity = 0.922024
#stores=1677, content changes=18, page changes=486
#Frame 9, 2685 bytes, similarity = 0.912723
#stores=1659, content changes=18, page changes=497
#Frame 12, 2689 bytes, similarity = 0.923438
#stores=1681, content changes=17, page changes=485
#Frame 15, 2685 bytes, similarity = 0.922656
#stores=1686, content changes=17, page changes=482
#Frame 18, 2684 bytes, similarity = 0.921912
#stores=1669, content changes=17, page changes=492
# stores=1894, content changes=15, page changes=365
# Frame 0, 2654 bytes, similarity = 0.850856
# stores=1750, content changes=19, page changes=444
# Frame 3, 2676 bytes, similarity = 0.903088
# stores=1648, content changes=20, page changes=501
# Frame 6, 2690 bytes, similarity = 0.922024
# stores=1677, content changes=18, page changes=486
# Frame 9, 2685 bytes, similarity = 0.912723
# stores=1659, content changes=18, page changes=497
# Frame 12, 2689 bytes, similarity = 0.923438
# stores=1681, content changes=17, page changes=485
# Frame 15, 2685 bytes, similarity = 0.922656
# stores=1686, content changes=17, page changes=482
# Frame 18, 2684 bytes, similarity = 0.921912
# stores=1669, content changes=17, page changes=492
# New
#stores=2260, content changes=277, page changes=125
#Frame 0, 3064 bytes, similarity = 0.874740
#stores=2162, content changes=325, page changes=131
#Frame 3, 3074 bytes, similarity = 0.925670
#stores=2241, content changes=313, page changes=102
#Frame 6, 3071 bytes, similarity = 0.936942
#stores=2265, content changes=313, page changes=90
#Frame 9, 3071 bytes, similarity = 0.931882
#stores=2225, content changes=334, page changes=91
#Frame 12, 3075 bytes, similarity = 0.929427
#stores=2216, content changes=342, page changes=89
#Frame 15, 3078 bytes, similarity = 0.919978
#stores=2222, content changes=339, page changes=88
# stores=2260, content changes=277, page changes=125
# Frame 0, 3064 bytes, similarity = 0.874740
# stores=2162, content changes=325, page changes=131
# Frame 3, 3074 bytes, similarity = 0.925670
# stores=2241, content changes=313, page changes=102
# Frame 6, 3071 bytes, similarity = 0.936942
# stores=2265, content changes=313, page changes=90
# Frame 9, 3071 bytes, similarity = 0.931882
# stores=2225, content changes=334, page changes=91
# Frame 12, 3075 bytes, similarity = 0.929427
# stores=2216, content changes=342, page changes=89
# Frame 15, 3078 bytes, similarity = 0.919978
# stores=2222, content changes=339, page changes=88
# Optimized new
#stores=1762, content changes=15, page changes=338
#Frame 0, 2468 bytes, similarity = 0.841034
#stores=2150, content changes=28, page changes=465
#Frame 3, 3136 bytes, similarity = 0.921987
#stores=2067, content changes=30, page changes=573
#Frame 6, 3273 bytes, similarity = 0.939583
#stores=1906, content changes=29, page changes=551
#Frame 9, 3066 bytes, similarity = 0.928237
#stores=1876, content changes=27, page changes=560
#Frame 12, 3050 bytes, similarity = 0.933705
#stores=1856, content changes=30, page changes=575
#Frame 15, 3066 bytes, similarity = 0.929539
#stores=1827, content changes=30, page changes=562
# Fullness = 1.384560, cycles = 90738/104857 budget
# stores=1872, content changes=15, page changes=352
# Frame 0, 2606 bytes, similarity = 0.849219
# Fullness = 1.452588, cycles = 110009/104857 budget
# stores=2163, content changes=28, page changes=472
# Frame 3, 3163 bytes, similarity = 0.924256
# Fullness = 1.577072, cycles = 113843/104857 budget
# stores=2062, content changes=30, page changes=577
# Frame 6, 3276 bytes, similarity = 0.939918
# Fullness = 1.597466, cycles = 106213/104857 budget
# stores=1899, content changes=29, page changes=550
# Frame 9, 3057 bytes, similarity = 0.928274
# Fullness = 1.615001, cycles = 106008/104857 budget
# stores=1875, content changes=27, page changes=561
# Frame 12, 3051 bytes, similarity = 0.933854
# Fullness = 1.639691, cycles = 106460/104857 budget
# stores=1855, content changes=30, page changes=575
# Frame 15, 3065 bytes, similarity = 0.929725
# Fullness = 1.635406, cycles = 104583/104857 budget
# stores=1827, content changes=30, page changes=562
# TSP solver
# Fullness = 1.336189, cycles = 87568/104857 budget
# stores=1872, content changes=320, page changes=32
# Frame 0, 2576 bytes, similarity = 0.849219
# Fullness = 1.386065, cycles = 108771/104857 budget
# stores=2242, content changes=452, page changes=33
# Frame 3, 3212 bytes, similarity = 0.927604
# Fullness = 1.482284, cycles = 112136/104857 budget
# stores=2161, content changes=552, page changes=33
# Frame 6, 3331 bytes, similarity = 0.943415
# Fullness = 1.501014, cycles = 106182/104857 budget
# stores=2021, content changes=535, page changes=33
# Frame 9, 3157 bytes, similarity = 0.934263
# Fullness = 1.523818, cycles = 106450/104857 budget
# stores=1995, content changes=554, page changes=33
# Frame 12, 3169 bytes, similarity = 0.939844
# Fullness = 1.543029, cycles = 106179/104857 budget
# stores=1966, content changes=566, page changes=33
# Frame 15, 3164 bytes, similarity = 0.935231
# Fullness = 1.538659, cycles = 104560/104857 budget
# stores=1941, content changes=554, page changes=33
# page first
# Fullness = 1.366463, cycles = 89552/104857 budget
# stores=1872, content changes=352, page changes=32
# Frame 0, 2640 bytes, similarity = 0.849219
# Fullness = 1.413155, cycles = 108440/104857 budget
# stores=2192, content changes=476, page changes=32
# Frame 3, 3208 bytes, similarity = 0.925744
# Fullness = 1.516888, cycles = 112554/104857 budget
# stores=2120, content changes=583, page changes=32
# Frame 6, 3350 bytes, similarity = 0.942187
# Fullness = 1.535086, cycles = 106115/104857 budget
# stores=1975, content changes=561, page changes=32
# Frame 9, 3161 bytes, similarity = 0.932106
# Fullness = 1.553913, cycles = 106143/104857 budget
# stores=1951, content changes=575, page changes=32
# Frame 12, 3165 bytes, similarity = 0.937835
# Fullness = 1.571548, cycles = 106047/104857 budget
# stores=1927, content changes=587, page changes=32
# Frame 15, 3165 bytes, similarity = 0.933259
# Fullness = 1.572792, cycles = 104940/104857 budget
# stores=1906, content changes=581, page changes=32
def main():
s = screen.Screen()
@ -117,7 +169,7 @@ def main():
break
print("Frame %d, %d bytes, similarity = %f" % (
idx, len(stream), s.similarity(im,bm)))
idx, len(stream), s.similarity(im, bm)))
out.write(stream)
out.write(bytes(s.done()))

View File

@ -5,8 +5,12 @@ import functools
import enum
from typing import Set, Iterator, Union, Tuple
from ortools.constraint_solver import pywrapcp
from ortools.constraint_solver import routing_enums_pb2
import numpy as np
@functools.lru_cache(None)
def hamming_weight(n: int) -> int:
"""Compute hamming weight of 8-bit int"""
@ -142,6 +146,77 @@ class Screen:
sorted(self.index_changes(delta, target), reverse=True)
)[:est_opcodes]
for b in self._heuristic_opcode_scheduler(changes):
yield b
def _tsp_opcode_scheduler(self, changes):
# Build distance matrix for pairs of changes based on number of
# opcodes it would cost for opcodes to emit target change given source
dist = np.zeros(shape=(len(changes), len(changes)), dtype=np.int)
for i1, ch1 in enumerate(changes):
_, page1, _, content1 = ch1
for i2, ch2 in enumerate(changes):
if ch1 == ch2:
continue
_, page2, _, content2 = ch2
cost = self.CYCLES[0] # Emit the target content byte
if page1 != page2:
cost += self.CYCLES[Opcode.SET_PAGE]
if content1 != content2:
cost += self.CYCLES[Opcode.SET_CONTENT]
dist[i1][i2] = cost
dist[i2][i1] = cost
def create_distance_callback(dist_matrix):
# Create a callback to calculate distances between cities.
def distance_callback(from_node, to_node):
return int(dist_matrix[from_node][to_node])
return distance_callback
routing = pywrapcp.RoutingModel(len(changes), 1, 0)
search_parameters = pywrapcp.RoutingModel.DefaultSearchParameters()
# Create the distance callback.
dist_callback = create_distance_callback(dist)
routing.SetArcCostEvaluatorOfAllVehicles(dist_callback)
assignment = routing.SolveWithParameters(search_parameters)
if assignment:
# Solution distance.
print("Total cycles: " + str(assignment.ObjectiveValue()))
# Display the solution.
# Only one route here; otherwise iterate from 0 to routing.vehicles() - 1
route_number = 0
index = routing.Start(route_number) # Index of the variable for the starting node.
page = 0x20
content = 0x7f
# TODO: I think this will end by visiting the origin node which
# is not what we want
while not routing.IsEnd(index):
_, new_page, offset, new_content = changes[index]
if new_page != page:
page = new_page
yield self._emit(Opcode.SET_PAGE)
yield page
if new_content != content:
content = new_content
yield self._emit(Opcode.SET_CONTENT)
yield content
self._write(page << 8 | offset, content)
yield self._emit(offset)
index = assignment.Value(routing.NextVar(index))
else:
raise ValueError('No solution found.')
def _heuristic_opcode_scheduler(self, changes):
# Heuristic: group by content byte first then page
data = {}
for ch in changes:
@ -159,6 +234,24 @@ class Screen:
self._write(page << 8 | offset, content)
yield self._emit(offset)
def _heuristic_page_first_opcode_scheduler(self, changes):
# Heuristic: group by page first then content byte
data = {}
for ch in changes:
xor_weight, page, offset, content = ch
data.setdefault(page, {}).setdefault(content, set()).add(offset)
for page, content_offsets in data.items():
yield self._emit(Opcode.SET_PAGE)
yield page
for content, offsets in content_offsets.items():
yield self._emit(Opcode.SET_CONTENT)
yield content
for offset in offsets:
self._write(page << 8 | offset, content)
yield self._emit(offset)
def index_changes(self, deltas: np.array,
memmap: np.array) -> Set[Tuple[int, int, int, int]]:
"""Transform encoded screen to sequence of change tuples.