- Add a progress bar while computing the edit distance matrices

- Only compute the upper triangle since the matrix is symmetrical and
  we can reconstruct the rest cheaply at load time

- Compute edit distances for HGR as well by making use of the fact that
  the masked representation is 14-bit but still sparse.

- Add sanity checks for various invariants of the edit distance matrices.
This commit is contained in:
kris 2019-07-07 21:25:07 +01:00
parent ab29b01d0f
commit b05c3bec1e
2 changed files with 142 additions and 69 deletions

View File

@ -1,8 +1,7 @@
import bz2 import bz2
import functools import functools
import pickle import pickle
import time import sys
import datetime
from typing import Iterable, Type from typing import Iterable, Type
import colormath.color_conversions import colormath.color_conversions
@ -10,9 +9,11 @@ import colormath.color_diff
import colormath.color_objects import colormath.color_objects
import numpy as np import numpy as np
import weighted_levenshtein import weighted_levenshtein
from etaprogress.progress import ProgressBar
import colours import colours
import palette import palette
import screen
# The DHGR display encodes 7 pixels across interleaved 4-byte sequences # The DHGR display encodes 7 pixels across interleaved 4-byte sequences
# of AUX and MAIN memory, as follows: # of AUX and MAIN memory, as follows:
@ -118,7 +119,7 @@ def compute_diff_matrix(pal: Type[palette.BasePalette]):
return dm return dm
def make_substitute_costs(pal: Type[palette.BasePalette]): def compute_substitute_costs(pal: Type[palette.BasePalette]):
edp = EditDistanceParams() edp = EditDistanceParams()
diff_matrix = compute_diff_matrix(pal) diff_matrix = compute_diff_matrix(pal)
@ -152,86 +153,80 @@ def edit_distance(
return res return res
def make_edit_distance(edp: EditDistanceParams): def compute_edit_distance(
edit = [ edp: EditDistanceParams,
np.zeros(shape=(2 ** 26), dtype=np.uint16), bitmap_cls: Type[screen.Bitmap],
np.zeros(shape=(2 ** 26), dtype=np.uint16), nominal_colours: Type[colours.NominalColours]
np.zeros(shape=(2 ** 26), dtype=np.uint16), ):
np.zeros(shape=(2 ** 26), dtype=np.uint16), bits = bitmap_cls.MASKED_BITS
]
start_time = time.time() bitrange = np.uint64(2 ** bits)
for i in range(2 ** 13): edit = []
if i > 1: for _ in range(len(bitmap_cls.BYTE_MASKS)):
now = time.time() edit.append(
eta = datetime.timedelta( np.zeros(shape=np.uint64(bitrange * bitrange), dtype=np.uint16))
seconds=(now - start_time) * (2 ** 13 / i))
print("%.2f%% (ETA %s)" % (100 * i / (2 ** 13), eta))
for j in range(2 ** 13):
pair = (i << 13) + j
# Each DHGR byte offset has the same range of int13 possible # Matrix is symmetrical with zero diagonal so only need to compute upper
# values and nominal colour pixels, but with different initial # triangle
# phases: bar = ProgressBar((bitrange * (bitrange - 1)) / 2, max_width=80)
# AUX 0: 0 (1 at start of 3-bit header)
# MAIN 0: 3 (0)
# AUX 1: 2 (3)
# MAIN 1: 1 (2)
first_pixels = pixel_string( num_dots = bitmap_cls.HEADER_BITS + bitmap_cls.BODY_BITS
colours.int34_to_nominal_colour_pixel_values(
i, colours.DHGRColours, init_phase=1)
)
second_pixels = pixel_string(
colours.int34_to_nominal_colour_pixel_values(
j, colours.DHGRColours, init_phase=1))
edit[0][pair] = edit_distance(
edp, first_pixels, second_pixels, error=False)
first_pixels = pixel_string( cnt = 0
colours.int34_to_nominal_colour_pixel_values( for i in range(np.uint64(bitrange)):
i, colours.DHGRColours, init_phase=0) for j in range(i):
) cnt += 1
second_pixels = pixel_string(
colours.int34_to_nominal_colour_pixel_values(
j, colours.DHGRColours, init_phase=0))
edit[1][pair] = edit_distance(
edp, first_pixels, second_pixels, error=False)
first_pixels = pixel_string( if cnt % 10000 == 0:
colours.int34_to_nominal_colour_pixel_values( bar.numerator = cnt
i, colours.DHGRColours, init_phase=3) print(bar, end='\r')
) sys.stdout.flush()
second_pixels = pixel_string(
colours.int34_to_nominal_colour_pixel_values( pair = (np.uint64(i) << bits) + np.uint64(j)
j, colours.DHGRColours, init_phase=3))
edit[2][pair] = edit_distance( for o, ph in enumerate(bitmap_cls.PHASES):
edp, first_pixels, second_pixels, error=False) first_dots = bitmap_cls.to_dots(i, byte_offset=o)
second_dots = bitmap_cls.to_dots(j, byte_offset=o)
first_pixels = pixel_string(
colours.dots_to_nominal_colour_pixel_values(
num_dots, first_dots, nominal_colours,
init_phase=ph)
)
second_pixels = pixel_string(
colours.dots_to_nominal_colour_pixel_values(
num_dots, second_dots, nominal_colours,
init_phase=ph)
)
edit[o][pair] = edit_distance(
edp, first_pixels, second_pixels, error=False)
first_pixels = pixel_string(
colours.int34_to_nominal_colour_pixel_values(
i, colours.DHGRColours, init_phase=2)
)
second_pixels = pixel_string(
colours.int34_to_nominal_colour_pixel_values(
j, colours.DHGRColours, init_phase=2))
edit[3][pair] = edit_distance(
edp, first_pixels, second_pixels, error=False)
return edit return edit
def make_edit_distance(
pal: Type[palette.BasePalette],
edp: EditDistanceParams,
bitmap_cls: Type[screen.Bitmap],
nominal_colours: Type[colours.NominalColours]
):
dist = compute_edit_distance(edp, bitmap_cls, nominal_colours)
data = "transcoder/data/%s_palette_%d_edit_distance.pickle.bz2" % (
bitmap_cls.NAME, pal.ID.value)
with bz2.open(data, "wb", compresslevel=9) as out:
pickle.dump(dist, out, protocol=pickle.HIGHEST_PROTOCOL)
def main(): def main():
for p in palette.PALETTES.values(): for p in palette.PALETTES.values():
print("Processing palette %s" % p) print("Processing palette %s" % p)
edp = make_substitute_costs(p) edp = compute_substitute_costs(p)
edit = make_edit_distance(edp)
# TODO: error distance matrices # TODO: error distance matrices
data = "transcoder/data/DHGR_palette_%d_edit_distance.pickle" \
".bz2" % p.ID.value make_edit_distance(p, edp, screen.HGRBitmap, colours.HGRColours)
with bz2.open(data, "wb", compresslevel=9) as out: make_edit_distance(p, edp, screen.DHGRBitmap, colours.DHGRColours)
pickle.dump(edit, out, protocol=pickle.HIGHEST_PROTOCOL)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,7 +1,13 @@
import sys
import unittest import unittest
from colours import HGRColours import numpy as np
from etaprogress.progress import ProgressBar
import make_data_tables import make_data_tables
import screen
from colours import HGRColours
from palette import PALETTES
class TestMakeDataTables(unittest.TestCase): class TestMakeDataTables(unittest.TestCase):
@ -9,6 +15,78 @@ class TestMakeDataTables(unittest.TestCase):
pixels = (HGRColours.BLACK, HGRColours.WHITE, HGRColours.ORANGE) pixels = (HGRColours.BLACK, HGRColours.WHITE, HGRColours.ORANGE)
self.assertEqual("0FC", make_data_tables.pixel_string(pixels)) self.assertEqual("0FC", make_data_tables.pixel_string(pixels))
def test_edit_distances(self):
for p in PALETTES:
ed = screen.DHGRBitmap.edit_distances(p)
print(p)
bar = ProgressBar((4 * 2 ** 13 * (2 ** 13 - 1)) / 2, max_width=80)
cnt = 0
for ph in range(3):
# Only zero entries should be on diagonal, i.e. of form
# i << 13 + i
zeros = np.arange(len(ed[ph]))[ed[ph] == 0]
for z in zeros:
z1 = z & (2 ** 13 - 1)
z2 = (z >> 13) & (2 ** 13 - 1)
self.assertEqual(z1, z2)
# Assert that matrix is symmetrical
for i in range(2 ** 13):
for j in range(i):
cnt += 1
if cnt % 10000 == 0:
bar.numerator = cnt
print(bar, end='\r')
sys.stdout.flush()
self.assertEqual(
ed[ph][(i << 13) + j],
ed[ph][(j << 13) + i],
)
# Matrix is positive definite
self.assertGreaterEqual(ed[ph][(i << 13) + j], 0)
def test_edit_distances_hgr(self):
for p in PALETTES:
ed = screen.HGRBitmap.edit_distances(p)
print(p)
bar = ProgressBar((4 * 2 ** 14 * (2 ** 14 - 1)) / 2, max_width=80)
cnt = 0
for ph in range(2):
# Only zero entries should be on diagonal, i.e. of form
# # i << 14 + i
# zeros = np.arange(len(ed[ph]))[ed[ph] == 0]
# for z in zeros:
# z1 = z & (2**14-1)
# z2 = (z >> 14) & (2**14-1)
# self.assertEqual(z1, z2)
# Assert that matrix is symmetrical
for i in range(2 ** 14):
for j in range(i):
cnt += 1
if cnt % 10000 == 0:
bar.numerator = cnt
print(bar, end='\r')
sys.stdout.flush()
self.assertEqual(
ed[ph][(i << 14) + j],
ed[ph][(j << 14) + i],
)
# Matrix is positive definite
self.assertGreaterEqual(ed[ph][(i << 14) + j], 0)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()