diff --git a/README.md b/README.md index 947a2b5..88cce23 100644 --- a/README.md +++ b/README.md @@ -50,8 +50,7 @@ TODO: link video once it is available. ## Installation -This currently requires python3.7 because some dependencies (e.g. weighted-levenshtein) don't compile with 3.9+, and 3.8 -has a [bug](https://bugs.python.org/issue44439) in object pickling. +This currently requires python3.7 because some dependencies (e.g. weighted-levenshtein) don't compile with 3.9+. ``` python3.7 -m venv venv @@ -59,23 +58,21 @@ source venv/bin/activate pip install -r requirements.txt ``` -To generate the data files required by the transcoder: +Before you can run the transcoder you need to generate the data files it requires: ``` % python transcoder/make_data_tables.py ``` -This takes about 3 hours on my machine. - -TODO: download instructions +This is a one-time setup. It takes about 90 minutes on my machine. ## Release Notes ### v0.3 (17 Jan 2023) - Fixed an image quality bug in the transcoder -- Quality of life improvements to installation process -- Stop using LFS to store the generated data files in git, they're using up my quota +- Documentation/quality of life improvements to installation process +- Stop using LFS to store the generated data files in git, they're using up all my quota ### v0.2 (19 July 2019) diff --git a/transcoder/make_data_tables.py b/transcoder/make_data_tables.py index a539f5c..baf464b 100644 --- a/transcoder/make_data_tables.py +++ b/transcoder/make_data_tables.py @@ -113,7 +113,7 @@ def compute_edit_distance( edp: EditDistanceParams, bitmap_cls: Type[screen.Bitmap], nominal_colours: Type[colours.NominalColours] -): +) -> np.ndarray: """Computes edit distance matrix between all pairs of pixel strings. Enumerates all possible values of the masked bit representation from @@ -131,44 +131,45 @@ def compute_edit_distance( bitrange = np.uint64(2 ** bits) - edit = [] - for _ in range(len(bitmap_cls.BYTE_MASKS)): - edit.append( - np.zeros(shape=np.uint64(bitrange * bitrange), dtype=np.uint16)) + edit = np.zeros( + shape=(len(bitmap_cls.BYTE_MASKS), np.uint64(bitrange * bitrange)), + dtype=np.uint16) - # Matrix is symmetrical with zero diagonal so only need to compute upper - # triangle - bar = ProgressBar((bitrange * (bitrange - 1)) / 2, max_width=80) + bar = ProgressBar( + bitrange * (bitrange - 1) / 2 * len(bitmap_cls.PHASES), max_width=80) num_dots = bitmap_cls.MASKED_DOTS cnt = 0 for i in range(np.uint64(bitrange)): - for j in range(i): - cnt += 1 + pair_base = np.uint64(i) << bits + for o, ph in enumerate(bitmap_cls.PHASES): + # Compute this in the outer loop since it's invariant under j + first_dots = bitmap_cls.to_dots(i, byte_offset=o) + first_pixels = pixel_string( + colours.dots_to_nominal_colour_pixel_values( + num_dots, first_dots, nominal_colours, + init_phase=ph) + ) - if cnt % 10000 == 0: - bar.numerator = cnt - print(bar, end='\r') - sys.stdout.flush() + # Matrix is symmetrical with zero diagonal so only need to compute + # upper triangle + for j in range(i): + cnt += 1 + if cnt % 100000 == 0: + bar.numerator = cnt + print(bar, end='\r') + sys.stdout.flush() - pair = (np.uint64(i) << bits) + np.uint64(j) + pair = pair_base + np.uint64(j) - for o, ph in enumerate(bitmap_cls.PHASES): - first_dots = bitmap_cls.to_dots(i, byte_offset=o) second_dots = bitmap_cls.to_dots(j, byte_offset=o) - - first_pixels = pixel_string( - colours.dots_to_nominal_colour_pixel_values( - num_dots, first_dots, nominal_colours, - init_phase=ph) - ) second_pixels = pixel_string( colours.dots_to_nominal_colour_pixel_values( num_dots, second_dots, nominal_colours, init_phase=ph) ) - edit[o][pair] = edit_distance( + edit[o, pair] = edit_distance( edp, first_pixels, second_pixels, error=False) return edit @@ -183,10 +184,9 @@ def make_edit_distance( """Write file containing (D)HGR edit distance matrix for a palette.""" dist = compute_edit_distance(edp, bitmap_cls, nominal_colours) - data = "transcoder/data/%s_palette_%d_edit_distance.pickle.bz2" % ( + data = "transcoder/data/%s_palette_%d_edit_distance.npz" % ( bitmap_cls.NAME, pal.ID.value) - with bz2.open(data, "wb", compresslevel=9) as out: - pickle.dump(dist, out, protocol=pickle.HIGHEST_PROTOCOL) + np.savez_compressed(data, edit_distance=dist) def main(): diff --git a/transcoder/screen.py b/transcoder/screen.py index 486fe3e..780f45c 100644 --- a/transcoder/screen.py +++ b/transcoder/screen.py @@ -342,15 +342,13 @@ class Bitmap: @classmethod @functools.lru_cache(None) - def edit_distances(cls, palette_id: pal.Palette) -> List[np.ndarray]: + def edit_distances(cls, palette_id: pal.Palette) -> np.ndarray: """Load edit distance matrices for masked, shifted byte values.""" - data = "transcoder/data/%s_palette_%d_edit_distance.pickle.bz2" % ( - cls.NAME, - palette_id.value + data = "transcoder/data/%s_palette_%d_edit_distance.npz" % ( + cls.NAME, palette_id.value ) - with bz2.open(data, "rb") as ed: - dist = pickle.load(ed) # type: List[np.ndarray] + dist = np.load(data)['edit_distance'] # dist is an upper-triangular matrix of edit_distance(a, b) # encoded as dist[(a << N) + b] = edit_distance(a, b) @@ -363,8 +361,8 @@ class Bitmap: (identity & np.uint64(2 ** cls.MASKED_BITS - 1)) << cls.MASKED_BITS) - for i in range(len(dist)): - dist[i][transpose] += dist[i][identity] + for i in range(dist.shape[0]): + dist[i, transpose] += dist[i, identity] return dist @@ -741,6 +739,7 @@ class HGRBitmap(Bitmap): return double @classmethod + @functools.lru_cache(None) def to_dots(cls, masked_val: int, byte_offset: int) -> int: """Convert masked representation to bit sequence of display dots.