Optimize make_data_tables and use numpy.save instead of pickling. The

file sizes are a bit larger but it unblocks updating to python 3.8.
This commit is contained in:
kris 2023-01-18 00:01:34 +00:00
parent 89633aa845
commit 1d5bcfd74e
3 changed files with 39 additions and 43 deletions

View File

@ -50,8 +50,7 @@ TODO: link video once it is available.
## Installation ## Installation
This currently requires python3.7 because some dependencies (e.g. weighted-levenshtein) don't compile with 3.9+, and 3.8 This currently requires python3.7 because some dependencies (e.g. weighted-levenshtein) don't compile with 3.9+.
has a [bug](https://bugs.python.org/issue44439) in object pickling.
``` ```
python3.7 -m venv venv python3.7 -m venv venv
@ -59,23 +58,21 @@ source venv/bin/activate
pip install -r requirements.txt pip install -r requirements.txt
``` ```
To generate the data files required by the transcoder: Before you can run the transcoder you need to generate the data files it requires:
``` ```
% python transcoder/make_data_tables.py % python transcoder/make_data_tables.py
``` ```
This takes about 3 hours on my machine. This is a one-time setup. It takes about 90 minutes on my machine.
TODO: download instructions
## Release Notes ## Release Notes
### v0.3 (17 Jan 2023) ### v0.3 (17 Jan 2023)
- Fixed an image quality bug in the transcoder - Fixed an image quality bug in the transcoder
- Quality of life improvements to installation process - Documentation/quality of life improvements to installation process
- Stop using LFS to store the generated data files in git, they're using up my quota - Stop using LFS to store the generated data files in git, they're using up all my quota
### v0.2 (19 July 2019) ### v0.2 (19 July 2019)

View File

@ -113,7 +113,7 @@ def compute_edit_distance(
edp: EditDistanceParams, edp: EditDistanceParams,
bitmap_cls: Type[screen.Bitmap], bitmap_cls: Type[screen.Bitmap],
nominal_colours: Type[colours.NominalColours] nominal_colours: Type[colours.NominalColours]
): ) -> np.ndarray:
"""Computes edit distance matrix between all pairs of pixel strings. """Computes edit distance matrix between all pairs of pixel strings.
Enumerates all possible values of the masked bit representation from Enumerates all possible values of the masked bit representation from
@ -131,44 +131,45 @@ def compute_edit_distance(
bitrange = np.uint64(2 ** bits) bitrange = np.uint64(2 ** bits)
edit = [] edit = np.zeros(
for _ in range(len(bitmap_cls.BYTE_MASKS)): shape=(len(bitmap_cls.BYTE_MASKS), np.uint64(bitrange * bitrange)),
edit.append( dtype=np.uint16)
np.zeros(shape=np.uint64(bitrange * bitrange), dtype=np.uint16))
# Matrix is symmetrical with zero diagonal so only need to compute upper bar = ProgressBar(
# triangle bitrange * (bitrange - 1) / 2 * len(bitmap_cls.PHASES), max_width=80)
bar = ProgressBar((bitrange * (bitrange - 1)) / 2, max_width=80)
num_dots = bitmap_cls.MASKED_DOTS num_dots = bitmap_cls.MASKED_DOTS
cnt = 0 cnt = 0
for i in range(np.uint64(bitrange)): for i in range(np.uint64(bitrange)):
for j in range(i): pair_base = np.uint64(i) << bits
cnt += 1 for o, ph in enumerate(bitmap_cls.PHASES):
# Compute this in the outer loop since it's invariant under j
first_dots = bitmap_cls.to_dots(i, byte_offset=o)
first_pixels = pixel_string(
colours.dots_to_nominal_colour_pixel_values(
num_dots, first_dots, nominal_colours,
init_phase=ph)
)
if cnt % 10000 == 0: # Matrix is symmetrical with zero diagonal so only need to compute
bar.numerator = cnt # upper triangle
print(bar, end='\r') for j in range(i):
sys.stdout.flush() cnt += 1
if cnt % 100000 == 0:
bar.numerator = cnt
print(bar, end='\r')
sys.stdout.flush()
pair = (np.uint64(i) << bits) + np.uint64(j) pair = pair_base + np.uint64(j)
for o, ph in enumerate(bitmap_cls.PHASES):
first_dots = bitmap_cls.to_dots(i, byte_offset=o)
second_dots = bitmap_cls.to_dots(j, byte_offset=o) second_dots = bitmap_cls.to_dots(j, byte_offset=o)
first_pixels = pixel_string(
colours.dots_to_nominal_colour_pixel_values(
num_dots, first_dots, nominal_colours,
init_phase=ph)
)
second_pixels = pixel_string( second_pixels = pixel_string(
colours.dots_to_nominal_colour_pixel_values( colours.dots_to_nominal_colour_pixel_values(
num_dots, second_dots, nominal_colours, num_dots, second_dots, nominal_colours,
init_phase=ph) init_phase=ph)
) )
edit[o][pair] = edit_distance( edit[o, pair] = edit_distance(
edp, first_pixels, second_pixels, error=False) edp, first_pixels, second_pixels, error=False)
return edit return edit
@ -183,10 +184,9 @@ def make_edit_distance(
"""Write file containing (D)HGR edit distance matrix for a palette.""" """Write file containing (D)HGR edit distance matrix for a palette."""
dist = compute_edit_distance(edp, bitmap_cls, nominal_colours) dist = compute_edit_distance(edp, bitmap_cls, nominal_colours)
data = "transcoder/data/%s_palette_%d_edit_distance.pickle.bz2" % ( data = "transcoder/data/%s_palette_%d_edit_distance.npz" % (
bitmap_cls.NAME, pal.ID.value) bitmap_cls.NAME, pal.ID.value)
with bz2.open(data, "wb", compresslevel=9) as out: np.savez_compressed(data, edit_distance=dist)
pickle.dump(dist, out, protocol=pickle.HIGHEST_PROTOCOL)
def main(): def main():

View File

@ -342,15 +342,13 @@ class Bitmap:
@classmethod @classmethod
@functools.lru_cache(None) @functools.lru_cache(None)
def edit_distances(cls, palette_id: pal.Palette) -> List[np.ndarray]: def edit_distances(cls, palette_id: pal.Palette) -> np.ndarray:
"""Load edit distance matrices for masked, shifted byte values.""" """Load edit distance matrices for masked, shifted byte values."""
data = "transcoder/data/%s_palette_%d_edit_distance.pickle.bz2" % ( data = "transcoder/data/%s_palette_%d_edit_distance.npz" % (
cls.NAME, cls.NAME, palette_id.value
palette_id.value
) )
with bz2.open(data, "rb") as ed: dist = np.load(data)['edit_distance']
dist = pickle.load(ed) # type: List[np.ndarray]
# dist is an upper-triangular matrix of edit_distance(a, b) # dist is an upper-triangular matrix of edit_distance(a, b)
# encoded as dist[(a << N) + b] = edit_distance(a, b) # encoded as dist[(a << N) + b] = edit_distance(a, b)
@ -363,8 +361,8 @@ class Bitmap:
(identity & np.uint64(2 ** cls.MASKED_BITS - 1)) << (identity & np.uint64(2 ** cls.MASKED_BITS - 1)) <<
cls.MASKED_BITS) cls.MASKED_BITS)
for i in range(len(dist)): for i in range(dist.shape[0]):
dist[i][transpose] += dist[i][identity] dist[i, transpose] += dist[i, identity]
return dist return dist
@ -741,6 +739,7 @@ class HGRBitmap(Bitmap):
return double return double
@classmethod @classmethod
@functools.lru_cache(None)
def to_dots(cls, masked_val: int, byte_offset: int) -> int: def to_dots(cls, masked_val: int, byte_offset: int) -> int:
"""Convert masked representation to bit sequence of display dots. """Convert masked representation to bit sequence of display dots.