Optimize make_data_tables and use numpy.save instead of pickling. The

file sizes are a bit larger but it unblocks updating to python 3.8.
This commit is contained in:
kris
2023-01-18 00:01:34 +00:00
parent 89633aa845
commit 1d5bcfd74e
3 changed files with 39 additions and 43 deletions

View File

@@ -113,7 +113,7 @@ def compute_edit_distance(
edp: EditDistanceParams,
bitmap_cls: Type[screen.Bitmap],
nominal_colours: Type[colours.NominalColours]
):
) -> np.ndarray:
"""Computes edit distance matrix between all pairs of pixel strings.
Enumerates all possible values of the masked bit representation from
@@ -131,44 +131,45 @@ def compute_edit_distance(
bitrange = np.uint64(2 ** bits)
edit = []
for _ in range(len(bitmap_cls.BYTE_MASKS)):
edit.append(
np.zeros(shape=np.uint64(bitrange * bitrange), dtype=np.uint16))
edit = np.zeros(
shape=(len(bitmap_cls.BYTE_MASKS), np.uint64(bitrange * bitrange)),
dtype=np.uint16)
# Matrix is symmetrical with zero diagonal so only need to compute upper
# triangle
bar = ProgressBar((bitrange * (bitrange - 1)) / 2, max_width=80)
bar = ProgressBar(
bitrange * (bitrange - 1) / 2 * len(bitmap_cls.PHASES), max_width=80)
num_dots = bitmap_cls.MASKED_DOTS
cnt = 0
for i in range(np.uint64(bitrange)):
for j in range(i):
cnt += 1
pair_base = np.uint64(i) << bits
for o, ph in enumerate(bitmap_cls.PHASES):
# Compute this in the outer loop since it's invariant under j
first_dots = bitmap_cls.to_dots(i, byte_offset=o)
first_pixels = pixel_string(
colours.dots_to_nominal_colour_pixel_values(
num_dots, first_dots, nominal_colours,
init_phase=ph)
)
if cnt % 10000 == 0:
bar.numerator = cnt
print(bar, end='\r')
sys.stdout.flush()
# Matrix is symmetrical with zero diagonal so only need to compute
# upper triangle
for j in range(i):
cnt += 1
if cnt % 100000 == 0:
bar.numerator = cnt
print(bar, end='\r')
sys.stdout.flush()
pair = (np.uint64(i) << bits) + np.uint64(j)
pair = pair_base + np.uint64(j)
for o, ph in enumerate(bitmap_cls.PHASES):
first_dots = bitmap_cls.to_dots(i, byte_offset=o)
second_dots = bitmap_cls.to_dots(j, byte_offset=o)
first_pixels = pixel_string(
colours.dots_to_nominal_colour_pixel_values(
num_dots, first_dots, nominal_colours,
init_phase=ph)
)
second_pixels = pixel_string(
colours.dots_to_nominal_colour_pixel_values(
num_dots, second_dots, nominal_colours,
init_phase=ph)
)
edit[o][pair] = edit_distance(
edit[o, pair] = edit_distance(
edp, first_pixels, second_pixels, error=False)
return edit
@@ -183,10 +184,9 @@ def make_edit_distance(
"""Write file containing (D)HGR edit distance matrix for a palette."""
dist = compute_edit_distance(edp, bitmap_cls, nominal_colours)
data = "transcoder/data/%s_palette_%d_edit_distance.pickle.bz2" % (
data = "transcoder/data/%s_palette_%d_edit_distance.npz" % (
bitmap_cls.NAME, pal.ID.value)
with bz2.open(data, "wb", compresslevel=9) as out:
pickle.dump(dist, out, protocol=pickle.HIGHEST_PROTOCOL)
np.savez_compressed(data, edit_distance=dist)
def main():