Optimize make_data_tables and use numpy.save instead of pickling. The
file sizes are a bit larger but it unblocks updating to python 3.8.
This commit is contained in:
parent
89633aa845
commit
1d5bcfd74e
13
README.md
13
README.md
|
@ -50,8 +50,7 @@ TODO: link video once it is available.
|
|||
|
||||
## Installation
|
||||
|
||||
This currently requires python3.7 because some dependencies (e.g. weighted-levenshtein) don't compile with 3.9+, and 3.8
|
||||
has a [bug](https://bugs.python.org/issue44439) in object pickling.
|
||||
This currently requires python3.7 because some dependencies (e.g. weighted-levenshtein) don't compile with 3.9+.
|
||||
|
||||
```
|
||||
python3.7 -m venv venv
|
||||
|
@ -59,23 +58,21 @@ source venv/bin/activate
|
|||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
To generate the data files required by the transcoder:
|
||||
Before you can run the transcoder you need to generate the data files it requires:
|
||||
|
||||
```
|
||||
% python transcoder/make_data_tables.py
|
||||
```
|
||||
|
||||
This takes about 3 hours on my machine.
|
||||
|
||||
TODO: download instructions
|
||||
This is a one-time setup. It takes about 90 minutes on my machine.
|
||||
|
||||
## Release Notes
|
||||
|
||||
### v0.3 (17 Jan 2023)
|
||||
|
||||
- Fixed an image quality bug in the transcoder
|
||||
- Quality of life improvements to installation process
|
||||
- Stop using LFS to store the generated data files in git, they're using up my quota
|
||||
- Documentation/quality of life improvements to installation process
|
||||
- Stop using LFS to store the generated data files in git, they're using up all my quota
|
||||
|
||||
### v0.2 (19 July 2019)
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@ def compute_edit_distance(
|
|||
edp: EditDistanceParams,
|
||||
bitmap_cls: Type[screen.Bitmap],
|
||||
nominal_colours: Type[colours.NominalColours]
|
||||
):
|
||||
) -> np.ndarray:
|
||||
"""Computes edit distance matrix between all pairs of pixel strings.
|
||||
|
||||
Enumerates all possible values of the masked bit representation from
|
||||
|
@ -131,44 +131,45 @@ def compute_edit_distance(
|
|||
|
||||
bitrange = np.uint64(2 ** bits)
|
||||
|
||||
edit = []
|
||||
for _ in range(len(bitmap_cls.BYTE_MASKS)):
|
||||
edit.append(
|
||||
np.zeros(shape=np.uint64(bitrange * bitrange), dtype=np.uint16))
|
||||
edit = np.zeros(
|
||||
shape=(len(bitmap_cls.BYTE_MASKS), np.uint64(bitrange * bitrange)),
|
||||
dtype=np.uint16)
|
||||
|
||||
# Matrix is symmetrical with zero diagonal so only need to compute upper
|
||||
# triangle
|
||||
bar = ProgressBar((bitrange * (bitrange - 1)) / 2, max_width=80)
|
||||
bar = ProgressBar(
|
||||
bitrange * (bitrange - 1) / 2 * len(bitmap_cls.PHASES), max_width=80)
|
||||
|
||||
num_dots = bitmap_cls.MASKED_DOTS
|
||||
|
||||
cnt = 0
|
||||
for i in range(np.uint64(bitrange)):
|
||||
for j in range(i):
|
||||
cnt += 1
|
||||
pair_base = np.uint64(i) << bits
|
||||
for o, ph in enumerate(bitmap_cls.PHASES):
|
||||
# Compute this in the outer loop since it's invariant under j
|
||||
first_dots = bitmap_cls.to_dots(i, byte_offset=o)
|
||||
first_pixels = pixel_string(
|
||||
colours.dots_to_nominal_colour_pixel_values(
|
||||
num_dots, first_dots, nominal_colours,
|
||||
init_phase=ph)
|
||||
)
|
||||
|
||||
if cnt % 10000 == 0:
|
||||
bar.numerator = cnt
|
||||
print(bar, end='\r')
|
||||
sys.stdout.flush()
|
||||
# Matrix is symmetrical with zero diagonal so only need to compute
|
||||
# upper triangle
|
||||
for j in range(i):
|
||||
cnt += 1
|
||||
if cnt % 100000 == 0:
|
||||
bar.numerator = cnt
|
||||
print(bar, end='\r')
|
||||
sys.stdout.flush()
|
||||
|
||||
pair = (np.uint64(i) << bits) + np.uint64(j)
|
||||
pair = pair_base + np.uint64(j)
|
||||
|
||||
for o, ph in enumerate(bitmap_cls.PHASES):
|
||||
first_dots = bitmap_cls.to_dots(i, byte_offset=o)
|
||||
second_dots = bitmap_cls.to_dots(j, byte_offset=o)
|
||||
|
||||
first_pixels = pixel_string(
|
||||
colours.dots_to_nominal_colour_pixel_values(
|
||||
num_dots, first_dots, nominal_colours,
|
||||
init_phase=ph)
|
||||
)
|
||||
second_pixels = pixel_string(
|
||||
colours.dots_to_nominal_colour_pixel_values(
|
||||
num_dots, second_dots, nominal_colours,
|
||||
init_phase=ph)
|
||||
)
|
||||
edit[o][pair] = edit_distance(
|
||||
edit[o, pair] = edit_distance(
|
||||
edp, first_pixels, second_pixels, error=False)
|
||||
|
||||
return edit
|
||||
|
@ -183,10 +184,9 @@ def make_edit_distance(
|
|||
"""Write file containing (D)HGR edit distance matrix for a palette."""
|
||||
|
||||
dist = compute_edit_distance(edp, bitmap_cls, nominal_colours)
|
||||
data = "transcoder/data/%s_palette_%d_edit_distance.pickle.bz2" % (
|
||||
data = "transcoder/data/%s_palette_%d_edit_distance.npz" % (
|
||||
bitmap_cls.NAME, pal.ID.value)
|
||||
with bz2.open(data, "wb", compresslevel=9) as out:
|
||||
pickle.dump(dist, out, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
np.savez_compressed(data, edit_distance=dist)
|
||||
|
||||
|
||||
def main():
|
||||
|
|
|
@ -342,15 +342,13 @@ class Bitmap:
|
|||
|
||||
@classmethod
|
||||
@functools.lru_cache(None)
|
||||
def edit_distances(cls, palette_id: pal.Palette) -> List[np.ndarray]:
|
||||
def edit_distances(cls, palette_id: pal.Palette) -> np.ndarray:
|
||||
"""Load edit distance matrices for masked, shifted byte values."""
|
||||
|
||||
data = "transcoder/data/%s_palette_%d_edit_distance.pickle.bz2" % (
|
||||
cls.NAME,
|
||||
palette_id.value
|
||||
data = "transcoder/data/%s_palette_%d_edit_distance.npz" % (
|
||||
cls.NAME, palette_id.value
|
||||
)
|
||||
with bz2.open(data, "rb") as ed:
|
||||
dist = pickle.load(ed) # type: List[np.ndarray]
|
||||
dist = np.load(data)['edit_distance']
|
||||
|
||||
# dist is an upper-triangular matrix of edit_distance(a, b)
|
||||
# encoded as dist[(a << N) + b] = edit_distance(a, b)
|
||||
|
@ -363,8 +361,8 @@ class Bitmap:
|
|||
(identity & np.uint64(2 ** cls.MASKED_BITS - 1)) <<
|
||||
cls.MASKED_BITS)
|
||||
|
||||
for i in range(len(dist)):
|
||||
dist[i][transpose] += dist[i][identity]
|
||||
for i in range(dist.shape[0]):
|
||||
dist[i, transpose] += dist[i, identity]
|
||||
|
||||
return dist
|
||||
|
||||
|
@ -741,6 +739,7 @@ class HGRBitmap(Bitmap):
|
|||
return double
|
||||
|
||||
@classmethod
|
||||
@functools.lru_cache(None)
|
||||
def to_dots(cls, masked_val: int, byte_offset: int) -> int:
|
||||
"""Convert masked representation to bit sequence of display dots.
|
||||
|
||||
|
|
Loading…
Reference in New Issue