diff --git a/transcoder/colours.py b/transcoder/colours.py
index 1ec8247..f08f5b4 100644
--- a/transcoder/colours.py
+++ b/transcoder/colours.py
@@ -1,7 +1,8 @@
 """Apple II nominal display colours, represented by 4-bit dot sequences.
 
-These are distinct from the effective colours that are actually displayed,
-e.g. due to white/black coalescing and NTSC artifacting.
+These are the "asymptotic" colours as displayed in e.g. continuous runs of
+pixels.  The effective colours that are actually displayed are not discrete,
+due to NTSC artifacting being a continuous process.
 """
 
 from typing import Tuple, Type
@@ -10,6 +11,66 @@ import enum
 import functools
 
 
+class NominalColours(enum.Enum):
+    pass
+
+
+class HGRColours(NominalColours):
+    """Map from 4-bit dot representation to DHGR pixel colours.
+
+    Dots are in memory bit order (MSB -> LSB), which is opposite to screen
+    order (LSB -> MSB is ordered left-to-right on the screen)
+
+    Note that these are right-rotated from the HGR mapping, because of a
+    1-tick phase difference in the colour reference signal for DHGR vs HGR
+    """
+    BLACK = 0b0000
+    MAGENTA = 0b0001
+    BROWN = 0b1000
+    ORANGE = 0b1001  # HGR colour
+    DARK_GREEN = 0b0100
+    GREY1 = 0b0101
+    GREEN = 0b1100  # HGR colour
+    YELLOW = 0b1101
+    DARK_BLUE = 0b0010
+    VIOLET = 0b0011  # HGR colour
+    GREY2 = 0b1010
+    PINK = 0b1011
+    MED_BLUE = 0b0110  # HGR colour
+    LIGHT_BLUE = 0b0111
+    AQUA = 0b1110
+    WHITE = 0b1111
+
+
+class DHGRColours(NominalColours):
+    """Map from 4-bit dot representation to DHGR pixel colours.
+
+    Dots are in memory bit order (MSB -> LSB), which is opposite to screen
+    order (LSB -> MSB is ordered left-to-right on the screen)
+
+    Note that these are right-rotated from the HGR mapping, because of a
+    1-tick phase difference in the colour reference signal for DHGR vs HGR
+    """
+
+    # representation.
+    BLACK = 0b0000
+    MAGENTA = 0b1000
+    BROWN = 0b0100
+    ORANGE = 0b1100  # HGR colour
+    DARK_GREEN = 0b0010
+    GREY1 = 0b1010
+    GREEN = 0b0110  # HGR colour
+    YELLOW = 0b1110
+    DARK_BLUE = 0b0001
+    VIOLET = 0b1001  # HGR colour
+    GREY2 = 0b0101
+    PINK = 0b1101
+    MED_BLUE = 0b0011  # HGR colour
+    LIGHT_BLUE = 0b1011
+    AQUA = 0b0111
+    WHITE = 0b1111
+
+
 def ror(int4: int, howmany: int) -> int:
     """Rotate-right an int4 some number of times."""
     res = int4
@@ -36,52 +97,6 @@ def _rol(int4: int) -> int:
     return ((int4 & 0b0111) << 1) ^ ((int4 & 0b1000) >> 3)
 
 
-class NominalColours(enum.Enum):
-    pass
-
-
-class HGRColours(NominalColours):
-    # Value is memory bit order, which is opposite to screen order (bits
-    # ordered Left to Right on screen)
-    BLACK = 0b0000
-    MAGENTA = 0b0001
-    BROWN = 0b1000
-    ORANGE = 0b1001  # HGR colour
-    DARK_GREEN = 0b0100
-    GREY1 = 0b0101
-    GREEN = 0b1100  # HGR colour
-    YELLOW = 0b1101
-    DARK_BLUE = 0b0010
-    VIOLET = 0b0011  # HGR colour
-    GREY2 = 0b1010
-    PINK = 0b1011
-    MED_BLUE = 0b0110  # HGR colour
-    LIGHT_BLUE = 0b0111
-    AQUA = 0b1110
-    WHITE = 0b1111
-
-
-class DHGRColours(NominalColours):
-    # DHGR 4-bit memory representation is right-rotated from the HGR video
-    # representation.
-    BLACK = 0b0000
-    MAGENTA = 0b1000
-    BROWN = 0b0100
-    ORANGE = 0b1100  # HGR colour
-    DARK_GREEN = 0b0010
-    GREY1 = 0b1010
-    GREEN = 0b0110  # HGR colour
-    YELLOW = 0b1110
-    DARK_BLUE = 0b0001
-    VIOLET = 0b1001  # HGR colour
-    GREY2 = 0b0101
-    PINK = 0b1101
-    MED_BLUE = 0b0011  # HGR colour
-    LIGHT_BLUE = 0b1011
-    AQUA = 0b0111
-    WHITE = 0b1111
-
-
 @functools.lru_cache(None)
 def dots_to_nominal_colour_pixels(
         num_bits: int,
@@ -92,10 +107,10 @@ def dots_to_nominal_colour_pixels(
     """Sequence of num_bits nominal colour pixels via sliding 4-bit window.
 
     Includes the 3-bit header that represents the trailing 3 bits of the
-    previous tuple body.  i.e. storing a byte in aux even columns will also
-    influence the colours of the previous main odd column.
+    previous tuple body.  e.g. for DHGR, storing a byte in aux even columns
+    will also influence the colours of the previous main odd column.
 
-    This naively models the NTSC colour artifacting.
+    This naively models (approximates) the NTSC colour artifacting.
 
     TODO: Use a more careful analogue colour composition model to produce
     effective pixel colours.
@@ -126,6 +141,8 @@ def dots_to_nominal_colour_pixel_values(
         colours: Type[NominalColours],
         init_phase: int = 1  # Such that phase = 0 at start of body
 ) -> Tuple[int]:
+    """"Sequence of num_bits nominal colour values via sliding 4-bit window."""
+
     return tuple(p.value for p in dots_to_nominal_colour_pixels(
         num_bits, dots, colours, init_phase
     ))
diff --git a/transcoder/frame_grabber.py b/transcoder/frame_grabber.py
index 5a0fcb8..2940f1d 100644
--- a/transcoder/frame_grabber.py
+++ b/transcoder/frame_grabber.py
@@ -54,7 +54,7 @@ class FileFrameGrabber(FrameGrabber):
         return "P%d" % self.palette.value
 
     def frames(self) -> Iterator[screen.MemoryMap]:
-        """Encode frame to HGR using bmp2dhr.
+        """Encode frame to (D)HGR using bmp2dhr.
 
         We do the encoding in a background thread to parallelize.
         """
diff --git a/transcoder/machine.py b/transcoder/machine.py
index b714bcd..c07c2e9 100644
--- a/transcoder/machine.py
+++ b/transcoder/machine.py
@@ -3,12 +3,11 @@
 from typing import Iterator
 
 
+# TODO: screen memory changes should happen via Machine while emitting opcodes?
+
 class Machine:
     """Represents Apple II and player virtual machine state."""
 
-    def __init__(self):
-        self.page = 0x20  # type: int
-
     def emit(self, opcode: "Opcode") -> Iterator[int]:
         """
 
diff --git a/transcoder/main.py b/transcoder/main.py
index dca43ca..b816322 100644
--- a/transcoder/main.py
+++ b/transcoder/main.py
@@ -1,4 +1,4 @@
-"""Transcodes an input video file to ][Vision format."""
+"""Transcodes an input video file to ][-Vision format."""
 
 import argparse
 
@@ -7,7 +7,7 @@ import palette
 import video_mode
 
 parser = argparse.ArgumentParser(
-    description='Transcode videos to ][Vision format.')
+    description='Transcode videos to ][-Vision format.')
 parser.add_argument(
     'input', help='Path to input video file.')
 parser.add_argument(
diff --git a/transcoder/make_data_tables.py b/transcoder/make_data_tables.py
index 3792967..8aa78a8 100644
--- a/transcoder/make_data_tables.py
+++ b/transcoder/make_data_tables.py
@@ -15,62 +15,6 @@ import colours
 import palette
 import screen
 
-# The DHGR display encodes 7 pixels across interleaved 4-byte sequences
-# of AUX and MAIN memory, as follows:
-#
-#     PBBBAAAA PDDCCCCB PFEEEEDD PGGGGFFF
-#     Aux N    Main N   Aux N+1  Main N+1  (N even)
-#
-# Where A..G are the pixels, and P represents the (unused) palette bit.
-#
-# This layout makes more sense when written as a (little-endian) 32-bit integer:
-#
-#     33222222222211111111110000000000 <- bit pos in uint32
-#     10987654321098765432109876543210
-#     PGGGGFFFPFEEEEDDPDDCCCCBPBBBAAAA
-#
-# i.e. apart from the palette bits this is a linear ordering of pixels,
-# when read from LSB to MSB (i.e. right-to-left).  i.e. the screen layout order
-# of bits is opposite to the usual binary representation ordering.
-#
-# If we now look at the effect of storing a byte in each of the 4
-# byte-offset positions within this uint32,
-#
-#     PGGGGFFFPFEEEEDDPDDCCCCBPBBBAAAA
-#     33333333222222221111111100000000
-#
-# We see that these byte offsets cause changes to the following pixels:
-#
-# 0: A B
-# 1: B C D
-# 2: D E F
-# 3: F G
-#
-# i.e. DHGR byte stores to offsets 0 and 3 result in changing one 8-bit value
-# (2 DHGR pixels) into another; offsets 1 and 3 result in changing one 12-bit
-# value (3 DHGR pixels).
-#
-# We can simplify things by stripping out the palette bit and packing
-# down to a 28-bit integer representation:
-#
-#     33222222222211111111110000000000 <- bit pos in uint32
-#     10987654321098765432109876543210
-#
-#     0000GGGGFFFFEEEEDDDDCCCCBBBBAAAA <- pixel A..G
-#         3210321032103210321032103210 <- bit pos in A..G pixel
-#
-#         3333333222222211111110000000 <- byte offset 0.3
-#
-# With this representation, we can precompute an edit distance for the
-# pixel changes resulting from all possible DHGR byte stores.
-#
-# We further encode these (source, target) -> distance mappings by
-# concatenating source and target into 16- or 24-bit values.  This is
-# efficient to work with in the video transcoder.
-#
-# Since we are enumerating all such 16- or 24-bit values, these can be packed
-# contiguously into an array whose index is the (source, target) pair and
-# the value is the edit distance.
 
 PIXEL_CHARS = "0123456789ABCDEF"
 
@@ -85,6 +29,8 @@ def pixel_string(pixels: Iterable[int]) -> str:
 
 
 class EditDistanceParams:
+    """Data class for parameters to Damerau-Levenshtein edit distance."""
+
     # Don't even consider insertions and deletions into the string, they don't
     # make sense for comparing pixel strings
     insert_costs = np.ones(128, dtype=np.float64) * 100000
@@ -92,20 +38,26 @@ class EditDistanceParams:
 
     # Smallest substitution value is ~20 from palette.diff_matrices, i.e.
     # we always prefer to transpose 2 pixels rather than substituting colours.
-    transpose_costs = np.ones((128, 128), dtype=np.float64) * 10
+    # TODO: is quality really better allowing transposes?
+    transpose_costs = np.ones((128, 128), dtype=np.float64) * 100000  # 10
 
+    # These will be filled in later
     substitute_costs = np.zeros((128, 128), dtype=np.float64)
 
     # Substitution costs to use when evaluating other potential offsets at which
     # to store a content byte.  We penalize more harshly for introducing
     # errors that alter pixel colours, since these tend to be very
     # noticeable as visual noise.
+    #
+    # TODO: currently unused
     error_substitute_costs = np.zeros((128, 128), dtype=np.float64)
 
 
 def compute_diff_matrix(pal: Type[palette.BasePalette]):
-    # Compute matrix of CIE2000 delta values for this pal, representing
-    # perceptual distance between colours.
+    """Compute matrix of perceptual distance between colour pairs.
+
+    Specifically CIE2000 delta values for this palette.
+    """
     dm = np.ndarray(shape=(16, 16), dtype=np.int)
 
     for colour1, a in pal.RGB.items():
@@ -120,6 +72,8 @@ def compute_diff_matrix(pal: Type[palette.BasePalette]):
 
 
 def compute_substitute_costs(pal: Type[palette.BasePalette]):
+    """Compute costs for substituting one colour pixel for another."""
+
     edp = EditDistanceParams()
 
     diff_matrix = compute_diff_matrix(pal)
@@ -128,10 +82,10 @@ def compute_substitute_costs(pal: Type[palette.BasePalette]):
     for i, c in enumerate(PIXEL_CHARS):
         for j, d in enumerate(PIXEL_CHARS):
             cost = diff_matrix[i, j]
-            edp.substitute_costs[(ord(c), ord(d))] = cost  # / 20
-            edp.substitute_costs[(ord(d), ord(c))] = cost  # / 20
-            edp.error_substitute_costs[(ord(c), ord(d))] = 5 * cost  # / 4
-            edp.error_substitute_costs[(ord(d), ord(c))] = 5 * cost  # / 4
+            edp.substitute_costs[(ord(c), ord(d))] = cost
+            edp.substitute_costs[(ord(d), ord(c))] = cost
+            edp.error_substitute_costs[(ord(c), ord(d))] = 5 * cost
+            edp.error_substitute_costs[(ord(d), ord(c))] = 5 * cost
 
     return edp
 
@@ -141,6 +95,7 @@ def edit_distance(
         a: str,
         b: str,
         error: bool) -> np.float64:
+    """Damerau-Levenshtein edit distance between two pixel strings."""
     res = weighted_levenshtein.dam_lev(
         a, b,
         insert_costs=edp.insert_costs,
@@ -149,7 +104,8 @@ def edit_distance(
             edp.error_substitute_costs if error else edp.substitute_costs),
     )
 
-    assert res == 0 or (1 <= res < 2 ** 16), res
+    # Make sure result can fit in a uint16
+    assert (0 <= res < 2 ** 16), res
     return res
 
 
@@ -158,6 +114,19 @@ def compute_edit_distance(
         bitmap_cls: Type[screen.Bitmap],
         nominal_colours: Type[colours.NominalColours]
 ):
+    """Computes edit distance matrix between all pairs of pixel strings.
+
+    Enumerates all possible values of the masked bit representation from
+    bitmap_cls (assuming it is contiguous, i.e. we enumerate all
+    2**bitmap_cls.MASKED_BITS values).  These are mapped to the dot
+    representation, turned into coloured pixel strings, and we compute the
+    edit distance.
+
+    The effect of this is that we precompute the effect of storing all possible
+    byte values against all possible screen backgrounds (e.g. as
+    influencing/influenced by neighbouring bytes).
+    """
+
     bits = bitmap_cls.MASKED_BITS
 
     bitrange = np.uint64(2 ** bits)
@@ -171,7 +140,7 @@ def compute_edit_distance(
     # triangle
     bar = ProgressBar((bitrange * (bitrange - 1)) / 2, max_width=80)
 
-    num_dots = bitmap_cls.HEADER_BITS + bitmap_cls.BODY_BITS
+    num_dots = bitmap_cls.MASKED_DOTS
 
     cnt = 0
     for i in range(np.uint64(bitrange)):
@@ -211,6 +180,8 @@ def make_edit_distance(
         bitmap_cls: Type[screen.Bitmap],
         nominal_colours: Type[colours.NominalColours]
 ):
+    """Write file containing (D)HGR edit distance matrix for a palette."""
+
     dist = compute_edit_distance(edp, bitmap_cls, nominal_colours)
     data = "transcoder/data/%s_palette_%d_edit_distance.pickle.bz2" % (
         bitmap_cls.NAME, pal.ID.value)
@@ -223,7 +194,7 @@ def main():
         print("Processing palette %s" % p)
         edp = compute_substitute_costs(p)
 
-        # TODO: error distance matrices
+        # TODO: still worth using error distance matrices?
 
         make_edit_distance(p, edp, screen.HGRBitmap, colours.HGRColours)
         make_edit_distance(p, edp, screen.DHGRBitmap, colours.DHGRColours)
diff --git a/transcoder/make_data_tables_test.py b/transcoder/make_data_tables_test.py
index 689a651..92c4bc8 100644
--- a/transcoder/make_data_tables_test.py
+++ b/transcoder/make_data_tables_test.py
@@ -15,7 +15,8 @@ class TestMakeDataTables(unittest.TestCase):
         pixels = (HGRColours.BLACK, HGRColours.WHITE, HGRColours.ORANGE)
         self.assertEqual("0FC", make_data_tables.pixel_string(pixels))
 
-    def test_edit_distances(self):
+    def test_edit_distances_dhgr(self):
+        """Assert invariants and symmetries of the edit distance matrices."""
         for p in PALETTES:
             ed = screen.DHGRBitmap.edit_distances(p)
             print(p)
@@ -52,6 +53,8 @@ class TestMakeDataTables(unittest.TestCase):
                         self.assertGreaterEqual(ed[ph][(i << 13) + j], 0)
 
     def test_edit_distances_hgr(self):
+        """Assert invariants and symmetries of the edit distance matrices."""
+
         for p in PALETTES:
             ed = screen.HGRBitmap.edit_distances(p)
             print(p)
@@ -61,13 +64,17 @@ class TestMakeDataTables(unittest.TestCase):
             cnt = 0
             for ph in range(2):
 
-                # Only zero entries should be on diagonal, i.e. of form
+                # TODO: for HGR this invariant isn't true, all-0 and all-1
+                #  values for header/footer/body with/without palette bit can
+                #  also have zero difference
+                # # Only zero entries should be on diagonal, i.e. of form
                 # # i << 14 + i
                 # zeros = np.arange(len(ed[ph]))[ed[ph] == 0]
                 # for z in zeros:
                 #     z1 = z & (2**14-1)
                 #     z2 = (z >> 14) & (2**14-1)
-                #     self.assertEqual(z1, z2)
+                #     if z1 != z2:
+                #         self.assertEqual(z1, z2)
 
                 # Assert that matrix is symmetrical
                 for i in range(2 ** 14):
diff --git a/transcoder/movie.py b/transcoder/movie.py
index b75217e..263fd80 100644
--- a/transcoder/movie.py
+++ b/transcoder/movie.py
@@ -76,6 +76,7 @@ class Movie:
 
                     if aux:
                         aux_seq = self.video.encode_frame(aux, is_aux=True)
+
             # au has range -15 .. 16 (step=1)
             # Tick cycles are units of 2
             tick = au * 2  # -30 .. 32 (step=2)
@@ -86,18 +87,20 @@ class Movie:
 
             yield opcodes.TICK_OPCODES[(tick, page)](content, offsets)
 
-    def _emit_bytes(self, _op):
-        """
+    def _emit_bytes(self, _op: opcodes.Opcode) -> Iterable[int]:
+        """Emit compiled bytes corresponding to a player opcode.
 
-        :param _op:
-        :return:
+        Also tracks byte stream position.
         """
         for b in self.state.emit(_op):
             yield b
             self.stream_pos += 1
 
     def emit_stream(self, ops: Iterable[opcodes.Opcode]) -> Iterator[int]:
-        """
+        """Emit compiled byte stream corresponding to opcode stream.
+
+        Inserts padding opcodes at 2KB stream boundaries, to instruct player
+        to manage the TCP socket buffer.
 
         :param ops:
         :return:
@@ -124,7 +127,7 @@ class Movie:
         yield from self.done()
 
     def done(self) -> Iterator[int]:
-        """Terminate opcode stream.
+        """Terminate byte stream by emitting terminal opcode and padding to 2KB.
 
         :return:
         """
diff --git a/transcoder/palette.py b/transcoder/palette.py
index 77f7058..7076eaf 100644
--- a/transcoder/palette.py
+++ b/transcoder/palette.py
@@ -1,3 +1,5 @@
+"""RGB palette values for rendering NominalColour pixels."""
+
 import enum
 from typing import Dict, Type
 
@@ -14,7 +16,8 @@ def rgb(r, g, b):
 
 
 class Palette(enum.Enum):
-    """BMP2DHR palette numbers"""
+    """BMP2DHR palette numbers."""
+
     UNKNOWN = -1
     IIGS = 0
     NTSC = 5
diff --git a/transcoder/screen.py b/transcoder/screen.py
index 40f70fd..5029363 100644
--- a/transcoder/screen.py
+++ b/transcoder/screen.py
@@ -14,7 +14,7 @@ IntOrArray = Union[np.uint64, np.ndarray]
 
 
 def y_to_base_addr(y: int, page: int = 0) -> int:
-    """Maps y coordinate to base address on given screen page"""
+    """Maps y coordinate to base address on given screen page."""
     a = y // 64
     d = y - 64 * a
     b = d // 8
@@ -126,33 +126,44 @@ class MemoryMap:
 
 
 class Bitmap:
-    """Packed 28-bit bitmap representation of (D)HGR screen memory.
+    """Packed bitmap representation of (D)HGR screen memory.
 
-    XXX comments
-
-    The memory layout is still page-oriented, not linear y-x buffer but the
-    bit map is such that 20 consecutive entries linearly encode the 28*20 =
-    560-bit monochrome dot positions that underlie both Mono and Colour (
-    D)HGR screens.
-
-    For Colour display the (nominal) colours are encoded as 4-bit pixels.
+    Maintains a page-based array whose entries contain a packed representation
+    of multiple screen bytes, in a representation that supports efficiently
+    determining the visual effect of storing bytes at arbitrary screen offsets.
     """
 
+    # NOTE: See https://github.com/numpy/numpy/issues/2524 and related issues
+    # for why we have to cast things explicitly to np.uint64 - type promotion
+    # to uint64 is broken in numpy :(
+
+    # Name of bitmap type
     NAME = None  # type: str
 
-    # Size of packed representation
+    # Size of packed representation, consisting of header + body + footer
     HEADER_BITS = None  # type: np.uint64
     BODY_BITS = None  # type: np.uint64
     FOOTER_BITS = None  # type: np.uint64
 
+    # How many bits of packed representation are necessary to determine the
+    # effect of storing a memory byte, e.g. because they influence pixel
+    # colour or are influenced by other bits.
+    MASKED_BITS = None  # type: np.uint64
+
+    # How many coloured screen pixels we can extract from MASKED_BITS.  Note
+    # that this does not include the last 3 dots represented by the footer,
+    # since we don't have enough information to determine their colour (we
+    # would fall off the end of the 4-bit sliding window)
+    MASKED_DOTS = None  # type: np.uint64
+
+    # List of bitmasks for extracting the subset of packed data corresponding
+    # to bits influencing/influenced by a given byte offset.  These must be
+    # a contiguous bit mask, i.e. so that after shifting they are enumerated
+    # by 0..2**MASKED_BITS-1
     BYTE_MASKS = None  # type: List[np.uint64]
     BYTE_SHIFTS = None  # type: List[np.uint64]
 
-    # How many bits of packed representation are influenced when storing a
-    # memory byte
-    MASKED_BITS = None  # type: np.uint64
-
-    # XXX
+    # NTSC clock phase at first masked bit
     PHASES = None  # type: List[int]
 
     def __init__(
@@ -176,18 +187,21 @@ class Bitmap:
             shape=(32, 128), dtype=np.uint64)  # type: np.ndarray
         self._pack()
 
-    def _body(self) -> np.ndarray:
-        raise NotImplementedError
-
     # TODO: don't leak headers/footers across screen rows.  We should be using
     # x-y representation rather than page-offset
 
     @staticmethod
-    def _make_header(prev_col: IntOrArray) -> IntOrArray:
+    def _make_header(col: IntOrArray) -> IntOrArray:
+        """Extract values to use as header of next column."""
+        raise NotImplementedError
+
+    def _body(self) -> np.ndarray:
+        """Pack related screen bytes into an efficient representation."""
         raise NotImplementedError
 
     @staticmethod
-    def _make_footer(next_col: IntOrArray) -> IntOrArray:
+    def _make_footer(col: IntOrArray) -> IntOrArray:
+        """Extract values to use as footer of previous column."""
         raise NotImplementedError
 
     def _pack(self) -> None:
@@ -195,17 +209,15 @@ class Bitmap:
 
         body = self._body()
 
-        # XXX comments
-        # Prepend last 3 bits of previous main odd byte so we can correctly
-        # decode the effective colours at the beginning of the 28-bit
-        # tuple
+        # Prepend last 3 bits of previous odd byte so we can correctly
+        # decode the effective colours at the beginning of the 22-bit tuple
         prev_col = np.roll(body, 1, axis=1).astype(np.uint64)
         header = self._make_header(prev_col)
         # Don't leak header across page boundaries
         header[:, 0] = 0
 
-        # Append first 3 bits of next aux even byte so we can correctly
-        # decode the effective colours at the end of the 28-bit tuple
+        # Append first 3 bits of next even byte so we can correctly
+        # decode the effective colours at the end of the 22-bit tuple
         next_col = np.roll(body, -1, axis=1).astype(np.uint64)
         footer = self._make_footer(next_col)
         # Don't leak footer across page boundaries
@@ -218,20 +230,27 @@ class Bitmap:
             byte_offset: int,
             old_value: IntOrArray,
             new_value: np.uint8) -> IntOrArray:
+        """Update int/array to store new value at byte_offset in every entry.
+
+        Does not patch up headers/footers of neighbouring columns.
+        """
         raise NotImplementedError
 
     @staticmethod
     @functools.lru_cache(None)
-    def byte_offset(x_byte: int, is_aux: bool) -> int:
+    def byte_offset(page_offset: int, is_aux: bool) -> int:
+        """Map screen offset for aux/main into offset within packed data."""
         raise NotImplementedError
 
     @staticmethod
     @functools.lru_cache(None)
     def _byte_offsets(is_aux: bool) -> Tuple[int, int]:
+        """Return byte offsets within packed data for AUX/MAIN memory."""
         raise NotImplementedError
 
     @classmethod
     def to_dots(cls, masked_val: int, byte_offset: int) -> int:
+        """Convert masked representation to bit sequence of display dots."""
         raise NotImplementedError
 
     def apply(
@@ -254,6 +273,7 @@ class Bitmap:
             page: int,
             offset: int,
             byte_offset: int) -> None:
+        """Fix up column headers/footers when updating a (page, offset)."""
 
         if byte_offset == 0 and offset > 0:
             self.packed[page, offset - 1] = self._fix_column_left(
@@ -272,6 +292,8 @@ class Bitmap:
             column_left: IntOrArray,
             column: IntOrArray
     ) -> IntOrArray:
+        """Patch up the footer of the column to the left."""
+
         # Mask out footer(s)
         column_left &= np.uint64(2 ** (self.HEADER_BITS + self.BODY_BITS) - 1)
         column_left ^= self._make_footer(column)
@@ -283,6 +305,8 @@ class Bitmap:
             column_right: IntOrArray,
             column: IntOrArray
     ) -> IntOrArray:
+        """Patch up the header of the column to the right."""
+
         # Mask out header(s)
         column_right &= np.uint64(
             (2 ** (self.BODY_BITS + self.FOOTER_BITS) - 1)) << self.HEADER_BITS
@@ -295,15 +319,19 @@ class Bitmap:
             ary: np.ndarray,
             byte_offset: int
     ) -> None:
+        """Fix up column headers/footers for all array entries."""
+
+        # TODO: don't leak header/footer across page boundaries
+
         # Propagate new value into neighbouring byte headers/footers if
         # necessary
         if byte_offset == 0:
-            # Need to also update the 3-bit footer of the preceding column
+            # Need to also update the footer of the preceding column
             shifted_left = np.roll(ary, -1, axis=1)
             self._fix_column_left(ary, shifted_left)
 
-        elif byte_offset == 3:
-            # Need to also update the 3-bit header of the next column
+        elif byte_offset == (self.SCREEN_BYTES - 1):
+            # Need to also update the header of the next column
             shifted_right = np.roll(ary, 1, axis=1)
             self._fix_column_right(ary, shifted_right)
 
@@ -340,22 +368,25 @@ class Bitmap:
             cls,
             data: IntOrArray,
             byte_offset: int) -> IntOrArray:
-        """Masks and shifts data into the MASKED_BITS range."""
+        """Masks and shifts packed data into the MASKED_BITS range."""
         res = (data & cls.BYTE_MASKS[byte_offset]) >> (
             cls.BYTE_SHIFTS[byte_offset])
         assert np.all(res <= 2 ** cls.MASKED_BITS)
         return res
 
+    # Can't cache all possible values but this seems to give a good enough hit
+    # rate without costing too much memory
     # TODO: unit tests
-    @functools.lru_cache(None)
+    @functools.lru_cache(10 ** 6)
     def byte_pair_difference(
             self,
             byte_offset: int,
             old_packed: np.uint64,
             content: np.uint8
     ) -> np.uint16:
-        old_pixels = self.mask_and_shift_data(
-            old_packed, byte_offset)
+        """Compute effect of storing a new content byte within packed data."""
+
+        old_pixels = self.mask_and_shift_data(old_packed, byte_offset)
         new_pixels = self.mask_and_shift_data(
             self.masked_update(byte_offset, old_packed, content), byte_offset)
 
@@ -368,15 +399,24 @@ class Bitmap:
             source: "Bitmap",
             is_aux: bool
     ) -> np.ndarray:
+        """Compute edit distance matrix from source bitmap."""
         return self._diff_weights(source.packed, is_aux)
 
+    # TODO: unit test
     def _diff_weights(
             self,
             source_packed: np.ndarray,
             is_aux: bool,
             content: np.uint8 = None
     ) -> np.ndarray:
-        """Computes diff from source_packed to self.packed"""
+        """Computes edit distance matrix from source_packed to self.packed
+
+        If content is set, the distance will be computed as if this value
+        was stored into each offset position of source_packed, i.e. to
+        allow evaluating which offsets (if any) should be chosen for storing
+        this content byte.
+        """
+
         diff = np.ndarray((32, 256), dtype=np.int)
 
         offsets = self._byte_offsets(is_aux)
@@ -384,63 +424,173 @@ class Bitmap:
         dists = []
         for o in offsets:
             if content is not None:
-                source_packed = self.masked_update(o, source_packed, content)
-                self._fix_array_neighbours(source_packed, o)
+                compare_packed = self.masked_update(o, source_packed, content)
+                self._fix_array_neighbours(compare_packed, o)
+            else:
+                compare_packed = source_packed
 
             # Pixels influenced by byte offset o
-            source_pixels = self.mask_and_shift_data(source_packed, o)
+            source_pixels = self.mask_and_shift_data(compare_packed, o)
             target_pixels = self.mask_and_shift_data(self.packed, o)
 
-            # Concatenate 13-bit source and target into 26-bit values
+            # Concatenate N-bit source and target into 2N-bit values
             pair = (source_pixels << self.MASKED_BITS) + target_pixels
             dist = self.edit_distances(self.palette)[o][pair].reshape(
                 pair.shape)
             dists.append(dist)
 
+        # Interleave even/odd columns
         diff[:, 0::2] = dists[0]
         diff[:, 1::2] = dists[1]
 
         return diff
 
+    def _check_consistency(self):
+        """Sanity check that headers and footers are consistent."""
+
+        headers = np.roll(self._make_header(self.packed), 1, axis=1).astype(
+            np.uint64)
+
+        footers = np.roll(self._make_footer(self.packed), -1, axis=1).astype(
+            np.uint64)
+
+        mask_hf = np.uint64(0b1110000000000000000000000000000111)
+
+        res = (self.packed ^ headers ^ footers) & mask_hf
+        nz = np.transpose(np.nonzero(res))
+
+        ok = True
+        if nz.size != 0:
+            for p, o in nz.tolist():
+                if o == 0 or o == 127:
+                    continue
+                ok = False
+                print(p, o, bin(self.packed[p, o - 1]),
+                                bin(headers[p, o]),
+                      bin(self.packed[p, o]),
+                      bin(self.packed[p, o + 1]), bin(footers[p, o]),
+                      bin(res[p, o])
+                      )
+            assert ok
+
     # TODO: unit tests
     def compute_delta(
             self,
             content: int,
-            old: np.ndarray,
+            diff_weights: np.ndarray,
             is_aux: bool
     ) -> np.ndarray:
-        # TODO: use error edit distance
+        """Compute which content stores introduce the least additional error.
 
-        diff = self._diff_weights(self.packed, is_aux, content)
+        We compute the effect of storing content at all possible offsets
+        within self.packed, and then subtract the previous diff weights.
+
+        Negative values indicate that the new content value is closer to the
+        target than the current content.
+        """
+        # TODO: use error edit distance?
+
+        new_diff = self._diff_weights(self.packed, is_aux, content)
 
         # TODO: try different weightings
-        return (diff * 5) - old
+        return (new_diff * 5) - diff_weights
 
 
 class HGRBitmap(Bitmap):
+    """Packed bitmap representation of HGR screen memory.
+
+    The HGR display is encoded in a somewhat complicated way, so we have to
+    do a bit of work to turn it into a useful format.
+
+    Each screen byte consists of a palette bit (7) and 6 data bits (0..6)
+
+    Each non-palette bit turns on two consecutive display dots, with bit 6
+    repeated a third time.  This third dot may or may not be overwritten by the
+    effect of the next byte.
+
+    Turning on the palette bit shifts that byte's dots right by one
+    position.
+
+    Given two neighbouring screen bytes Aaaaaaaa, Bbbbbbbb (at even and odd
+    offsets), where capital letter indicates the position of the palette bit,
+    we use the following 22-bit packed representation:
+
+        2211111111110000000000  <-- bit position in uint22
+        1098765432109876543210
+        ffFbbbbbbbBAaaaaaaaHhh
+
+    h and f are headers/footers derived from the neighbouring screen bytes.
+
+    Since our colour artifact model (see colours.py) uses a sliding 4-bit window
+    onto the dot string, we need to also include a 3-bit header and footer
+    to account for the influence from/on neighbouring bytes, i.e. adjacent
+    packed values.  These are just the low/high 2 data bits of the 16-bit
+    body of those neighbouring columns, plus the corresponding palette bit.
+
+    This 22-bit packed representation is sufficient to compute the effects
+    (on pixel colours) of storing a byte at even or odd offsets.  From it we
+    can extract the bit stream of displayed HGR dots, and the mapping to pixel
+    colours follows the HGRColours bitmap, see colours.py.
+
+    We put the two A/B palette bits next to each other so that we can
+    mask a contiguous range of bits whose colours influence/are influenced by
+    storing a byte at a given offset.
+
+    We need to mask out bit subsequences of size 3+8+3=14, i.e. the 8-bits
+    corresponding to the byte being stored, plus the neighbouring 3 bits that
+    influence it/are influenced by it.
+
+    Note that the masked representation has the same size for both offsets (
+    14 bits), but different meaning, since the palette bit is in a different
+    position.
+
+    With this masked representation, we can precompute an edit distance for the
+    pixel changes resulting from all possible HGR byte stores, see
+    make_edit_distance.py.
+
+    The edit distance matrix is encoded by concatenating the 14-bit source
+    and target masked values into a 28-bit pair, which indexes into the
+    edit_distance array to give the corresponding edit distance.
+    """
     NAME = 'HGR'
 
-    # hhhbbbbbbbpPBBBBBBBfff
-    # 0000000011111111111111
-    # 1111111111111100000000
+    # Size of packed representation, consisting of header + body + footer
+    HEADER_BITS = np.uint64(3)
+    # 2x 8-bit screen bytes
+    BODY_BITS = np.uint64(16)
+    FOOTER_BITS = np.uint64(3)
 
-    # Header:
-    #    0000000010000011
-    # Footer:
-    #    1100000100000000
+    # How many bits of packed representation are necessary to determine the
+    # effect of storing a memory byte, e.g. because they influence pixel
+    # colour or are influenced by other bits.
+    MASKED_BITS = np.uint64(14)  # 3 + 8 + 3
 
+    # How many coloured screen pixels we can extract from MASKED_BITS.  Note
+    # that this does not include the last 3 dots represented by the footer,
+    # since we don't have enough information to determine their colour (we
+    # would fall off the end of the 4-bit sliding window)
+    #
+    # From header: 3 bits (2 HGR pixels but might be shifted right by palette)
+    # From body: 7 bits doubled, plus possible shift from palette bit
+    MASKED_DOTS = np.uint64(18)  # 3 + 7 + 7
+
+    # List of bitmasks for extracting the subset of packed data corresponding
+    # to bits influencing/influenced by a given byte offset.  These must be
+    # a contiguous bit mask, i.e. so that after shifting they are enumerated
+    # by 0..2**MASKED_BITS-1
     BYTE_MASKS = [
         np.uint64(0b0000000011111111111111),
         np.uint64(0b1111111111111100000000)
     ]
     BYTE_SHIFTS = [np.uint64(0), np.uint64(8)]
-    MASKED_BITS = np.uint64(14)  # 3 + 8 + 3
-
-    HEADER_BITS = np.uint64(3)
-    # 7-bits doubled, plus possible shift from palette bit
-    BODY_BITS = np.uint64(15)
-    FOOTER_BITS = np.uint64(3)
 
+    # NTSC clock phase at first masked bit
+    #
+    # Each HGR byte offset has the same range of uint14 possible
+    # values and nominal colour pixels, but with different initial
+    # phases:
+    #   even: 0 (1 at start of 3-bit header)
+    #   odd:  2 (3)
     PHASES = [1, 3]
 
     def __init__(self, palette: pal.Palette, main_memory: MemoryMap):
@@ -448,10 +598,11 @@ class HGRBitmap(Bitmap):
 
     @staticmethod
     def _make_header(col: IntOrArray) -> IntOrArray:
-        # Header format is bits 5,6,0 of previous byte
-        # i.e. offsets 16, 17, 11
+        """Extract values to use as header of next column.
 
-        # return (col & np.uint64(0b111 << 16)) >> np.uint64(16)
+        Header format is bits 5,6,0 of previous screen byte
+        i.e. offsets 17, 18, 11 in packed representation
+        """
 
         return (
                 (col & np.uint64(0b1 << 11)) >> np.uint64(9) ^ (
@@ -459,11 +610,13 @@ class HGRBitmap(Bitmap):
         )
 
     def _body(self) -> np.ndarray:
-        # Body is in order
-        # a0 a1 a2 a3 a4 a5 a6 a7 b7 b0 b1 b2 b3 b4 b5 b6
-        # so that a) the header and footer have the same order
-        # across the two byte offsets, and b) so that they
-        # can be extracted as contiguous bit ranges
+        """Pack related screen bytes into an efficient representation.
+
+        Body is of the form:
+            bbbbbbbBAaaaaaaa
+
+        where capital indicates the palette bit.
+        """
 
         even = self.main_memory.page_offset[:, 0::2].astype(np.uint64)
         odd = self.main_memory.page_offset[:, 1::2].astype(np.uint64)
@@ -474,133 +627,46 @@ class HGRBitmap(Bitmap):
 
     @staticmethod
     def _make_footer(col: IntOrArray) -> IntOrArray:
-        # Footer format is bits 7,0,1 of next byte
-        # i.e. offsets 10,3,4
+        """Extract values to use as footer of previous column.
+
+        Footer format is bits 7,0,1 of next screen byte
+        i.e. offsets 10,3,4 in packed representation
+        """
 
         return (
                        (col & np.uint64(0b1 << 10)) >> np.uint64(10) ^ (
                        (col & np.uint64(0b11 << 3)) >> np.uint64(2))
                ) << np.uint64(19)
 
-    # # XXX move to make_data_tables
-    # def _pack(self) -> None:
-    #     """Pack main memory into (28+3)-bit uint64 array"""
-    #
-    #     # 00000000001111111111222222222233
-    #     # 01234567890123456789012345678901
-    #     # AAAABBBBCCCCDDd
-    #     #  AAAABBBBCCCCDd
-    #     #               DDEEEEFFFFGGGGg
-    #     #               dDDEEEEFFFFGGGg
-    #
-    #     # Even, P0: store unshifted (0..14)
-    #     # Even, P1: store shifted << 1 (1..15) (only need 1..14)
-    #
-    #     # Odd, P0: store shifted << 14 (14 .. 28) - set bit 14 as bit 0 of next
-    #     #  byte
-    #     # Odd, p1: store shifted << 15 (15 .. 29) (only need 15 .. 28) - set
-    #     #  bit 13 as bit 0 of next byte
-    #
-    #     # Odd overflow only matters for even, P1
-    #     # - bit 0 is either bit 14 if odd, P0 or bit 13 if odd, P1
-    #     # - but these both come from the undoubled bit 6.
-    #
-    #     main = self.main_memory.page_offset.astype(np.uint64)
-    #
-    #     # Double 7-bit pixel data from a into 14-bit fat pixels, and extend MSB
-    #     # into 15-bits tohandle case when subsequent byte has palette bit set,
-    #     # i.e. is right-shifted by 1 dot.  This only matters for even bytes
-    #     # with P=0 that are followed by odd bytes with P=1; in other cases
-    #     # this extra bit will be overwritten.
-    #     double = (
-    #                  # Bit pos 6
-    #                      ((main & 0x40) << 8) + ((main & 0x40) << 7) + (
-    #                      (main & 0x40) << 6)) + (
-    #                  # Bit pos 5
-    #                      ((main & 0x20) << 6) + ((main & 0x20) << 5)) + (
-    #                  # Bit pos 4
-    #                      ((main & 0x10) << 5) + ((main & 0x10) << 4)) + (
-    #                  # Bit pos 3
-    #                      ((main & 0x08) << 4) + ((main & 0x08) << 3)) + (
-    #                  # Bit pos 2
-    #                      ((main & 0x04) << 3) + ((main & 0x04) << 2)) + (
-    #                  # Bit pos 1
-    #                      ((main & 0x02) << 2) + ((main & 0x02) << 1)) + (
-    #                  # Bit pos 0
-    #                      ((main & 0x01) << 1) + (main & 0x01))
-    #
-    #     a_even = main[:, ::2]
-    #     a_odd = main[:, 1::2]
-    #
-    #     double_even = double[:, ::2]
-    #     double_odd = double[:, 1::2]
-    #
-    #     # Place even offsets at bits 1..15 (P=1) or 0..14 (P=0)
-    #     packed = np.where(a_even & 0x80, double_even << 1, double_even)
-    #
-    #     # Place off offsets at bits 15..27 (P=1) or 14..27 (P=0)
-    #     packed = np.where(
-    #         a_odd & 0x80,
-    #         np.bitwise_xor(
-    #             np.bitwise_and(packed, (2 ** 15 - 1)),
-    #             double_odd << 15
-    #         ),
-    #         np.bitwise_xor(
-    #             np.bitwise_and(packed, (2 ** 14 - 1)),
-    #             double_odd << 14
-    #         )
-    #     )
-    #
-    #     # Patch up even offsets with P=1 with extended bit from previous odd
-    #     # column
-    #
-    #     previous_odd = np.roll(a_odd, 1, axis=1).astype(np.uint64)
-    #
-    #     packed = np.where(
-    #         a_even & 0x80,
-    #         # Truncate to 28-bits and set bit 0 from bit 6 of previous byte
-    #         np.bitwise_xor(
-    #             np.bitwise_and(packed, (2 ** 28 - 2)),
-    #             (previous_odd & (1 << 6)) >> 6
-    #         ),
-    #         # Truncate to 28-bits
-    #         np.bitwise_and(packed, (2 ** 28 - 1))
-    #     )
-    #
-    #     # Append first 3 bits of next even byte so we can correctly
-    #     # decode the effective colours at the end of the 28-bit tuple
-    #     trailing = np.roll(packed, -1, axis=1).astype(np.uint64)
-    #
-    #     packed = np.bitwise_xor(
-    #         packed,
-    #         (trailing & 0b111) << 28
-    #     )
-    #
-    #     self.packed = packed
-
     @staticmethod
     @functools.lru_cache(None)
-    def byte_offset(x_byte: int, is_aux: bool) -> int:
-        """Returns 0..1 offset in packed representation for a given x_byte."""
-        assert not is_aux
+    def byte_offset(page_offset: int, is_aux: bool) -> int:
+        """Returns 0..1 offset in packed representation for page_offset."""
 
-        is_odd = x_byte % 2 == 1
+        assert not is_aux
+        is_odd = page_offset % 2 == 1
 
         return 1 if is_odd else 0
 
     @staticmethod
     @functools.lru_cache(None)
     def _byte_offsets(is_aux: bool) -> Tuple[int, int]:
+        """Return byte offsets within packed data for AUX/MAIN memory."""
+
         assert not is_aux
         return 0, 1
 
     @staticmethod
     @functools.lru_cache(None)
     def _double_pixels(int7: int) -> int:
+        """Each bit 0..6 controls two hires dots.
 
-        # Input bit 6 is repeated 3 times in case the neighbouring byte is
-        # delayed (right-shifted by one dot) due to the palette bit being set.
-        # Care needs to be taken to mask this out when overwriting.
+        Input bit 6 is repeated 3 times in case the neighbouring byte is
+        delayed (right-shifted by one dot) due to the palette bit being set,
+        which means the effect of this byte is "extended" by an extra dot.
+
+        Care needs to be taken to mask this out when overwriting.
+        """
         double = (
             # Bit pos 6
                 ((int7 & 0x40) << 8) + ((int7 & 0x40) << 7) + (
@@ -608,32 +674,38 @@ class HGRBitmap(Bitmap):
                 # Bit pos 5
                 ((int7 & 0x20) << 6) + ((int7 & 0x20) << 5) +
                 # Bit pos 4
-                ((int7 & 0x10) << 5) + ((int7 & 0x10) << 4) + (
-                    # Bit pos 3
-                        ((int7 & 0x08) << 4) + ((int7 & 0x08) << 3) +
-                        # Bit pos 2
-                        ((int7 & 0x04) << 3) + ((int7 & 0x04) << 2) +
-                        # Bit pos 1
-                        ((int7 & 0x02) << 2) + ((int7 & 0x02) << 1) +
-                        # Bit pos 0
-                        ((int7 & 0x01) << 1) + (int7 & 0x01))
+                ((int7 & 0x10) << 5) + ((int7 & 0x10) << 4) +
+                # Bit pos 3
+                ((int7 & 0x08) << 4) + ((int7 & 0x08) << 3) +
+                # Bit pos 2
+                ((int7 & 0x04) << 3) + ((int7 & 0x04) << 2) +
+                # Bit pos 1
+                ((int7 & 0x02) << 2) + ((int7 & 0x02) << 1) +
+                # Bit pos 0
+                ((int7 & 0x01) << 1) + (int7 & 0x01)
         )
 
         return double
 
     @classmethod
     def to_dots(cls, masked_val: int, byte_offset: int) -> int:
+        """Convert masked representation to bit sequence of display dots.
+
+        Packed representation is of the form:
+            ffFbbbbbbbBAaaaaaaaHhh
+
+        where capital indicates the palette bit.
+
+        Each non-palette bit turns on two display dots, with bit 6 repeated
+        a third time.  This may or may not be overwritten by the next byte.
+
+        Turning on the palette bit shifts that byte's dots right by one
+        position.
+        """
 
         # Assert 14-bit representation
         assert (masked_val & (2 ** 14 - 1)) == masked_val
 
-        # Unpack hhHaaaaaaaABbbbbbbbFff
-
-        # --> hhhaaaaaaaaaaaaaabbbb (P=0, P=0, P=0)
-        #     hhhaaaaaaaaaaaaaabbbb (P=1, P=0, P=0)
-        #     hhhhaaaaaaaaaaaaabbbb (P=1, P=1, P=0)
-        #     hhhhaaaaaaaaaaaaaabbb (P=1, P=1, P=1)
-
         # Take top 3 bits from header (plus duplicated MSB) not 4, because if it
         # is palette-shifted then we don't know what is in bit 0
         h = (masked_val & 0b111) << 5
@@ -641,11 +713,11 @@ class HGRBitmap(Bitmap):
         res = cls._double_pixels(h & 0x7f) >> (11 - hp)
 
         if byte_offset == 0:
-            # Offset 0: hhHaaaaaaaABbb
+            # Offset 0: bbBAaaaaaaaHhh
             b = (masked_val >> 3) & 0xff
             bp = (b & 0x80) >> 7
         else:
-            # Offset 1: aaABbbbbbbbFff
+            # Offset 1: ffFbbbbbbbBAaa
             bp = (masked_val >> 3) & 0x01
             b = ((masked_val >> 4) & 0x7f) ^ (bp << 7)
 
@@ -664,7 +736,6 @@ class HGRBitmap(Bitmap):
         res ^= cls._double_pixels(f & 0x7f) << (17 + fp)
         return res & (2 ** 21 - 1)
 
-    # XXX test
     @staticmethod
     def masked_update(
             byte_offset: int,
@@ -694,37 +765,99 @@ class HGRBitmap(Bitmap):
 
 
 class DHGRBitmap(Bitmap):
-    # NOTE: See https://github.com/numpy/numpy/issues/2524 and related issues
-    # for why we have to cast things explicitly to np.uint64 - type promotion
-    # to uint64 is broken in numpy :(
+    """Packed bitmap representation of DHGR screen memory.
+
+    The DHGR display encodes 7 pixels across interleaved 4-byte sequences
+    of AUX and MAIN memory, as follows:
+
+        PBBBAAAA PDDCCCCB PFEEEEDD PGGGGFFF
+        Aux N    Main N   Aux N+1  Main N+1  (N even)
+
+    Where A..G are the pixels, and P represents the (unused) palette bit.
+
+    This layout makes more sense when written as a (little-endian) 32-bit
+    integer:
+
+        33222222222211111111110000000000 <- bit pos in uint32
+        10987654321098765432109876543210
+        PGGGGFFFPFEEEEDDPDDCCCCBPBBBAAAA
+
+    i.e. apart from the palette bits this is a linear ordering of pixels,
+    when read from LSB to MSB (i.e. right-to-left).  i.e. the screen layout
+    order of bits is opposite to the usual binary representation ordering.
+
+    We can simplify things by stripping out the palette bit and packing
+    down to a 28-bit integer representation:
+
+        33222222222211111111110000000000 <- bit pos in uint32
+        10987654321098765432109876543210
+
+            GGGGFFFFEEEEDDDDCCCCBBBBAAAA <- pixel A..G
+            3210321032103210321032103210 <- bit pos in A..G pixel
+
+            3333333222222211111110000000 <- byte offset 0.3
+
+    Since our colour artifact model (see colours.py) uses a sliding 4-bit window
+    onto the dot string, we need to also include a 3-bit header and footer
+    to account for the influence from/on neighbouring bytes, i.e. adjacent
+    packed values.  These are just the low/high 3 bits of the 28-bit body of
+    those neighbouring columns.
+
+    This gives a 34-bit packed representation that is sufficient to compute
+    the effects (on pixel colours) of storing a byte at one of the 0..3 offsets.
+
+    Note that this representation is also 1:1 with the actual displayed
+    DHGR dots.  The mapping to pixel colours follows the DHGRColours
+    bitmap, see colours.py.
+
+    Because the packed representation is contiguous, we need to mask out bit
+    subsequences of size 3+7+3=13, i.e. the 7-bits corresponding to the
+    byte being stored, plus the neighbouring 3 bits that influence it/are
+    influenced by it.
+
+    With this masked representation, we can precompute an edit distance for the
+    pixel changes resulting from all possible DHGR byte stores, see
+    make_edit_distance.py.
+
+    The edit distance matrix is encoded by concatenating the 13-bit source
+    and target masked values into a 26-bit pair, which indexes into the
+    edit_distance array to give the corresponding edit distance.
+    """
 
     NAME = 'DHGR'
 
-    # 3-bit header + 28-bit body + 3-bit footer
-    BYTE_MASKS = [
-        #    3333333222222211111110000000    <- byte 0.3
-        #
-        #           3333222222222211111111110000000000 <- bit pos in uint64
-        #           3210987654321098765432109876543210
-        #           tttGGGGFFFFEEEEDDDDCCCCBBBBAAAAhhh <- pixel A..G
-        #              3210321032103210321032103210    <- bit pos in A..G pixel
-        np.uint64(0b0000000000000000000001111111111111),  # byte 0 int13 mask
-        np.uint64(0b0000000000000011111111111110000000),  # byte 1 int13 mask
-        np.uint64(0b0000000111111111111100000000000000),  # byte 2 int13 mask
-        np.uint64(0b1111111111111000000000000000000000),  # byte 3 int13 mask
-    ]
-
-    # How much to right-shift bits after masking to bring into int13 range
-    BYTE_SHIFTS = [np.uint64(0), np.uint64(7), np.uint64(14), np.uint64(21)]
-
+    # Packed representation is 3 + 28 + 3 = 34 bits
     HEADER_BITS = np.uint64(3)
     BODY_BITS = np.uint64(28)
     FOOTER_BITS = np.uint64(3)
 
-    MASKED_BITS = np.uint64(13)
+    # Masked representation selecting the influence of each byte offset
+    MASKED_BITS = np.uint64(13)  # 7-bit body + 3-bit header + 3-bit footer
+
+    # Masking is 1:1 with screen dots, but we can't compute the colour of the
+    # last 3 dots because we fall off the end of the 4-bit sliding window
+    MASKED_DOTS = np.uint64(10)
+
+    # 3-bit header + 28-bit body + 3-bit footer
+    BYTE_MASKS = [
+        #           3333222222222211111111110000000000 <- bit pos in uint64
+        #           3210987654321098765432109876543210
+        #           tttGGGGFFFFEEEEDDDDCCCCBBBBAAAAhhh <- pixel A..G
+        #              3210321032103210321032103210    <- bit pos in A..G pixel
+        #
+        #              3333333222222211111110000000    <- byte offset 0.3
+        np.uint64(0b0000000000000000000001111111111111),  # byte 0 uint13 mask
+        np.uint64(0b0000000000000011111111111110000000),  # byte 1 uint13 mask
+        np.uint64(0b0000000111111111111100000000000000),  # byte 2 uint13 mask
+        np.uint64(0b1111111111111000000000000000000000),  # byte 3 uint13 mask
+    ]
+
+    # How much to right-shift bits after masking, to bring into uint13 range
+    BYTE_SHIFTS = [np.uint64(0), np.uint64(7), np.uint64(14), np.uint64(21)]
 
     # NTSC clock phase at first masked bit
-    # Each DHGR byte offset has the same range of int13 possible
+    #
+    # Each DHGR byte offset has the same range of uint13 possible
     # values and nominal colour pixels, but with different initial
     # phases:
     # AUX 0: 0 (1 at start of 3-bit header)
@@ -733,19 +866,27 @@ class DHGRBitmap(Bitmap):
     # MAIN 1: 1 (2)
     PHASES = [1, 0, 3, 2]
 
+    @staticmethod
+    def _make_header(col: IntOrArray) -> IntOrArray:
+        """Extract upper 3 bits of body for header of next column."""
+        return (col & np.uint64(0b111 << 28)) >> np.uint64(28)
+
     def _body(self) -> np.ndarray:
+        """Pack related screen bytes into an efficient representation.
+
+        For DHGR we first strip off the (unused) palette bit to produce
+        7-bit values, then interleave aux and main memory columns and pack
+        these 7-bit values into 28-bits.  This sequentially encodes 7 4-bit
+        DHGR pixels, which is the "repeating unit" of the DHGR screen, and
+        in a form that is convenient to operate on.
+
+        We also shift to make room for the 3-bit header.
+        """
+
         # Palette bit is unused for DHGR so mask it out
         aux = (self.aux_memory.page_offset & 0x7f).astype(np.uint64)
         main = (self.main_memory.page_offset & 0x7f).astype(np.uint64)
 
-        # XXX update
-        # Interleave aux and main memory columns and pack 7-bit masked values
-        # into a 28-bit value, with 3-bit header and footer.  This
-        # sequentially encodes 7 4-bit DHGR pixels, together with the
-        # neighbouring 3 bits that are necessary to decode artifact colours.
-        #
-        # See make_data_tables.py for more discussion about this representation.
-
         return (
                 (aux[:, 0::2] << 3) +
                 (main[:, 0::2] << 10) +
@@ -753,11 +894,6 @@ class DHGRBitmap(Bitmap):
                 (main[:, 1::2] << 24)
         )
 
-    @staticmethod
-    def _make_header(col: IntOrArray) -> IntOrArray:
-        """Extract upper 3 bits of body for header of next column."""
-        return (col & np.uint64(0b111 << 28)) >> np.uint64(28)
-
     @staticmethod
     def _make_footer(col: IntOrArray) -> IntOrArray:
         """Extract lower 3 bits of body for footer of previous column."""
@@ -765,9 +901,10 @@ class DHGRBitmap(Bitmap):
 
     @staticmethod
     @functools.lru_cache(None)
-    def byte_offset(x_byte: int, is_aux: bool) -> int:
-        """Returns 0..3 packed byte offset for a given x_byte and is_aux"""
-        is_odd = x_byte % 2 == 1
+    def byte_offset(page_offset: int, is_aux: bool) -> int:
+        """Returns 0..3 packed byte offset for a given page_offset and is_aux"""
+
+        is_odd = page_offset % 2 == 1
         if is_aux:
             if is_odd:
                 return 2
@@ -781,6 +918,8 @@ class DHGRBitmap(Bitmap):
     @staticmethod
     @functools.lru_cache(None)
     def _byte_offsets(is_aux: bool) -> Tuple[int, int]:
+        """Return byte offsets within packed data for AUX/MAIN memory."""
+
         if is_aux:
             offsets = (0, 2)
         else:
@@ -790,8 +929,11 @@ class DHGRBitmap(Bitmap):
 
     @classmethod
     def to_dots(cls, masked_val: int, byte_offset: int) -> int:
-        # For DHGR the 13-bit masked value is already a 13-bit dot sequence
-        # so no need to transform it.
+        """Convert masked representation to bit sequence of display dots.
+
+        For DHGR the 13-bit masked value is already a 13-bit dot sequence
+        so no need to transform it.
+        """
 
         return masked_val
 
@@ -804,7 +946,6 @@ class DHGRBitmap(Bitmap):
 
         Does not patch up headers/footers of neighbouring columns.
         """
-
         # Mask out 7-bit value where update will go
         masked_value = old_value & (
             ~np.uint64(0x7f << (7 * byte_offset + 3)))
diff --git a/transcoder/screen_test.py b/transcoder/screen_test.py
index eaf4cff..2bcbb5b 100644
--- a/transcoder/screen_test.py
+++ b/transcoder/screen_test.py
@@ -9,12 +9,18 @@ import colours
 from palette import Palette
 
 
+def binary(a):
+    return np.vectorize("{:032b}".format)(a)
+
+
 class TestDHGRBitmap(unittest.TestCase):
     def setUp(self) -> None:
         self.aux = screen.MemoryMap(screen_page=1)
         self.main = screen.MemoryMap(screen_page=1)
 
     def test_make_header(self):
+        """Header extracted correctly from packed representation."""
+
         self.assertEqual(
             0b100,
             screen.DHGRBitmap._make_header(
@@ -22,6 +28,8 @@ class TestDHGRBitmap(unittest.TestCase):
         )
 
     def test_make_footer(self):
+        """Footer extracted correctly from packed representation."""
+
         self.assertEqual(
             0b1010000000000000000000000000000000,
             screen.DHGRBitmap._make_footer(
@@ -29,6 +37,8 @@ class TestDHGRBitmap(unittest.TestCase):
         )
 
     def test_pixel_packing_offset_0(self):
+        """Screen byte packing happens correctly at offset 0."""
+
         #                              PBBBAAAA
         self.aux.page_offset[0, 0] = 0b11110101
         #                               PDDCCCCB
@@ -57,6 +67,8 @@ class TestDHGRBitmap(unittest.TestCase):
         self.assertEqual(2, np.count_nonzero(dhgr.packed))
 
     def test_pixel_packing_offset_1(self):
+        """Screen byte packing happens correctly at offset 1."""
+
         #                              PBBBAAAA
         self.aux.page_offset[0, 2] = 0b11110101
         #                               PDDCCCCB
@@ -90,6 +102,8 @@ class TestDHGRBitmap(unittest.TestCase):
         self.assertEqual(3, np.count_nonzero(dhgr.packed))
 
     def test_pixel_packing_offset_127(self):
+        """Screen byte packing happens correctly at offset 127."""
+
         #                              PBBBAAAA
         self.aux.page_offset[0, 254] = 0b11110101
         #                               PDDCCCCB
@@ -118,16 +132,22 @@ class TestDHGRBitmap(unittest.TestCase):
         self.assertEqual(2, np.count_nonzero(dhgr.packed))
 
     def test_byte_offset(self):
+        """Test the byte_offset behaviour."""
+
         self.assertEqual(0, screen.DHGRBitmap.byte_offset(0, is_aux=True))
         self.assertEqual(1, screen.DHGRBitmap.byte_offset(0, is_aux=False))
         self.assertEqual(2, screen.DHGRBitmap.byte_offset(1, is_aux=True))
         self.assertEqual(3, screen.DHGRBitmap.byte_offset(1, is_aux=False))
 
     def test_byte_offsets(self):
+        """Test the _byte_offsets behaviour."""
+
         self.assertEqual((0, 2), screen.DHGRBitmap._byte_offsets(is_aux=True))
         self.assertEqual((1, 3), screen.DHGRBitmap._byte_offsets(is_aux=False))
 
     def test_mask_and_shift_data(self):
+        """Verify that mask_and_shift_data extracts the right bit positions."""
+
         int13_max = np.uint64(2 ** 13 - 1)
         int34_max = np.uint64(2 ** 34 - 1)
 
@@ -152,6 +172,8 @@ class TestDHGRBitmap(unittest.TestCase):
             )
 
     def test_masked_update(self):
+        """Verify that masked_update updates the expected bit positions."""
+
         self.assertEqual(
             0b0000000000000000000000001111111000,
             screen.DHGRBitmap.masked_update(
@@ -204,6 +226,8 @@ class TestDHGRBitmap(unittest.TestCase):
         ))
 
     def test_apply(self):
+        """Test that apply() correctly updates neighbours."""
+
         dhgr = screen.DHGRBitmap(
             main_memory=self.main, aux_memory=self.aux, palette=Palette.NTSC)
 
@@ -292,6 +316,8 @@ class TestDHGRBitmap(unittest.TestCase):
             dhgr.packed[12, 17])
 
     def test_fix_array_neighbours(self):
+        """Test that _fix_array_neighbours DTRT after masked_update."""
+
         dhgr = screen.DHGRBitmap(
             main_memory=self.main, aux_memory=self.aux, palette=Palette.NTSC)
 
@@ -326,15 +352,13 @@ class TestDHGRBitmap(unittest.TestCase):
         )
 
 
-def binary(a):
-    return np.vectorize("{:032b}".format)(a)
-
-
 class TestHGRBitmap(unittest.TestCase):
     def setUp(self) -> None:
         self.main = screen.MemoryMap(screen_page=1)
 
     def test_make_header(self):
+        """Header extracted correctly from packed representation."""
+
         self.assertEqual(
             0b111,
             screen.HGRBitmap._make_header(
@@ -349,6 +373,8 @@ class TestHGRBitmap(unittest.TestCase):
         )
 
     def test_make_footer(self):
+        """Footer extracted correctly from packed representation."""
+
         self.assertEqual(
             0b1110000000000000000000,
             screen.HGRBitmap._make_footer(
@@ -363,6 +389,8 @@ class TestHGRBitmap(unittest.TestCase):
         )
 
     def test_pixel_packing_p0_p0(self):
+        """Screen byte packing happens correctly with P=0, P=0 palette bits."""
+
         #                               PDCCBBAA
         self.main.page_offset[0, 0] = 0b01000011
         #                               PGGFFEED
@@ -378,6 +406,8 @@ class TestHGRBitmap(unittest.TestCase):
         )
 
     def test_pixel_packing_p0_p1(self):
+        """Screen byte packing happens correctly with P=0, P=1 palette bits."""
+
         #                               PDCCBBAA
         self.main.page_offset[0, 0] = 0b01000011
         #                               PGGFFEED
@@ -393,6 +423,8 @@ class TestHGRBitmap(unittest.TestCase):
         )
 
     def test_pixel_packing_p1_p0(self):
+        """Screen byte packing happens correctly with P=1, P=0 palette bits."""
+
         #                               PDCCBBAA
         self.main.page_offset[0, 0] = 0b11000011
         #                               PGGFFEED
@@ -408,6 +440,8 @@ class TestHGRBitmap(unittest.TestCase):
         )
 
     def test_pixel_packing_p1_p1(self):
+        """Screen byte packing happens correctly with P=1, P=1 palette bits."""
+
         #                               PDCCBBAA
         self.main.page_offset[0, 0] = 0b11000011
         #                               PGGFFEED
@@ -422,8 +456,8 @@ class TestHGRBitmap(unittest.TestCase):
             want, got, "\n%s\n%s" % (binary(want), binary(got))
         )
 
-    def test_masked_update(self):
-
+    def test_apply(self):
+        """Test that header, body and footer are placed correctly."""
         hgr = screen.HGRBitmap(main_memory=self.main, palette=Palette.NTSC)
 
         hgr.apply(0, 0, False, 0b11000011)
@@ -436,7 +470,25 @@ class TestHGRBitmap(unittest.TestCase):
             want, got, "\n%s\n%s" % (binary(want), binary(got))
         )
 
+        # Now check with 4 consecutive bytes, i.e. even/odd pair plus the
+        # neighbouring header/footer.
+        hgr = screen.HGRBitmap(main_memory=self.main, palette=Palette.NTSC)
+
+        hgr.apply(1, 197, False, 128)
+        hgr.apply(1, 198, False, 143)
+        hgr.apply(1, 199, False, 192)
+        hgr.apply(1, 200, False, 128)
+
+        want = 0b0011000000110001111100
+        got = hgr.packed[1, 199 // 2]
+
+        self.assertEqual(
+            want, got, "\n%s\n%s" % (binary(want), binary(got))
+        )
+
     def test_double_pixels(self):
+        """Verify behaviour of _double_pixels."""
+
         want = 0b111001100110011
         got = screen.HGRBitmap._double_pixels(0b1010101)
 
@@ -445,6 +497,8 @@ class TestHGRBitmap(unittest.TestCase):
         )
 
     def test_to_dots_offset_0(self):
+        """Verify to_dots behaviour with byte_offset=0"""
+
         # Header has P=0, Body has P=0
         want = 0b00000000000000000111
         got = screen.HGRBitmap.to_dots(0b00000000000011, 0)
@@ -510,6 +564,8 @@ class TestHGRBitmap(unittest.TestCase):
         )
 
     def test_to_dots_offset_1(self):
+        """Verify to_dots behaviour with byte_offset=1"""
+
         # Header has P=0, Body has P=0
         want = 0b000000000000000000111
         got = screen.HGRBitmap.to_dots(0b00000000000011, 1)
@@ -576,6 +632,8 @@ class TestHGRBitmap(unittest.TestCase):
 
 
 class TestNominalColours(unittest.TestCase):
+    """Tests that screen pixel values produce expected colour sequences."""
+
     def setUp(self) -> None:
         self.main = screen.MemoryMap(screen_page=1)
 
@@ -658,10 +716,12 @@ class TestNominalColours(unittest.TestCase):
                 init_phase=screen.HGRBitmap.PHASES[1])
         )
 
-    # See Figure 8.15 from Sather, "Understanding the Apple IIe"
+    # The following tests check for the extended/truncated behaviour across
+    # byte boundaries when mismatching palette bits.   See Figure 8.15 from
+    # Sather, "Understanding the Apple IIe"
 
     def test_nominal_colours_sather_even_1(self):
-        # Extend violet into light blue
+        """Extend violet into light blue."""
 
         #                               PDCCBBAA
         self.main.page_offset[0, 0] = 0b01000000
@@ -702,7 +762,7 @@ class TestNominalColours(unittest.TestCase):
         )
 
     def test_nominal_colours_sather_even_2(self):
-        # Cut off blue with black to produce dark blue
+        """Cut off blue with black to produce dark blue."""
 
         #                               PDCCBBAA
         self.main.page_offset[0, 0] = 0b11000000
@@ -742,7 +802,7 @@ class TestNominalColours(unittest.TestCase):
         )
 
     def test_nominal_colours_sather_even_3(self):
-        # Cut off blue with green to produce aqua
+        """Cut off blue with green to produce aqua."""
 
         #                               PDCCBBAA
         self.main.page_offset[0, 0] = 0b11000000
@@ -782,7 +842,7 @@ class TestNominalColours(unittest.TestCase):
         )
 
     def test_nominal_colours_sather_even_4(self):
-        # Cut off white with black to produce pink
+        """Cut off white with black to produce pink."""
 
         #                               PDCCBBAA
         self.main.page_offset[0, 0] = 0b11100000
@@ -822,10 +882,10 @@ class TestNominalColours(unittest.TestCase):
         )
 
     def test_nominal_colours_sather_even_5(self):
-        # Cut off orange-black with green to produce bright green
+        """Cut off orange-black with green to produce bright green.
 
-        # "Bright" here is because the sequence of pixels has high intensity
-        # Orange-Orange-Yellow-Yellow-Green-Green
+        "Bright" here is because the sequence of pixels has high intensity
+        Orange-Orange-Yellow-Yellow-Green-Green."""
 
         #                               PDCCBBAA
         self.main.page_offset[0, 0] = 0b10100000
@@ -865,7 +925,7 @@ class TestNominalColours(unittest.TestCase):
         )
 
     def test_nominal_colours_sather_odd_1(self):
-        # Extend green into light brown
+        """Extend green into light brown."""
 
         #                               PDCCBBAA
         self.main.page_offset[0, 1] = 0b01000000
@@ -905,7 +965,7 @@ class TestNominalColours(unittest.TestCase):
         )
 
     def test_nominal_colours_sather_odd_2(self):
-        # Cut off orange with black to produce dark brown
+        """Cut off orange with black to produce dark brown."""
 
         #                               PDCCBBAA
         self.main.page_offset[0, 1] = 0b11000000
@@ -945,7 +1005,7 @@ class TestNominalColours(unittest.TestCase):
         )
 
     def test_nominal_colours_sather_odd_3(self):
-        # Cut off orange with violet to produce pink
+        """Cut off orange with violet to produce pink."""
 
         #                               PDCCBBAA
         self.main.page_offset[0, 1] = 0b11000000
@@ -985,7 +1045,7 @@ class TestNominalColours(unittest.TestCase):
         )
 
     def test_nominal_colours_sather_odd_4(self):
-        # Cut off white with black to produce aqua
+        """Cut off white with black to produce aqua."""
 
         #                               PDCCBBAA
         self.main.page_offset[0, 1] = 0b11100000
@@ -1025,10 +1085,11 @@ class TestNominalColours(unittest.TestCase):
         )
 
     def test_nominal_colours_sather_odd_5(self):
-        # Cut off blue-black with violet to produce bright violet
+        """Cut off blue-black with violet to produce bright violet.
 
-        # "Bright" here is because the sequence of pixels has high intensity
-        # Blue-Blue-Light Blue-Light Blue-Violet-Violet
+        "Bright" here is because the sequence of pixels has high intensity
+        Blue-Blue-Light Blue-Light Blue-Violet-Violet.
+        """
 
         #                               PDCCBBAA
         self.main.page_offset[0, 1] = 0b10100000
diff --git a/transcoder/video.py b/transcoder/video.py
index 117bb7c..b8ffd12 100644
--- a/transcoder/video.py
+++ b/transcoder/video.py
@@ -14,7 +14,7 @@ from video_mode import VideoMode
 
 
 class Video:
-    """Apple II screen memory map encoding a bitmapped frame."""
+    """Encodes sequence of images into prioritized screen byte changes."""
 
     CLOCK_SPEED = 1024 * 1024  # type: int
 
@@ -58,6 +58,8 @@ class Video:
             self.aux_update_priority = np.zeros((32, 256), dtype=np.int)
 
     def tick(self, ticks: int) -> bool:
+        """Keep track of when it is time for a new image frame."""
+
         if ticks >= (self.ticks_per_frame * self.frame_number):
             self.frame_number += 1
             return True
@@ -68,7 +70,8 @@ class Video:
             target: screen.MemoryMap,
             is_aux: bool,
     ) -> Iterator[opcodes.Opcode]:
-        """Update to match content of frame within provided budget."""
+        """Converge towards target frame in priority order of edit distance."""
+
         if is_aux:
             memory_map = self.aux_memory_map
             update_priority = self.aux_update_priority
@@ -114,7 +117,6 @@ class Video:
             )
 
         diff_weights = target_pixelmap.diff_weights(self.pixelmap, is_aux)
-
         # Don't bother storing into screen holes
         diff_weights[screen.SCREEN_HOLES] = 0
 
@@ -123,8 +125,6 @@ class Video:
         update_priority[diff_weights == 0] = 0
         update_priority += diff_weights
 
-        assert np.count_nonzero(update_priority[screen.SCREEN_HOLES]) == 0
-
         priorities = self._heapify_priorities(update_priority)
 
         content_deltas = {}
@@ -172,13 +172,12 @@ class Video:
                     is_aux
             ):
                 assert o != offset
-
                 assert not screen.SCREEN_HOLES[page, o], (
                         "Attempted to store into screen hole at (%d, %d)" % (
                     page, o))
 
                 if update_priority[page, o] == 0:
-                    # print("Skipping page=%d, offset=%d" % (page, o))
+                    # Someone already resolved this diff.
                     continue
 
                 # Make sure we don't end up considering this (page, offset)
@@ -195,7 +194,7 @@ class Video:
                     byte_offset, old_packed, content)
 
                 # Update priority for the offset we're emitting
-                update_priority[page, o] = p  # 0
+                update_priority[page, o] = p
 
                 source.page_offset[page, o] = content
                 self.pixelmap.apply(page, o, is_aux, content)
@@ -205,7 +204,7 @@ class Video:
                     # heap in case we can get back to fixing it exactly
                     # during this frame.  Otherwise we'll get to it later.
                     heapq.heappush(
-                        priorities, (-p, random.getrandbits(16), page, o))
+                        priorities, (-p, random.getrandbits(8), page, o))
 
                 offsets.append(o)
                 if len(offsets) == 3:
@@ -216,19 +215,30 @@ class Video:
                 offsets.append(offsets[0])
             yield (page + 32, content, offsets)
 
-        # TODO: there is still a bug causing residual diffs when we have
-        # apparently run out of work to do
+        # # TODO: there is still a bug causing residual diffs when we have
+        # # apparently run out of work to do
         if not np.array_equal(source.page_offset, target.page_offset):
             diffs = np.nonzero(source.page_offset != target.page_offset)
             for i in range(len(diffs[0])):
                 diff_p = diffs[0][i]
                 diff_o = diffs[1][i]
 
+                # For HGR, 0x00 or 0x7f may be visually equivalent to the same
+                # bytes with high bit set (depending on neighbours), so skip
+                # them
+                if (source.page_offset[diff_p, diff_o] & 0x7f) == 0 and \
+                        (target.page_offset[diff_p, diff_o] & 0x7f) == 0:
+                    continue
+
+                if (source.page_offset[diff_p, diff_o] & 0x7f) == 0x7f and \
+                        (target.page_offset[diff_p, diff_o] & 0x7f) == 0x7f:
+                    continue
+
                 print("Diff at (%d, %d): %d != %d" % (
                     diff_p, diff_o, source.page_offset[diff_p, diff_o],
                     target.page_offset[diff_p, diff_o]
                 ))
-            # assert False
+                # assert False
 
         # If we run out of things to do, pad forever
         content = target.page_offset[0, 0]
@@ -237,6 +247,10 @@ class Video:
 
     @staticmethod
     def _heapify_priorities(update_priority: np.array) -> List:
+        """Build priority queue of (page, offset) ordered by update priority."""
+
+        # Use numpy vectorization to efficiently compute the list of
+        # (priority, random nonce, page, offset) tuples to be heapified.
         pages, offsets = update_priority.nonzero()
         priorities = [tuple(data) for data in np.stack((
             -update_priority[pages, offsets],
@@ -251,13 +265,17 @@ class Video:
 
     _OFFSETS = np.arange(256)
 
-    def _compute_error(self, page, content, target_pixelmap, old_error,
+    def _compute_error(self, page, content, target_pixelmap, diff_weights,
                        content_deltas, is_aux):
+        """Build priority queue of other offsets at which to store content.
+
+        Ordered by offsets which are closest to the target content value.
+        """
         # TODO: move this up into parent
         delta_screen = content_deltas.get(content)
         if delta_screen is None:
             delta_screen = target_pixelmap.compute_delta(
-                content, old_error, is_aux)
+                content, diff_weights, is_aux)
             content_deltas[content] = delta_screen
 
         delta_page = delta_screen[page]
@@ -266,7 +284,7 @@ class Video:
         priorities = delta_page[cond]
 
         deltas = [
-            (priorities[i], random.getrandbits(16), candidate_offsets[i])
+            (priorities[i], random.getrandbits(8), candidate_offsets[i])
             for i in range(len(candidate_offsets))
         ]
         heapq.heapify(deltas)
diff --git a/transcoder/video_mode.py b/transcoder/video_mode.py
index 6e33e36..a243088 100644
--- a/transcoder/video_mode.py
+++ b/transcoder/video_mode.py
@@ -4,5 +4,5 @@ import enum
 
 
 class VideoMode(enum.Enum):
-    HGR = 0
-    DHGR = 1
+    HGR = 0  # Hi-Res
+    DHGR = 1  # Double Hi-Res
diff --git a/transcoder/video_test.py b/transcoder/video_test.py
index 3b98c6c..eefa04f 100644
--- a/transcoder/video_test.py
+++ b/transcoder/video_test.py
@@ -67,11 +67,11 @@ class TestVideo(unittest.TestCase):
 
         diff = target_pixelmap.diff_weights(v.pixelmap, is_aux=True)
 
-        # Expect byte 0 to map to 0b01111111 01101101 XXX
+        # Masked offset 0 changes from 0001111111000 to 0001101101000
         expect0 = target_pixelmap.edit_distances(pal.ID)[0][
             0b00011111110000001101101000]
 
-        # Expect byte 2 to map to 0b000101010100 000011011000
+        # Masked offset 2 changes from 0001010101000 to 0000110110000
         expect2 = target_pixelmap.edit_distances(pal.ID)[2][
             0b00010101010000000110110000]