diff --git a/audio.py b/audio.py
index 4368e98..68db7e8 100644
--- a/audio.py
+++ b/audio.py
@@ -9,23 +9,21 @@ import video
 
 
 class Audio:
-    def encode_audio(self, audio):
-        for a in audio:
-            a = max(-30, min(a * 2, 32)) + 34
-            page = random.randint(32, 56)
-            content = random.randint(0,255)
-            offsets = [random.randint(0, 255) for _ in range(4)]
-            yield opcodes.TICK_OPCODES[(a, page)](content, offsets)
+    def __init__(
+            self, filename: str, normalization: float = 1.0):
+        self.filename = filename
+        self.normalization = normalization
 
+        # TODO: take into account that the available range is slightly offset
+        # as fraction of total cycle count?
+        self._tick_range = [4, 66]
+        self.cycles_per_tick = 73
 
-def main():
-    filename = "Computer Chronicles - 06x05 - The Apple II.mp4"
+        # TODO: round to divisor of video frame rate
+        self.sample_rate = 14340  # int(1024. * 1024 / self.cycles_per_tick)
 
-    s = video.Video(frame_rate=None)
-    au = Audio()
-
-    with audioread.audio_open(filename) as f:
-        with open("out.bin", "wb") as out:
+    def audio_stream(self):
+        with audioread.audio_open(self.filename) as f:
             for buf in f.read_data(128 * 1024):
                 print(f.channels, f.samplerate, f.duration)
 
@@ -33,24 +31,29 @@ def main():
                     'float32').reshape((f.channels, -1), order='F')
 
                 a = librosa.core.to_mono(data)
-                a = librosa.resample(a, f.samplerate, 14000).flatten()
+                a = librosa.resample(a, f.samplerate,
+                                     self.sample_rate).flatten()
 
-                # Normalize to 95%ile
-                # norm = max(
-                #    abs(np.percentile(a, 5, axis=0)),
-                #    abs(np.percentile(a, 95, axis=0))
-                # )
-                # print(min(a),max(a))
-                # print(norm)
+                a /= 16384  # normalize to -1.0 .. 1.0
+                a *= self.normalization
 
-                # XXX how to estimate normalization without reading whole file?
-                norm = 12000
+                # Convert to -16 .. 16
+                a = (a * 16).astype(np.int)
+                a = np.clip(a, -15, 16)
 
-                a /= norm  # librosa.util.normalize(a)
-                a = (a * 32).astype(np.int)
+                yield from a
 
-                out.write(bytes(s.emit_stream(au.encode_audio(a))))
-    out.write(bytes(s.done()))
+
+def main():
+    filename = "Computer Chronicles - 06x05 - The Apple II.mp4"
+
+    s = video.Video(frame_rate=None)
+    au = Audio(filename, normalization=3)
+
+    with open("out.bin", "wb") as out:
+        for b in s.emit_stream(au.encode_audio()):
+            out.write(bytearray([b]))
+        out.write(bytes(s.done()))
 
 
 if __name__ == "__main__":
diff --git a/frame_grabber.py b/frame_grabber.py
index 8cac929..c4a66b3 100644
--- a/frame_grabber.py
+++ b/frame_grabber.py
@@ -1,3 +1,7 @@
+import os
+import threading
+import queue
+import subprocess
 from typing import Iterable
 
 from PIL import Image
@@ -7,22 +11,64 @@ import numpy as np
 import screen
 
 
+def frame_grabber(filename: str) -> Iterable[np.array]:
+    """Yields a sequence of Image frames in original resolution."""
+    for frame_array in skvideo.io.vreader(filename):
+        yield Image.fromarray(frame_array)
+
+
 def hgr140_frame_grabber(filename: str) -> Iterable[screen.MemoryMap]:
     bm_cls = screen.HGR140Bitmap
-    for frame in skvideo.io.vreader(filename):
-        im = Image.fromarray(frame)
-        im = im.resize((bm_cls.XMAX, bm_cls.YMAX))
-        im = im.convert("1")
-        im = np.array(im)
+    for frame in frame_grabber(filename):
+        frame = frame.resize((bm_cls.XMAX, bm_cls.YMAX))
+        frame = frame.convert("1")
+        frame = np.array(frame)
 
-        yield bm_cls(im).to_bytemap().to_memory_map(screen_page=1)
+        yield bm_cls(frame).to_bytemap().to_memory_map(screen_page=1)
 
 
-def bmp_frame_grabber(filename: str) -> Iterable[screen.MemoryMap]:
-    idx = 0
+def bmp2dhr_frame_grabber(filename: str) -> Iterable[screen.MemoryMap]:
+    """Encode frame to HGR using bmp2dhr"""
+
+    frame_dir = filename.split(".")[0]
+    try:
+        os.mkdir(frame_dir)
+    except FileExistsError:
+        pass
+
+    q = queue.Queue(maxsize=10)
+
+    def worker():
+        for idx, frame in enumerate(frame_grabber(filename)):
+            outfile = "%s/%08dC.BIN" % (frame_dir, idx)
+            bmpfile = "%s/%08d.bmp" % (frame_dir, idx)
+
+            try:
+                os.stat(outfile)
+            except FileNotFoundError:
+                frame = frame.resize((280, 192))
+                frame.save(bmpfile)
+
+                subprocess.call(
+                    ["/usr/local/bin/bmp2dhr", bmpfile, "hgr", "D9"])
+
+                os.remove(bmpfile)
+
+            frame = np.fromfile(outfile, dtype=np.uint8)
+            q.put(frame)
+
+        q.put(None)
+
+    t = threading.Thread(target=worker)
+    t.start()
+
     while True:
-        fn = "%s-%08dC.BIN" % (filename, idx)
-        frame = np.fromfile(fn, dtype=np.uint8)
+        frame = q.get()
+
+        if frame is None:
+            break
 
         yield screen.FlatMemoryMap(screen_page=1, data=frame).to_memory_map()
-        idx += 1
+        q.task_done()
+
+    t.join()
\ No newline at end of file
diff --git a/main.py b/main.py
index 33d2e18..f930b63 100644
--- a/main.py
+++ b/main.py
@@ -1,53 +1,27 @@
 import frame_grabber
+import movie
 import opcodes
 import screen
 import video
 
-MAX_OUT = 100 * 1024 * 1024
+MAX_OUT = 10 * 1024 * 1024
 VIDEO_FPS = 30
-APPLE_FPS = 7
+APPLE_FPS = 30
 
 
 def main():
-    #frames = frame_grabber.hgr140_frame_grabber(
-    #    "Computer Chronicles - 06x05 - The Apple II.mp4")
+    filename = "Computer Chronicles - 06x05 - The Apple II.mp4"
 
-    frames = frame_grabber.bmp_frame_grabber("cc/CC")
-
-    bytes_out = 0
-    sims = []
-    out_frames = 0
-
-    s = video.Video(APPLE_FPS)
-
-    # Assert that the opcode stream reconstructs the same screen
-    ds = video.Video()
-    decoder = opcodes.Decoder(s.state)
+    m = movie.Movie(filename, audio_normalization=3.0)
 
     with open("out.bin", "wb") as out:
-        for idx, frame in enumerate(frames):
-            if idx % (VIDEO_FPS // APPLE_FPS):
-                continue
+        for bytes_out, b in enumerate(m.emit_stream(m.encode())):
+            out.write(bytearray([b]))
 
-            stream = bytes(s.emit_stream(s.encode_frame(frame)))
-
-            bytes_out += len(stream)
-            bytes_left = MAX_OUT - bytes_out
-
-            sim = screen.bitmap_similarity(
-                screen.HGRBitmap.from_bytemap(s.memory_map.to_bytemap()).bitmap,
-                screen.HGRBitmap.from_bytemap(frame.to_bytemap()).bitmap)
-            sims.append(sim)
-            out_frames += 1
-            print("Frame %d, %d bytes, similarity = %f" % (
-                idx, len(stream), sim))
-            out.write(stream[:bytes_left])
-
-            if bytes_left <= 0:
-                out.write(bytes(s.done()))
+            if bytes_out >= MAX_OUT:
                 break
 
-    print("Median similarity: %f" % sorted(sims)[out_frames//2])
+        out.write(bytes(m.done()))
 
 
 if __name__ == "__main__":
diff --git a/video.py b/video.py
index 9e72eee..390b9d3 100644
--- a/video.py
+++ b/video.py
@@ -1,4 +1,6 @@
 import functools
+import heapq
+import random
 from typing import Iterator, Tuple, Iterable
 
 import numpy as np
@@ -72,9 +74,13 @@ class Video:
 
     CLOCK_SPEED = 1024 * 1024
 
-    def __init__(self, frame_rate: int = 15, screen_page: int = 1,
-                 opcode_scheduler: scheduler.OpcodeScheduler = None):
+    def __init__(
+            self,
+            frame_rate: int = 30,
+            screen_page: int = 1,
+            opcode_scheduler: scheduler.OpcodeScheduler = None):
         self.screen_page = screen_page
+        self.frame_rate = frame_rate
 
         # Initialize empty
         self.memory_map = screen.MemoryMap(
@@ -83,118 +89,17 @@ class Video:
         self.scheduler = (
                 opcode_scheduler or scheduler.HeuristicPageFirstScheduler())
 
-        self.cycle_counter = opcodes.CycleCounter()
-
         # Accumulates pending edit weights across frames
         self.update_priority = np.zeros((32, 256), dtype=np.int)
 
-        self.state = opcodes.State(
-            self.cycle_counter, self.memory_map, self.update_priority)
-
-        self.frame_rate = frame_rate
-        self.stream_pos = 0
-        if self.frame_rate:
-            self.cycles_per_frame = self.CLOCK_SPEED // self.frame_rate
-        else:
-            self.cycles_per_frame = None
-
-        self._last_op = opcodes.Nop()
 
     def encode_frame(self, target: screen.MemoryMap) -> Iterator[
         opcodes.Opcode]:
         """Update to match content of frame within provided budget.
 
-        Emits encoded byte stream for rendering the image.
-
-        XXX update
-
-        The byte stream consists of offsets against a selected page (e.g. $20xx)
-        at which to write a selected content byte.  Those selections are
-        controlled by special opcodes emitted to the stream
-
-        Opcodes:
-          SET_CONTENT - new byte to write to screen contents
-          SET_PAGE - set new page to offset against (e.g. $20xx)
-          TICK - tick the speaker
-          DONE - terminate the video decoding
-
-        We group by offsets from page boundary (cf some other more
-        optimal starting point) because STA (..),y has 1 extra cycle if
-        crossing the page boundary.  Though maybe this would be worthwhile if
-        it optimizes the bytestream.
         """
 
-        # TODO: changes should be a class
-        changes = self._index_changes(self.memory_map, target)
-
-        yield from self.scheduler.schedule(changes)
-
-    @functools.lru_cache()
-    def _rle_cycles(self, run_length):
-        return opcodes.RLE(0, run_length).cycles
-
-    def _index_page(self, bits_different, target_content):
-        byte_cycles = opcodes.Store(0).cycles
-
-        cur_content = None
-        run_length = 0
-        run = []
-
-        # Number of changes in run for which >0 bits differ
-        num_changes_in_run = 0
-
-        # Total weight of differences accumulated in run
-        total_update_priority_in_run = 0
-
-        def end_run():
-            # Decide if it's worth emitting as a run vs single stores
-            run_cost = self._rle_cycles(run_length)
-            single_cost = byte_cycles * num_changes_in_run
-            # print("Run of %d cheaper than %d singles" % (
-            #     run_length, num_changes_in_run))
-
-            if run_cost < single_cost:
-                start_offset = run[0][1]
-
-                # print("Found run of %d * %2x at %2x" % (
-                #     run_length, cur_content, offset - run_length)
-                #       )
-                # print(run)
-                yield (
-                    total_update_priority_in_run, start_offset, cur_content,
-                    run_length
-                )
-            else:
-                for ch in run:
-                    if ch[0]:
-                        yield ch
-
-        for offset in range(256):
-            bd = bits_different[offset]
-            tc = target_content[offset]
-            if run and cur_content != tc:
-                # End of run
-
-                yield from end_run()
-
-                run = []
-                run_length = 0
-                num_changes_in_run = 0
-                total_update_priority_in_run = 0
-                cur_content = tc
-
-            if cur_content is None:
-                cur_content = tc
-
-            run_length += 1
-            run.append((bd, offset, tc, 1))
-            if bd:
-                num_changes_in_run += 1
-                total_update_priority_in_run += bd
-
-        if run:
-            # End of run
-            yield from end_run()
+        yield from self._index_changes(self.memory_map, target)
 
     def _index_changes(
             self,
@@ -209,9 +114,13 @@ class Video:
         diff_weights = np.zeros((32, 256), dtype=np.uint8)
 
         it = np.nditer(
-            source.page_offset ^ target.page_offset,
-            flags=['multi_index'])
+            source.page_offset ^ target.page_offset, flags=['multi_index'])
         while not it.finished:
+            # If no diff, don't need to bother
+            if not it[0]:
+                it.iternext()
+                continue
+
             diff_weights[it.multi_index] = edit_weight(
                 source.page_offset[it.multi_index],
                 target.page_offset[it.multi_index],
@@ -225,51 +134,82 @@ class Video:
 
         self.update_priority += diff_weights
 
-        for page in range(32):
-            for change in self._index_page(
-                    self.update_priority[page], target.page_offset[page]):
-                (
-                    total_priority_in_run, start_offset, target_content,
-                    run_length
-                ) = change
+        # Iterate in descending order of update priority and emit tuples
+        # encoding (page, content, [offsets])
 
-                # TODO: handle screen page
-                yield (
-                    total_priority_in_run, page + 32, start_offset,
-                    target_content, run_length
+        priorities = []
+        it = np.nditer(self.update_priority, flags=['multi_index'])
+        while not it.finished:
+            priority = it[0]
+            if not priority:
+                it.iternext()
+                continue
+
+            page, offset = it.multi_index
+            # Don't use deterministic order for page, offset
+            nonce = random.randint(0,255)
+            heapq.heappush(priorities, (-priority, nonce, page, offset))
+            it.iternext()
+
+        while True:
+            priority, _, page, offset = heapq.heappop(priorities)
+            priority = -priority
+            if page > (56-32):
+                continue
+            offsets = [offset]
+            content = target.page_offset[page, offset]
+            #print("Priority %d: page %d offset %d content %d" % (
+            #    priority, page, offset, content))
+
+            # Clear priority for the offset we're emitting
+            self.update_priority[page, offset] = 0
+
+            # Need to find 3 more offsets to fill this opcode
+
+            # Minimize the update_priority delta that would result from
+            # emitting this offset
+
+            # Find offsets that would have largest reduction in diff weight
+            # with this content byte, then order by update priority
+            deltas = {}
+            for o, p in enumerate(self.update_priority[page]):
+                if p == 0:
+                    continue
+
+                # If we store content at this offset, what is the new
+                # edit_weight from this content byte to the target
+                delta = edit_weight(
+                    content,
+                    target.page_offset[page, o],
+                    o % 2 == 1
                 )
+                #print("Offset %d prio %d: %d -> %d = %d" % (
+                #    o, p, content,
+                #    target.page_offset[page, o],
+                #    delta
+                #))
+                deltas.setdefault(delta, list()).append((p, o))
 
-    def _emit_bytes(self, _op):
-        # print("%04X:" % self.stream_pos)
-        for b in self.state.emit(self._last_op, _op):
-            yield b
-            self.stream_pos += 1
-        self._last_op = _op
+            for d in sorted(deltas.keys()):
+                #print(d)
+                po = sorted(deltas[d], reverse=True)
+                #print(po)
+                for p, o in po:
+                    offsets.append(o)
+                    # Clear priority for the offset we're emitting
+                    self.update_priority[page, offset] = 0
+                    if len(offsets) == 4:
+                        break
+                if len(offsets) == 4:
+                    break
+
+            # Pad to 4 if we didn't find anything
+            for _ in range(len(offsets), 4):
+                offsets.append(offsets[0])
+
+            #print("Page %d, content %d: offsets %s" % (page+32, content,
+            #                                           offsets))
+            yield (page+32, content, offsets)
 
-    def emit_stream(self, ops: Iterable[opcodes.Opcode]) -> Iterator[int]:
-        self.cycle_counter.reset()
-        for op in ops:
-            # Keep track of where we are in TCP client socket buffer
-            socket_pos = self.stream_pos % 2048
-            if socket_pos >= 2045:
-                # May be about to emit a 3-byte opcode, pad out to last byte
-                # in frame
-                nops = 2047 - socket_pos
-                # print("At position %04x, padding with %d nops" % (
-                #    socket_pos, nops))
-                for _ in range(nops):
-                    yield from self._emit_bytes(opcodes.Nop())
-                yield from self._emit_bytes(opcodes.Ack())
-                # Ack falls through to nop
-                self._last_op = opcodes.Nop()
-            yield from self._emit_bytes(op)
 
-            if self.cycles_per_frame and (
-                    self.cycle_counter.cycles > self.cycles_per_frame):
-                print("Out of cycle budget")
-                return
-        # TODO: pad to cycles_per_frame with NOPs
 
-    def done(self) -> Iterator[int]:
-        """Terminate opcode stream."""
-        yield from self._emit_bytes(opcodes.Terminate())