In HeuristicPageFirstScheduler, don't use a deterministic ordering

of pages and content, since we may never get around to some of them across many frames. Instead weight by total xor weight for the page, (page, content) tuple and offset list Add some other scheduler variants - prefer content first, then page. This turns out to introduce a lot of colour fringing since we may not ever get back to fix up the hanging bit
2025-03-11 04:37:03 +00:00 · 2019-02-27 14:09:42 +00:00 · 2019-02-27 14:09:42 +00:00 · 4840efc41e
commit 4840efc41e
parent 0ac905a7aa
1 changed files with 119 additions and 1 deletions
--- a/scheduler.py
+++ b/scheduler.py
@ -1,5 +1,6 @@
 """Opcode schedulers."""

+import collections
 from typing import Iterator

 import opcodes
@ -10,8 +11,125 @@ class OpcodeScheduler:
        raise NotImplementedError


+class HeuristicContentFirstScheduler(OpcodeScheduler):
+    """Group by content first then page.
+
+    This has a fair bit of colour fringing because we aren't guaranteed to
+    get back to fixing up hanging bits within our frame window.  In practise
+    this also does not deal well with fine detail at higher frame rates.
+    """
+
+    def schedule(self, changes):
+        data = {}
+
+        content_weights = collections.defaultdict(int)
+        content_page_weights = {}
+        for ch in changes:
+            xor_weight, page, offset, content, run_length = ch
+            data.setdefault((page, content), list()).append(
+                (xor_weight, run_length, offset))
+            content_weights[content] += xor_weight
+            content_page_weights.setdefault(content, collections.defaultdict(
+                int))[page] += xor_weight
+
+        # Weight each page and content within page by total xor weight and
+        # traverse in this order
+
+        contents = sorted(
+            list(content_weights.keys()),
+            key=lambda p: content_weights[p], reverse=True)
+        for content in contents:
+            yield opcodes.SetContent(content)
+
+            page_weights = content_page_weights[content]
+
+            pages = sorted(
+                list(page_weights.keys()),
+                key=lambda c: page_weights[c],
+                reverse=True)
+            for page in pages:
+                yield opcodes.SetPage(page)
+                offsets = sorted(data[(page, content)], key=lambda x: x[0],
+                                 reverse=True)
+
+                # print("page %d content %d offsets %s" % (page, content,
+                #                                        offsets))
+                for (_, run_length, offset) in offsets:
+                    if run_length > 1:
+                        # print("Offset %d run length %d" % (
+                        #     offset, run_length))
+                        yield opcodes.RLE(offset, run_length)
+                    else:
+                        yield opcodes.Store(offset)
+
+
 class HeuristicPageFirstScheduler(OpcodeScheduler):
-    """Group by page first then content byte."""
+    """Group by page first then content byte.
+
+    Grouping by page (rather than content) means that we'll reduce the window
+    of time during which we have violated a colour invariant due to bits
+    hanging across byte boundaries.
+    """
+
+    # Median similarity: 0.862798 @ 15 fps, 10M output
+    def schedule(self, changes):
+        data = {}
+
+        page_weights = collections.defaultdict(int)
+        page_content_weights = {}
+        for ch in changes:
+            xor_weight, page, offset, content, run_length = ch
+            data.setdefault((page, content), list()).append(
+                (xor_weight, run_length, offset))
+            page_weights[page] += xor_weight
+            page_content_weights.setdefault(page, collections.defaultdict(
+                int))[content] += xor_weight
+
+        # Weight each page and content within page by total xor weight and
+        # traverse in this order
+
+        pages = sorted(
+            list(page_weights.keys()),
+            key=lambda p: page_weights[p], reverse=True)
+        for page in pages:
+            yield opcodes.SetPage(page)
+
+            content_weights = page_content_weights[page]
+            contents = sorted(
+                list(content_weights.keys()),
+                key=lambda c: content_weights[c],
+                reverse=True)
+
+            for content in contents:
+                yield opcodes.SetContent(content)
+                offsets = sorted(
+                    data[(page, content)],
+                    key=lambda x: x[0],
+                    reverse=True)
+
+                # print("page %d content %d offsets %s" % (page, content,
+                #                                         offsets))
+                for (_, run_length, offset) in offsets:
+                    if run_length > 1:
+                        # print("Offset %d run length %d" % (
+                        #     offset, run_length))
+                        yield opcodes.RLE(offset, run_length)
+                    else:
+                        yield opcodes.Store(offset)
+
+
+class OldHeuristicPageFirstScheduler(OpcodeScheduler):
+    """Group by page first then content byte.
+
+    This uses a deterministic order of pages and content bytes, and ignores
+    xor_weight altogether
+    """
+
+    # Median similarity: 0.854613 ( @ 15 fps, 10M output)
+    # is almost as good as HeuristicPageFirstScheduler -- despite the fact
+    # that we consistently fail to update some pages.  That means we should
+    # be measuring some notion of error persistence rather than just
+    # similarity

    def schedule(self, changes):
        data = {}