From 4840efc41e0cca0f75b626df31edab0b7defabef Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 27 Feb 2019 14:09:42 +0000
Subject: [PATCH] In HeuristicPageFirstScheduler, don't use a deterministic
 ordering of pages and content, since we may never get around to some of them
 across many frames.  Instead weight by total xor weight for the page, (page,
 content) tuple and offset list

Add some other scheduler variants
- prefer content first, then page.  This turns out to introduce a lot
  of colour fringing since we may not ever get back to fix up the
  hanging bit
---
 scheduler.py | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 119 insertions(+), 1 deletion(-)

diff --git a/scheduler.py b/scheduler.py
index 53fa387..32a12b7 100644
--- a/scheduler.py
+++ b/scheduler.py
@@ -1,5 +1,6 @@
 """Opcode schedulers."""
 
+import collections
 from typing import Iterator
 
 import opcodes
@@ -10,8 +11,125 @@ class OpcodeScheduler:
         raise NotImplementedError
 
 
+class HeuristicContentFirstScheduler(OpcodeScheduler):
+    """Group by content first then page.
+
+    This has a fair bit of colour fringing because we aren't guaranteed to
+    get back to fixing up hanging bits within our frame window.  In practise
+    this also does not deal well with fine detail at higher frame rates.
+    """
+
+    def schedule(self, changes):
+        data = {}
+
+        content_weights = collections.defaultdict(int)
+        content_page_weights = {}
+        for ch in changes:
+            xor_weight, page, offset, content, run_length = ch
+            data.setdefault((page, content), list()).append(
+                (xor_weight, run_length, offset))
+            content_weights[content] += xor_weight
+            content_page_weights.setdefault(content, collections.defaultdict(
+                int))[page] += xor_weight
+
+        # Weight each page and content within page by total xor weight and
+        # traverse in this order
+
+        contents = sorted(
+            list(content_weights.keys()),
+            key=lambda p: content_weights[p], reverse=True)
+        for content in contents:
+            yield opcodes.SetContent(content)
+
+            page_weights = content_page_weights[content]
+
+            pages = sorted(
+                list(page_weights.keys()),
+                key=lambda c: page_weights[c],
+                reverse=True)
+            for page in pages:
+                yield opcodes.SetPage(page)
+                offsets = sorted(data[(page, content)], key=lambda x: x[0],
+                                 reverse=True)
+
+                # print("page %d content %d offsets %s" % (page, content,
+                #                                        offsets))
+                for (_, run_length, offset) in offsets:
+                    if run_length > 1:
+                        # print("Offset %d run length %d" % (
+                        #     offset, run_length))
+                        yield opcodes.RLE(offset, run_length)
+                    else:
+                        yield opcodes.Store(offset)
+
+
 class HeuristicPageFirstScheduler(OpcodeScheduler):
-    """Group by page first then content byte."""
+    """Group by page first then content byte.
+
+    Grouping by page (rather than content) means that we'll reduce the window
+    of time during which we have violated a colour invariant due to bits
+    hanging across byte boundaries.
+    """
+
+    # Median similarity: 0.862798 @ 15 fps, 10M output
+    def schedule(self, changes):
+        data = {}
+
+        page_weights = collections.defaultdict(int)
+        page_content_weights = {}
+        for ch in changes:
+            xor_weight, page, offset, content, run_length = ch
+            data.setdefault((page, content), list()).append(
+                (xor_weight, run_length, offset))
+            page_weights[page] += xor_weight
+            page_content_weights.setdefault(page, collections.defaultdict(
+                int))[content] += xor_weight
+
+        # Weight each page and content within page by total xor weight and
+        # traverse in this order
+
+        pages = sorted(
+            list(page_weights.keys()),
+            key=lambda p: page_weights[p], reverse=True)
+        for page in pages:
+            yield opcodes.SetPage(page)
+
+            content_weights = page_content_weights[page]
+            contents = sorted(
+                list(content_weights.keys()),
+                key=lambda c: content_weights[c],
+                reverse=True)
+
+            for content in contents:
+                yield opcodes.SetContent(content)
+                offsets = sorted(
+                    data[(page, content)],
+                    key=lambda x: x[0],
+                    reverse=True)
+
+                # print("page %d content %d offsets %s" % (page, content,
+                #                                         offsets))
+                for (_, run_length, offset) in offsets:
+                    if run_length > 1:
+                        # print("Offset %d run length %d" % (
+                        #     offset, run_length))
+                        yield opcodes.RLE(offset, run_length)
+                    else:
+                        yield opcodes.Store(offset)
+
+
+class OldHeuristicPageFirstScheduler(OpcodeScheduler):
+    """Group by page first then content byte.
+
+    This uses a deterministic order of pages and content bytes, and ignores
+    xor_weight altogether
+    """
+
+    # Median similarity: 0.854613 ( @ 15 fps, 10M output)
+    # is almost as good as HeuristicPageFirstScheduler -- despite the fact
+    # that we consistently fail to update some pages.  That means we should
+    # be measuring some notion of error persistence rather than just
+    # similarity
 
     def schedule(self, changes):
         data = {}