From 10fa4bc72dcaa5aa8686119d30a285a97ef3cc97 Mon Sep 17 00:00:00 2001
From: kris <kris.kennaway@gmail.com>
Date: Wed, 27 Mar 2019 21:37:06 +0000
Subject: [PATCH] Proof of concept DHGR encoding/playback

- Every time we process an ACK opcode, toggle page 1/page 2 soft
  switches to steer subsequent writes between MAIN and AUX memory
- while I'm here, squeeze out some unnecessary operations from the
  buffer management

On the player side, this is implemented by maintaining two screen
memory maps, and alternating between opcode streams for each of them.
This is using entirely the wrong colour model for errors, but
surprisingly it already works pretty well in practise (and the frame
rate is acceptable on test videos)

DHGR/HGR could be made runtime selectable by adding a header byte that
determines whether to set the DHGR soft switches before initiating
the decode loop.

While I'm in here, fix op_terminate to clear keyboard strobe before
waiting.
---
 player/main.s         | 72 +++++++++++++++++++++++--------------------
 transcoder/movie.py   | 17 +++++++---
 transcoder/opcodes.py | 11 +++++--
 transcoder/video.py   | 62 ++++++++++++++++++++++---------------
 4 files changed, 97 insertions(+), 65 deletions(-)

diff --git a/player/main.s b/player/main.s
index fb12426..6a98fb2 100644
--- a/player/main.s
+++ b/player/main.s
@@ -304,8 +304,23 @@ exit_parmtable:
 
 init_mainloop:
     JSR hgr ; nukes the startup code we placed in HGR segment
+
+    STA $C050 ; GRAPHICS
+    STA $C057 ; HIRES
+    STA $C05E ; DHR
+    STA $C00D ; 80 COLUMN MODE
+
+    STA $C001 ; 80STOREON
+
+    ; Clear aux screen
+    STA $C055 ;
+    LDA #$20
+    JSR $F3EA
+
     STA fullscr
 
+    STA $C054 ; MAIN memory active
+
     ; establish invariant expected by decode loop
     LDX #$00
 
@@ -1256,11 +1271,11 @@ op_tick_64 63
 op_tick_66 63
 
 op_terminate:
-    ; Wait for keypress
+    LDA KBDSTRB ; clear strobe
+@0: ; Wait for keypress
     LDA KBD
-    BMI @1 ; key pressed
-    BPL op_terminate
-@1: LDA KBDSTRB ; clear strobe
+    BPL @0
+@1: ; key pressed
     JMP exit
 
 ; Manage W5100 socket buffer and ACK TCP stream.
@@ -1268,30 +1283,35 @@ op_terminate:
 ; In order to simplify the buffer management we expect this ACK opcode to consume
 ; the last 4 bytes in a 2K "TCP frame".  i.e. we can assume that we need to consume
 ; exactly 2K from the W5100 socket buffer.
+;
+; TODO: actually we are underrunning by 2 bytes currently, we've only consumed 2
+; bytes of 4 by this point.
 op_ack:
     BIT tick ; 4
 
-    LDA WDATA ; 4 dummy read of second-last byte in TCP frame
+    ; allow flip-flopping the PAGE1/PAGE2 soft switches to steer writes to MAIN/AUX screens
+    ; actually this allows touching any $C0XX soft-switch, in case that is useful somehow
+    LDA WDATA ; 4
+    STA @D+1 ; 4
+@D:
+    STA $C054 ; 4 low-byte is modified
     LDA WDATA ; 4 dummy read of last byte in TCP frame
 
     CLC ; 2
     LDA #>S0RXRD ; 2 NEED HIGH BYTE HERE
     STA WADRH ; 4
-    LDA #<S0RXRD ; 2
+    LDX #<S0RXRD ; 2
+    STX WADRL ; 4
 
-    STA WADRL ; 4
+    NOP ; 2
+    BIT tick ; 4 (36) ; does not affect Carry bit
+
+    ; No need to read/modify low byte since it is always guaranteed to be 0 (since we are at the end of a 2K frame)
     LDA WDATA ; 4 HIGH BYTE
-    LDX WDATA ; 4 LOW BYTE ; not sure if needed -- but we have cycles to spare so who cares!
 
-    ADC #$08 ; 2 ADD HIGH BYTE OF RECEIVED SIZE
-    BIT tick ; 4 (36)
-    TAY ; 2 SAVE
-
-    LDA #<S0RXRD ; 2
-    STA WADRL ; 4 Might not be needed, but have cycles to spare
-
-    STY WDATA ; 4 SEND HIGH BYTE
-    STX WDATA ; 4 SEND LOW BYTE
+    ADC #$08 ; 2 Add high byte of received size (always constant
+    STX WADRL ; 4 Reset address pointer, still have it in X
+    STA WDATA ; 4 Store high byte (no need to store low byte since it's 0)
 
 ; SEND THE RECV COMMAND
     LDA #<S0CR ; 2
@@ -1300,6 +1320,7 @@ op_ack:
     STA WDATA ; 4
 
     NOP ; 2 ; see, we even have cycles left over!
+    NOP ; 2
 
     JMP CHECKRECV ; 3 (37 with following BIT tick)
 
@@ -1313,21 +1334,4 @@ CLOSECONN:
     LDA #SCDISCON ; DISCONNECT
     STA WDATA ; SEND COMMAND
 
-; CHECK FOR CLOSED STATUS
-
-;CHECKCLOSED:
-;    LDX #0
-;@L:
-;    LDA #<S0SR
-;    STA WADRL
-;    LDA WDATA
-;    BEQ ISCLOSED
-;    NOP
-;    NOP
-;    NOP
-;    INX
-;    BNE @L  ; DON'T WAIT FOREVER
-;ISCLOSED:
-;    RTS ; SOCKET IS CLOSED
-
 .endproc
diff --git a/transcoder/movie.py b/transcoder/movie.py
index 6bee59f..32e132a 100644
--- a/transcoder/movie.py
+++ b/transcoder/movie.py
@@ -36,6 +36,8 @@ class Movie:
             self.video.update_priority
         )
 
+        self.aux_memory_bank = False
+
     def encode(self) -> Iterator[opcodes.Opcode]:
         """
 
@@ -47,18 +49,23 @@ class Movie:
         for au in self.audio.audio_stream():
             self.cycles += self.audio.cycles_per_tick
             if self.video.tick(self.cycles):
-                video_frame = next(video_frames)
+                main, aux = next(video_frames)
                 if ((self.video.frame_number - 1) % self.every_n_video_frames
                         == 0):
                     print("Starting frame %d" % self.video.frame_number)
-                    video_seq = self.video.encode_frame(video_frame)
+                    main_seq = self.video.encode_frame(
+                        main, self.video.memory_map, self.video.update_priority)
+                    aux_seq = self.video.encode_frame(
+                        aux, self.video.aux_memory_map,
+                        self.video.aux_update_priority)
 
             # au has range -15 .. 16 (step=1)
             # Tick cycles are units of 2
             tick = au * 2  # -30 .. 32 (step=2)
             tick += 34  # 4 .. 66 (step=2)
 
-            (page, content, offsets) = next(video_seq)
+            (page, content, offsets) = next(
+                        aux_seq if self.aux_memory_bank else main_seq)
 
             yield opcodes.TICK_OPCODES[(tick, page)](content, offsets)
 
@@ -86,7 +93,9 @@ class Movie:
             socket_pos = self.stream_pos % 2048
             if socket_pos >= 2044:
                 # 2 dummy bytes + 2 address bytes for next opcode
-                yield from self._emit_bytes(opcodes.Ack())
+                yield from self._emit_bytes(opcodes.Ack(self.aux_memory_bank))
+                # Flip-flop between MAIN and AUX banks
+                self.aux_memory_bank = not self.aux_memory_bank
             yield from self._emit_bytes(op)
 
         yield from self.done()
diff --git a/transcoder/opcodes.py b/transcoder/opcodes.py
index d942a96..e822cd1 100644
--- a/transcoder/opcodes.py
+++ b/transcoder/opcodes.py
@@ -76,13 +76,18 @@ class Ack(Opcode):
     """Instructs player to perform TCP stream + buffer management."""
     COMMAND = OpcodeCommand.ACK
 
+    def __init__(self, aux_active: bool):
+        self.aux_active = aux_active
+
     def emit_data(self) -> Iterator[int]:
-        # Dummy bytes to pad out TCP frame
-        yield 0xff
+        # Flip $C054 or $C055 soft-switches to steer subsequent writes to
+        # MAIN/AUX screen memory
+        yield 0x54 if self.aux_active else 0x55
+        # Dummy byte to pad out TCP frame
         yield 0xff
 
     def __data_eq__(self, other):
-        return True
+        return self.aux_active == other.aux_active
 
 
 class BaseTick(Opcode):
diff --git a/transcoder/video.py b/transcoder/video.py
index e3ad893..d51e659 100644
--- a/transcoder/video.py
+++ b/transcoder/video.py
@@ -45,10 +45,14 @@ class Video:
         # Initialize empty screen
         self.memory_map = screen.MemoryMap(
             screen_page=1)  # type: screen.MemoryMap
+        self.aux_memory_map = screen.MemoryMap(
+            screen_page=1)  # type: screen.MemoryMap
 
         # Accumulates pending edit weights across frames
         self.update_priority = np.zeros((32, 256), dtype=np.int64)
 
+        self.aux_update_priority = np.zeros((32, 256), dtype=np.int64)
+
     def tick(self, cycles: int) -> bool:
         if cycles > (self.cycles_per_frame * self.frame_number):
             self.frame_number += 1
@@ -110,51 +114,61 @@ class Video:
         def worker():
             """Invoke bmp2dhr to encode input image frames and push to queue."""
             for _idx, _frame in enumerate(self._frame_grabber()):
-                outfile = "%s/%08dC.BIN" % (frame_dir, _idx)
+                mainfile = "%s/%08d.BIN" % (frame_dir, _idx)
+                auxfile = "%s/%08d.AUX" % (frame_dir, _idx)
+
                 bmpfile = "%s/%08d.bmp" % (frame_dir, _idx)
 
                 try:
-                    os.stat(outfile)
+                    os.stat(mainfile)
+                    os.stat(auxfile)
                 except FileNotFoundError:
                     _frame = _frame.resize((280, 192), resample=Image.LANCZOS)
                     _frame.save(bmpfile)
 
                     subprocess.call(
-                        ["/usr/local/bin/bmp2dhr", bmpfile, "hgr", "D9"])
+                        ["/usr/local/bin/bmp2dhr", bmpfile, "dhgr", "P0", "A",
+                         "D9"])
 
                     os.remove(bmpfile)
 
-                _frame = np.fromfile(outfile, dtype=np.uint8)
-                q.put(_frame)
+                main = np.fromfile(mainfile, dtype=np.uint8)
+                aux = np.fromfile(auxfile, dtype=np.uint8)
+                q.put((main, aux))
 
-            q.put(None)
+            q.put((None, None))
 
         t = threading.Thread(target=worker, daemon=True)
         t.start()
 
         while True:
-            frame = q.get()
-            if frame is None:
+            main, aux = q.get()
+            if main is None:
                 break
 
-            yield screen.FlatMemoryMap(
-                screen_page=1, data=frame).to_memory_map()
+            yield (
+                screen.FlatMemoryMap(screen_page=1, data=main).to_memory_map(),
+                screen.FlatMemoryMap(screen_page=1, data=aux).to_memory_map()
+            )
             q.task_done()
 
         t.join()
 
     def encode_frame(
-            self, target: screen.MemoryMap
+            self, target: screen.MemoryMap,
+            memory_map: screen.MemoryMap,
+            update_priority: np.array,
     ) -> Iterator[opcodes.Opcode]:
         """Update to match content of frame within provided budget."""
 
-        print("Similarity %f" % (self.update_priority.mean()))
-        yield from self._index_changes(self.memory_map, target)
+        print("Similarity %f" % (update_priority.mean()))
+        yield from self._index_changes(memory_map, target, update_priority)
 
     def _index_changes(
             self,
             source: screen.MemoryMap,
-            target: screen.MemoryMap
+            target: screen.MemoryMap,
+            update_priority: np.array
     ) -> Iterator[Tuple[int, int, List[int]]]:
         """Transform encoded screen to sequence of change tuples."""
 
@@ -162,16 +176,16 @@ class Video:
 
         # Clear any update priority entries that have resolved themselves
         # with new frame
-        self.update_priority[diff_weights == 0] = 0
+        update_priority[diff_weights == 0] = 0
 
         # Halve existing weights to increase bias to new diffs.
         # In particular this means that existing updates with diff 1 will
         # become diff 0, i.e. will only be prioritized if they are still
         # diffs in the new frame.
         # self.update_priority >>= 1
-        self.update_priority += diff_weights
+        update_priority += diff_weights
 
-        priorities = self._heapify_priorities()
+        priorities = self._heapify_priorities(update_priority)
 
         content_deltas = {}
 
@@ -179,15 +193,15 @@ class Video:
             _, _, page, offset = heapq.heappop(priorities)
             # Check whether we've already cleared this diff while processing
             # an earlier opcode
-            if self.update_priority[page, offset] == 0:
+            if update_priority[page, offset] == 0:
                 continue
 
             offsets = [offset]
             content = target.page_offset[page, offset]
 
             # Clear priority for the offset we're emitting
-            self.update_priority[page, offset] = 0
-            self.memory_map.page_offset[page, offset] = content
+            update_priority[page, offset] = 0
+            source.page_offset[page, offset] = content
             diff_weights[page, offset] = 0
 
             # Make sure we don't emit this offset as a side-effect of some
@@ -212,9 +226,9 @@ class Video:
                     error=False)
 
                 # Update priority for the offset we're emitting
-                self.update_priority[page, o] = p  # 0
+                update_priority[page, o] = p  # 0
 
-                self.memory_map.page_offset[page, o] = content
+                source.page_offset[page, o] = content
 
                 if p:
                     # This content byte introduced an error, so put back on the
@@ -242,9 +256,9 @@ class Video:
         return edit_distance.screen_edit_distance(
             source.page_offset, target.page_offset)
 
-    def _heapify_priorities(self) -> List:
+    def _heapify_priorities(self, update_priority: np.array) -> List:
         priorities = []
-        it = np.nditer(self.update_priority, flags=['multi_index'])
+        it = np.nditer(update_priority, flags=['multi_index'])
         while not it.finished:
             priority = it[0]
             if not priority: