Merge pull request #3 from KrisKennaway/audio

Improvements to audio processing (quality, support better quality video stream on //gs)
2024-06-26 00:29:29 +00:00 · 2019-07-14 22:08:04 +01:00 · 2019-07-14 22:08:04 +01:00 · 142dbe02fe
commit 142dbe02fe
parent 326ca62075 451523bdef
5 changed files with 45 additions and 16 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 __pycache__/
 venv/
 videos/
 *.a2m
--- a/player/main.s
+++ b/player/main.s
@ -437,10 +437,10 @@ recv: ; 15 cycles so far
 ; pad cycles to keep ticking on 36/37 cycle cadence
 ; TODO: what can we do with the luxury of 14 unused cycles?!
@2: ; 30 so far
    STA TICK ; 4 ; 34
    ; X will usually already be 0 from op_ack except during first frame when reading
    ; header but reset it unconditionally
    LDX #$00 ; 2
    STA TICK ; 4 ; 36
    NOP ; 2
    STA dummy ; 4
@ -453,7 +453,7 @@ op_nop:
    LDY WDATA ; 4
    STY @D+1 ; 4
@D:
-    JMP op_nop ; 3 ; 23 with following tick (37 in fallthrough case)
+    JMP op_nop ; 3 ; 23 with following tick (39 if we fell through from checkrecv case)
 ; Build macros for "fat" opcodes that do the following:
 ; - tick twice, N cycles apart (N = 4 .. 66 in steps of 2)
@ -1312,16 +1312,18 @@ op_ack:
    LDA #>S0RXRD ; 2
    STA WADRH ; 4
    LDX #<S0RXRD ; 2
-    STX WADRL ; 4
+    ; prepare for ADC below, but reordered to allow TICK at offset 34
    CLC ; 2
-    STA TICK ; 4 (36)
+    STA TICK ; 4 (34)
    STX WADRL ; 4
    LDA WDATA ; 4 Read high byte
    ; No need to read low byte since it's guaranteed to be 0 since we're at the end of a 2K frame.
    ; Update new Received Read pointer
    ; We have received an additional 2KB
    CLC ; 2
    ADC #$08 ; 2
    STX WADRL ; 4 Reset address pointer, X still has #<S0RXRD
@ -1339,7 +1341,7 @@ op_ack:
    ; - used as the low byte for resetting the W5100 address pointer when we're ready to start processing more data
    LDX #$00 ; 2 restore invariant for dispatch loop
-    JMP checkrecv ; 3 (37 with following STA TICK)
+    JMP checkrecv ; 3 (39 with following STA TICK)
 ; Quit to ProDOS
 exit:
--- a/transcoder/audio.py
+++ b/transcoder/audio.py
@ -8,21 +8,38 @@ import numpy as np
 class Audio:
    """
    Decodes audio stream from input file and resamples.
    Notes on audio bitrate:
    At 73 cycles/tick, true audio playback sample rate is
    roughly 1024*1024/73 = 14364 Hz (ignoring ACK slow path).
    Typical audio encoding is 44100Hz which is close to 14700*3
    Downscaling by 3x gives better results than trying to resample
    to a non-divisor.  So we cheat a bit and play back the video a
    tiny bit (<2%) faster.
    For //gs playback at 2.8MHz, the effective speed increase is only about
    1.6x.  This is probably because accessing the I/O page is done at 1MHz
    to not mess up hardware timings.
    This is close (2.1%) to 22500Hz which is again a simple divisor of the
    base frequency (1/2).
    """
    def __init__(
-            self, filename: str, normalization: float = None):
+            self,
            filename: str,
            bitrate: int = 14700,
            normalization: float = None):
        self.filename = filename  # type: str
        # TODO: take into account that the available range is slightly offset
        # as fraction of total cycle count?
        self._tick_range = [4, 66]
-        # At 73 cycles/tick, true audio playback sample rate is
+        self.sample_rate = float(bitrate)  # type: float
        # roughly 1024*1024/73 = 14364 Hz (ignoring ACK slow path).
        # Typical audio encoding is 44100Hz which is close to 14700*3
        # Downscaling by 3x gives better results than trying to resample
        # to a non-divisor.  So we cheat a bit and play back the video a tiny
        # bit (<2%) faster.
        self.sample_rate = 14700.  # type: float
        self.normalization = (
                normalization or self._normalization())  # type: float
@ -39,7 +56,8 @@ class Audio:
        a = librosa.core.to_mono(data)
        a = librosa.resample(a, f.samplerate,
-                             self.sample_rate).flatten()
+                             self.sample_rate,
                             res_type='scipy', scale=True).flatten()
        return a
--- a/transcoder/main.py
+++ b/transcoder/main.py
@ -20,6 +20,11 @@ parser.add_argument(
    '--audio_normalization', type=float, default=None,
    help='Override auto-detected multiplier for audio normalization.'
 )
 parser.add_argument(
    '--audio_bitrate', type=int, default=14700,
    help='Select output audio bitrate (Hz), controls video speed (Default: '
         '14700; try 22500 for //gs 2.8MHz mode)'
 )
 parser.add_argument(
    '--every_n_video_frames', type=int, default=2,
    help='Allows skipping frames of input video to lower effective output '
@ -42,6 +47,7 @@ def main(args):
    m = movie.Movie(
        filename,
        every_n_video_frames=args.every_n_video_frames,
        audio_bitrate=args.audio_bitrate,
        audio_normalization=args.audio_normalization,
        max_bytes_out=1024. * 1024 * args.max_output_mb,
        video_mode=video_mode.VideoMode[args.video_mode],
--- a/transcoder/movie.py
+++ b/transcoder/movie.py
@ -15,6 +15,7 @@ class Movie:
    def __init__(
            self, filename: str,
            every_n_video_frames: int = 1,
            audio_bitrate: int = 14700,
            audio_normalization: float = None,
            max_bytes_out: int = None,
            video_mode: VideoMode = VideoMode.HGR,
@ -27,7 +28,8 @@ class Movie:
        self.palette = palette  # type: Palette
        self.audio = audio.Audio(
-            filename, normalization=audio_normalization)  # type: audio.Audio
+            filename, bitrate=audio_bitrate,
            normalization=audio_normalization)  # type: audio.Audio
        self.frame_grabber = frame_grabber.FileFrameGrabber(
            filename, mode=video_mode, palette=self.palette)