mirror of
https://github.com/KrisKennaway/ii-vision.git
synced 2024-09-08 04:54:26 +00:00
101 lines
3.0 KiB
Python
101 lines
3.0 KiB
Python
"""Encodes input audio stream into sequence of speaker duty cycle counts."""
|
|
|
|
from typing import Iterator
|
|
|
|
import audioread
|
|
import librosa
|
|
import numpy as np
|
|
|
|
|
|
class Audio:
|
|
"""
|
|
Decodes audio stream from input file and resamples.
|
|
|
|
Notes on audio bitrate:
|
|
|
|
At 73 cycles/tick, true audio playback sample rate is
|
|
roughly 1024*1024/73 = 14364 Hz (ignoring ACK slow path).
|
|
Typical audio encoding is 44100Hz which is close to 14700*3
|
|
Downscaling by 3x gives better results than trying to resample
|
|
to a non-divisor. So we cheat a bit and play back the video a
|
|
tiny bit (<2%) faster.
|
|
|
|
For //gs playback at 2.8MHz, the effective speed increase is only about
|
|
1.6x. This is probably because accessing the I/O page is done at 1MHz
|
|
to not mess up hardware timings.
|
|
|
|
This is close (2.1%) to 22500Hz which is again a simple divisor of the
|
|
base frequency (1/2).
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
filename: str,
|
|
bitrate: int = 14700,
|
|
normalization: float = None):
|
|
self.filename = filename # type: str
|
|
|
|
# TODO: take into account that the available range is slightly offset
|
|
# as fraction of total cycle count?
|
|
self._tick_range = [4, 66]
|
|
|
|
self.sample_rate = float(bitrate) # type: float
|
|
|
|
self.normalization = (
|
|
normalization or self._normalization()) # type: float
|
|
|
|
def _decode(self, f, buf) -> np.array:
|
|
"""
|
|
|
|
:param f:
|
|
:param buf:
|
|
:return:
|
|
"""
|
|
data = np.frombuffer(buf, dtype='int16').astype(
|
|
'float32').reshape((f.channels, -1), order='F')
|
|
|
|
a = librosa.core.to_mono(data)
|
|
a = librosa.resample(a, orig_sr=f.samplerate,
|
|
target_sr=self.sample_rate,
|
|
res_type='scipy', scale=True).flatten()
|
|
|
|
return a
|
|
|
|
def _normalization(self, read_bytes=1024 * 1024 * 10):
|
|
"""Read first read_bytes of audio stream and compute normalization.
|
|
|
|
We normalize based on the 0.5th and 99.5th percentiles, i.e. only <1% of
|
|
samples will clip.
|
|
|
|
:param read_bytes:
|
|
:return:
|
|
"""
|
|
raw = bytearray()
|
|
with audioread.audio_open(self.filename) as f:
|
|
for buf in f.read_data():
|
|
raw.extend(bytearray(buf))
|
|
if len(raw) > read_bytes:
|
|
break
|
|
a = self._decode(f, raw)
|
|
norm = np.max(np.abs(np.percentile(a, [0.5, 99.5])))
|
|
|
|
return 16384. / norm
|
|
|
|
def audio_stream(self) -> Iterator[int]:
|
|
"""
|
|
|
|
:return:
|
|
"""
|
|
with audioread.audio_open(self.filename) as f:
|
|
for buf in f.read_data(128 * 1024):
|
|
a = self._decode(f, buf)
|
|
|
|
a /= 16384 # normalize to -1.0 .. 1.0
|
|
a *= self.normalization
|
|
|
|
# Convert to -16 .. 16
|
|
a = (a * 16).astype(np.int)
|
|
a = np.clip(a, -15, 16)
|
|
|
|
yield from a
|