Normalize audio by tasting the first 10M of the audio stream and

computing the 2.5%ile and 97.5%ile values, i.e. so that <2.5% of
audio samples will clip.
This commit is contained in:
kris 2019-03-14 21:40:09 +00:00
parent 2d410a4b13
commit cd17dce267
2 changed files with 34 additions and 10 deletions

View File

@ -10,9 +10,8 @@ import video
class Audio:
def __init__(
self, filename: str, normalization: float = 1.0):
self, filename: str, normalization: float = None):
self.filename = filename
self.normalization = normalization
# TODO: take into account that the available range is slightly offset
# as fraction of total cycle count?
@ -22,16 +21,41 @@ class Audio:
# TODO: round to divisor of video frame rate
self.sample_rate = 14340 # int(1024. * 1024 / self.cycles_per_tick)
self.normalization = normalization or self._normalization()
print(self.normalization)
def _decode(self, f, buf) -> np.array:
data = np.frombuffer(buf, dtype='int16').astype(
'float32').reshape((f.channels, -1), order='F')
a = librosa.core.to_mono(data)
a = librosa.resample(a, f.samplerate,
self.sample_rate).flatten()
return a
def _normalization(self, read_bytes=1024*1024*10):
"""Read first read_bytes of audio stream and compute normalization.
We compute the 2.5th and 97.5th percentiles i.e. only 2.5% of samples
will clip.
"""
raw = bytearray()
with audioread.audio_open(self.filename) as f:
for buf in f.read_data():
raw.extend(bytearray(buf))
if len(raw) > read_bytes:
break
a = self._decode(f, raw)
norm = np.max(np.abs(np.percentile(a, [2.5, 97.5])))
assert norm
return 16384. / norm
def audio_stream(self):
with audioread.audio_open(self.filename) as f:
for buf in f.read_data(128 * 1024):
data = np.frombuffer(buf, dtype='int16').astype(
'float32').reshape((f.channels, -1), order='F')
a = librosa.core.to_mono(data)
a = librosa.resample(a, f.samplerate,
self.sample_rate).flatten()
a = self._decode(f, buf)
a /= 16384 # normalize to -1.0 .. 1.0
a *= self.normalization

View File

@ -8,7 +8,7 @@ import video
class Movie:
def __init__(self, filename: str, audio_normalization: float = 1.0):
def __init__(self, filename: str, audio_normalization: float = None):
self.filename = filename # type: str
self.audio = audio.Audio(
filename, normalization=audio_normalization) # type: audio.Audio