Chunk the output resampling to limit memory use

2022-07-03 22:33:01 +01:00 · 2022-07-03 22:33:01 +01:00 · 2e3e51bb99
parent 18a16999b0
commit 2e3e51bb99
1 changed files with 57 additions and 12 deletions
--- a/encode_audio.py
+++ b/encode_audio.py
@ -112,7 +112,7 @@ def audio_bytestream(data: numpy.ndarray, step: int, lookahead_steps: int,
            opcodes_generated.PlayerOps.TICK_00)), dtype=numpy.float32)]))

    # Starting speaker applied voltage.
-    voltage1 = voltage2 = -1.0  # * 2.5
+    voltage1 = voltage2 = 1.0  # * 2.5

    toggles = 0

@ -152,7 +152,7 @@ def audio_bytestream(data: numpy.ndarray, step: int, lookahead_steps: int,
        # print(frame_offset, voltages.shape, voltages.nbytes)
        opcode_idx = lookahead.evolve_return_best(
            sp, y1, y2, voltage1, voltage2, voltage1 * voltages,
-            data[i:i+lookahead_steps])
+            data[i:i + lookahead_steps])

        opcode = next_candidate_opcodes[opcode_idx]

@ -222,6 +222,24 @@ def preprocess(
    return data


+def resample_output(output_buffer, input_audio, sample_rate, output_rate,
+                    noise_output=False):
+    resampled_output = librosa.resample(
+        numpy.array(output_buffer, dtype=numpy.float32),
+        orig_sr=sample_rate,
+        target_sr=output_rate)
+
+    resampled_noise = None
+    if noise_output:
+        noise_len = min(len(output_buffer), len(input_audio))
+        resampled_noise = librosa.resample(
+            numpy.array(output_buffer[:noise_len] - input_audio[:noise_len]),
+            orig_sr=sample_rate,
+            target_sr=output_rate)
+
+    return resampled_output, resampled_noise
+
+
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--clock", choices=['pal', 'ntsc'],
@ -259,24 +277,51 @@ def main():

    output_rate = 44100

-    output = numpy.array(list(
-        audio_bytestream(input_audio, args.step_size, args.lookahead_cycles,
-                         sample_rate)), dtype=numpy.float32)
+    resampled_output = []
+    resampled_noise = []
+    output_buffer = []
+    input_offset = 0
+    for idx, sample in enumerate(audio_bytestream(
+            input_audio, args.step_size, args.lookahead_cycles, sample_rate)):
+        output_buffer.append(sample)
+        input_offset += 1
+
+        # Keep accumulating as long as we have <10MB in the buffer, or are
+        # within 10MB from the end.  This ensures we have enough samples to
+        # resample including the last (partial) buffer
+        if len(output_buffer) < 10 * 1024 * 1024:
+            continue
+        if (len(input_audio) - input_offset) < 10 * 1024 * 1024:
+            continue
+        resampled_output_buffer, resampled_noise_buffer = resample_output(
+            output_buffer, input_audio[input_offset - len(output_buffer):],
+            sample_rate, output_rate, bool(args.noise_output)
+        )
+        resampled_output.extend(resampled_output_buffer)
+        if args.noise_output:
+            resampled_noise.extend(resampled_noise_buffer)
+
+        output_buffer = []
+
+    if output_buffer:
+        resampled_output_buffer, resampled_noise_buffer = resample_output(
+            output_buffer, input_audio[input_offset - len(output_buffer):],
+            sample_rate, output_rate, bool(args.noise_output)
+        )
+        resampled_output.extend(resampled_output_buffer)
+        if args.noise_output:
+            resampled_noise.extend(resampled_noise_buffer)
+
    if args.noise_output:
-        noise = numpy.array(output - input_audio[:len(output)])
-        noise = librosa.resample(noise, orig_sr=sample_rate,
-                                 target_sr=output_rate)
        with sf.SoundFile(
                args.noise_output, "w", output_rate, channels=1,
                format='WAV') as f:
-            f.write(noise)
+            f.write(resampled_noise)

-    output = librosa.resample(output, orig_sr=sample_rate,
-                              target_sr=output_rate)
    with sf.SoundFile(
            args.output, "w", output_rate, channels=1, format='WAV') \
            as f:
-        f.write(output)
+        f.write(resampled_output)

    # with open(args.output, "wb+") as f:
    #     for opcode in audio_bytestream(