Merge pull request #1 from chcunningham/audio

Render the audio
chcunningham · Oct 6, 2021 · 1880e6b · 1880e6b
2 parents bbd2f43 + 0907260
commit 1880e6b
Show file tree

Hide file tree

Showing 7 changed files with 498 additions and 69 deletions.
diff --git a/audio_renderer.js b/audio_renderer.js
@@ -1,20 +1,36 @@
-const DATA_BUFFER_TARGET_SIZE = 10;
+const DATA_BUFFER_DECODE_TARGET_DURATION = 0.3;
+const DATA_BUFFER_DURATION = 0.5;
+const DECODER_QUEUE_SIZE_MAX = 5;
 const ENABLE_DEBUG_LOGGING = false;
 
-import {MP4PullDemuxer} from "./mp4_pull_demuxer.js";
+import { MP4PullDemuxer } from "./mp4_pull_demuxer.js";
+import { RingBuffer } from "./ringbuf.js";
 
 function debugLog(msg) {
-  if (!ENABLE_DEBUG_LOGGING)
+  if (!ENABLE_DEBUG_LOGGING) {
     return;
+  }
 
   console.debug(msg);
 }
 
+function URLFromFiles(files) {
+  const promises = files.map(file =>
+    fetch(file).then(response => response.text())
+  );
+
+  return Promise.all(promises).then(texts => {
+    const text = texts.join("");
+    const blob = new Blob([text], { type: "application/javascript" });
+
+    return URL.createObjectURL(blob);
+  });
+}
+
 export class AudioRenderer {
   async initialize(fileUri) {
-    this.dataBuffer = [];
     this.fillInProgress = false;
-    this.lastRenderedMediaTimestamp = 0;
+    this.playing = false;
 
     this.demuxer = new MP4PullDemuxer(fileUri);
 
@@ -23,65 +39,113 @@ export class AudioRenderer {
 
     this.decoder = new AudioDecoder({
       output: this.bufferAudioData.bind(this),
-      error: e => console.error(e),
+      error: e => console.error(e)
     });
     const config = {
       codec: trackInfo.codec,
       sampleRate: trackInfo.sampleRate,
       numberOfChannels: trackInfo.numberOfChannels,
       description: trackInfo.extradata
     };
-    console.assert(AudioDecoder.isConfigSupported(config))
+    this.sampleRate = trackInfo.sampleRate;
+    this.channelCount = trackInfo.numberOfChannels;
+
+    debugLog(config);
+
+    console.assert(AudioDecoder.isConfigSupported(config));
     this.decoder.configure(config);
 
+    // Initialize the AudioWorkletProcessor
+    this.audioContext = new AudioContext({ sampleRate: trackInfo.sampleRate, latencyHint: "playback" });
+    this.audioContext.suspend();
+    // Initialize the ring buffer between the decoder and the real-time audio
+    // rendering thread. The AudioRenderer has buffer space for approximately
+    // 500ms of decoded audio ahead.
+    let sampleCountIn500ms =
+      DATA_BUFFER_DURATION * this.audioContext.sampleRate * trackInfo.numberOfChannels;
+    let sab = RingBuffer.getStorageForCapacity(
+      sampleCountIn500ms,
+      Float32Array
+    );
+    this.ringbuffer = new RingBuffer(sab, Float32Array);
+    this.interleavingBuffers = [];
+    // Get an instance of the AudioSink worklet, passing it the memory for a
+    // ringbuffer, connect it to a GainNode for volume. This GainNode is in
+    // turn connected to the destination.
+    var workletSource = await URLFromFiles(["ringbuf.js", "audiosink.js"]);
+    await this.audioContext.audioWorklet.addModule(workletSource);
+    this.audioSink = new AudioWorkletNode(this.audioContext, "AudioSink", {
+      processorOptions: { sab: sab, mediaChannelCount: this.channelCount },
+      outputChannelCount: [trackInfo.numberOfChannels]
+    });
+    this.volume = new GainNode(this.audioContext);
+    this.audioSink.connect(this.volume).connect(this.audioContext.destination);
+
     this.init_resolver = null;
-    let promise = new Promise((resolver) => this.init_resolver = resolver);
+    let promise = new Promise(resolver => (this.init_resolver = resolver));
 
     this.fillDataBuffer();
     return promise;
   }
 
-  startPlaying() {
-    this.fakeRendering();
-  }
-
-  // TODO(padenot): Replace this with calls to render() timed as needed to keep
-  // the AudioWorklet well fed.
-  fakeRendering() {
-    if (this.dataBuffer.length == 0) {
-      console.warn('audio data underflow');
-      window.setTimeout(this.fakeRendering.bind(this), 10);
+  setVolume(volume) {
+    if (volume < 0.0 && volume > 1.0) {
       return;
     }
-
-    let renderDurationMs = this.dataBuffer[0].duration / 1000;
-    this.render();
-    window.setTimeout(this.fakeRendering.bind(this), renderDurationMs);
+    // Smooth exponential volume ramps on change
+    this.volume.gain.setTargetAtTime(
+      volume,
+      this.audioContext.currentTime,
+      0.3
+    );
   }
 
-  render(timestamp) {
-    if (this.dataBuffer.length == 0) {
-      console.warn('audio render(): no data ');
-      return;
-    }
-
-    // TODO(padenot): copy the AudioData samples to AudioWorklet and play out.
-    let data = this.dataBuffer.shift();
-    this.lastRenderedMediaTimestamp = data.timestamp;
-
-    debugLog('audio render()ing %d', data.timestamp);
-
+  play() {
+    // resolves when audio has effectively started: this can take some time if using
+    // bluetooth, for example.
+    debugLog("playback start");
+    this.playing = true;
     this.fillDataBuffer();
+    return this.audioContext.resume();
+  }
+
+  pause() {
+    // resolves when audio has effectively stopped, this can take some time if using
+    // bluetooth, for example.
+    debugLog("playback stop");
+    this.playing = false;
+    return this.audioContext.suspend();
   }
 
   getMediaTime() {
-    return this.lastRenderedMediaTimestamp;
+    let totalOutputLatency = 0.0;
+    if (this.audioContext.outputLatency == undefined) {
+      // Put appropriate values for Chromium here, not sure what latencies are
+      // used. Likely OS-dependent, certainly hardware dependant. Assume 40ms.
+      totalOutputLatency += 0.04;
+    } else {
+      totalOutputLatency += this.audioContext.outputLatency;
+    }
+    // This looks supported by Chromium, always 128 / samplerate.
+    totalOutputLatency += this.audioContext.baseLatency;
+    // The currently rendered audio sample is the current time of the
+    // AudioContext, offset by the total output latency, that is composed of
+    // the internal buffering of the AudioContext (e.g., double buffering), and
+    // the inherent latency of the audio playback system: OS buffering,
+    // hardware buffering, etc. This starts out negative, because it takes some
+    // time to buffer, and crosses zero as the first audio sample is produced
+    // by the audio output device.
+    let time = Math.max(
+      this.audioContext.currentTime - totalOutputLatency,
+      0.0
+    );
+    return time * 1000 * 1000; // microseconds
   }
 
   makeChunk(sample) {
     const type = sample.is_sync ? "key" : "delta";
-    const pts_us = sample.cts * 1000000 / sample.timescale;
-    const duration_us = sample.duration * 1000000 / sample.timescale;
+    const pts_us = (sample.cts * 1000000) / sample.timescale;
+    const duration_us = (sample.duration * 1000000) / sample.timescale;
     return new EncodedAudioChunk({
       type: type,
       timestamp: pts_us,
@@ -90,16 +154,22 @@ export class AudioRenderer {
     });
   }
 
-  async fillDataBuffer() {
-    if (this.dataBuffer.length >= DATA_BUFFER_TARGET_SIZE) {
-      debugLog('AudioData buffer full');
-
-      if (this.init_resolver) {
-        this.init_resolver();
-        this.init_resolver = null;
-      }
+  // Returned the duration of audio that can be enqueued in the ring buffer.
+  availableWrite() {
+    return this.ringbuffer.available_write() / this.sampleRate;
+  }
 
-      return;
+  async fillDataBuffer() {
+    let inBuffer = this.ringbuffer.capacity() - this.availableWrite();
+    if (inBuffer > DATA_BUFFER_DECODE_TARGET_DURATION ||
+        this.decoder.decodeQueueSize > DECODER_QUEUE_SIZE_MAX) {
+      debugLog(
+        `audio buffer full (target : ${DATA_BUFFER_DECODE_TARGET_DURATION}, current: ${inBuffer}), delaying decode`);
+        window.setTimeout(this.fillDataBuffer.bind(this), 1000 * inBuffer / this.sampleRate / 2);
+    }
+    if (this.init_resolver) {
+      this.init_resolver();
+      this.init_resolver = null;
     }
 
     // This method can be called from multiple places and we some may already
@@ -109,20 +179,68 @@ export class AudioRenderer {
     }
     this.fillInProgress = true;
 
-    while (this.dataBuffer.length < DATA_BUFFER_TARGET_SIZE &&
-            this.decoder.decodeQueueSize < DATA_BUFFER_TARGET_SIZE) {
+    // Decode up to the buffering target
+    while (this.availableWrite() > DATA_BUFFER_DECODE_TARGET_DURATION &&
+      this.decoder.decodeQueueSize < DECODER_QUEUE_SIZE_MAX) {
       let sample = await this.demuxer.readSample();
       this.decoder.decode(this.makeChunk(sample));
     }
 
     this.fillInProgress = false;
+    // Don't schedule more decoding operations when not playing.
+    if (this.playing) {
+      window.setTimeout(this.fillDataBuffer.bind(this), 0);
+    }
+  }
 
-    // Give decoder a chance to work, see if we saturated the pipeline.
-    window.setTimeout(this.fillDataBuffer.bind(this), 0);
+  bufferHealth() {
+    return (1 - this.ringbuffer.available_write() / this.ringbuffer.capacity()) * 100;
+  }
+
+  // From a array of Float32Array containing planar audio data `input`, writes
+  // interleaved audio data to `output`. Start the copy at sample
+  // `inputOffset`: index of the sample to start the copy from
+  // `inputSamplesToCopy`: number of input samples to copy
+  // `output`: a Float32Array to write the samples to
+  // `outputSampleOffset`: an offset in `output` to start writing
+  interleave(inputs, inputOffset, inputSamplesToCopy, output, outputSampleOffset) {
+    if (inputs.length * inputs[0].length < output.length) {
+      throw `not enough space in destination (${inputs.length * inputs[0].length} < ${output.length}})`
+    }
+    let channelCount = inputs.length;
+    let outIdx = outputSampleOffset;
+    let inputIdx = Math.floor(inputOffset / channelCount);
+    var channel = inputOffset % channelCount;
+    for (var i = 0; i < inputSamplesToCopy; i++) {
+      output[outIdx++] = inputs[channel][inputIdx];
+      if (++channel == inputs.length) {
+        channel = 0;
+        inputIdx++;
+      }
+    }
   }
 
   bufferAudioData(data) {
-    debugLog('bufferAudioData(%d)', data.timestamp);
-    this.dataBuffer.push(data);
+    if (this.interleavingBuffers.length != data.numberOfChannels) {
+      this.interleavingBuffers = new Array(this.channelCount);
+      for (var i = 0; i < this.interleavingBuffers.length; i++) {
+        this.interleavingBuffers[i] = new Float32Array(data.numberOfFrames);
+      }
+    }
+
+    debugLog("bufferAudioData(%d)", data.timestamp);
+    // Write to temporary planar arrays, and interleave into the ring buffer.
+    for (var i = 0; i < this.channelCount; i++) {
+      data.copyTo(this.interleavingBuffers[i], { planeIndex: i });
+    }
+    // Write the data to the ring buffer. Because it wraps around, there is
+    // potentially two copyTo to do.
+    this.ringbuffer.writeCallback(
+      data.numberOfFrames * data.numberOfChannels,
+      (first_part, second_part) => {
+        this.interleave(this.interleavingBuffers, 0, first_part.length, first_part, 0);
+        this.interleave(this.interleavingBuffers, first_part.length, second_part.length, second_part, 0);
+      }
+    );
   }
 }
diff --git a/audiosink.js b/audiosink.js
@@ -0,0 +1,29 @@
+registerProcessor("AudioSink", class AudioSink extends AudioWorkletProcessor {
+  constructor(options) {
+    super();
+    let sab = options.processorOptions.sab;
+    this.consumerSide = new RingBuffer(sab, Float32Array);
+    this.mediaChannelCount = options.processorOptions.mediaChannelCount;
+    this.deinterleaveBuffer = new Float32Array(this.mediaChannelCount * 128);
+    this.s = 0;
+  }
+
+  // Deinterleave audio data from input (linear Float32Array) to output, an
+  // array of Float32Array.
+  deinterleave(input, output) {
+    let inputIdx = 0;
+    let outputChannelCount = output.length;
+    for (var i = 0; i < output[0].length; i++) {
+      for (var j = 0; j < outputChannelCount; j++) {
+        output[j][i] = input[inputIdx++];
+      }
+    }
+  }
+  process(inputs, outputs, params) {
+    if (this.consumerSide.pop(this.deinterleaveBuffer) != this.deinterleaveBuffer.length) {
+      console.log("Warning: audio underrun");
+    }
+    this.deinterleave(this.deinterleaveBuffer, outputs[0]);
+    return true;
+  }
+});
diff --git a/mp4box.all.min.js b/mp4box.all.min.js
diff --git a/rapid_video_painter.html b/rapid_video_painter.html
@@ -1,7 +1,7 @@
 <!doctype html>
 <canvas style="outline: 1px solid"></canvas>
 
-<script src="https://gpac.github.io/mp4box.js/dist/mp4box.all.min.js"></script>
+<script src="mp4box.all.min.js"></script>
 
 <script type="module">