Add Rodio audio pipeline as alternative to current LiveKit pipeline (#36607)

Rodio parts are well tested and need less configuration then the livekit parts. I suspect there is a bug in the livekit configuration regarding resampling. Rather then investigate that it seemed faster & easier to swap in Rodio. This opens the door to using other Rodio parts like: - Decibel based volume control - Limiter (prevents sound from becoming too loud) - Automatic gain control To use this add to settings: ``` "audio": { "experimental.rodio_audio": true } ``` Release Notes: - N/A Co-authored-by: Mikayla <mikayla@zed.dev> Co-authored-by: Antonio Scandurra <me@as-cii.com>
2025-08-21 15:56:16 +02:00 · 2025-08-21 15:56:16 +02:00 · e0613cbd0f
commit e0613cbd0f
parent 1dd237139c
13 changed files with 226 additions and 112 deletions
--- a/crates/livekit_client/Cargo.toml
+++ b/crates/livekit_client/Cargo.toml
@ -25,6 +25,7 @@ async-trait.workspace = true
 collections.workspace = true
 cpal.workspace = true
 futures.workspace = true
+audio.workspace = true
 gpui = { workspace = true, features = ["screen-capture", "x11", "wayland", "windows-manifest"] }
 gpui_tokio.workspace = true
 http_client_tls.workspace = true
@ -35,6 +36,7 @@ nanoid.workspace = true
 parking_lot.workspace = true
 postage.workspace = true
 smallvec.workspace = true
+settings.workspace = true
 tokio-tungstenite.workspace = true
 util.workspace = true
 workspace-hack.workspace = true
--- a/crates/livekit_client/src/lib.rs
+++ b/crates/livekit_client/src/lib.rs
@ -24,8 +24,11 @@ mod livekit_client;
 )))]
 pub use livekit_client::*;

-// If you need proper LSP in livekit_client you've got to comment out
-// the mocks and test
+// If you need proper LSP in livekit_client you've got to comment
+// - the cfg blocks above
+// - the mods: mock_client & test and their conditional blocks
+// - the pub use mock_client::* and their conditional blocks
+
 #[cfg(any(
    test,
    feature = "test-support",
--- a/crates/livekit_client/src/livekit_client.rs
+++ b/crates/livekit_client/src/livekit_client.rs
@ -1,15 +1,16 @@
 use std::sync::Arc;

 use anyhow::{Context as _, Result};
+use audio::AudioSettings;
 use collections::HashMap;
 use futures::{SinkExt, channel::mpsc};
 use gpui::{App, AsyncApp, ScreenCaptureSource, ScreenCaptureStream, Task};
 use gpui_tokio::Tokio;
+use log::info;
 use playback::capture_local_video_track;
+use settings::Settings;

 mod playback;
-#[cfg(feature = "record-microphone")]
-mod record;

 use crate::{LocalTrack, Participant, RemoteTrack, RoomEvent, TrackPublication};
 pub use playback::AudioStream;
@ -125,9 +126,14 @@ impl Room {
    pub fn play_remote_audio_track(
        &self,
        track: &RemoteAudioTrack,
-        _cx: &App,
+        cx: &mut App,
    ) -> Result<playback::AudioStream> {
-        Ok(self.playback.play_remote_audio_track(&track.0))
+        if AudioSettings::get_global(cx).rodio_audio {
+            info!("Using experimental.rodio_audio audio pipeline");
+            playback::play_remote_audio_track(&track.0, cx)
+        } else {
+            Ok(self.playback.play_remote_audio_track(&track.0))
+        }
    }
 }

--- a/crates/livekit_client/src/livekit_client/playback.rs
+++ b/crates/livekit_client/src/livekit_client/playback.rs
@ -18,13 +18,16 @@ use livekit::webrtc::{
    video_stream::native::NativeVideoStream,
 };
 use parking_lot::Mutex;
+use rodio::Source;
 use std::cell::RefCell;
 use std::sync::Weak;
-use std::sync::atomic::{self, AtomicI32};
+use std::sync::atomic::{AtomicBool, AtomicI32, Ordering};
 use std::time::Duration;
 use std::{borrow::Cow, collections::VecDeque, sync::Arc, thread};
 use util::{ResultExt as _, maybe};

+mod source;
+
 pub(crate) struct AudioStack {
    executor: BackgroundExecutor,
    apm: Arc<Mutex<apm::AudioProcessingModule>>,
@ -40,6 +43,29 @@ pub(crate) struct AudioStack {
 const SAMPLE_RATE: u32 = 48000;
 const NUM_CHANNELS: u32 = 2;

+pub(crate) fn play_remote_audio_track(
+    track: &livekit::track::RemoteAudioTrack,
+    cx: &mut gpui::App,
+) -> Result<AudioStream> {
+    let stop_handle = Arc::new(AtomicBool::new(false));
+    let stop_handle_clone = stop_handle.clone();
+    let stream = source::LiveKitStream::new(cx.background_executor(), track)
+        .stoppable()
+        .periodic_access(Duration::from_millis(50), move |s| {
+            if stop_handle.load(Ordering::Relaxed) {
+                s.stop();
+            }
+        });
+    audio::Audio::play_source(stream, cx).context("Could not play audio")?;
+
+    let on_drop = util::defer(move || {
+        stop_handle_clone.store(true, Ordering::Relaxed);
+    });
+    Ok(AudioStream::Output {
+        _drop: Box::new(on_drop),
+    })
+}
+
 impl AudioStack {
    pub(crate) fn new(executor: BackgroundExecutor) -> Self {
        let apm = Arc::new(Mutex::new(apm::AudioProcessingModule::new(
@ -61,7 +87,7 @@ impl AudioStack {
    ) -> AudioStream {
        let output_task = self.start_output();

-        let next_ssrc = self.next_ssrc.fetch_add(1, atomic::Ordering::Relaxed);
+        let next_ssrc = self.next_ssrc.fetch_add(1, Ordering::Relaxed);
        let source = AudioMixerSource {
            ssrc: next_ssrc,
            sample_rate: SAMPLE_RATE,
@ -97,6 +123,23 @@ impl AudioStack {
        }
    }

+    fn start_output(&self) -> Arc<Task<()>> {
+        if let Some(task) = self._output_task.borrow().upgrade() {
+            return task;
+        }
+        let task = Arc::new(self.executor.spawn({
+            let apm = self.apm.clone();
+            let mixer = self.mixer.clone();
+            async move {
+                Self::play_output(apm, mixer, SAMPLE_RATE, NUM_CHANNELS)
+                    .await
+                    .log_err();
+            }
+        }));
+        *self._output_task.borrow_mut() = Arc::downgrade(&task);
+        task
+    }
+
    pub(crate) fn capture_local_microphone_track(
        &self,
    ) -> Result<(crate::LocalAudioTrack, AudioStream)> {
@ -139,23 +182,6 @@ impl AudioStack {
        ))
    }

-    fn start_output(&self) -> Arc<Task<()>> {
-        if let Some(task) = self._output_task.borrow().upgrade() {
-            return task;
-        }
-        let task = Arc::new(self.executor.spawn({
-            let apm = self.apm.clone();
-            let mixer = self.mixer.clone();
-            async move {
-                Self::play_output(apm, mixer, SAMPLE_RATE, NUM_CHANNELS)
-                    .await
-                    .log_err();
-            }
-        }));
-        *self._output_task.borrow_mut() = Arc::downgrade(&task);
-        task
-    }
-
    async fn play_output(
        apm: Arc<Mutex<apm::AudioProcessingModule>>,
        mixer: Arc<Mutex<audio_mixer::AudioMixer>>,
--- a/crates/livekit_client/src/livekit_client/playback/source.rs
+++ b/crates/livekit_client/src/livekit_client/playback/source.rs
@ -0,0 +1,67 @@
+use futures::StreamExt;
+use libwebrtc::{audio_stream::native::NativeAudioStream, prelude::AudioFrame};
+use livekit::track::RemoteAudioTrack;
+use rodio::{Source, buffer::SamplesBuffer, conversions::SampleTypeConverter};
+
+use crate::livekit_client::playback::{NUM_CHANNELS, SAMPLE_RATE};
+
+fn frame_to_samplesbuffer(frame: AudioFrame) -> SamplesBuffer {
+    let samples = frame.data.iter().copied();
+    let samples = SampleTypeConverter::<_, _>::new(samples);
+    let samples: Vec<f32> = samples.collect();
+    SamplesBuffer::new(frame.num_channels as u16, frame.sample_rate, samples)
+}
+
+pub struct LiveKitStream {
+    // shared_buffer: SharedBuffer,
+    inner: rodio::queue::SourcesQueueOutput,
+    _receiver_task: gpui::Task<()>,
+}
+
+impl LiveKitStream {
+    pub fn new(executor: &gpui::BackgroundExecutor, track: &RemoteAudioTrack) -> Self {
+        let mut stream =
+            NativeAudioStream::new(track.rtc_track(), SAMPLE_RATE as i32, NUM_CHANNELS as i32);
+        let (queue_input, queue_output) = rodio::queue::queue(true);
+        // spawn rtc stream
+        let receiver_task = executor.spawn({
+            async move {
+                while let Some(frame) = stream.next().await {
+                    let samples = frame_to_samplesbuffer(frame);
+                    queue_input.append(samples);
+                }
+            }
+        });
+
+        LiveKitStream {
+            _receiver_task: receiver_task,
+            inner: queue_output,
+        }
+    }
+}
+
+impl Iterator for LiveKitStream {
+    type Item = rodio::Sample;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.inner.next()
+    }
+}
+
+impl Source for LiveKitStream {
+    fn current_span_len(&self) -> Option<usize> {
+        self.inner.current_span_len()
+    }
+
+    fn channels(&self) -> rodio::ChannelCount {
+        self.inner.channels()
+    }
+
+    fn sample_rate(&self) -> rodio::SampleRate {
+        self.inner.sample_rate()
+    }
+
+    fn total_duration(&self) -> Option<std::time::Duration> {
+        self.inner.total_duration()
+    }
+}