Add Rodio audio pipeline as alternative to current LiveKit pipeline (#36607)

Rodio parts are well tested and need less configuration then the livekit
parts. I suspect there is a bug in the livekit configuration regarding
resampling. Rather then investigate that it seemed faster & easier to
swap in Rodio.

This opens the door to using other Rodio parts like:
 - Decibel based volume control
 - Limiter (prevents sound from becoming too loud)
 - Automatic gain control

To use this add to settings:
```
  "audio": {
    "experimental.rodio_audio": true
  }
```

Release Notes:

- N/A

Co-authored-by: Mikayla <mikayla@zed.dev>
Co-authored-by: Antonio Scandurra <me@as-cii.com>
This commit is contained in:
David Kleingeld 2025-08-21 15:56:16 +02:00 committed by GitHub
parent 1dd237139c
commit e0613cbd0f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 226 additions and 112 deletions

View file

@ -25,6 +25,7 @@ async-trait.workspace = true
collections.workspace = true
cpal.workspace = true
futures.workspace = true
audio.workspace = true
gpui = { workspace = true, features = ["screen-capture", "x11", "wayland", "windows-manifest"] }
gpui_tokio.workspace = true
http_client_tls.workspace = true
@ -35,6 +36,7 @@ nanoid.workspace = true
parking_lot.workspace = true
postage.workspace = true
smallvec.workspace = true
settings.workspace = true
tokio-tungstenite.workspace = true
util.workspace = true
workspace-hack.workspace = true

View file

@ -24,8 +24,11 @@ mod livekit_client;
)))]
pub use livekit_client::*;
// If you need proper LSP in livekit_client you've got to comment out
// the mocks and test
// If you need proper LSP in livekit_client you've got to comment
// - the cfg blocks above
// - the mods: mock_client & test and their conditional blocks
// - the pub use mock_client::* and their conditional blocks
#[cfg(any(
test,
feature = "test-support",

View file

@ -1,15 +1,16 @@
use std::sync::Arc;
use anyhow::{Context as _, Result};
use audio::AudioSettings;
use collections::HashMap;
use futures::{SinkExt, channel::mpsc};
use gpui::{App, AsyncApp, ScreenCaptureSource, ScreenCaptureStream, Task};
use gpui_tokio::Tokio;
use log::info;
use playback::capture_local_video_track;
use settings::Settings;
mod playback;
#[cfg(feature = "record-microphone")]
mod record;
use crate::{LocalTrack, Participant, RemoteTrack, RoomEvent, TrackPublication};
pub use playback::AudioStream;
@ -125,9 +126,14 @@ impl Room {
pub fn play_remote_audio_track(
&self,
track: &RemoteAudioTrack,
_cx: &App,
cx: &mut App,
) -> Result<playback::AudioStream> {
Ok(self.playback.play_remote_audio_track(&track.0))
if AudioSettings::get_global(cx).rodio_audio {
info!("Using experimental.rodio_audio audio pipeline");
playback::play_remote_audio_track(&track.0, cx)
} else {
Ok(self.playback.play_remote_audio_track(&track.0))
}
}
}

View file

@ -18,13 +18,16 @@ use livekit::webrtc::{
video_stream::native::NativeVideoStream,
};
use parking_lot::Mutex;
use rodio::Source;
use std::cell::RefCell;
use std::sync::Weak;
use std::sync::atomic::{self, AtomicI32};
use std::sync::atomic::{AtomicBool, AtomicI32, Ordering};
use std::time::Duration;
use std::{borrow::Cow, collections::VecDeque, sync::Arc, thread};
use util::{ResultExt as _, maybe};
mod source;
pub(crate) struct AudioStack {
executor: BackgroundExecutor,
apm: Arc<Mutex<apm::AudioProcessingModule>>,
@ -40,6 +43,29 @@ pub(crate) struct AudioStack {
const SAMPLE_RATE: u32 = 48000;
const NUM_CHANNELS: u32 = 2;
pub(crate) fn play_remote_audio_track(
track: &livekit::track::RemoteAudioTrack,
cx: &mut gpui::App,
) -> Result<AudioStream> {
let stop_handle = Arc::new(AtomicBool::new(false));
let stop_handle_clone = stop_handle.clone();
let stream = source::LiveKitStream::new(cx.background_executor(), track)
.stoppable()
.periodic_access(Duration::from_millis(50), move |s| {
if stop_handle.load(Ordering::Relaxed) {
s.stop();
}
});
audio::Audio::play_source(stream, cx).context("Could not play audio")?;
let on_drop = util::defer(move || {
stop_handle_clone.store(true, Ordering::Relaxed);
});
Ok(AudioStream::Output {
_drop: Box::new(on_drop),
})
}
impl AudioStack {
pub(crate) fn new(executor: BackgroundExecutor) -> Self {
let apm = Arc::new(Mutex::new(apm::AudioProcessingModule::new(
@ -61,7 +87,7 @@ impl AudioStack {
) -> AudioStream {
let output_task = self.start_output();
let next_ssrc = self.next_ssrc.fetch_add(1, atomic::Ordering::Relaxed);
let next_ssrc = self.next_ssrc.fetch_add(1, Ordering::Relaxed);
let source = AudioMixerSource {
ssrc: next_ssrc,
sample_rate: SAMPLE_RATE,
@ -97,6 +123,23 @@ impl AudioStack {
}
}
fn start_output(&self) -> Arc<Task<()>> {
if let Some(task) = self._output_task.borrow().upgrade() {
return task;
}
let task = Arc::new(self.executor.spawn({
let apm = self.apm.clone();
let mixer = self.mixer.clone();
async move {
Self::play_output(apm, mixer, SAMPLE_RATE, NUM_CHANNELS)
.await
.log_err();
}
}));
*self._output_task.borrow_mut() = Arc::downgrade(&task);
task
}
pub(crate) fn capture_local_microphone_track(
&self,
) -> Result<(crate::LocalAudioTrack, AudioStream)> {
@ -139,23 +182,6 @@ impl AudioStack {
))
}
fn start_output(&self) -> Arc<Task<()>> {
if let Some(task) = self._output_task.borrow().upgrade() {
return task;
}
let task = Arc::new(self.executor.spawn({
let apm = self.apm.clone();
let mixer = self.mixer.clone();
async move {
Self::play_output(apm, mixer, SAMPLE_RATE, NUM_CHANNELS)
.await
.log_err();
}
}));
*self._output_task.borrow_mut() = Arc::downgrade(&task);
task
}
async fn play_output(
apm: Arc<Mutex<apm::AudioProcessingModule>>,
mixer: Arc<Mutex<audio_mixer::AudioMixer>>,

View file

@ -0,0 +1,67 @@
use futures::StreamExt;
use libwebrtc::{audio_stream::native::NativeAudioStream, prelude::AudioFrame};
use livekit::track::RemoteAudioTrack;
use rodio::{Source, buffer::SamplesBuffer, conversions::SampleTypeConverter};
use crate::livekit_client::playback::{NUM_CHANNELS, SAMPLE_RATE};
fn frame_to_samplesbuffer(frame: AudioFrame) -> SamplesBuffer {
let samples = frame.data.iter().copied();
let samples = SampleTypeConverter::<_, _>::new(samples);
let samples: Vec<f32> = samples.collect();
SamplesBuffer::new(frame.num_channels as u16, frame.sample_rate, samples)
}
pub struct LiveKitStream {
// shared_buffer: SharedBuffer,
inner: rodio::queue::SourcesQueueOutput,
_receiver_task: gpui::Task<()>,
}
impl LiveKitStream {
pub fn new(executor: &gpui::BackgroundExecutor, track: &RemoteAudioTrack) -> Self {
let mut stream =
NativeAudioStream::new(track.rtc_track(), SAMPLE_RATE as i32, NUM_CHANNELS as i32);
let (queue_input, queue_output) = rodio::queue::queue(true);
// spawn rtc stream
let receiver_task = executor.spawn({
async move {
while let Some(frame) = stream.next().await {
let samples = frame_to_samplesbuffer(frame);
queue_input.append(samples);
}
}
});
LiveKitStream {
_receiver_task: receiver_task,
inner: queue_output,
}
}
}
impl Iterator for LiveKitStream {
type Item = rodio::Sample;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}
impl Source for LiveKitStream {
fn current_span_len(&self) -> Option<usize> {
self.inner.current_span_len()
}
fn channels(&self) -> rodio::ChannelCount {
self.inner.channels()
}
fn sample_rate(&self) -> rodio::SampleRate {
self.inner.sample_rate()
}
fn total_duration(&self) -> Option<std::time::Duration> {
self.inner.total_duration()
}
}