From 53bc5714d645b9c5cf568f581ac51f29df3e7bca Mon Sep 17 00:00:00 2001 From: "gcp-cherry-pick-bot[bot]" <98988430+gcp-cherry-pick-bot[bot]@users.noreply.github.com> Date: Wed, 30 Jul 2025 12:56:03 +0300 Subject: [PATCH] Fix racy leaked extension server adapters handling (cherry-pick #35319) (#35321) Cherry-picked Kb/wasm panics (#35319) Follow-up of https://github.com/zed-industries/zed/pull/34208 Closes https://github.com/zed-industries/zed/issues/35185 Previous code assumed that extensions' language server wrappers may leak only in static data (e.g. fields that were not cleared on deinit), but we seem to have a race that breaks this assumption. 1. We do clean `all_lsp_adapters` field after https://github.com/zed-industries/zed/pull/34334 and it's called for every extension that is unregistered. 2. `LspStore::maintain_workspace_config` -> `LspStore::refresh_workspace_configurations` chain is triggered independently, apparently on `ToolchainStoreEvent::ToolchainActivated` event which means somewhere behind there's potentially a Python code that gets executed to activate the toolchian, making `refresh_workspace_configurations` start timings unpredictable. 3. Seems that toolchain activation overlaps with plugin reload, as `2025-07-28T12:16:19+03:00 INFO [extension_host] extensions updated. loading 0, reloading 1, unloading 0` suggests in the issue logs. The plugin reload seem to happen faster than workspace configuration refresh in https://github.com/zed-industries/zed/blob/c65da547c9aa5d798a1a71468bf253bf55d1cb09/crates/project/src/lsp_store.rs#L7426-L7456 as the language servers are just starting and take extra time to respond to the notification. At least one of the `.clone()`d `adapter`s there is the adapter that got removed during plugin reload and has its channel closed, which causes a panic later. ---------------------------- A good fix would be to re-architect the workspace refresh approach, same as other accesses to the language server collections. One way could be to use `Weak`-based structures instead, as definitely the extension server data belongs to extension, not the `LspStore`. This is quite a large undertaking near the extension core though, so is not done yet. Currently, to stop the excessive panics, no more `.expect` is done on the channel result, as indeed, it now can be closed very dynamically. This will result in more errors (and backtraces, presumably) printed in the logs and no panics. More logging and comments are added, and workspace querying is replaced to the concurrent one: no need to wait until a previous server had processed the notification to send the same to the next one. Release Notes: - Fixed warm-related panic happening during startup Co-authored-by: Kirill Bulatov --- crates/extension_host/src/wasm_host.rs | 51 +++++++++--------- crates/project/src/lsp_store.rs | 73 +++++++++++++++----------- 2 files changed, 69 insertions(+), 55 deletions(-) diff --git a/crates/extension_host/src/wasm_host.rs b/crates/extension_host/src/wasm_host.rs index b514c8ee43..fd1633126a 100644 --- a/crates/extension_host/src/wasm_host.rs +++ b/crates/extension_host/src/wasm_host.rs @@ -102,7 +102,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn language_server_initialization_options( @@ -127,7 +127,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn language_server_workspace_configuration( @@ -150,7 +150,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn language_server_additional_initialization_options( @@ -175,7 +175,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn language_server_additional_workspace_configuration( @@ -200,7 +200,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn labels_for_completions( @@ -226,7 +226,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn labels_for_symbols( @@ -252,7 +252,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn complete_slash_command_argument( @@ -271,7 +271,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn run_slash_command( @@ -297,7 +297,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn context_server_command( @@ -316,7 +316,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn context_server_configuration( @@ -343,7 +343,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn suggest_docs_packages(&self, provider: Arc) -> Result> { @@ -358,7 +358,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn index_docs( @@ -384,7 +384,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn get_dap_binary( @@ -406,7 +406,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn dap_request_kind( &self, @@ -423,7 +423,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn dap_config_to_scenario(&self, config: ZedDebugConfig) -> Result { @@ -437,7 +437,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn dap_locator_create_scenario( @@ -461,7 +461,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } async fn run_dap_locator( &self, @@ -477,7 +477,7 @@ impl extension::Extension for WasmExtension { } .boxed() }) - .await + .await? } } @@ -739,7 +739,7 @@ impl WasmExtension { .with_context(|| format!("failed to load wasm extension {}", manifest.id)) } - pub async fn call(&self, f: Fn) -> T + pub async fn call(&self, f: Fn) -> Result where T: 'static + Send, Fn: 'static @@ -755,14 +755,15 @@ impl WasmExtension { } .boxed() })) - .unwrap_or_else(|_| { - panic!( + .map_err(|_| { + anyhow!( "wasm extension channel should not be closed yet, extension {} (id {})", - self.manifest.name, self.manifest.id, + self.manifest.name, + self.manifest.id, ) - }); - return_rx.await.unwrap_or_else(|_| { - panic!( + })?; + return_rx.await.with_context(|| { + format!( "wasm extension channel, extension {} (id {})", self.manifest.name, self.manifest.id, ) diff --git a/crates/project/src/lsp_store.rs b/crates/project/src/lsp_store.rs index d657c59a40..161b861dd0 100644 --- a/crates/project/src/lsp_store.rs +++ b/crates/project/src/lsp_store.rs @@ -7437,21 +7437,23 @@ impl LspStore { } pub(crate) async fn refresh_workspace_configurations( - this: &WeakEntity, + lsp_store: &WeakEntity, fs: Arc, cx: &mut AsyncApp, ) { maybe!(async move { - let servers = this - .update(cx, |this, cx| { - let Some(local) = this.as_local() else { + let mut refreshed_servers = HashSet::default(); + let servers = lsp_store + .update(cx, |lsp_store, cx| { + let toolchain_store = lsp_store.toolchain_store(cx); + let Some(local) = lsp_store.as_local() else { return Vec::default(); }; local .language_server_ids .iter() .flat_map(|((worktree_id, _), server_ids)| { - let worktree = this + let worktree = lsp_store .worktree_store .read(cx) .worktree_for_id(*worktree_id, cx); @@ -7467,43 +7469,54 @@ impl LspStore { ) }); - server_ids.iter().filter_map(move |server_id| { + let fs = fs.clone(); + let toolchain_store = toolchain_store.clone(); + server_ids.iter().filter_map(|server_id| { + let delegate = delegate.clone()? as Arc; let states = local.language_servers.get(server_id)?; match states { LanguageServerState::Starting { .. } => None, LanguageServerState::Running { adapter, server, .. - } => Some(( - adapter.adapter.clone(), - server.clone(), - delegate.clone()? as Arc, - )), + } => { + let fs = fs.clone(); + let toolchain_store = toolchain_store.clone(); + let adapter = adapter.clone(); + let server = server.clone(); + refreshed_servers.insert(server.name()); + Some(cx.spawn(async move |_, cx| { + let settings = + LocalLspStore::workspace_configuration_for_adapter( + adapter.adapter.clone(), + fs.as_ref(), + &delegate, + toolchain_store, + cx, + ) + .await + .ok()?; + server + .notify::( + &lsp::DidChangeConfigurationParams { settings }, + ) + .ok()?; + Some(()) + })) + } } - }) + }).collect::>() }) .collect::>() }) .ok()?; - let toolchain_store = this.update(cx, |this, cx| this.toolchain_store(cx)).ok()?; - for (adapter, server, delegate) in servers { - let settings = LocalLspStore::workspace_configuration_for_adapter( - adapter, - fs.as_ref(), - &delegate, - toolchain_store.clone(), - cx, - ) - .await - .ok()?; - - server - .notify::( - &lsp::DidChangeConfigurationParams { settings }, - ) - .ok(); - } + log::info!("Refreshing workspace configurations for servers {refreshed_servers:?}"); + // TODO this asynchronous job runs concurrently with extension (de)registration and may take enough time for a certain extension + // to stop and unregister its language server wrapper. + // This is racy : an extension might have already removed all `local.language_servers` state, but here we `.clone()` and hold onto it anyway. + // This now causes errors in the logs, we should find a way to remove such servers from the processing everywhere. + let _: Vec> = join_all(servers).await; Some(()) }) .await;