eval: Fix stalling on tool confirmation (#28786)

The `always_allow_tool_actions` setting would get overridden with the default when we loaded each example project, leading to examples stalling when they run a tool that needed confirmation. There's now a separate `runner_settings.json` file where we can configure the environment for the eval. Release Notes: - N/A --------- Co-authored-by: Oleksiy <oleksiy@zed.dev>
2025-04-15 10:53:45 -06:00 · 2025-04-15 10:53:45 -06:00 · ff4334efc7
commit ff4334efc7
parent b1e4e6048a
5 changed files with 36 additions and 13 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4877,7 +4877,6 @@ version = "0.1.0"
 dependencies = [
 "agent",
 "anyhow",
- "assistant_settings",
 "assistant_tool",
 "assistant_tools",
 "async-watch",
--- a/crates/eval/Cargo.toml
+++ b/crates/eval/Cargo.toml
@ -8,7 +8,6 @@ edition.workspace = true
 agent.workspace = true
 anyhow.workspace = true
 async-watch.workspace = true
-assistant_settings.workspace = true
 assistant_tool.workspace = true
 assistant_tools.workspace = true
 chrono.workspace = true
--- a/crates/eval/runner_settings.json
+++ b/crates/eval/runner_settings.json
@ -0,0 +1,6 @@
+{
+  "assistant": {
+    "always_allow_tool_actions": true,
+    "version": "2"
+  }
+}
--- a/crates/eval/src/eval.rs
+++ b/crates/eval/src/eval.rs
@ -1,6 +1,5 @@
 mod example;

-use assistant_settings::AssistantSettings;
 use client::{Client, ProxySettings, UserStore};
 pub(crate) use example::*;

@ -10,7 +9,7 @@ use clap::Parser;
 use extension::ExtensionHostProxy;
 use futures::future;
 use gpui::http_client::{Uri, read_proxy_from_env};
-use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, Task};
+use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, Task, UpdateGlobal};
 use gpui_tokio::Tokio;
 use language::LanguageRegistry;
 use language_model::{
@ -390,13 +389,10 @@ pub fn init(cx: &mut App) -> Arc<AgentAppState> {
    let prompt_builder = PromptBuilder::load(fs.clone(), stdout_is_a_pty, cx);
    agent::init(fs.clone(), client.clone(), prompt_builder.clone(), cx);

-    AssistantSettings::override_global(
-        AssistantSettings {
-            always_allow_tool_actions: true,
-            ..AssistantSettings::get_global(cx).clone()
-        },
-        cx,
-    );
+    SettingsStore::update_global(cx, |store, cx| {
+        store.set_user_settings(include_str!("../runner_settings.json"), cx)
+    })
+    .unwrap();

    Arc::new(AgentAppState {
        languages,
--- a/crates/eval/src/example.rs
+++ b/crates/eval/src/example.rs
@ -330,7 +330,11 @@ impl Example {
                                Ok(StopReason::MaxTokens) => {
                                    return Err(anyhow!("Exceeded maximum tokens"));
                                }
-                                Ok(StopReason::ToolUse) => {}
+                                Ok(StopReason::ToolUse) => {
+                                    if std::env::var("ZED_EVAL_DEBUG").is_ok() {
+                                        println!("{}StopReason: Tool use", log_prefix);
+                                    }
+                                }
                                Err(error) => {
                                    return Err(anyhow!(error.clone()));
                                }
@ -371,7 +375,20 @@ impl Example {
                                    }
                                })?;
                            }
-                            _ => {}
+                            ThreadEvent::ToolConfirmationNeeded => {
+                                panic!("{}Bug: Tool confirmation should not be required in eval", log_prefix);
+                            },
+                            ThreadEvent::StreamedCompletion |
+                            ThreadEvent::MessageAdded(_) |
+                            ThreadEvent::MessageEdited(_) |
+                            ThreadEvent::MessageDeleted(_) |
+                            ThreadEvent::SummaryChanged |
+                            ThreadEvent::SummaryGenerated |
+                            ThreadEvent::CheckpointChanged => {
+                                if std::env::var("ZED_EVAL_DEBUG").is_ok() {
+                                    println!("{}Event: {:#?}", log_prefix, event);
+                                }
+                            }
                        }

                        output_file.flush().log_err();
@ -387,16 +404,22 @@ impl Example {

            event_handler_task.await?;

+            println!("{}Stopped", this.log_prefix);
+
            if let Some((_, lsp_store)) = lsp_open_handle_and_store.as_ref() {
                wait_for_lang_server(lsp_store, this.log_prefix.clone(), cx).await?;
            }

+            println!("{}Getting repository diff", this.log_prefix);
            let repository_diff = this.repository_diff().await?;
+
+            println!("{}Getting diagnostics", this.log_prefix);
            let diagnostics = cx
                .update(move |cx| {
                    cx.spawn(async move |cx| query_lsp_diagnostics(project, cx).await)
                })?
                .await?;
+            println!("{}Got diagnostics", this.log_prefix);

            drop(subscription);
            drop(lsp_open_handle_and_store);