From ff4334efc72c41723f776425a83c20f01a500b01 Mon Sep 17 00:00:00 2001 From: Agus Zubiaga Date: Tue, 15 Apr 2025 10:53:45 -0600 Subject: [PATCH] eval: Fix stalling on tool confirmation (#28786) The `always_allow_tool_actions` setting would get overridden with the default when we loaded each example project, leading to examples stalling when they run a tool that needed confirmation. There's now a separate `runner_settings.json` file where we can configure the environment for the eval. Release Notes: - N/A --------- Co-authored-by: Oleksiy --- Cargo.lock | 1 - crates/eval/Cargo.toml | 1 - crates/eval/runner_settings.json | 6 ++++++ crates/eval/src/eval.rs | 14 +++++--------- crates/eval/src/example.rs | 27 +++++++++++++++++++++++++-- 5 files changed, 36 insertions(+), 13 deletions(-) create mode 100644 crates/eval/runner_settings.json diff --git a/Cargo.lock b/Cargo.lock index e2f767145f..bb4d080ac2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4877,7 +4877,6 @@ version = "0.1.0" dependencies = [ "agent", "anyhow", - "assistant_settings", "assistant_tool", "assistant_tools", "async-watch", diff --git a/crates/eval/Cargo.toml b/crates/eval/Cargo.toml index 42597393a1..235ae22c84 100644 --- a/crates/eval/Cargo.toml +++ b/crates/eval/Cargo.toml @@ -8,7 +8,6 @@ edition.workspace = true agent.workspace = true anyhow.workspace = true async-watch.workspace = true -assistant_settings.workspace = true assistant_tool.workspace = true assistant_tools.workspace = true chrono.workspace = true diff --git a/crates/eval/runner_settings.json b/crates/eval/runner_settings.json new file mode 100644 index 0000000000..53d853023c --- /dev/null +++ b/crates/eval/runner_settings.json @@ -0,0 +1,6 @@ +{ + "assistant": { + "always_allow_tool_actions": true, + "version": "2" + } +} diff --git a/crates/eval/src/eval.rs b/crates/eval/src/eval.rs index 2e69b12845..f78293dbf1 100644 --- a/crates/eval/src/eval.rs +++ b/crates/eval/src/eval.rs @@ -1,6 +1,5 @@ mod example; -use assistant_settings::AssistantSettings; use client::{Client, ProxySettings, UserStore}; pub(crate) use example::*; @@ -10,7 +9,7 @@ use clap::Parser; use extension::ExtensionHostProxy; use futures::future; use gpui::http_client::{Uri, read_proxy_from_env}; -use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, Task}; +use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, Task, UpdateGlobal}; use gpui_tokio::Tokio; use language::LanguageRegistry; use language_model::{ @@ -390,13 +389,10 @@ pub fn init(cx: &mut App) -> Arc { let prompt_builder = PromptBuilder::load(fs.clone(), stdout_is_a_pty, cx); agent::init(fs.clone(), client.clone(), prompt_builder.clone(), cx); - AssistantSettings::override_global( - AssistantSettings { - always_allow_tool_actions: true, - ..AssistantSettings::get_global(cx).clone() - }, - cx, - ); + SettingsStore::update_global(cx, |store, cx| { + store.set_user_settings(include_str!("../runner_settings.json"), cx) + }) + .unwrap(); Arc::new(AgentAppState { languages, diff --git a/crates/eval/src/example.rs b/crates/eval/src/example.rs index 7f4b6de349..70140a360a 100644 --- a/crates/eval/src/example.rs +++ b/crates/eval/src/example.rs @@ -330,7 +330,11 @@ impl Example { Ok(StopReason::MaxTokens) => { return Err(anyhow!("Exceeded maximum tokens")); } - Ok(StopReason::ToolUse) => {} + Ok(StopReason::ToolUse) => { + if std::env::var("ZED_EVAL_DEBUG").is_ok() { + println!("{}StopReason: Tool use", log_prefix); + } + } Err(error) => { return Err(anyhow!(error.clone())); } @@ -371,7 +375,20 @@ impl Example { } })?; } - _ => {} + ThreadEvent::ToolConfirmationNeeded => { + panic!("{}Bug: Tool confirmation should not be required in eval", log_prefix); + }, + ThreadEvent::StreamedCompletion | + ThreadEvent::MessageAdded(_) | + ThreadEvent::MessageEdited(_) | + ThreadEvent::MessageDeleted(_) | + ThreadEvent::SummaryChanged | + ThreadEvent::SummaryGenerated | + ThreadEvent::CheckpointChanged => { + if std::env::var("ZED_EVAL_DEBUG").is_ok() { + println!("{}Event: {:#?}", log_prefix, event); + } + } } output_file.flush().log_err(); @@ -387,16 +404,22 @@ impl Example { event_handler_task.await?; + println!("{}Stopped", this.log_prefix); + if let Some((_, lsp_store)) = lsp_open_handle_and_store.as_ref() { wait_for_lang_server(lsp_store, this.log_prefix.clone(), cx).await?; } + println!("{}Getting repository diff", this.log_prefix); let repository_diff = this.repository_diff().await?; + + println!("{}Getting diagnostics", this.log_prefix); let diagnostics = cx .update(move |cx| { cx.spawn(async move |cx| query_lsp_diagnostics(project, cx).await) })? .await?; + println!("{}Got diagnostics", this.log_prefix); drop(subscription); drop(lsp_open_handle_and_store);