eval: Fix stalling on tool confirmation (#28786)

The `always_allow_tool_actions` setting would get overridden with the
default when we loaded each example project, leading to examples
stalling when they run a tool that needed confirmation. There's now a
separate `runner_settings.json` file where we can configure the
environment for the eval.

Release Notes:

- N/A

---------

Co-authored-by: Oleksiy <oleksiy@zed.dev>
This commit is contained in:
Agus Zubiaga 2025-04-15 10:53:45 -06:00 committed by GitHub
parent b1e4e6048a
commit ff4334efc7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 36 additions and 13 deletions

1
Cargo.lock generated
View file

@ -4877,7 +4877,6 @@ version = "0.1.0"
dependencies = [
"agent",
"anyhow",
"assistant_settings",
"assistant_tool",
"assistant_tools",
"async-watch",

View file

@ -8,7 +8,6 @@ edition.workspace = true
agent.workspace = true
anyhow.workspace = true
async-watch.workspace = true
assistant_settings.workspace = true
assistant_tool.workspace = true
assistant_tools.workspace = true
chrono.workspace = true

View file

@ -0,0 +1,6 @@
{
"assistant": {
"always_allow_tool_actions": true,
"version": "2"
}
}

View file

@ -1,6 +1,5 @@
mod example;
use assistant_settings::AssistantSettings;
use client::{Client, ProxySettings, UserStore};
pub(crate) use example::*;
@ -10,7 +9,7 @@ use clap::Parser;
use extension::ExtensionHostProxy;
use futures::future;
use gpui::http_client::{Uri, read_proxy_from_env};
use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, Task};
use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, Task, UpdateGlobal};
use gpui_tokio::Tokio;
use language::LanguageRegistry;
use language_model::{
@ -390,13 +389,10 @@ pub fn init(cx: &mut App) -> Arc<AgentAppState> {
let prompt_builder = PromptBuilder::load(fs.clone(), stdout_is_a_pty, cx);
agent::init(fs.clone(), client.clone(), prompt_builder.clone(), cx);
AssistantSettings::override_global(
AssistantSettings {
always_allow_tool_actions: true,
..AssistantSettings::get_global(cx).clone()
},
cx,
);
SettingsStore::update_global(cx, |store, cx| {
store.set_user_settings(include_str!("../runner_settings.json"), cx)
})
.unwrap();
Arc::new(AgentAppState {
languages,

View file

@ -330,7 +330,11 @@ impl Example {
Ok(StopReason::MaxTokens) => {
return Err(anyhow!("Exceeded maximum tokens"));
}
Ok(StopReason::ToolUse) => {}
Ok(StopReason::ToolUse) => {
if std::env::var("ZED_EVAL_DEBUG").is_ok() {
println!("{}StopReason: Tool use", log_prefix);
}
}
Err(error) => {
return Err(anyhow!(error.clone()));
}
@ -371,7 +375,20 @@ impl Example {
}
})?;
}
_ => {}
ThreadEvent::ToolConfirmationNeeded => {
panic!("{}Bug: Tool confirmation should not be required in eval", log_prefix);
},
ThreadEvent::StreamedCompletion |
ThreadEvent::MessageAdded(_) |
ThreadEvent::MessageEdited(_) |
ThreadEvent::MessageDeleted(_) |
ThreadEvent::SummaryChanged |
ThreadEvent::SummaryGenerated |
ThreadEvent::CheckpointChanged => {
if std::env::var("ZED_EVAL_DEBUG").is_ok() {
println!("{}Event: {:#?}", log_prefix, event);
}
}
}
output_file.flush().log_err();
@ -387,16 +404,22 @@ impl Example {
event_handler_task.await?;
println!("{}Stopped", this.log_prefix);
if let Some((_, lsp_store)) = lsp_open_handle_and_store.as_ref() {
wait_for_lang_server(lsp_store, this.log_prefix.clone(), cx).await?;
}
println!("{}Getting repository diff", this.log_prefix);
let repository_diff = this.repository_diff().await?;
println!("{}Getting diagnostics", this.log_prefix);
let diagnostics = cx
.update(move |cx| {
cx.spawn(async move |cx| query_lsp_diagnostics(project, cx).await)
})?
.await?;
println!("{}Got diagnostics", this.log_prefix);
drop(subscription);
drop(lsp_open_handle_and_store);