eval: New add_arg_to_trait_method example (#29297)

Release Notes:

- N/A

---------

Co-authored-by: Richard Feldman <oss@rtfeldman.com>
This commit is contained in:
Agus Zubiaga 2025-04-23 15:46:39 -03:00 committed by GitHub
parent 8366cd0b52
commit 45d3f5168a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 222 additions and 9 deletions

1
Cargo.lock generated
View file

@ -4961,6 +4961,7 @@ dependencies = [
"assistant_tools",
"async-trait",
"async-watch",
"buffer_diff",
"chrono",
"clap",
"client",

View file

@ -56,7 +56,10 @@ use crate::symbol_info_tool::SymbolInfoTool;
use crate::terminal_tool::TerminalTool;
use crate::thinking_tool::ThinkingTool;
pub use create_file_tool::CreateFileToolInput;
pub use edit_file_tool::EditFileToolInput;
pub use path_search_tool::PathSearchToolInput;
pub use read_file_tool::ReadFileToolInput;
pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
assistant_tool::init(cx);

View file

@ -11,6 +11,7 @@ assistant_tool.workspace = true
assistant_tools.workspace = true
async-trait.workspace = true
async-watch.workspace = true
buffer_diff.workspace = true
chrono.workspace = true
clap.workspace = true
client.workspace = true

View file

@ -1,6 +1,7 @@
use std::{
error::Error,
fmt::{self, Debug},
path::Path,
sync::{Arc, Mutex},
time::Duration,
};
@ -12,6 +13,8 @@ use crate::{
use agent::ThreadEvent;
use anyhow::{Result, anyhow};
use async_trait::async_trait;
use buffer_diff::DiffHunkStatus;
use collections::HashMap;
use futures::{FutureExt as _, StreamExt, channel::mpsc, select_biased};
use gpui::{AppContext, AsyncApp, Entity};
use language_model::{LanguageModel, Role, StopReason};
@ -234,9 +237,9 @@ impl ExampleContext {
let mut tool_metrics = tool_metrics.lock().unwrap();
if let Some(tool_result) = thread.tool_result(&tool_use_id) {
let message = if tool_result.is_error {
format!("TOOL FAILED: {}", tool_use.name)
format!("✖︎ {}", tool_use.name)
} else {
format!("TOOL FINISHED: {}", tool_use.name)
format!("✔︎ {}", tool_use.name)
};
println!("{log_prefix}{message}");
tool_metrics
@ -320,6 +323,36 @@ impl ExampleContext {
Ok(response)
}
pub fn edits(&self) -> HashMap<Arc<Path>, FileEdits> {
self.app
.read_entity(&self.agent_thread, |thread, cx| {
let action_log = thread.action_log().read(cx);
HashMap::from_iter(action_log.changed_buffers(cx).into_iter().map(
|(buffer, diff)| {
let snapshot = buffer.read(cx).snapshot();
let file = snapshot.file().unwrap();
let diff = diff.read(cx);
let base_text = diff.base_text().text();
let hunks = diff
.hunks(&snapshot, cx)
.map(|hunk| FileEditHunk {
base_text: base_text[hunk.diff_base_byte_range.clone()].to_string(),
text: snapshot
.text_for_range(hunk.range.clone())
.collect::<String>(),
status: hunk.status(),
})
.collect();
(file.path().clone(), FileEdits { hunks })
},
))
})
.unwrap()
}
}
#[derive(Debug)]
@ -344,6 +377,10 @@ impl Response {
});
cx.assert_some(result, format!("called `{}`", tool_name))
}
pub fn tool_uses(&self) -> impl Iterator<Item = &ToolUse> {
self.messages.iter().flat_map(|msg| &msg.tool_use)
}
}
#[derive(Debug)]
@ -355,17 +392,37 @@ pub struct Message {
#[derive(Debug)]
pub struct ToolUse {
name: String,
pub name: String,
value: serde_json::Value,
}
impl ToolUse {
pub fn expect_input<Input>(&self, cx: &mut ExampleContext) -> Result<Input>
pub fn parse_input<Input>(&self) -> Result<Input>
where
Input: for<'de> serde::Deserialize<'de>,
{
let result =
serde_json::from_value::<Input>(self.value.clone()).map_err(|err| anyhow!(err));
cx.log_assertion(result, format!("valid `{}` input", &self.name))
serde_json::from_value::<Input>(self.value.clone()).map_err(|err| anyhow!(err))
}
}
#[derive(Debug)]
pub struct FileEdits {
hunks: Vec<FileEditHunk>,
}
#[derive(Debug)]
struct FileEditHunk {
base_text: String,
text: String,
status: DiffHunkStatus,
}
impl FileEdits {
pub fn has_added_line(&self, line: &str) -> bool {
self.hunks.iter().any(|hunk| {
hunk.status == DiffHunkStatus::added_none()
&& hunk.base_text.is_empty()
&& hunk.text.contains(line)
})
}
}

View file

@ -0,0 +1,147 @@
use std::{collections::HashSet, path::Path};
use anyhow::Result;
use assistant_tools::{CreateFileToolInput, EditFileToolInput, ReadFileToolInput};
use async_trait::async_trait;
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion, LanguageServer};
pub struct AddArgToTraitMethod;
#[async_trait(?Send)]
impl Example for AddArgToTraitMethod {
fn meta(&self) -> ExampleMetadata {
ExampleMetadata {
name: "add_arg_to_trait_method".to_string(),
url: "https://github.com/zed-industries/zed.git".to_string(),
revision: "f69aeb6311dde3c0b8979c293d019d66498d54f2".to_string(),
language_server: Some(LanguageServer {
file_extension: "rs".to_string(),
allow_preexisting_diagnostics: false,
}),
max_assertions: None,
}
}
async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
const FILENAME: &str = "assistant_tool.rs";
cx.push_user_message(format!(
r#"
Add a `window: Option<gpui::AnyWindowHandle>` argument to the `Tool::run` trait method in {FILENAME},
and update all the implementations of the trait and call sites accordingly.
"#
));
let response = cx.run_to_end().await?;
// Reads files before it edits them
let mut read_files = HashSet::new();
for tool_use in response.tool_uses() {
match tool_use.name.as_str() {
"read_file" => {
if let Ok(input) = tool_use.parse_input::<ReadFileToolInput>() {
read_files.insert(input.path);
}
}
"create_file" => {
if let Ok(input) = tool_use.parse_input::<CreateFileToolInput>() {
read_files.insert(input.path);
}
}
"edit_file" => {
if let Ok(input) = tool_use.parse_input::<EditFileToolInput>() {
cx.assert(
read_files.contains(input.path.to_str().unwrap()),
format!(
"Read before edit: {}",
&input.path.file_stem().unwrap().to_str().unwrap()
),
)
.ok();
}
}
_ => {}
}
}
// Adds ignored argument to all but `batch_tool`
let add_ignored_window_paths = &[
"code_action_tool",
"code_symbols_tool",
"contents_tool",
"copy_path_tool",
"create_directory_tool",
"create_file_tool",
"delete_path_tool",
"diagnostics_tool",
"edit_file_tool",
"fetch_tool",
"grep_tool",
"list_directory_tool",
"move_path_tool",
"now_tool",
"open_tool",
"path_search_tool",
"read_file_tool",
"rename_tool",
"symbol_info_tool",
"terminal_tool",
"thinking_tool",
"web_search_tool",
];
let edits = cx.edits();
for tool_name in add_ignored_window_paths {
let path_str = format!("crates/assistant_tools/src/{}.rs", tool_name);
let edits = edits.get(Path::new(&path_str));
let ignored = edits.map_or(false, |edits| {
edits.has_added_line(" _window: Option<gpui::AnyWindowHandle>,\n")
});
let uningored = edits.map_or(false, |edits| {
edits.has_added_line(" window: Option<gpui::AnyWindowHandle>,\n")
});
cx.assert(ignored || uningored, format!("Argument: {}", tool_name))
.ok();
cx.assert(ignored, format!("`_` prefix: {}", tool_name))
.ok();
}
// Adds unignored argument to `batch_tool`
let batch_tool_edits = edits.get(Path::new("crates/assistant_tools/src/batch_tool.rs"));
cx.assert(
batch_tool_edits.map_or(false, |edits| {
edits.has_added_line(" window: Option<gpui::AnyWindowHandle>,\n")
}),
"Argument: batch_tool",
)
.ok();
Ok(())
}
fn diff_assertions(&self) -> Vec<JudgeAssertion> {
vec![
JudgeAssertion {
id: "batch tool passes window to each".to_string(),
description:
"batch_tool is modified to pass a clone of the window to each tool it calls."
.to_string(),
},
JudgeAssertion {
id: "tool tests updated".to_string(),
description:
"tool tests are updated to pass the new `window` argument (`None` is ok)."
.to_string(),
},
]
}
}

View file

@ -33,7 +33,7 @@ impl Example for FileSearchExample {
let response = cx.run_turn().await?;
let tool_use = response.expect_tool("path_search", cx)?;
let input = tool_use.expect_input::<PathSearchToolInput>(cx)?;
let input = tool_use.parse_input::<PathSearchToolInput>()?;
let glob = input.glob;
cx.assert(

View file

@ -11,10 +11,14 @@ use util::serde::default_true;
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
mod add_arg_to_trait_method;
mod file_search;
pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
let mut threads: Vec<Rc<dyn Example>> = vec![Rc::new(file_search::FileSearchExample)];
let mut threads: Vec<Rc<dyn Example>> = vec![
Rc::new(file_search::FileSearchExample),
Rc::new(add_arg_to_trait_method::AddArgToTraitMethod),
];
for example_path in list_declarative_examples(examples_dir).unwrap() {
threads.push(Rc::new(DeclarativeExample::load(&example_path).unwrap()));