eval: New add_arg_to_trait_method
example (#29297)
Release Notes: - N/A --------- Co-authored-by: Richard Feldman <oss@rtfeldman.com>
This commit is contained in:
parent
8366cd0b52
commit
45d3f5168a
7 changed files with 222 additions and 9 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -4961,6 +4961,7 @@ dependencies = [
|
||||||
"assistant_tools",
|
"assistant_tools",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"async-watch",
|
"async-watch",
|
||||||
|
"buffer_diff",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"client",
|
"client",
|
||||||
|
|
|
@ -56,7 +56,10 @@ use crate::symbol_info_tool::SymbolInfoTool;
|
||||||
use crate::terminal_tool::TerminalTool;
|
use crate::terminal_tool::TerminalTool;
|
||||||
use crate::thinking_tool::ThinkingTool;
|
use crate::thinking_tool::ThinkingTool;
|
||||||
|
|
||||||
|
pub use create_file_tool::CreateFileToolInput;
|
||||||
|
pub use edit_file_tool::EditFileToolInput;
|
||||||
pub use path_search_tool::PathSearchToolInput;
|
pub use path_search_tool::PathSearchToolInput;
|
||||||
|
pub use read_file_tool::ReadFileToolInput;
|
||||||
|
|
||||||
pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
|
pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
|
||||||
assistant_tool::init(cx);
|
assistant_tool::init(cx);
|
||||||
|
|
|
@ -11,6 +11,7 @@ assistant_tool.workspace = true
|
||||||
assistant_tools.workspace = true
|
assistant_tools.workspace = true
|
||||||
async-trait.workspace = true
|
async-trait.workspace = true
|
||||||
async-watch.workspace = true
|
async-watch.workspace = true
|
||||||
|
buffer_diff.workspace = true
|
||||||
chrono.workspace = true
|
chrono.workspace = true
|
||||||
clap.workspace = true
|
clap.workspace = true
|
||||||
client.workspace = true
|
client.workspace = true
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
use std::{
|
use std::{
|
||||||
error::Error,
|
error::Error,
|
||||||
fmt::{self, Debug},
|
fmt::{self, Debug},
|
||||||
|
path::Path,
|
||||||
sync::{Arc, Mutex},
|
sync::{Arc, Mutex},
|
||||||
time::Duration,
|
time::Duration,
|
||||||
};
|
};
|
||||||
|
@ -12,6 +13,8 @@ use crate::{
|
||||||
use agent::ThreadEvent;
|
use agent::ThreadEvent;
|
||||||
use anyhow::{Result, anyhow};
|
use anyhow::{Result, anyhow};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
use buffer_diff::DiffHunkStatus;
|
||||||
|
use collections::HashMap;
|
||||||
use futures::{FutureExt as _, StreamExt, channel::mpsc, select_biased};
|
use futures::{FutureExt as _, StreamExt, channel::mpsc, select_biased};
|
||||||
use gpui::{AppContext, AsyncApp, Entity};
|
use gpui::{AppContext, AsyncApp, Entity};
|
||||||
use language_model::{LanguageModel, Role, StopReason};
|
use language_model::{LanguageModel, Role, StopReason};
|
||||||
|
@ -234,9 +237,9 @@ impl ExampleContext {
|
||||||
let mut tool_metrics = tool_metrics.lock().unwrap();
|
let mut tool_metrics = tool_metrics.lock().unwrap();
|
||||||
if let Some(tool_result) = thread.tool_result(&tool_use_id) {
|
if let Some(tool_result) = thread.tool_result(&tool_use_id) {
|
||||||
let message = if tool_result.is_error {
|
let message = if tool_result.is_error {
|
||||||
format!("TOOL FAILED: {}", tool_use.name)
|
format!("✖︎ {}", tool_use.name)
|
||||||
} else {
|
} else {
|
||||||
format!("TOOL FINISHED: {}", tool_use.name)
|
format!("✔︎ {}", tool_use.name)
|
||||||
};
|
};
|
||||||
println!("{log_prefix}{message}");
|
println!("{log_prefix}{message}");
|
||||||
tool_metrics
|
tool_metrics
|
||||||
|
@ -320,6 +323,36 @@ impl ExampleContext {
|
||||||
|
|
||||||
Ok(response)
|
Ok(response)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn edits(&self) -> HashMap<Arc<Path>, FileEdits> {
|
||||||
|
self.app
|
||||||
|
.read_entity(&self.agent_thread, |thread, cx| {
|
||||||
|
let action_log = thread.action_log().read(cx);
|
||||||
|
HashMap::from_iter(action_log.changed_buffers(cx).into_iter().map(
|
||||||
|
|(buffer, diff)| {
|
||||||
|
let snapshot = buffer.read(cx).snapshot();
|
||||||
|
|
||||||
|
let file = snapshot.file().unwrap();
|
||||||
|
let diff = diff.read(cx);
|
||||||
|
let base_text = diff.base_text().text();
|
||||||
|
|
||||||
|
let hunks = diff
|
||||||
|
.hunks(&snapshot, cx)
|
||||||
|
.map(|hunk| FileEditHunk {
|
||||||
|
base_text: base_text[hunk.diff_base_byte_range.clone()].to_string(),
|
||||||
|
text: snapshot
|
||||||
|
.text_for_range(hunk.range.clone())
|
||||||
|
.collect::<String>(),
|
||||||
|
status: hunk.status(),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
(file.path().clone(), FileEdits { hunks })
|
||||||
|
},
|
||||||
|
))
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -344,6 +377,10 @@ impl Response {
|
||||||
});
|
});
|
||||||
cx.assert_some(result, format!("called `{}`", tool_name))
|
cx.assert_some(result, format!("called `{}`", tool_name))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn tool_uses(&self) -> impl Iterator<Item = &ToolUse> {
|
||||||
|
self.messages.iter().flat_map(|msg| &msg.tool_use)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -355,17 +392,37 @@ pub struct Message {
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ToolUse {
|
pub struct ToolUse {
|
||||||
name: String,
|
pub name: String,
|
||||||
value: serde_json::Value,
|
value: serde_json::Value,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ToolUse {
|
impl ToolUse {
|
||||||
pub fn expect_input<Input>(&self, cx: &mut ExampleContext) -> Result<Input>
|
pub fn parse_input<Input>(&self) -> Result<Input>
|
||||||
where
|
where
|
||||||
Input: for<'de> serde::Deserialize<'de>,
|
Input: for<'de> serde::Deserialize<'de>,
|
||||||
{
|
{
|
||||||
let result =
|
serde_json::from_value::<Input>(self.value.clone()).map_err(|err| anyhow!(err))
|
||||||
serde_json::from_value::<Input>(self.value.clone()).map_err(|err| anyhow!(err));
|
}
|
||||||
cx.log_assertion(result, format!("valid `{}` input", &self.name))
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct FileEdits {
|
||||||
|
hunks: Vec<FileEditHunk>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct FileEditHunk {
|
||||||
|
base_text: String,
|
||||||
|
text: String,
|
||||||
|
status: DiffHunkStatus,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileEdits {
|
||||||
|
pub fn has_added_line(&self, line: &str) -> bool {
|
||||||
|
self.hunks.iter().any(|hunk| {
|
||||||
|
hunk.status == DiffHunkStatus::added_none()
|
||||||
|
&& hunk.base_text.is_empty()
|
||||||
|
&& hunk.text.contains(line)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
147
crates/eval/src/examples/add_arg_to_trait_method.rs
Normal file
147
crates/eval/src/examples/add_arg_to_trait_method.rs
Normal file
|
@ -0,0 +1,147 @@
|
||||||
|
use std::{collections::HashSet, path::Path};
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use assistant_tools::{CreateFileToolInput, EditFileToolInput, ReadFileToolInput};
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
|
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion, LanguageServer};
|
||||||
|
|
||||||
|
pub struct AddArgToTraitMethod;
|
||||||
|
|
||||||
|
#[async_trait(?Send)]
|
||||||
|
impl Example for AddArgToTraitMethod {
|
||||||
|
fn meta(&self) -> ExampleMetadata {
|
||||||
|
ExampleMetadata {
|
||||||
|
name: "add_arg_to_trait_method".to_string(),
|
||||||
|
url: "https://github.com/zed-industries/zed.git".to_string(),
|
||||||
|
revision: "f69aeb6311dde3c0b8979c293d019d66498d54f2".to_string(),
|
||||||
|
language_server: Some(LanguageServer {
|
||||||
|
file_extension: "rs".to_string(),
|
||||||
|
allow_preexisting_diagnostics: false,
|
||||||
|
}),
|
||||||
|
max_assertions: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
|
||||||
|
const FILENAME: &str = "assistant_tool.rs";
|
||||||
|
cx.push_user_message(format!(
|
||||||
|
r#"
|
||||||
|
Add a `window: Option<gpui::AnyWindowHandle>` argument to the `Tool::run` trait method in {FILENAME},
|
||||||
|
and update all the implementations of the trait and call sites accordingly.
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
let response = cx.run_to_end().await?;
|
||||||
|
|
||||||
|
// Reads files before it edits them
|
||||||
|
|
||||||
|
let mut read_files = HashSet::new();
|
||||||
|
|
||||||
|
for tool_use in response.tool_uses() {
|
||||||
|
match tool_use.name.as_str() {
|
||||||
|
"read_file" => {
|
||||||
|
if let Ok(input) = tool_use.parse_input::<ReadFileToolInput>() {
|
||||||
|
read_files.insert(input.path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"create_file" => {
|
||||||
|
if let Ok(input) = tool_use.parse_input::<CreateFileToolInput>() {
|
||||||
|
read_files.insert(input.path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"edit_file" => {
|
||||||
|
if let Ok(input) = tool_use.parse_input::<EditFileToolInput>() {
|
||||||
|
cx.assert(
|
||||||
|
read_files.contains(input.path.to_str().unwrap()),
|
||||||
|
format!(
|
||||||
|
"Read before edit: {}",
|
||||||
|
&input.path.file_stem().unwrap().to_str().unwrap()
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.ok();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adds ignored argument to all but `batch_tool`
|
||||||
|
|
||||||
|
let add_ignored_window_paths = &[
|
||||||
|
"code_action_tool",
|
||||||
|
"code_symbols_tool",
|
||||||
|
"contents_tool",
|
||||||
|
"copy_path_tool",
|
||||||
|
"create_directory_tool",
|
||||||
|
"create_file_tool",
|
||||||
|
"delete_path_tool",
|
||||||
|
"diagnostics_tool",
|
||||||
|
"edit_file_tool",
|
||||||
|
"fetch_tool",
|
||||||
|
"grep_tool",
|
||||||
|
"list_directory_tool",
|
||||||
|
"move_path_tool",
|
||||||
|
"now_tool",
|
||||||
|
"open_tool",
|
||||||
|
"path_search_tool",
|
||||||
|
"read_file_tool",
|
||||||
|
"rename_tool",
|
||||||
|
"symbol_info_tool",
|
||||||
|
"terminal_tool",
|
||||||
|
"thinking_tool",
|
||||||
|
"web_search_tool",
|
||||||
|
];
|
||||||
|
|
||||||
|
let edits = cx.edits();
|
||||||
|
|
||||||
|
for tool_name in add_ignored_window_paths {
|
||||||
|
let path_str = format!("crates/assistant_tools/src/{}.rs", tool_name);
|
||||||
|
let edits = edits.get(Path::new(&path_str));
|
||||||
|
|
||||||
|
let ignored = edits.map_or(false, |edits| {
|
||||||
|
edits.has_added_line(" _window: Option<gpui::AnyWindowHandle>,\n")
|
||||||
|
});
|
||||||
|
let uningored = edits.map_or(false, |edits| {
|
||||||
|
edits.has_added_line(" window: Option<gpui::AnyWindowHandle>,\n")
|
||||||
|
});
|
||||||
|
|
||||||
|
cx.assert(ignored || uningored, format!("Argument: {}", tool_name))
|
||||||
|
.ok();
|
||||||
|
|
||||||
|
cx.assert(ignored, format!("`_` prefix: {}", tool_name))
|
||||||
|
.ok();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adds unignored argument to `batch_tool`
|
||||||
|
|
||||||
|
let batch_tool_edits = edits.get(Path::new("crates/assistant_tools/src/batch_tool.rs"));
|
||||||
|
|
||||||
|
cx.assert(
|
||||||
|
batch_tool_edits.map_or(false, |edits| {
|
||||||
|
edits.has_added_line(" window: Option<gpui::AnyWindowHandle>,\n")
|
||||||
|
}),
|
||||||
|
"Argument: batch_tool",
|
||||||
|
)
|
||||||
|
.ok();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diff_assertions(&self) -> Vec<JudgeAssertion> {
|
||||||
|
vec![
|
||||||
|
JudgeAssertion {
|
||||||
|
id: "batch tool passes window to each".to_string(),
|
||||||
|
description:
|
||||||
|
"batch_tool is modified to pass a clone of the window to each tool it calls."
|
||||||
|
.to_string(),
|
||||||
|
},
|
||||||
|
JudgeAssertion {
|
||||||
|
id: "tool tests updated".to_string(),
|
||||||
|
description:
|
||||||
|
"tool tests are updated to pass the new `window` argument (`None` is ok)."
|
||||||
|
.to_string(),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
|
@ -33,7 +33,7 @@ impl Example for FileSearchExample {
|
||||||
|
|
||||||
let response = cx.run_turn().await?;
|
let response = cx.run_turn().await?;
|
||||||
let tool_use = response.expect_tool("path_search", cx)?;
|
let tool_use = response.expect_tool("path_search", cx)?;
|
||||||
let input = tool_use.expect_input::<PathSearchToolInput>(cx)?;
|
let input = tool_use.parse_input::<PathSearchToolInput>()?;
|
||||||
|
|
||||||
let glob = input.glob;
|
let glob = input.glob;
|
||||||
cx.assert(
|
cx.assert(
|
||||||
|
|
|
@ -11,10 +11,14 @@ use util::serde::default_true;
|
||||||
|
|
||||||
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
|
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
|
||||||
|
|
||||||
|
mod add_arg_to_trait_method;
|
||||||
mod file_search;
|
mod file_search;
|
||||||
|
|
||||||
pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
|
pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
|
||||||
let mut threads: Vec<Rc<dyn Example>> = vec![Rc::new(file_search::FileSearchExample)];
|
let mut threads: Vec<Rc<dyn Example>> = vec![
|
||||||
|
Rc::new(file_search::FileSearchExample),
|
||||||
|
Rc::new(add_arg_to_trait_method::AddArgToTraitMethod),
|
||||||
|
];
|
||||||
|
|
||||||
for example_path in list_declarative_examples(examples_dir).unwrap() {
|
for example_path in list_declarative_examples(examples_dir).unwrap() {
|
||||||
threads.push(Rc::new(DeclarativeExample::load(&example_path).unwrap()));
|
threads.push(Rc::new(DeclarativeExample::load(&example_path).unwrap()));
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue