From 9efc09c5a61d230c85dbe471a6c9f70a27542d38 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Fri, 2 May 2025 11:56:07 -0400 Subject: [PATCH] Add eval for open_tool (#29801) Also have its description say it should only be used on request Release Notes: - N/A --- crates/assistant_tools/src/assistant_tools.rs | 10 +-- .../src/open_tool/description.md | 3 + crates/eval/src/eval.rs | 13 ++-- crates/eval/src/examples/mod.rs | 2 + crates/eval/src/examples/planets.rs | 73 +++++++++++++++++++ 5 files changed, 90 insertions(+), 11 deletions(-) create mode 100644 crates/eval/src/examples/planets.rs diff --git a/crates/assistant_tools/src/assistant_tools.rs b/crates/assistant_tools/src/assistant_tools.rs index 410432343f..756a9271b1 100644 --- a/crates/assistant_tools/src/assistant_tools.rs +++ b/crates/assistant_tools/src/assistant_tools.rs @@ -48,27 +48,25 @@ use crate::code_action_tool::CodeActionTool; use crate::code_symbols_tool::CodeSymbolsTool; use crate::contents_tool::ContentsTool; use crate::create_directory_tool::CreateDirectoryTool; -use crate::create_file_tool::CreateFileTool; use crate::delete_path_tool::DeletePathTool; use crate::diagnostics_tool::DiagnosticsTool; -use crate::edit_file_tool::EditFileTool; use crate::fetch_tool::FetchTool; use crate::find_path_tool::FindPathTool; use crate::grep_tool::GrepTool; use crate::list_directory_tool::ListDirectoryTool; use crate::now_tool::NowTool; -use crate::open_tool::OpenTool; use crate::read_file_tool::ReadFileTool; use crate::rename_tool::RenameTool; use crate::streaming_edit_file_tool::StreamingEditFileTool; use crate::symbol_info_tool::SymbolInfoTool; -use crate::terminal_tool::TerminalTool; use crate::thinking_tool::ThinkingTool; -pub use create_file_tool::CreateFileToolInput; -pub use edit_file_tool::EditFileToolInput; +pub use create_file_tool::{CreateFileTool, CreateFileToolInput}; +pub use edit_file_tool::{EditFileTool, EditFileToolInput}; pub use find_path_tool::FindPathToolInput; +pub use open_tool::OpenTool; pub use read_file_tool::ReadFileToolInput; +pub use terminal_tool::TerminalTool; pub fn init(http_client: Arc, cx: &mut App) { assistant_tool::init(cx); diff --git a/crates/assistant_tools/src/open_tool/description.md b/crates/assistant_tools/src/open_tool/description.md index 1e7eb4127d..99ccbb0524 100644 --- a/crates/assistant_tools/src/open_tool/description.md +++ b/crates/assistant_tools/src/open_tool/description.md @@ -4,3 +4,6 @@ This tool opens a file or URL with the default application associated with it on - On Linux, it uses something like `xdg-open`, `gio open`, `gnome-open`, `kde-open`, `wslview` as appropriate For example, it can open a web browser with a URL, open a PDF file with the default PDF viewer, etc. + +You MUST ONLY use this tool when the user has explicitly requested opening something. You MUST NEVER assume that +the user would like for you to use this tool. diff --git a/crates/eval/src/eval.rs b/crates/eval/src/eval.rs index 646a8e23c0..00bc60c9ea 100644 --- a/crates/eval/src/eval.rs +++ b/crates/eval/src/eval.rs @@ -169,11 +169,14 @@ fn main() { continue; } - if meta.language_server.map_or(false, |language| { - !languages.contains(&language.file_extension) - }) { - skipped.push(meta.name); - continue; + if let Some(language) = meta.language_server { + if !languages.contains(&language.file_extension) { + panic!( + "Eval for {:?} could not be run because no language server was found for extension {:?}", + meta.name, + language.file_extension + ); + } } // TODO: This creates a worktree per repetition. Ideally these examples should diff --git a/crates/eval/src/examples/mod.rs b/crates/eval/src/examples/mod.rs index 7e451e4ff6..5487511ea0 100644 --- a/crates/eval/src/examples/mod.rs +++ b/crates/eval/src/examples/mod.rs @@ -14,12 +14,14 @@ use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion}; mod add_arg_to_trait_method; mod code_block_citations; mod file_search; +mod planets; pub fn all(examples_dir: &Path) -> Vec> { let mut threads: Vec> = vec![ Rc::new(file_search::FileSearchExample), Rc::new(add_arg_to_trait_method::AddArgToTraitMethod), Rc::new(code_block_citations::CodeBlockCitations), + Rc::new(planets::Planets), ]; for example_path in list_declarative_examples(examples_dir).unwrap() { diff --git a/crates/eval/src/examples/planets.rs b/crates/eval/src/examples/planets.rs new file mode 100644 index 0000000000..c900ea87d9 --- /dev/null +++ b/crates/eval/src/examples/planets.rs @@ -0,0 +1,73 @@ +use anyhow::Result; +use assistant_tool::Tool; +use assistant_tools::{OpenTool, TerminalTool}; +use async_trait::async_trait; + +use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion}; + +pub struct Planets; + +#[async_trait(?Send)] +impl Example for Planets { + fn meta(&self) -> ExampleMetadata { + ExampleMetadata { + name: "planets".to_string(), + url: "https://github.com/roc-lang/roc".to_string(), // This commit in this repo is just the Apache2 license, + revision: "59e49c75214f60b4dc4a45092292061c8c26ce27".to_string(), // so effectively a blank project. + language_server: None, + max_assertions: None, + } + } + + async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> { + cx.push_user_message( + r#" + Make a plain JavaScript web page which renders an animated 3D solar system. + Let me drag to rotate the camera around. + Do not use npm. + "# + .to_string(), + ); + + let response = cx.run_to_end().await?; + let mut open_tool_uses = 0; + let mut terminal_tool_uses = 0; + + for tool_use in response.tool_uses() { + if tool_use.name == OpenTool.name() { + open_tool_uses += 1; + } else if tool_use.name == TerminalTool.name() { + terminal_tool_uses += 1; + } + } + + // The open tool should only be used when requested, which it was not. + cx.assert_eq(open_tool_uses, 0, "`open` tool was not used") + .ok(); + // No reason to use the terminal if not using npm. + cx.assert_eq(terminal_tool_uses, 0, "`terminal` tool was not used") + .ok(); + + Ok(()) + } + + fn diff_assertions(&self) -> Vec { + vec![ + JudgeAssertion { + id: "animated solar system".to_string(), + description: "This page should render a solar system, and it should be animated." + .to_string(), + }, + JudgeAssertion { + id: "drag to rotate camera".to_string(), + description: "The user can drag to rotate the camera around.".to_string(), + }, + JudgeAssertion { + id: "plain JavaScript".to_string(), + description: + "The code base uses plain JavaScript and no npm, along with HTML and CSS." + .to_string(), + }, + ] + } +}