Add eval for open_tool (#29801)

Also have its description say it should only be used on request

Release Notes:

- N/A
This commit is contained in:
Richard Feldman 2025-05-02 11:56:07 -04:00 committed by GitHub
parent e6f6b351b7
commit 9efc09c5a6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 90 additions and 11 deletions

View file

@ -48,27 +48,25 @@ use crate::code_action_tool::CodeActionTool;
use crate::code_symbols_tool::CodeSymbolsTool;
use crate::contents_tool::ContentsTool;
use crate::create_directory_tool::CreateDirectoryTool;
use crate::create_file_tool::CreateFileTool;
use crate::delete_path_tool::DeletePathTool;
use crate::diagnostics_tool::DiagnosticsTool;
use crate::edit_file_tool::EditFileTool;
use crate::fetch_tool::FetchTool;
use crate::find_path_tool::FindPathTool;
use crate::grep_tool::GrepTool;
use crate::list_directory_tool::ListDirectoryTool;
use crate::now_tool::NowTool;
use crate::open_tool::OpenTool;
use crate::read_file_tool::ReadFileTool;
use crate::rename_tool::RenameTool;
use crate::streaming_edit_file_tool::StreamingEditFileTool;
use crate::symbol_info_tool::SymbolInfoTool;
use crate::terminal_tool::TerminalTool;
use crate::thinking_tool::ThinkingTool;
pub use create_file_tool::CreateFileToolInput;
pub use edit_file_tool::EditFileToolInput;
pub use create_file_tool::{CreateFileTool, CreateFileToolInput};
pub use edit_file_tool::{EditFileTool, EditFileToolInput};
pub use find_path_tool::FindPathToolInput;
pub use open_tool::OpenTool;
pub use read_file_tool::ReadFileToolInput;
pub use terminal_tool::TerminalTool;
pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
assistant_tool::init(cx);

View file

@ -4,3 +4,6 @@ This tool opens a file or URL with the default application associated with it on
- On Linux, it uses something like `xdg-open`, `gio open`, `gnome-open`, `kde-open`, `wslview` as appropriate
For example, it can open a web browser with a URL, open a PDF file with the default PDF viewer, etc.
You MUST ONLY use this tool when the user has explicitly requested opening something. You MUST NEVER assume that
the user would like for you to use this tool.

View file

@ -169,11 +169,14 @@ fn main() {
continue;
}
if meta.language_server.map_or(false, |language| {
!languages.contains(&language.file_extension)
}) {
skipped.push(meta.name);
continue;
if let Some(language) = meta.language_server {
if !languages.contains(&language.file_extension) {
panic!(
"Eval for {:?} could not be run because no language server was found for extension {:?}",
meta.name,
language.file_extension
);
}
}
// TODO: This creates a worktree per repetition. Ideally these examples should

View file

@ -14,12 +14,14 @@ use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
mod add_arg_to_trait_method;
mod code_block_citations;
mod file_search;
mod planets;
pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
let mut threads: Vec<Rc<dyn Example>> = vec![
Rc::new(file_search::FileSearchExample),
Rc::new(add_arg_to_trait_method::AddArgToTraitMethod),
Rc::new(code_block_citations::CodeBlockCitations),
Rc::new(planets::Planets),
];
for example_path in list_declarative_examples(examples_dir).unwrap() {

View file

@ -0,0 +1,73 @@
use anyhow::Result;
use assistant_tool::Tool;
use assistant_tools::{OpenTool, TerminalTool};
use async_trait::async_trait;
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
pub struct Planets;
#[async_trait(?Send)]
impl Example for Planets {
fn meta(&self) -> ExampleMetadata {
ExampleMetadata {
name: "planets".to_string(),
url: "https://github.com/roc-lang/roc".to_string(), // This commit in this repo is just the Apache2 license,
revision: "59e49c75214f60b4dc4a45092292061c8c26ce27".to_string(), // so effectively a blank project.
language_server: None,
max_assertions: None,
}
}
async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
cx.push_user_message(
r#"
Make a plain JavaScript web page which renders an animated 3D solar system.
Let me drag to rotate the camera around.
Do not use npm.
"#
.to_string(),
);
let response = cx.run_to_end().await?;
let mut open_tool_uses = 0;
let mut terminal_tool_uses = 0;
for tool_use in response.tool_uses() {
if tool_use.name == OpenTool.name() {
open_tool_uses += 1;
} else if tool_use.name == TerminalTool.name() {
terminal_tool_uses += 1;
}
}
// The open tool should only be used when requested, which it was not.
cx.assert_eq(open_tool_uses, 0, "`open` tool was not used")
.ok();
// No reason to use the terminal if not using npm.
cx.assert_eq(terminal_tool_uses, 0, "`terminal` tool was not used")
.ok();
Ok(())
}
fn diff_assertions(&self) -> Vec<JudgeAssertion> {
vec![
JudgeAssertion {
id: "animated solar system".to_string(),
description: "This page should render a solar system, and it should be animated."
.to_string(),
},
JudgeAssertion {
id: "drag to rotate camera".to_string(),
description: "The user can drag to rotate the camera around.".to_string(),
},
JudgeAssertion {
id: "plain JavaScript".to_string(),
description:
"The code base uses plain JavaScript and no npm, along with HTML and CSS."
.to_string(),
},
]
}
}