Add eval for open_tool (#29801)
Also have its description say it should only be used on request Release Notes: - N/A
This commit is contained in:
parent
e6f6b351b7
commit
9efc09c5a6
5 changed files with 90 additions and 11 deletions
|
@ -48,27 +48,25 @@ use crate::code_action_tool::CodeActionTool;
|
||||||
use crate::code_symbols_tool::CodeSymbolsTool;
|
use crate::code_symbols_tool::CodeSymbolsTool;
|
||||||
use crate::contents_tool::ContentsTool;
|
use crate::contents_tool::ContentsTool;
|
||||||
use crate::create_directory_tool::CreateDirectoryTool;
|
use crate::create_directory_tool::CreateDirectoryTool;
|
||||||
use crate::create_file_tool::CreateFileTool;
|
|
||||||
use crate::delete_path_tool::DeletePathTool;
|
use crate::delete_path_tool::DeletePathTool;
|
||||||
use crate::diagnostics_tool::DiagnosticsTool;
|
use crate::diagnostics_tool::DiagnosticsTool;
|
||||||
use crate::edit_file_tool::EditFileTool;
|
|
||||||
use crate::fetch_tool::FetchTool;
|
use crate::fetch_tool::FetchTool;
|
||||||
use crate::find_path_tool::FindPathTool;
|
use crate::find_path_tool::FindPathTool;
|
||||||
use crate::grep_tool::GrepTool;
|
use crate::grep_tool::GrepTool;
|
||||||
use crate::list_directory_tool::ListDirectoryTool;
|
use crate::list_directory_tool::ListDirectoryTool;
|
||||||
use crate::now_tool::NowTool;
|
use crate::now_tool::NowTool;
|
||||||
use crate::open_tool::OpenTool;
|
|
||||||
use crate::read_file_tool::ReadFileTool;
|
use crate::read_file_tool::ReadFileTool;
|
||||||
use crate::rename_tool::RenameTool;
|
use crate::rename_tool::RenameTool;
|
||||||
use crate::streaming_edit_file_tool::StreamingEditFileTool;
|
use crate::streaming_edit_file_tool::StreamingEditFileTool;
|
||||||
use crate::symbol_info_tool::SymbolInfoTool;
|
use crate::symbol_info_tool::SymbolInfoTool;
|
||||||
use crate::terminal_tool::TerminalTool;
|
|
||||||
use crate::thinking_tool::ThinkingTool;
|
use crate::thinking_tool::ThinkingTool;
|
||||||
|
|
||||||
pub use create_file_tool::CreateFileToolInput;
|
pub use create_file_tool::{CreateFileTool, CreateFileToolInput};
|
||||||
pub use edit_file_tool::EditFileToolInput;
|
pub use edit_file_tool::{EditFileTool, EditFileToolInput};
|
||||||
pub use find_path_tool::FindPathToolInput;
|
pub use find_path_tool::FindPathToolInput;
|
||||||
|
pub use open_tool::OpenTool;
|
||||||
pub use read_file_tool::ReadFileToolInput;
|
pub use read_file_tool::ReadFileToolInput;
|
||||||
|
pub use terminal_tool::TerminalTool;
|
||||||
|
|
||||||
pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
|
pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
|
||||||
assistant_tool::init(cx);
|
assistant_tool::init(cx);
|
||||||
|
|
|
@ -4,3 +4,6 @@ This tool opens a file or URL with the default application associated with it on
|
||||||
- On Linux, it uses something like `xdg-open`, `gio open`, `gnome-open`, `kde-open`, `wslview` as appropriate
|
- On Linux, it uses something like `xdg-open`, `gio open`, `gnome-open`, `kde-open`, `wslview` as appropriate
|
||||||
|
|
||||||
For example, it can open a web browser with a URL, open a PDF file with the default PDF viewer, etc.
|
For example, it can open a web browser with a URL, open a PDF file with the default PDF viewer, etc.
|
||||||
|
|
||||||
|
You MUST ONLY use this tool when the user has explicitly requested opening something. You MUST NEVER assume that
|
||||||
|
the user would like for you to use this tool.
|
||||||
|
|
|
@ -169,11 +169,14 @@ fn main() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if meta.language_server.map_or(false, |language| {
|
if let Some(language) = meta.language_server {
|
||||||
!languages.contains(&language.file_extension)
|
if !languages.contains(&language.file_extension) {
|
||||||
}) {
|
panic!(
|
||||||
skipped.push(meta.name);
|
"Eval for {:?} could not be run because no language server was found for extension {:?}",
|
||||||
continue;
|
meta.name,
|
||||||
|
language.file_extension
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: This creates a worktree per repetition. Ideally these examples should
|
// TODO: This creates a worktree per repetition. Ideally these examples should
|
||||||
|
|
|
@ -14,12 +14,14 @@ use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
|
||||||
mod add_arg_to_trait_method;
|
mod add_arg_to_trait_method;
|
||||||
mod code_block_citations;
|
mod code_block_citations;
|
||||||
mod file_search;
|
mod file_search;
|
||||||
|
mod planets;
|
||||||
|
|
||||||
pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
|
pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
|
||||||
let mut threads: Vec<Rc<dyn Example>> = vec![
|
let mut threads: Vec<Rc<dyn Example>> = vec![
|
||||||
Rc::new(file_search::FileSearchExample),
|
Rc::new(file_search::FileSearchExample),
|
||||||
Rc::new(add_arg_to_trait_method::AddArgToTraitMethod),
|
Rc::new(add_arg_to_trait_method::AddArgToTraitMethod),
|
||||||
Rc::new(code_block_citations::CodeBlockCitations),
|
Rc::new(code_block_citations::CodeBlockCitations),
|
||||||
|
Rc::new(planets::Planets),
|
||||||
];
|
];
|
||||||
|
|
||||||
for example_path in list_declarative_examples(examples_dir).unwrap() {
|
for example_path in list_declarative_examples(examples_dir).unwrap() {
|
||||||
|
|
73
crates/eval/src/examples/planets.rs
Normal file
73
crates/eval/src/examples/planets.rs
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use assistant_tool::Tool;
|
||||||
|
use assistant_tools::{OpenTool, TerminalTool};
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
|
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
|
||||||
|
|
||||||
|
pub struct Planets;
|
||||||
|
|
||||||
|
#[async_trait(?Send)]
|
||||||
|
impl Example for Planets {
|
||||||
|
fn meta(&self) -> ExampleMetadata {
|
||||||
|
ExampleMetadata {
|
||||||
|
name: "planets".to_string(),
|
||||||
|
url: "https://github.com/roc-lang/roc".to_string(), // This commit in this repo is just the Apache2 license,
|
||||||
|
revision: "59e49c75214f60b4dc4a45092292061c8c26ce27".to_string(), // so effectively a blank project.
|
||||||
|
language_server: None,
|
||||||
|
max_assertions: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
|
||||||
|
cx.push_user_message(
|
||||||
|
r#"
|
||||||
|
Make a plain JavaScript web page which renders an animated 3D solar system.
|
||||||
|
Let me drag to rotate the camera around.
|
||||||
|
Do not use npm.
|
||||||
|
"#
|
||||||
|
.to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let response = cx.run_to_end().await?;
|
||||||
|
let mut open_tool_uses = 0;
|
||||||
|
let mut terminal_tool_uses = 0;
|
||||||
|
|
||||||
|
for tool_use in response.tool_uses() {
|
||||||
|
if tool_use.name == OpenTool.name() {
|
||||||
|
open_tool_uses += 1;
|
||||||
|
} else if tool_use.name == TerminalTool.name() {
|
||||||
|
terminal_tool_uses += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The open tool should only be used when requested, which it was not.
|
||||||
|
cx.assert_eq(open_tool_uses, 0, "`open` tool was not used")
|
||||||
|
.ok();
|
||||||
|
// No reason to use the terminal if not using npm.
|
||||||
|
cx.assert_eq(terminal_tool_uses, 0, "`terminal` tool was not used")
|
||||||
|
.ok();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diff_assertions(&self) -> Vec<JudgeAssertion> {
|
||||||
|
vec![
|
||||||
|
JudgeAssertion {
|
||||||
|
id: "animated solar system".to_string(),
|
||||||
|
description: "This page should render a solar system, and it should be animated."
|
||||||
|
.to_string(),
|
||||||
|
},
|
||||||
|
JudgeAssertion {
|
||||||
|
id: "drag to rotate camera".to_string(),
|
||||||
|
description: "The user can drag to rotate the camera around.".to_string(),
|
||||||
|
},
|
||||||
|
JudgeAssertion {
|
||||||
|
id: "plain JavaScript".to_string(),
|
||||||
|
description:
|
||||||
|
"The code base uses plain JavaScript and no npm, along with HTML and CSS."
|
||||||
|
.to_string(),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue