diff --git a/assets/prompts/assistant_system_prompt.hbs b/assets/prompts/assistant_system_prompt.hbs index a155dea19d..b4545f5a74 100644 --- a/assets/prompts/assistant_system_prompt.hbs +++ b/assets/prompts/assistant_system_prompt.hbs @@ -27,11 +27,11 @@ If you are unsure how to fulfill the user's request, gather more information wit If appropriate, use tool calls to explore the current project, which contains the following root directories: {{#each worktrees}} -- `{{root_name}}` +- `{{abs_path}}` {{/each}} - Bias towards not asking the user for help if you can find the answer yourself. -- When providing paths to tools, the path should always begin with a path that starts with a project root directory listed above. +- When providing paths to tools, the path should always start with the name of a project root directory listed above. - Before you read or edit a file, you must first find the full path. DO NOT ever guess a file path! {{# if (has_tool 'grep') }} - When looking for symbols in the project, prefer the `grep` tool. diff --git a/crates/agent/src/thread_store.rs b/crates/agent/src/thread_store.rs index db87bdd3a5..9ac50444c5 100644 --- a/crates/agent/src/thread_store.rs +++ b/crates/agent/src/thread_store.rs @@ -305,17 +305,19 @@ impl ThreadStore { project: Entity, cx: &mut App, ) -> Task<(WorktreeContext, Option)> { - let root_name = worktree.read(cx).root_name().into(); + let tree = worktree.read(cx); + let root_name = tree.root_name().into(); + let abs_path = tree.abs_path(); + + let mut context = WorktreeContext { + root_name, + abs_path, + rules_file: None, + }; let rules_task = Self::load_worktree_rules_file(worktree, project, cx); let Some(rules_task) = rules_task else { - return Task::ready(( - WorktreeContext { - root_name, - rules_file: None, - }, - None, - )); + return Task::ready((context, None)); }; cx.spawn(async move |_| { @@ -328,11 +330,8 @@ impl ThreadStore { }), ), }; - let worktree_info = WorktreeContext { - root_name, - rules_file, - }; - (worktree_info, rules_file_error) + context.rules_file = rules_file; + (context, rules_file_error) }) } @@ -341,12 +340,12 @@ impl ThreadStore { project: Entity, cx: &mut App, ) -> Option>> { - let worktree_ref = worktree.read(cx); - let worktree_id = worktree_ref.id(); + let worktree = worktree.read(cx); + let worktree_id = worktree.id(); let selected_rules_file = RULES_FILE_NAMES .into_iter() .filter_map(|name| { - worktree_ref + worktree .entry_for_path(name) .filter(|entry| entry.is_file()) .map(|entry| entry.path.clone()) diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index 567bfecc0a..20be2f01b8 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -26,6 +26,7 @@ use std::{ cmp::Reverse, fmt::{self, Display}, io::Write as _, + path::Path, str::FromStr, sync::mpsc, }; @@ -38,10 +39,11 @@ fn eval_extract_handle_command_output() { // // Model | Pass rate // ----------------------------|---------- - // claude-3.7-sonnet | 0.98 - // gemini-2.5-pro-06-05 | 0.77 - // gemini-2.5-flash | 0.11 - // gpt-4.1 | 1.00 + // claude-3.7-sonnet | 0.99 (2025-06-14) + // claude-sonnet-4 | 0.97 (2025-06-14) + // gemini-2.5-pro-06-05 | 0.77 (2025-05-22) + // gemini-2.5-flash | 0.11 (2025-05-22) + // gpt-4.1 | 1.00 (2025-05-22) let input_file_path = "root/blame.rs"; let input_file_content = include_str!("evals/fixtures/extract_handle_command_output/before.rs"); @@ -110,6 +112,13 @@ fn eval_extract_handle_command_output() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_delete_run_git_blame() { + // Model | Pass rate + // ----------------------------|---------- + // claude-3.7-sonnet | 1.0 (2025-06-14) + // claude-sonnet-4 | 0.96 (2025-06-14) + // gemini-2.5-pro-06-05 | + // gemini-2.5-flash | + // gpt-4.1 | let input_file_path = "root/blame.rs"; let input_file_content = include_str!("evals/fixtures/delete_run_git_blame/before.rs"); let output_file_content = include_str!("evals/fixtures/delete_run_git_blame/after.rs"); @@ -165,13 +174,12 @@ fn eval_delete_run_git_blame() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_translate_doc_comments() { - // Results for 2025-05-22 - // // Model | Pass rate // ============================================ // - // claude-3.7-sonnet | - // gemini-2.5-pro-preview-03-25 | 1.0 + // claude-3.7-sonnet | 1.0 (2025-06-14) + // claude-sonnet-4 | 1.0 (2025-06-14) + // gemini-2.5-pro-preview-03-25 | 1.0 (2025-05-22) // gemini-2.5-flash-preview-04-17 | // gpt-4.1 | let input_file_path = "root/canvas.rs"; @@ -228,13 +236,12 @@ fn eval_translate_doc_comments() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_use_wasi_sdk_in_compile_parser_to_wasm() { - // Results for 2025-05-22 - // // Model | Pass rate // ============================================ // - // claude-3.7-sonnet | 0.98 - // gemini-2.5-pro-preview-03-25 | 0.99 + // claude-3.7-sonnet | 0.96 (2025-06-14) + // claude-sonnet-4 | 0.11 (2025-06-14) + // gemini-2.5-pro-preview-03-25 | 0.99 (2025-05-22) // gemini-2.5-flash-preview-04-17 | // gpt-4.1 | let input_file_path = "root/lib.rs"; @@ -354,13 +361,12 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_disable_cursor_blinking() { - // Results for 2025-05-22 - // // Model | Pass rate // ============================================ // - // claude-3.7-sonnet | - // gemini-2.5-pro-preview-03-25 | 1.0 + // claude-3.7-sonnet | 0.99 (2025-06-14) + // claude-sonnet-4 | 0.85 (2025-06-14) + // gemini-2.5-pro-preview-03-25 | 1.0 (2025-05-22) // gemini-2.5-flash-preview-04-17 | // gpt-4.1 | let input_file_path = "root/editor.rs"; @@ -462,7 +468,7 @@ fn eval_from_pixels_constructor() { 0.95, // For whatever reason, this eval produces more mismatched tags. // Increasing for now, let's see if we can bring this down. - 0.2, + 0.25, EvalInput::from_conversation( vec![ message( @@ -648,15 +654,14 @@ fn eval_from_pixels_constructor() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_zode() { - // Results for 2025-05-22 - // // Model | Pass rate // ============================================ // - // claude-3.7-sonnet | 1.0 - // gemini-2.5-pro-preview-03-25 | 1.0 - // gemini-2.5-flash-preview-04-17 | 1.0 - // gpt-4.1 | 1.0 + // claude-3.7-sonnet | 1.0 (2025-06-14) + // claude-sonnet-4 | 1.0 (2025-06-14) + // gemini-2.5-pro-preview-03-25 | 1.0 (2025-05-22) + // gemini-2.5-flash-preview-04-17 | 1.0 (2025-05-22) + // gpt-4.1 | 1.0 (2025-05-22) let input_file_path = "root/zode.py"; let input_content = None; let edit_description = "Create the main Zode CLI script"; @@ -755,13 +760,12 @@ fn eval_zode() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_add_overwrite_test() { - // Results for 2025-05-22 - // // Model | Pass rate // ============================================ // - // claude-3.7-sonnet | 0.16 - // gemini-2.5-pro-preview-03-25 | 0.35 + // claude-3.7-sonnet | 0.65 (2025-06-14) + // claude-sonnet-4 | 0.07 (2025-06-14) + // gemini-2.5-pro-preview-03-25 | 0.35 (2025-05-22) // gemini-2.5-flash-preview-04-17 | // gpt-4.1 | let input_file_path = "root/action_log.rs"; @@ -991,15 +995,14 @@ fn eval_create_empty_file() { // thoughts into it. This issue is not specific to empty files, but // it's easier to reproduce with them. // - // Results for 2025-05-21: - // // Model | Pass rate // ============================================ // - // claude-3.7-sonnet | 1.00 - // gemini-2.5-pro-preview-03-25 | 1.00 - // gemini-2.5-flash-preview-04-17 | 1.00 - // gpt-4.1 | 1.00 + // claude-3.7-sonnet | 1.00 (2025-06-14) + // claude-sonnet-4 | 1.00 (2025-06-14) + // gemini-2.5-pro-preview-03-25 | 1.00 (2025-05-21) + // gemini-2.5-flash-preview-04-17 | 1.00 (2025-05-21) + // gpt-4.1 | 1.00 (2025-05-21) // // // TODO: gpt-4.1-mini errored 38 times: @@ -1556,6 +1559,7 @@ impl EditAgentTest { .collect::>(); let worktrees = vec![WorktreeContext { root_name: "root".to_string(), + abs_path: Path::new("/path/to/root").into(), rules_file: None, }]; let prompt_builder = PromptBuilder::new(None)?; @@ -1650,7 +1654,7 @@ async fn retry_on_rate_limit(mut request: impl AsyncFnMut() -> Result) -> Ok(err) => match err { LanguageModelCompletionError::RateLimit(duration) => { // Wait for the duration supplied, with some jitter to avoid all requests being made at the same time. - let jitter = duration.mul_f64(rand::thread_rng().gen_range(0.0..0.5)); + let jitter = duration.mul_f64(rand::thread_rng().gen_range(0.0..1.0)); eprintln!( "Attempt #{attempt}: Rate limit exceeded. Retry after {duration:?} + jitter of {jitter:?}" ); diff --git a/crates/assistant_tools/src/edit_file_tool.rs b/crates/assistant_tools/src/edit_file_tool.rs index 7f21843b71..d3dba5cb61 100644 --- a/crates/assistant_tools/src/edit_file_tool.rs +++ b/crates/assistant_tools/src/edit_file_tool.rs @@ -69,13 +69,13 @@ pub struct EditFileToolInput { /// start each path with one of the project's root directories. /// /// The following examples assume we have two root directories in the project: - /// - backend - /// - frontend + /// - /a/b/backend + /// - /c/d/frontend /// /// /// `backend/src/main.rs` /// - /// Notice how the file path starts with root-1. Without that, the path + /// Notice how the file path starts with `backend`. Without that, the path /// would be ambiguous and the call would fail! /// /// diff --git a/crates/assistant_tools/src/read_file_tool.rs b/crates/assistant_tools/src/read_file_tool.rs index 33cbf9f557..4d40fc6a7c 100644 --- a/crates/assistant_tools/src/read_file_tool.rs +++ b/crates/assistant_tools/src/read_file_tool.rs @@ -31,8 +31,8 @@ pub struct ReadFileToolInput { /// /// If the project has the following root directories: /// - /// - directory1 - /// - directory2 + /// - /a/b/directory1 + /// - /c/d/directory2 /// /// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`. /// If you want to access `file.txt` in `directory2`, you should use the path `directory2/file.txt`. diff --git a/crates/prompt_store/src/prompts.rs b/crates/prompt_store/src/prompts.rs index 1e53df70eb..d737ef9246 100644 --- a/crates/prompt_store/src/prompts.rs +++ b/crates/prompt_store/src/prompts.rs @@ -74,6 +74,7 @@ pub struct UserRulesContext { #[derive(Debug, Clone, Serialize)] pub struct WorktreeContext { pub root_name: String, + pub abs_path: Arc, pub rules_file: Option, } @@ -455,6 +456,7 @@ mod test { fn test_assistant_system_prompt_renders() { let worktrees = vec![WorktreeContext { root_name: "path".into(), + abs_path: Path::new("/path/to/root").into(), rules_file: Some(RulesFileContext { path_in_worktree: Path::new(".rules").into(), text: "".into(), @@ -484,6 +486,7 @@ mod test { fn test_assistant_system_prompt_depends_on_enabled_tools() { let worktrees = vec![WorktreeContext { root_name: "path".into(), + abs_path: Path::new("/path/to/root").into(), rules_file: None, }]; let default_user_rules = vec![];