Include full abs paths of worktrees in system prompt (#32725)
Some MCP servers expose tools that take absolute paths as arguments. To interact with these, the agent needs to know the absolute path to the project directories, not just their names. This PR changes the system prompt to include the full path to each worktree, and updates some tool descriptions to reflect this. Todo: * [x] Run evals, make sure assistant still understand how to specify paths for tools, now that we include abs paths in the system prompt. Release Notes: - Improved the agent's ability to use MPC tools that require absolute paths to files and directories in the project. --------- Co-authored-by: Ben Brandt <benjamin.j.brandt@gmail.com>
This commit is contained in:
parent
0433b8859d
commit
a994666888
6 changed files with 63 additions and 57 deletions
|
@ -27,11 +27,11 @@ If you are unsure how to fulfill the user's request, gather more information wit
|
||||||
If appropriate, use tool calls to explore the current project, which contains the following root directories:
|
If appropriate, use tool calls to explore the current project, which contains the following root directories:
|
||||||
|
|
||||||
{{#each worktrees}}
|
{{#each worktrees}}
|
||||||
- `{{root_name}}`
|
- `{{abs_path}}`
|
||||||
{{/each}}
|
{{/each}}
|
||||||
|
|
||||||
- Bias towards not asking the user for help if you can find the answer yourself.
|
- Bias towards not asking the user for help if you can find the answer yourself.
|
||||||
- When providing paths to tools, the path should always begin with a path that starts with a project root directory listed above.
|
- When providing paths to tools, the path should always start with the name of a project root directory listed above.
|
||||||
- Before you read or edit a file, you must first find the full path. DO NOT ever guess a file path!
|
- Before you read or edit a file, you must first find the full path. DO NOT ever guess a file path!
|
||||||
{{# if (has_tool 'grep') }}
|
{{# if (has_tool 'grep') }}
|
||||||
- When looking for symbols in the project, prefer the `grep` tool.
|
- When looking for symbols in the project, prefer the `grep` tool.
|
||||||
|
|
|
@ -305,17 +305,19 @@ impl ThreadStore {
|
||||||
project: Entity<Project>,
|
project: Entity<Project>,
|
||||||
cx: &mut App,
|
cx: &mut App,
|
||||||
) -> Task<(WorktreeContext, Option<RulesLoadingError>)> {
|
) -> Task<(WorktreeContext, Option<RulesLoadingError>)> {
|
||||||
let root_name = worktree.read(cx).root_name().into();
|
let tree = worktree.read(cx);
|
||||||
|
let root_name = tree.root_name().into();
|
||||||
|
let abs_path = tree.abs_path();
|
||||||
|
|
||||||
|
let mut context = WorktreeContext {
|
||||||
|
root_name,
|
||||||
|
abs_path,
|
||||||
|
rules_file: None,
|
||||||
|
};
|
||||||
|
|
||||||
let rules_task = Self::load_worktree_rules_file(worktree, project, cx);
|
let rules_task = Self::load_worktree_rules_file(worktree, project, cx);
|
||||||
let Some(rules_task) = rules_task else {
|
let Some(rules_task) = rules_task else {
|
||||||
return Task::ready((
|
return Task::ready((context, None));
|
||||||
WorktreeContext {
|
|
||||||
root_name,
|
|
||||||
rules_file: None,
|
|
||||||
},
|
|
||||||
None,
|
|
||||||
));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
cx.spawn(async move |_| {
|
cx.spawn(async move |_| {
|
||||||
|
@ -328,11 +330,8 @@ impl ThreadStore {
|
||||||
}),
|
}),
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
let worktree_info = WorktreeContext {
|
context.rules_file = rules_file;
|
||||||
root_name,
|
(context, rules_file_error)
|
||||||
rules_file,
|
|
||||||
};
|
|
||||||
(worktree_info, rules_file_error)
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -341,12 +340,12 @@ impl ThreadStore {
|
||||||
project: Entity<Project>,
|
project: Entity<Project>,
|
||||||
cx: &mut App,
|
cx: &mut App,
|
||||||
) -> Option<Task<Result<RulesFileContext>>> {
|
) -> Option<Task<Result<RulesFileContext>>> {
|
||||||
let worktree_ref = worktree.read(cx);
|
let worktree = worktree.read(cx);
|
||||||
let worktree_id = worktree_ref.id();
|
let worktree_id = worktree.id();
|
||||||
let selected_rules_file = RULES_FILE_NAMES
|
let selected_rules_file = RULES_FILE_NAMES
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|name| {
|
.filter_map(|name| {
|
||||||
worktree_ref
|
worktree
|
||||||
.entry_for_path(name)
|
.entry_for_path(name)
|
||||||
.filter(|entry| entry.is_file())
|
.filter(|entry| entry.is_file())
|
||||||
.map(|entry| entry.path.clone())
|
.map(|entry| entry.path.clone())
|
||||||
|
|
|
@ -26,6 +26,7 @@ use std::{
|
||||||
cmp::Reverse,
|
cmp::Reverse,
|
||||||
fmt::{self, Display},
|
fmt::{self, Display},
|
||||||
io::Write as _,
|
io::Write as _,
|
||||||
|
path::Path,
|
||||||
str::FromStr,
|
str::FromStr,
|
||||||
sync::mpsc,
|
sync::mpsc,
|
||||||
};
|
};
|
||||||
|
@ -38,10 +39,11 @@ fn eval_extract_handle_command_output() {
|
||||||
//
|
//
|
||||||
// Model | Pass rate
|
// Model | Pass rate
|
||||||
// ----------------------------|----------
|
// ----------------------------|----------
|
||||||
// claude-3.7-sonnet | 0.98
|
// claude-3.7-sonnet | 0.99 (2025-06-14)
|
||||||
// gemini-2.5-pro-06-05 | 0.77
|
// claude-sonnet-4 | 0.97 (2025-06-14)
|
||||||
// gemini-2.5-flash | 0.11
|
// gemini-2.5-pro-06-05 | 0.77 (2025-05-22)
|
||||||
// gpt-4.1 | 1.00
|
// gemini-2.5-flash | 0.11 (2025-05-22)
|
||||||
|
// gpt-4.1 | 1.00 (2025-05-22)
|
||||||
|
|
||||||
let input_file_path = "root/blame.rs";
|
let input_file_path = "root/blame.rs";
|
||||||
let input_file_content = include_str!("evals/fixtures/extract_handle_command_output/before.rs");
|
let input_file_content = include_str!("evals/fixtures/extract_handle_command_output/before.rs");
|
||||||
|
@ -110,6 +112,13 @@ fn eval_extract_handle_command_output() {
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||||
fn eval_delete_run_git_blame() {
|
fn eval_delete_run_git_blame() {
|
||||||
|
// Model | Pass rate
|
||||||
|
// ----------------------------|----------
|
||||||
|
// claude-3.7-sonnet | 1.0 (2025-06-14)
|
||||||
|
// claude-sonnet-4 | 0.96 (2025-06-14)
|
||||||
|
// gemini-2.5-pro-06-05 |
|
||||||
|
// gemini-2.5-flash |
|
||||||
|
// gpt-4.1 |
|
||||||
let input_file_path = "root/blame.rs";
|
let input_file_path = "root/blame.rs";
|
||||||
let input_file_content = include_str!("evals/fixtures/delete_run_git_blame/before.rs");
|
let input_file_content = include_str!("evals/fixtures/delete_run_git_blame/before.rs");
|
||||||
let output_file_content = include_str!("evals/fixtures/delete_run_git_blame/after.rs");
|
let output_file_content = include_str!("evals/fixtures/delete_run_git_blame/after.rs");
|
||||||
|
@ -165,13 +174,12 @@ fn eval_delete_run_git_blame() {
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||||
fn eval_translate_doc_comments() {
|
fn eval_translate_doc_comments() {
|
||||||
// Results for 2025-05-22
|
|
||||||
//
|
|
||||||
// Model | Pass rate
|
// Model | Pass rate
|
||||||
// ============================================
|
// ============================================
|
||||||
//
|
//
|
||||||
// claude-3.7-sonnet |
|
// claude-3.7-sonnet | 1.0 (2025-06-14)
|
||||||
// gemini-2.5-pro-preview-03-25 | 1.0
|
// claude-sonnet-4 | 1.0 (2025-06-14)
|
||||||
|
// gemini-2.5-pro-preview-03-25 | 1.0 (2025-05-22)
|
||||||
// gemini-2.5-flash-preview-04-17 |
|
// gemini-2.5-flash-preview-04-17 |
|
||||||
// gpt-4.1 |
|
// gpt-4.1 |
|
||||||
let input_file_path = "root/canvas.rs";
|
let input_file_path = "root/canvas.rs";
|
||||||
|
@ -228,13 +236,12 @@ fn eval_translate_doc_comments() {
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||||
fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
||||||
// Results for 2025-05-22
|
|
||||||
//
|
|
||||||
// Model | Pass rate
|
// Model | Pass rate
|
||||||
// ============================================
|
// ============================================
|
||||||
//
|
//
|
||||||
// claude-3.7-sonnet | 0.98
|
// claude-3.7-sonnet | 0.96 (2025-06-14)
|
||||||
// gemini-2.5-pro-preview-03-25 | 0.99
|
// claude-sonnet-4 | 0.11 (2025-06-14)
|
||||||
|
// gemini-2.5-pro-preview-03-25 | 0.99 (2025-05-22)
|
||||||
// gemini-2.5-flash-preview-04-17 |
|
// gemini-2.5-flash-preview-04-17 |
|
||||||
// gpt-4.1 |
|
// gpt-4.1 |
|
||||||
let input_file_path = "root/lib.rs";
|
let input_file_path = "root/lib.rs";
|
||||||
|
@ -354,13 +361,12 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||||
fn eval_disable_cursor_blinking() {
|
fn eval_disable_cursor_blinking() {
|
||||||
// Results for 2025-05-22
|
|
||||||
//
|
|
||||||
// Model | Pass rate
|
// Model | Pass rate
|
||||||
// ============================================
|
// ============================================
|
||||||
//
|
//
|
||||||
// claude-3.7-sonnet |
|
// claude-3.7-sonnet | 0.99 (2025-06-14)
|
||||||
// gemini-2.5-pro-preview-03-25 | 1.0
|
// claude-sonnet-4 | 0.85 (2025-06-14)
|
||||||
|
// gemini-2.5-pro-preview-03-25 | 1.0 (2025-05-22)
|
||||||
// gemini-2.5-flash-preview-04-17 |
|
// gemini-2.5-flash-preview-04-17 |
|
||||||
// gpt-4.1 |
|
// gpt-4.1 |
|
||||||
let input_file_path = "root/editor.rs";
|
let input_file_path = "root/editor.rs";
|
||||||
|
@ -462,7 +468,7 @@ fn eval_from_pixels_constructor() {
|
||||||
0.95,
|
0.95,
|
||||||
// For whatever reason, this eval produces more mismatched tags.
|
// For whatever reason, this eval produces more mismatched tags.
|
||||||
// Increasing for now, let's see if we can bring this down.
|
// Increasing for now, let's see if we can bring this down.
|
||||||
0.2,
|
0.25,
|
||||||
EvalInput::from_conversation(
|
EvalInput::from_conversation(
|
||||||
vec![
|
vec![
|
||||||
message(
|
message(
|
||||||
|
@ -648,15 +654,14 @@ fn eval_from_pixels_constructor() {
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||||
fn eval_zode() {
|
fn eval_zode() {
|
||||||
// Results for 2025-05-22
|
|
||||||
//
|
|
||||||
// Model | Pass rate
|
// Model | Pass rate
|
||||||
// ============================================
|
// ============================================
|
||||||
//
|
//
|
||||||
// claude-3.7-sonnet | 1.0
|
// claude-3.7-sonnet | 1.0 (2025-06-14)
|
||||||
// gemini-2.5-pro-preview-03-25 | 1.0
|
// claude-sonnet-4 | 1.0 (2025-06-14)
|
||||||
// gemini-2.5-flash-preview-04-17 | 1.0
|
// gemini-2.5-pro-preview-03-25 | 1.0 (2025-05-22)
|
||||||
// gpt-4.1 | 1.0
|
// gemini-2.5-flash-preview-04-17 | 1.0 (2025-05-22)
|
||||||
|
// gpt-4.1 | 1.0 (2025-05-22)
|
||||||
let input_file_path = "root/zode.py";
|
let input_file_path = "root/zode.py";
|
||||||
let input_content = None;
|
let input_content = None;
|
||||||
let edit_description = "Create the main Zode CLI script";
|
let edit_description = "Create the main Zode CLI script";
|
||||||
|
@ -755,13 +760,12 @@ fn eval_zode() {
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||||
fn eval_add_overwrite_test() {
|
fn eval_add_overwrite_test() {
|
||||||
// Results for 2025-05-22
|
|
||||||
//
|
|
||||||
// Model | Pass rate
|
// Model | Pass rate
|
||||||
// ============================================
|
// ============================================
|
||||||
//
|
//
|
||||||
// claude-3.7-sonnet | 0.16
|
// claude-3.7-sonnet | 0.65 (2025-06-14)
|
||||||
// gemini-2.5-pro-preview-03-25 | 0.35
|
// claude-sonnet-4 | 0.07 (2025-06-14)
|
||||||
|
// gemini-2.5-pro-preview-03-25 | 0.35 (2025-05-22)
|
||||||
// gemini-2.5-flash-preview-04-17 |
|
// gemini-2.5-flash-preview-04-17 |
|
||||||
// gpt-4.1 |
|
// gpt-4.1 |
|
||||||
let input_file_path = "root/action_log.rs";
|
let input_file_path = "root/action_log.rs";
|
||||||
|
@ -991,15 +995,14 @@ fn eval_create_empty_file() {
|
||||||
// thoughts into it. This issue is not specific to empty files, but
|
// thoughts into it. This issue is not specific to empty files, but
|
||||||
// it's easier to reproduce with them.
|
// it's easier to reproduce with them.
|
||||||
//
|
//
|
||||||
// Results for 2025-05-21:
|
|
||||||
//
|
|
||||||
// Model | Pass rate
|
// Model | Pass rate
|
||||||
// ============================================
|
// ============================================
|
||||||
//
|
//
|
||||||
// claude-3.7-sonnet | 1.00
|
// claude-3.7-sonnet | 1.00 (2025-06-14)
|
||||||
// gemini-2.5-pro-preview-03-25 | 1.00
|
// claude-sonnet-4 | 1.00 (2025-06-14)
|
||||||
// gemini-2.5-flash-preview-04-17 | 1.00
|
// gemini-2.5-pro-preview-03-25 | 1.00 (2025-05-21)
|
||||||
// gpt-4.1 | 1.00
|
// gemini-2.5-flash-preview-04-17 | 1.00 (2025-05-21)
|
||||||
|
// gpt-4.1 | 1.00 (2025-05-21)
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// TODO: gpt-4.1-mini errored 38 times:
|
// TODO: gpt-4.1-mini errored 38 times:
|
||||||
|
@ -1556,6 +1559,7 @@ impl EditAgentTest {
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
let worktrees = vec![WorktreeContext {
|
let worktrees = vec![WorktreeContext {
|
||||||
root_name: "root".to_string(),
|
root_name: "root".to_string(),
|
||||||
|
abs_path: Path::new("/path/to/root").into(),
|
||||||
rules_file: None,
|
rules_file: None,
|
||||||
}];
|
}];
|
||||||
let prompt_builder = PromptBuilder::new(None)?;
|
let prompt_builder = PromptBuilder::new(None)?;
|
||||||
|
@ -1650,7 +1654,7 @@ async fn retry_on_rate_limit<R>(mut request: impl AsyncFnMut() -> Result<R>) ->
|
||||||
Ok(err) => match err {
|
Ok(err) => match err {
|
||||||
LanguageModelCompletionError::RateLimit(duration) => {
|
LanguageModelCompletionError::RateLimit(duration) => {
|
||||||
// Wait for the duration supplied, with some jitter to avoid all requests being made at the same time.
|
// Wait for the duration supplied, with some jitter to avoid all requests being made at the same time.
|
||||||
let jitter = duration.mul_f64(rand::thread_rng().gen_range(0.0..0.5));
|
let jitter = duration.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
|
||||||
eprintln!(
|
eprintln!(
|
||||||
"Attempt #{attempt}: Rate limit exceeded. Retry after {duration:?} + jitter of {jitter:?}"
|
"Attempt #{attempt}: Rate limit exceeded. Retry after {duration:?} + jitter of {jitter:?}"
|
||||||
);
|
);
|
||||||
|
|
|
@ -69,13 +69,13 @@ pub struct EditFileToolInput {
|
||||||
/// start each path with one of the project's root directories.
|
/// start each path with one of the project's root directories.
|
||||||
///
|
///
|
||||||
/// The following examples assume we have two root directories in the project:
|
/// The following examples assume we have two root directories in the project:
|
||||||
/// - backend
|
/// - /a/b/backend
|
||||||
/// - frontend
|
/// - /c/d/frontend
|
||||||
///
|
///
|
||||||
/// <example>
|
/// <example>
|
||||||
/// `backend/src/main.rs`
|
/// `backend/src/main.rs`
|
||||||
///
|
///
|
||||||
/// Notice how the file path starts with root-1. Without that, the path
|
/// Notice how the file path starts with `backend`. Without that, the path
|
||||||
/// would be ambiguous and the call would fail!
|
/// would be ambiguous and the call would fail!
|
||||||
/// </example>
|
/// </example>
|
||||||
///
|
///
|
||||||
|
|
|
@ -31,8 +31,8 @@ pub struct ReadFileToolInput {
|
||||||
/// <example>
|
/// <example>
|
||||||
/// If the project has the following root directories:
|
/// If the project has the following root directories:
|
||||||
///
|
///
|
||||||
/// - directory1
|
/// - /a/b/directory1
|
||||||
/// - directory2
|
/// - /c/d/directory2
|
||||||
///
|
///
|
||||||
/// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
|
/// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
|
||||||
/// If you want to access `file.txt` in `directory2`, you should use the path `directory2/file.txt`.
|
/// If you want to access `file.txt` in `directory2`, you should use the path `directory2/file.txt`.
|
||||||
|
|
|
@ -74,6 +74,7 @@ pub struct UserRulesContext {
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct WorktreeContext {
|
pub struct WorktreeContext {
|
||||||
pub root_name: String,
|
pub root_name: String,
|
||||||
|
pub abs_path: Arc<Path>,
|
||||||
pub rules_file: Option<RulesFileContext>,
|
pub rules_file: Option<RulesFileContext>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -455,6 +456,7 @@ mod test {
|
||||||
fn test_assistant_system_prompt_renders() {
|
fn test_assistant_system_prompt_renders() {
|
||||||
let worktrees = vec![WorktreeContext {
|
let worktrees = vec![WorktreeContext {
|
||||||
root_name: "path".into(),
|
root_name: "path".into(),
|
||||||
|
abs_path: Path::new("/path/to/root").into(),
|
||||||
rules_file: Some(RulesFileContext {
|
rules_file: Some(RulesFileContext {
|
||||||
path_in_worktree: Path::new(".rules").into(),
|
path_in_worktree: Path::new(".rules").into(),
|
||||||
text: "".into(),
|
text: "".into(),
|
||||||
|
@ -484,6 +486,7 @@ mod test {
|
||||||
fn test_assistant_system_prompt_depends_on_enabled_tools() {
|
fn test_assistant_system_prompt_depends_on_enabled_tools() {
|
||||||
let worktrees = vec![WorktreeContext {
|
let worktrees = vec![WorktreeContext {
|
||||||
root_name: "path".into(),
|
root_name: "path".into(),
|
||||||
|
abs_path: Path::new("/path/to/root").into(),
|
||||||
rules_file: None,
|
rules_file: None,
|
||||||
}];
|
}];
|
||||||
let default_user_rules = vec![];
|
let default_user_rules = vec![];
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue