diff --git a/crates/agent/src/agent.rs b/crates/agent/src/agent.rs index 9bf99b0a87..2a0b9ebc65 100644 --- a/crates/agent/src/agent.rs +++ b/crates/agent/src/agent.rs @@ -49,7 +49,7 @@ pub use crate::context::{ContextLoadResult, LoadedContext}; pub use crate::inline_assistant::InlineAssistant; use crate::slash_command_settings::SlashCommandSettings; pub use crate::thread::{Message, MessageSegment, Thread, ThreadEvent}; -pub use crate::thread_store::{TextThreadStore, ThreadStore}; +pub use crate::thread_store::{SerializedThread, TextThreadStore, ThreadStore}; pub use agent_diff::{AgentDiffPane, AgentDiffToolbar}; pub use context_store::ContextStore; pub use ui::preview::{all_agent_previews, get_agent_preview}; diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index 50ef8b256b..5790be6644 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -458,7 +458,7 @@ impl Thread { tools: Entity, prompt_builder: Arc, project_context: SharedProjectContext, - window: &mut Window, + window: Option<&mut Window>, // None in headless mode cx: &mut Context, ) -> Self { let next_message_id = MessageId( diff --git a/crates/agent/src/thread_store.rs b/crates/agent/src/thread_store.rs index c43e452152..6095a30ff8 100644 --- a/crates/agent/src/thread_store.rs +++ b/crates/agent/src/thread_store.rs @@ -386,6 +386,25 @@ impl ThreadStore { }) } + pub fn create_thread_from_serialized( + &mut self, + serialized: SerializedThread, + cx: &mut Context, + ) -> Entity { + cx.new(|cx| { + Thread::deserialize( + ThreadId::new(), + serialized, + self.project.clone(), + self.tools.clone(), + self.prompt_builder.clone(), + self.project_context.clone(), + None, + cx, + ) + }) + } + pub fn open_thread( &self, id: &ThreadId, @@ -411,7 +430,7 @@ impl ThreadStore { this.tools.clone(), this.prompt_builder.clone(), this.project_context.clone(), - window, + Some(window), cx, ) }) diff --git a/crates/agent/src/tool_use.rs b/crates/agent/src/tool_use.rs index 5ed330b29d..7808ffb145 100644 --- a/crates/agent/src/tool_use.rs +++ b/crates/agent/src/tool_use.rs @@ -54,15 +54,19 @@ impl ToolUseState { /// Constructs a [`ToolUseState`] from the given list of [`SerializedMessage`]s. /// /// Accepts a function to filter the tools that should be used to populate the state. + /// + /// If `window` is `None` (e.g., when in headless mode or when running evals), + /// tool cards won't be deserialized pub fn from_serialized_messages( tools: Entity, messages: &[SerializedMessage], project: Entity, - window: &mut Window, + window: Option<&mut Window>, // None in headless mode cx: &mut App, ) -> Self { let mut this = Self::new(tools); let mut tool_names_by_id = HashMap::default(); + let mut window = window; for message in messages { match message.role { @@ -107,12 +111,17 @@ impl ToolUseState { }, ); - if let Some(tool) = this.tools.read(cx).tool(tool_use, cx) { - if let Some(output) = tool_result.output.clone() { - if let Some(card) = - tool.deserialize_card(output, project.clone(), window, cx) - { - this.tool_result_cards.insert(tool_use_id, card); + if let Some(window) = &mut window { + if let Some(tool) = this.tools.read(cx).tool(tool_use, cx) { + if let Some(output) = tool_result.output.clone() { + if let Some(card) = tool.deserialize_card( + output, + project.clone(), + window, + cx, + ) { + this.tool_result_cards.insert(tool_use_id, card); + } } } } diff --git a/crates/assistant_tools/src/assistant_tools.rs b/crates/assistant_tools/src/assistant_tools.rs index 8a9d4bf6fb..f8ba3418b7 100644 --- a/crates/assistant_tools/src/assistant_tools.rs +++ b/crates/assistant_tools/src/assistant_tools.rs @@ -42,7 +42,7 @@ use crate::list_directory_tool::ListDirectoryTool; use crate::now_tool::NowTool; use crate::thinking_tool::ThinkingTool; -pub use edit_file_tool::EditFileToolInput; +pub use edit_file_tool::{EditFileMode, EditFileToolInput}; pub use find_path_tool::FindPathToolInput; pub use open_tool::OpenTool; pub use read_file_tool::{ReadFileTool, ReadFileToolInput}; diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index 9b7d3e8aca..2af9c30434 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -1,5 +1,9 @@ use super::*; -use crate::{ReadFileToolInput, edit_file_tool::EditFileToolInput, grep_tool::GrepToolInput}; +use crate::{ + ReadFileToolInput, + edit_file_tool::{EditFileMode, EditFileToolInput}, + grep_tool::GrepToolInput, +}; use Role::*; use anyhow::anyhow; use assistant_tool::ToolRegistry; @@ -71,7 +75,7 @@ fn eval_extract_handle_command_output() { EditFileToolInput { display_description: edit_description.into(), path: input_file_path.into(), - create_or_overwrite: false, + mode: EditFileMode::Edit, }, )], ), @@ -127,7 +131,7 @@ fn eval_delete_run_git_blame() { EditFileToolInput { display_description: edit_description.into(), path: input_file_path.into(), - create_or_overwrite: false, + mode: EditFileMode::Edit, }, )], ), @@ -182,7 +186,7 @@ fn eval_translate_doc_comments() { EditFileToolInput { display_description: edit_description.into(), path: input_file_path.into(), - create_or_overwrite: false, + mode: EditFileMode::Edit, }, )], ), @@ -297,7 +301,7 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() { EditFileToolInput { display_description: edit_description.into(), path: input_file_path.into(), - create_or_overwrite: false, + mode: EditFileMode::Edit, }, )], ), @@ -372,7 +376,7 @@ fn eval_disable_cursor_blinking() { EditFileToolInput { display_description: edit_description.into(), path: input_file_path.into(), - create_or_overwrite: false, + mode: EditFileMode::Edit, }, )], ), @@ -566,7 +570,7 @@ fn eval_from_pixels_constructor() { EditFileToolInput { display_description: edit_description.into(), path: input_file_path.into(), - create_or_overwrite: false, + mode: EditFileMode::Edit, }, )], ), @@ -643,7 +647,7 @@ fn eval_zode() { EditFileToolInput { display_description: edit_description.into(), path: input_file_path.into(), - create_or_overwrite: true, + mode: EditFileMode::Create, }, ), ], @@ -888,7 +892,7 @@ fn eval_add_overwrite_test() { EditFileToolInput { display_description: edit_description.into(), path: input_file_path.into(), - create_or_overwrite: false, + mode: EditFileMode::Edit, }, ), ], diff --git a/crates/assistant_tools/src/edit_file_tool.rs b/crates/assistant_tools/src/edit_file_tool.rs index 8c38534bee..08f319b4e3 100644 --- a/crates/assistant_tools/src/edit_file_tool.rs +++ b/crates/assistant_tools/src/edit_file_tool.rs @@ -76,12 +76,22 @@ pub struct EditFileToolInput { /// pub path: PathBuf, - /// If true, this tool will recreate the file from scratch. - /// If false, this tool will produce granular edits to an existing file. + /// The mode of operation on the file. Possible values: + /// - 'edit': Make granular edits to an existing file. + /// - 'create': Create a new file if it doesn't exist. + /// - 'overwrite': Replace the entire contents of an existing file. /// - /// When a file already exists or you just created it, always prefer editing + /// When a file already exists or you just created it, prefer editing /// it as opposed to recreating it from scratch. - pub create_or_overwrite: bool, + pub mode: EditFileMode, +} + +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "lowercase")] +pub enum EditFileMode { + Edit, + Create, + Overwrite, } #[derive(Debug, Serialize, Deserialize, JsonSchema)] @@ -195,7 +205,11 @@ impl Tool for EditFileTool { .as_ref() .map_or(false, |file| file.disk_state().exists()) })?; - if !input.create_or_overwrite && !exists { + let create_or_overwrite = match input.mode { + EditFileMode::Create | EditFileMode::Overwrite => true, + _ => false, + }; + if !create_or_overwrite && !exists { return Err(anyhow!("{} not found", input.path.display())); } @@ -207,7 +221,7 @@ impl Tool for EditFileTool { }) .await; - let (output, mut events) = if input.create_or_overwrite { + let (output, mut events) = if create_or_overwrite { edit_agent.overwrite( buffer.clone(), input.display_description.clone(), @@ -876,7 +890,7 @@ mod tests { let input = serde_json::to_value(EditFileToolInput { display_description: "Some edit".into(), path: "root/nonexistent_file.txt".into(), - create_or_overwrite: false, + mode: EditFileMode::Edit, }) .unwrap(); Arc::new(EditFileTool) diff --git a/crates/eval/src/eval.rs b/crates/eval/src/eval.rs index 1a73c88563..d69ec5d9c9 100644 --- a/crates/eval/src/eval.rs +++ b/crates/eval/src/eval.rs @@ -711,9 +711,9 @@ fn print_report( .values() .flat_map(|results| { results.iter().map(|(example, _)| { - let absolute_path = example.run_directory.join("last.messages.json"); - pathdiff::diff_paths(&absolute_path, run_dir) - .unwrap_or_else(|| absolute_path.clone()) + let absolute_path = run_dir.join(example.run_directory.join("last.messages.json")); + let cwd = std::env::current_dir().expect("Can't get current dir"); + pathdiff::diff_paths(&absolute_path, cwd).unwrap_or_else(|| absolute_path.clone()) }) }) .collect::>(); diff --git a/crates/eval/src/example.rs b/crates/eval/src/example.rs index d2478e89e3..f1fb2b2513 100644 --- a/crates/eval/src/example.rs +++ b/crates/eval/src/example.rs @@ -48,6 +48,7 @@ pub struct ExampleMetadata { pub language_server: Option, pub max_assertions: Option, pub profile_id: AgentProfileId, + pub existing_thread_json: Option, } #[derive(Clone, Debug)] @@ -477,12 +478,16 @@ impl Response { tool_name: &'static str, cx: &mut ExampleContext, ) -> Result<&ToolUse> { - let result = self.messages.iter().find_map(|msg| { + let result = self.find_tool_call(tool_name); + cx.assert_some(result, format!("called `{}`", tool_name)) + } + + pub fn find_tool_call(&self, tool_name: &str) -> Option<&ToolUse> { + self.messages.iter().rev().find_map(|msg| { msg.tool_use .iter() .find(|tool_use| tool_use.name == tool_name) - }); - cx.assert_some(result, format!("called `{}`", tool_name)) + }) } #[allow(dead_code)] diff --git a/crates/eval/src/examples/add_arg_to_trait_method.rs b/crates/eval/src/examples/add_arg_to_trait_method.rs index dbf2e8bd35..19cfc44d18 100644 --- a/crates/eval/src/examples/add_arg_to_trait_method.rs +++ b/crates/eval/src/examples/add_arg_to_trait_method.rs @@ -21,6 +21,7 @@ impl Example for AddArgToTraitMethod { }), max_assertions: None, profile_id: AgentProfileId::default(), + existing_thread_json: None, } } diff --git a/crates/eval/src/examples/code_block_citations.rs b/crates/eval/src/examples/code_block_citations.rs index 13fb346bf9..4de69ecaa4 100644 --- a/crates/eval/src/examples/code_block_citations.rs +++ b/crates/eval/src/examples/code_block_citations.rs @@ -22,6 +22,7 @@ impl Example for CodeBlockCitations { }), max_assertions: None, profile_id: AgentProfileId::default(), + existing_thread_json: None, } } diff --git a/crates/eval/src/examples/comment_translation.rs b/crates/eval/src/examples/comment_translation.rs index 72a3e865a8..f4a7db1f94 100644 --- a/crates/eval/src/examples/comment_translation.rs +++ b/crates/eval/src/examples/comment_translation.rs @@ -1,7 +1,7 @@ use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion}; use anyhow::Result; use assistant_settings::AgentProfileId; -use assistant_tools::EditFileToolInput; +use assistant_tools::{EditFileMode, EditFileToolInput}; use async_trait::async_trait; pub struct CommentTranslation; @@ -16,6 +16,7 @@ impl Example for CommentTranslation { language_server: None, max_assertions: Some(1), profile_id: AgentProfileId::default(), + existing_thread_json: None, } } @@ -35,7 +36,7 @@ impl Example for CommentTranslation { for tool_use in thread.tool_uses_for_message(message.id, cx) { if tool_use.name == "edit_file" { let input: EditFileToolInput = serde_json::from_value(tool_use.input)?; - if input.create_or_overwrite { + if !matches!(input.mode, EditFileMode::Edit) { create_or_overwrite_count += 1; } } diff --git a/crates/eval/src/examples/file_search.rs b/crates/eval/src/examples/file_search.rs index 5da0d03f37..b6334710c9 100644 --- a/crates/eval/src/examples/file_search.rs +++ b/crates/eval/src/examples/file_search.rs @@ -18,6 +18,7 @@ impl Example for FileSearchExample { language_server: None, max_assertions: Some(3), profile_id: AgentProfileId::default(), + existing_thread_json: None, } } diff --git a/crates/eval/src/examples/mod.rs b/crates/eval/src/examples/mod.rs index d7604170d3..b11f62ab76 100644 --- a/crates/eval/src/examples/mod.rs +++ b/crates/eval/src/examples/mod.rs @@ -16,6 +16,7 @@ mod add_arg_to_trait_method; mod code_block_citations; mod comment_translation; mod file_search; +mod overwrite_file; mod planets; pub fn all(examples_dir: &Path) -> Vec> { @@ -25,6 +26,7 @@ pub fn all(examples_dir: &Path) -> Vec> { Rc::new(code_block_citations::CodeBlockCitations), Rc::new(planets::Planets), Rc::new(comment_translation::CommentTranslation), + Rc::new(overwrite_file::FileOverwriteExample), ]; for example_path in list_declarative_examples(examples_dir).unwrap() { @@ -45,6 +47,7 @@ impl DeclarativeExample { pub fn load(example_path: &Path) -> Result { let name = Self::name_from_path(example_path); let base: ExampleToml = toml::from_str(&fs::read_to_string(&example_path)?)?; + let example_dir = example_path.parent().unwrap(); let language_server = if base.require_lsp { Some(crate::example::LanguageServer { @@ -63,6 +66,14 @@ impl DeclarativeExample { AgentProfileId::default() }; + let existing_thread_json = if let Some(path) = base.existing_thread_path { + let content = fs::read_to_string(example_dir.join(&path)) + .unwrap_or_else(|_| panic!("Failed to read existing thread file: {}", path)); + Some(content) + } else { + None + }; + let metadata = ExampleMetadata { name, url: base.url, @@ -70,6 +81,7 @@ impl DeclarativeExample { language_server, max_assertions: None, profile_id, + existing_thread_json, }; Ok(DeclarativeExample { @@ -110,6 +122,8 @@ pub struct ExampleToml { pub diff_assertions: BTreeMap, #[serde(default)] pub thread_assertions: BTreeMap, + #[serde(default)] + pub existing_thread_path: Option, } #[async_trait(?Send)] diff --git a/crates/eval/src/examples/overwrite_file.rs b/crates/eval/src/examples/overwrite_file.rs new file mode 100644 index 0000000000..368ebd5cea --- /dev/null +++ b/crates/eval/src/examples/overwrite_file.rs @@ -0,0 +1,49 @@ +use anyhow::Result; +use assistant_settings::AgentProfileId; +use assistant_tools::{EditFileMode, EditFileToolInput}; +use async_trait::async_trait; + +use crate::example::{Example, ExampleContext, ExampleMetadata}; + +pub struct FileOverwriteExample; + +/* +This eval tests a fix for a destructive behavior of the `edit_file` tool. +Previously, it would rewrite existing files too aggressively, which often +resulted in content loss. + +Pass rate before the fix: 10% +Pass rate after the fix: 100% +*/ + +#[async_trait(?Send)] +impl Example for FileOverwriteExample { + fn meta(&self) -> ExampleMetadata { + let thread_json = include_str!("threads/overwrite-file.json"); + + ExampleMetadata { + name: "file_overwrite".to_string(), + url: "https://github.com/zed-industries/zed.git".to_string(), + revision: "023a60806a8cc82e73bd8d88e63b4b07fc7a0040".to_string(), + language_server: None, + max_assertions: Some(1), + profile_id: AgentProfileId::default(), + existing_thread_json: Some(thread_json.to_string()), + } + } + + async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> { + let response = cx.run_turns(1).await?; + let file_overwritten = if let Some(tool_use) = response.find_tool_call("edit_file") { + let input = tool_use.parse_input::()?; + match input.mode { + EditFileMode::Edit => false, + EditFileMode::Create | EditFileMode::Overwrite => true, + } + } else { + false + }; + + cx.assert(!file_overwritten, "File should be edited, not overwritten") + } +} diff --git a/crates/eval/src/examples/planets.rs b/crates/eval/src/examples/planets.rs index f1b361652a..53e926332b 100644 --- a/crates/eval/src/examples/planets.rs +++ b/crates/eval/src/examples/planets.rs @@ -18,6 +18,7 @@ impl Example for Planets { language_server: None, max_assertions: None, profile_id: AgentProfileId::default(), + existing_thread_json: None, } } diff --git a/crates/eval/src/examples/threads/overwrite-file.json b/crates/eval/src/examples/threads/overwrite-file.json new file mode 100644 index 0000000000..ffef258193 --- /dev/null +++ b/crates/eval/src/examples/threads/overwrite-file.json @@ -0,0 +1,262 @@ +{ + "completion_mode": "normal", + "cumulative_token_usage": { + "cache_creation_input_tokens": 18383, + "cache_read_input_tokens": 97250, + "input_tokens": 45, + "output_tokens": 776 + }, + "detailed_summary_state": "NotGenerated", + "exceeded_window_error": null, + "initial_project_snapshot": { + "timestamp": "2025-05-08T14:31:16.701157512Z", + "unsaved_buffer_paths": [], + "worktree_snapshots": [ + { + "git_state": { + "current_branch": null, + "diff": "diff --git a/crates/language_model_selector/src/language_model_selector.rs b/crates/language_model_selector/src/language_model_selector.rs\nindex 6775bee98a..e25c9e1415 100644\n--- a/crates/language_model_selector/src/language_model_selector.rs\n+++ b/crates/language_model_selector/src/language_model_selector.rs\n@@ -410,7 +410,8 @@ impl ModelMatcher {\n }\n \n pub fn is_match(self: &Self, info: &ModelInfo) -> bool {\n- self.matched_ids.contains(&info.model.id().0)\n+ let q = (info.model.provider_id(), info.model.id());\n+ self.matched_models.contains(&q)\n }\n }\n \n", + "head_sha": "9245656485e58a5d6d717d82209bc8c57cb9c539", + "remote_url": "git@github.com:zed-industries/zed.git" + }, + "worktree_path": "/home/silver/develop/zed" + } + ] + }, + "messages": [ + { + "context": "\n\nThe following items were attached by the user. They are up-to-date and don't need to be re-read.\n\n\n```rs zed/crates/language_model_selector/src/language_model_selector.rs\nconst TRY_ZED_PRO_URL [L28]\ntype OnModelChanged [L30]\ntype GetActiveModel [L31]\npub struct LanguageModelSelector [L33-37]\n picker [L34]\n _authenticate_all_providers_task [L35]\n _subscriptions [L36]\nimpl LanguageModelSelector [L39-231]\n pub fn new [L40-81]\n fn handle_language_model_registry_event [L83-104]\n fn authenticate_all_providers [L110-154]\n fn all_models [L156-204]\n pub fn active_model [L206-208]\n fn get_active_model_index [L210-230]\nimpl EventEmitter for LanguageModelSelector [L233]\nimpl Focusable for LanguageModelSelector [L235-239]\n fn focus_handle [L236-238]\nimpl Render for LanguageModelSelector [L241-245]\n fn render [L242-244]\npub struct LanguageModelSelectorPopoverMenu [L248-258]\n language_model_selector [L253]\n trigger [L254]\n tooltip [L255]\n handle [L256]\n anchor [L257]\nimpl LanguageModelSelectorPopoverMenu [L260-284]\n pub fn new [L265-278]\n pub fn with_handle [L280-283]\nimpl RenderOnce for LanguageModelSelectorPopoverMenu [L286-304]\n fn render [L291-303]\nstruct ModelInfo [L307-310]\n model [L308]\n icon [L309]\npub struct LanguageModelPickerDelegate [L312-319]\n language_model_selector [L313]\n on_model_changed [L314]\n get_active_model [L315]\n all_models [L316]\n filtered_entries [L317]\n selected_index [L318]\nstruct GroupedModels [L321-324]\n recommended [L322]\n other [L323]\nimpl GroupedModels [L326-385]\n pub fn new [L327-342]\n fn entries [L344-370]\n fn model_infos [L372-384]\nenum LanguageModelPickerEntry [L387-390]\n Model [L388]\n Separator [L389]\nstruct ModelMatcher [L392-396]\n models [L393]\n bg_executor [L394]\n candidates [L395]\nimpl ModelMatcher [L398-452]\n fn new [L399-406]\n pub fn fuzzy_search [L408-424]\n pub fn exact_search [L426-438]\n fn make_match_candidates [L440-451]\nimpl PickerDelegate for LanguageModelPickerDelegate [L454-693]\n type ListItem [L455]\n fn match_count [L457-459]\n fn selected_index [L461-463]\n fn set_selected_index [L465-468]\n fn can_select [L470-480]\n fn placeholder_text [L482-484]\n fn update_matches [L486-546]\n fn confirm [L548-560]\n fn dismissed [L562-566]\n fn render_match [L568-636]\n fn render_footer [L638-692]\n\nShowing symbols 1-71 (total symbols: 71)\n\n```\n\n\n", + "creases": [ + { + "end": 140, + "icon_path": "icons/file_icons/rust.svg", + "label": "language_model_selector.rs", + "start": 38 + } + ], + "id": 0, + "role": "user", + "segments": [ + { + "text": "I recently introduced fuzzy search in [@language_model_selector.rs](@file:zed/crates/language_model_selector/src/language_model_selector.rs). Can you write unit-test that check it? You can find a test list of models on images. ", + "type": "text" + } + ], + "tool_results": [], + "tool_uses": [] + }, + { + "context": "", + "creases": [], + "id": 1, + "role": "assistant", + "segments": [ + { + "text": "I'll help you write unit tests for the fuzzy search functionality in the language model selector. I see that you want to test the fuzzy search with the models shown in the images - both Claude models (3, 3.5, 3.7) and GPT models (GPT-3.5, GPT-4, GPT-4o, etc.).\n\nLet's start by examining the code structure to understand how the fuzzy search works.", + "type": "text" + } + ], + "tool_results": [ + { + "content": "zed/crates/language_model_selector/Cargo.toml\nzed/crates/language_model_selector/LICENSE-GPL\nzed/crates/language_model_selector/src\n", + "is_error": false, + "output": null, + "tool_use_id": "toolu_01FvYMtHs763GL1wA5mpTpqn" + } + ], + "tool_uses": [ + { + "id": "toolu_01FvYMtHs763GL1wA5mpTpqn", + "input": { + "path": "zed/crates/language_model_selector" + }, + "name": "list_directory" + } + ] + }, + { + "context": "", + "creases": [], + "id": 2, + "role": "assistant", + "segments": [ + { + "text": "Let's look at the structure of the tests directory or see if there are existing tests:", + "type": "text" + } + ], + "tool_results": [ + { + "content": "zed/crates/language_model_selector/src/language_model_selector.rs\n", + "is_error": false, + "output": null, + "tool_use_id": "toolu_01KSLDsLLrf7tgixB1sge3cm" + } + ], + "tool_uses": [ + { + "id": "toolu_01KSLDsLLrf7tgixB1sge3cm", + "input": { + "path": "zed/crates/language_model_selector/src" + }, + "name": "list_directory" + } + ] + }, + { + "context": "", + "creases": [], + "id": 3, + "role": "assistant", + "segments": [ + { + "text": "Let's check the Cargo.toml to understand the dependencies:", + "type": "text" + } + ], + "tool_results": [ + { + "content": "[package]\nname = \"language_model_selector\"\nversion = \"0.1.0\"\nedition.workspace = true\npublish.workspace = true\nlicense = \"GPL-3.0-or-later\"\n\n[lints]\nworkspace = true\n\n[lib]\npath = \"src/language_model_selector.rs\"\n\n[dependencies]\ncollections.workspace = true\nfeature_flags.workspace = true\nfuzzy.workspace = true\ngpui.workspace = true\nlanguage_model.workspace = true\nlog.workspace = true\npicker.workspace = true\nproto.workspace = true\nui.workspace = true\nworkspace-hack.workspace = true\nzed_actions.workspace = true\n", + "is_error": false, + "output": null, + "tool_use_id": "toolu_019Je2MLfJhpJr93g5igoRAH" + } + ], + "tool_uses": [ + { + "id": "toolu_019Je2MLfJhpJr93g5igoRAH", + "input": { + "path": "zed/crates/language_model_selector/Cargo.toml" + }, + "name": "read_file" + } + ] + }, + { + "context": "", + "creases": [], + "id": 4, + "role": "assistant", + "segments": [ + { + "text": "Now let's look at the fuzzy search implementation in detail:", + "type": "text" + } + ], + "tool_results": [ + { + "content": "struct ModelMatcher {\n models: Vec,\n bg_executor: BackgroundExecutor,\n candidates: Vec,\n}\n\nimpl ModelMatcher {\n fn new(models: Vec, bg_executor: BackgroundExecutor) -> ModelMatcher {\n let candidates = Self::make_match_candidates(&models);\n Self {\n models,\n bg_executor,\n candidates,\n }\n }\n\n pub fn fuzzy_search(&self, query: &str) -> Vec {\n let matches = self.bg_executor.block(match_strings(\n &self.candidates,\n &query,\n false,\n 100,\n &Default::default(),\n self.bg_executor.clone(),\n ));\n\n let matched_models: Vec<_> = matches\n .into_iter()\n .map(|m| self.models[m.candidate_id].clone())\n .collect();\n\n matched_models\n }\n\n pub fn exact_search(&self, query: &str) -> Vec {\n self.models\n .iter()\n .filter(|m| {\n m.model\n .name()\n .0\n .to_lowercase()\n .contains(&query.to_lowercase())\n })\n .cloned()\n .collect::>()\n }\n\n fn make_match_candidates(model_infos: &Vec) -> Vec {\n model_infos\n .iter()\n .enumerate()\n .map(|(index, model)| {\n StringMatchCandidate::new(\n index,\n &format!(\"{}/{}\", &model.model.provider_id().0, &model.model.name().0),\n )\n })\n .collect::>()\n }\n}", + "is_error": false, + "output": null, + "tool_use_id": "toolu_01GpKFEn6hYrXGiS5yBF3urk" + } + ], + "tool_uses": [ + { + "id": "toolu_01GpKFEn6hYrXGiS5yBF3urk", + "input": { + "end_line": 452, + "path": "zed/crates/language_model_selector/src/language_model_selector.rs", + "start_line": 392 + }, + "name": "read_file" + } + ] + }, + { + "context": "", + "creases": [], + "id": 5, + "role": "assistant", + "segments": [ + { + "text": "Let's now check if there's an existing test module:", + "type": "text" + } + ], + "tool_results": [ + { + "content": "No matches found", + "is_error": false, + "output": null, + "tool_use_id": "toolu_01RZAvFvpMq16Dwh6d6cnVzy" + } + ], + "tool_uses": [ + { + "id": "toolu_01RZAvFvpMq16Dwh6d6cnVzy", + "input": { + "include_pattern": "zed/crates/language_model_selector/**/*.rs", + "regex": "mod tests" + }, + "name": "grep" + } + ] + }, + { + "context": "", + "creases": [], + "id": 6, + "role": "assistant", + "segments": [ + { + "text": "Now I'll create a test module for the ModelMatcher's fuzzy search functionality. Let's create this test module:", + "type": "text" + } + ], + "tool_results": [], + "tool_uses": [] + } + ], + "model": { + "model": "claude-3-7-sonnet-latest", + "provider": "zed.dev" + }, + "profile": "Write", + "request_token_usage": [ + { + "cache_creation_input_tokens": 9581, + "input_tokens": 4, + "output_tokens": 5 + }, + { + "cache_creation_input_tokens": 204, + "cache_read_input_tokens": 9581, + "input_tokens": 6, + "output_tokens": 5 + }, + { + "cache_creation_input_tokens": 104, + "cache_read_input_tokens": 9785, + "input_tokens": 6, + "output_tokens": 5 + }, + { + "cache_creation_input_tokens": 239, + "cache_read_input_tokens": 9889, + "input_tokens": 6, + "output_tokens": 5 + }, + { + "cache_creation_input_tokens": 569, + "cache_read_input_tokens": 10128, + "input_tokens": 5, + "output_tokens": 5 + }, + { + "cache_creation_input_tokens": 87, + "cache_read_input_tokens": 10697, + "input_tokens": 5, + "output_tokens": 2 + }, + { + "cache_creation_input_tokens": 7355, + "cache_read_input_tokens": 10784, + "input_tokens": 5, + "output_tokens": 3 + } + ], + "summary": "Fuzzy Search Testing Language Model Selector", + "updated_at": "2025-05-08T18:20:34.205405751Z", + "version": "0.2.0" +} diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs index f7ba4a43ad..6baeda8fa7 100644 --- a/crates/eval/src/instance.rs +++ b/crates/eval/src/instance.rs @@ -1,4 +1,4 @@ -use agent::{Message, MessageSegment, ThreadStore}; +use agent::{Message, MessageSegment, SerializedThread, ThreadStore}; use anyhow::{Context, Result, anyhow, bail}; use assistant_tool::ToolWorkingSet; use client::proto::LspWorkProgress; @@ -312,7 +312,14 @@ impl ExampleInstance { thread_store.update(cx, |thread_store, cx| thread_store.load_profile_by_id(profile_id, cx)).expect("Failed to load profile"); let thread = - thread_store.update(cx, |thread_store, cx| thread_store.create_thread(cx))?; + thread_store.update(cx, |thread_store, cx| { + if let Some(json) = &meta.existing_thread_json { + let serialized = SerializedThread::from_json(json.as_bytes()).expect("Can't read serialized thread"); + thread_store.create_thread_from_serialized(serialized, cx) + } else { + thread_store.create_thread(cx) + } + })?; thread.update(cx, |thread, _cx| {