diff --git a/Cargo.lock b/Cargo.lock index a2e9fc26ca..3950871688 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -779,6 +779,7 @@ dependencies = [ "collections", "component", "derive_more 0.99.19", + "diffy", "editor", "feature_flags", "fs", diff --git a/crates/assistant_tools/Cargo.toml b/crates/assistant_tools/Cargo.toml index 2b8958feb1..e234b62b14 100644 --- a/crates/assistant_tools/Cargo.toml +++ b/crates/assistant_tools/Cargo.toml @@ -63,6 +63,7 @@ which.workspace = true workspace-hack.workspace = true workspace.workspace = true zed_llm_client.workspace = true +diffy = "0.4.2" [dev-dependencies] lsp = { workspace = true, features = ["test-support"] } diff --git a/crates/assistant_tools/src/project_notifications_tool.rs b/crates/assistant_tools/src/project_notifications_tool.rs index 1b926bb446..ec315d9ab1 100644 --- a/crates/assistant_tools/src/project_notifications_tool.rs +++ b/crates/assistant_tools/src/project_notifications_tool.rs @@ -6,7 +6,7 @@ use language_model::{LanguageModel, LanguageModelRequest, LanguageModelToolSchem use project::Project; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use std::sync::Arc; +use std::{fmt::Write, sync::Arc}; use ui::IconName; #[derive(Debug, Serialize, Deserialize, JsonSchema)] @@ -59,7 +59,9 @@ impl Tool for ProjectNotificationsTool { // NOTE: Changes to this prompt require a symmetric update in the LLM Worker const HEADER: &str = include_str!("./project_notifications_tool/prompt_header.txt"); - result(&format!("{HEADER}\n\n```diff\n{user_edits_diff}\n```\n").replace("\r\n", "\n")) + const MAX_BYTES: usize = 8000; + let diff = fit_patch_to_size(&user_edits_diff, MAX_BYTES); + result(&format!("{HEADER}\n\n```diff\n{diff}\n```\n").replace("\r\n", "\n")) } } @@ -67,11 +69,95 @@ fn result(response: &str) -> ToolResult { Task::ready(Ok(response.to_string().into())).into() } +/// Make sure that the patch fits into the size limit (in bytes). +/// Compress the patch by omitting some parts if needed. +/// Unified diff format is assumed. +fn fit_patch_to_size(patch: &str, max_size: usize) -> String { + if patch.len() <= max_size { + return patch.to_string(); + } + + // Compression level 1: remove context lines in diff bodies, but + // leave the counts and positions of inserted/deleted lines + let mut current_size = patch.len(); + let mut file_patches = split_patch(&patch); + file_patches.sort_by_key(|patch| patch.len()); + let compressed_patches = file_patches + .iter() + .rev() + .map(|patch| { + if current_size > max_size { + let compressed = compress_patch(patch).unwrap_or_else(|_| patch.to_string()); + current_size -= patch.len() - compressed.len(); + compressed + } else { + patch.to_string() + } + }) + .collect::>(); + + if current_size <= max_size { + return compressed_patches.join("\n\n"); + } + + // Compression level 2: list paths of the changed files only + let filenames = file_patches + .iter() + .map(|patch| { + let patch = diffy::Patch::from_str(patch).unwrap(); + let path = patch + .modified() + .and_then(|path| path.strip_prefix("b/")) + .unwrap_or_default(); + format!("- {path}\n") + }) + .collect::>(); + + filenames.join("") +} + +/// Split a potentially multi-file patch into multiple single-file patches +fn split_patch(patch: &str) -> Vec { + let mut result = Vec::new(); + let mut current_patch = String::new(); + + for line in patch.lines() { + if line.starts_with("---") && !current_patch.is_empty() { + result.push(current_patch.trim_end_matches('\n').into()); + current_patch = String::new(); + } + current_patch.push_str(line); + current_patch.push('\n'); + } + + if !current_patch.is_empty() { + result.push(current_patch.trim_end_matches('\n').into()); + } + + result +} + +fn compress_patch(patch: &str) -> anyhow::Result { + let patch = diffy::Patch::from_str(patch)?; + let mut out = String::new(); + + writeln!(out, "--- {}", patch.original().unwrap_or("a"))?; + writeln!(out, "+++ {}", patch.modified().unwrap_or("b"))?; + + for hunk in patch.hunks() { + writeln!(out, "@@ -{} +{} @@", hunk.old_range(), hunk.new_range())?; + writeln!(out, "[...skipped...]")?; + } + + Ok(out) +} + #[cfg(test)] mod tests { use super::*; use assistant_tool::ToolResultContent; use gpui::{AppContext, TestAppContext}; + use indoc::indoc; use language_model::{LanguageModelRequest, fake_provider::FakeLanguageModelProvider}; use project::{FakeFs, Project}; use serde_json::json; @@ -206,6 +292,61 @@ mod tests { ); } + #[test] + fn test_patch_compression() { + // Given a patch that doesn't fit into the size budget + let patch = indoc! {" + --- a/dir/test.txt + +++ b/dir/test.txt + @@ -1,3 +1,3 @@ + line 1 + -line 2 + +CHANGED + line 3 + @@ -10,2 +10,2 @@ + line 10 + -line 11 + +line eleven + + + --- a/dir/another.txt + +++ b/dir/another.txt + @@ -100,1 +1,1 @@ + -before + +after + "}; + + // When the size deficit can be compensated by dropping the body, + // then the body should be trimmed for larger files first + let limit = patch.len() - 10; + let compressed = fit_patch_to_size(patch, limit); + let expected = indoc! {" + --- a/dir/test.txt + +++ b/dir/test.txt + @@ -1,3 +1,3 @@ + [...skipped...] + @@ -10,2 +10,2 @@ + [...skipped...] + + + --- a/dir/another.txt + +++ b/dir/another.txt + @@ -100,1 +1,1 @@ + -before + +after"}; + assert_eq!(compressed, expected); + + // When the size deficit is too large, then only file paths + // should be returned + let limit = 10; + let compressed = fit_patch_to_size(patch, limit); + let expected = indoc! {" + - dir/another.txt + - dir/test.txt + "}; + assert_eq!(compressed, expected); + } + fn init_test(cx: &mut TestAppContext) { cx.update(|cx| { let settings_store = SettingsStore::test(cx);