From b696a3251873908fa2542fb03153b3fdf2ea0c23 Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Mon, 25 Aug 2025 21:56:22 -0600 Subject: [PATCH 1/6] zeta: Record recently active files when data collection is enabled --- .../cloud_llm_client/src/cloud_llm_client.rs | 8 + .../zed/src/zed/edit_prediction_registry.rs | 5 +- crates/zeta/src/zeta.rs | 171 +++++++++++++----- crates/zeta_cli/src/main.rs | 2 + 4 files changed, 140 insertions(+), 46 deletions(-) diff --git a/crates/cloud_llm_client/src/cloud_llm_client.rs b/crates/cloud_llm_client/src/cloud_llm_client.rs index 741945af10..7d1548b322 100644 --- a/crates/cloud_llm_client/src/cloud_llm_client.rs +++ b/crates/cloud_llm_client/src/cloud_llm_client.rs @@ -152,6 +152,8 @@ pub struct PredictEditsBody { /// Info about the git repository state, only present when can_collect_data is true. #[serde(skip_serializing_if = "Option::is_none", default)] pub git_info: Option, + #[serde(skip_serializing_if = "Option::is_none", default)] + pub recent_files: Option>, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -167,6 +169,12 @@ pub struct PredictEditsGitInfo { pub remote_upstream_url: Option, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PredictEditsRecentFile { + pub repo_path: String, + pub active_to_now_ms: u32, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PredictEditsResponse { pub request_id: Uuid, diff --git a/crates/zed/src/zed/edit_prediction_registry.rs b/crates/zed/src/zed/edit_prediction_registry.rs index bc2d757fd1..06ed524e65 100644 --- a/crates/zed/src/zed/edit_prediction_registry.rs +++ b/crates/zed/src/zed/edit_prediction_registry.rs @@ -204,10 +204,7 @@ fn assign_edit_prediction_provider( } } - let workspace = window - .root::() - .flatten() - .map(|workspace| workspace.downgrade()); + let workspace = window.root::().flatten(); let zeta = zeta::Zeta::register(workspace, worktree, client.clone(), user_store, cx); diff --git a/crates/zeta/src/zeta.rs b/crates/zeta/src/zeta.rs index 7b14d12796..45cd7b1614 100644 --- a/crates/zeta/src/zeta.rs +++ b/crates/zeta/src/zeta.rs @@ -6,19 +6,21 @@ mod onboarding_modal; mod onboarding_telemetry; mod rate_completion_modal; +use arrayvec::ArrayVec; pub(crate) use completion_diff_element::*; use db::kvp::{Dismissable, KEY_VALUE_STORE}; use edit_prediction::DataCollectionState; pub use init::*; use license_detection::LicenseDetectionWatcher; +use project::git_store::Repository; pub use rate_completion_modal::*; use anyhow::{Context as _, Result, anyhow}; -use arrayvec::ArrayVec; use client::{Client, EditPredictionUsage, UserStore}; use cloud_llm_client::{ AcceptEditPredictionBody, EXPIRED_LLM_TOKEN_HEADER_NAME, MINIMUM_REQUIRED_VERSION_HEADER_NAME, - PredictEditsBody, PredictEditsGitInfo, PredictEditsResponse, ZED_VERSION_HEADER_NAME, + PredictEditsBody, PredictEditsGitInfo, PredictEditsRecentFile, PredictEditsResponse, + ZED_VERSION_HEADER_NAME, }; use collections::{HashMap, HashSet, VecDeque}; use futures::AsyncReadExt; @@ -32,7 +34,7 @@ use language::{ Anchor, Buffer, BufferSnapshot, EditPreview, OffsetRangeExt, ToOffset, ToPoint, text_diff, }; use language_model::{LlmApiToken, RefreshLlmTokenListener}; -use project::{Project, ProjectPath}; +use project::{Project, ProjectEntryId, ProjectPath}; use release_channel::AppVersion; use settings::WorktreeId; use std::str::FromStr; @@ -70,6 +72,12 @@ const MAX_DIAGNOSTIC_GROUPS: usize = 10; /// Maximum number of events to track. const MAX_EVENT_COUNT: usize = 16; +/// Maximum number of recent files to track. +const MAX_RECENT_PROJECT_ENTRIES_COUNT: usize = 16; + +/// Maximum number of edit predictions to store for feedback. +const MAX_SHOWN_COMPLETION_COUNT: usize = 50; + actions!( edit_prediction, [ @@ -212,7 +220,7 @@ impl std::fmt::Debug for EditPrediction { } pub struct Zeta { - workspace: Option>, + workspace: WeakEntity, client: Arc, events: VecDeque, registered_buffers: HashMap, @@ -225,6 +233,7 @@ pub struct Zeta { update_required: bool, user_store: Entity, license_detection_watchers: HashMap>, + recent_project_entries: VecDeque<(ProjectEntryId, Instant)>, } impl Zeta { @@ -233,7 +242,7 @@ impl Zeta { } pub fn register( - workspace: Option>, + workspace: Option>, worktree: Option>, client: Arc, user_store: Entity, @@ -266,7 +275,7 @@ impl Zeta { } fn new( - workspace: Option>, + workspace: Option>, client: Arc, user_store: Entity, cx: &mut Context, @@ -276,11 +285,27 @@ impl Zeta { let data_collection_choice = Self::load_data_collection_choices(); let data_collection_choice = cx.new(|_| data_collection_choice); + if let Some(workspace) = &workspace { + cx.subscribe( + &workspace.read(cx).project().clone(), + |this, _workspace, event, _cx| match event { + project::Event::ActiveEntryChanged(Some(project_entry_id)) => { + this.push_recent_project_entry(*project_entry_id) + } + _ => {} + }, + ) + .detach(); + } + Self { - workspace, + workspace: workspace.map_or_else( + || WeakEntity::new_invalid(), + |workspace| workspace.downgrade(), + ), client, - events: VecDeque::new(), - shown_completions: VecDeque::new(), + events: VecDeque::with_capacity(MAX_EVENT_COUNT), + shown_completions: VecDeque::with_capacity(MAX_SHOWN_COMPLETION_COUNT), rated_completions: HashSet::default(), registered_buffers: HashMap::default(), data_collection_choice, @@ -300,6 +325,7 @@ impl Zeta { update_required: false, license_detection_watchers: HashMap::default(), user_store, + recent_project_entries: VecDeque::with_capacity(MAX_RECENT_PROJECT_ENTRIES_COUNT), } } @@ -327,11 +353,12 @@ impl Zeta { } } - self.events.push_back(event); if self.events.len() >= MAX_EVENT_COUNT { // These are halved instead of popping to improve prompt caching. self.events.drain(..MAX_EVENT_COUNT / 2); } + + self.events.push_back(event); } pub fn register_buffer(&mut self, buffer: &Entity, cx: &mut Context) { @@ -393,12 +420,17 @@ impl Zeta { let llm_token = self.llm_token.clone(); let app_version = AppVersion::global(cx); - let git_info = if let (true, Some(project), Some(file)) = + let (git_info, recent_files) = if let (true, Some(project), Some(file)) = (can_collect_data, project, snapshot.file()) + && let Some(repository) = + git_repository_for_file(project, &ProjectPath::from_file(file.as_ref(), cx), cx) { - git_info_for_file(project, &ProjectPath::from_file(file.as_ref(), cx), cx) + let repository = repository.read(cx); + let git_info = make_predict_edits_git_info(repository); + let recent_files = self.recent_files(&buffer_snapshotted_at, repository, cx); + (git_info, Some(recent_files)) } else { - None + (None, None) }; let full_path: Arc = snapshot @@ -417,6 +449,7 @@ impl Zeta { make_events_prompt, can_collect_data, git_info, + recent_files, cx, ); @@ -702,12 +735,8 @@ and then another can_collect_data: bool, cx: &mut Context, ) -> Task>> { - let workspace = self - .workspace - .as_ref() - .and_then(|workspace| workspace.upgrade()); self.request_completion_impl( - workspace, + self.workspace.upgrade(), project, buffer, position, @@ -1021,11 +1050,11 @@ and then another } pub fn completion_shown(&mut self, completion: &EditPrediction, cx: &mut Context) { - self.shown_completions.push_front(completion.clone()); - if self.shown_completions.len() > 50 { + if self.shown_completions.len() >= MAX_SHOWN_COMPLETION_COUNT { let completion = self.shown_completions.pop_back().unwrap(); self.rated_completions.remove(&completion.id); } + self.shown_completions.push_front(completion.clone()); cx.notify(); } @@ -1099,6 +1128,63 @@ and then another None => DataCollectionChoice::NotAnswered, } } + + fn push_recent_project_entry(&mut self, project_entry_id: ProjectEntryId) { + let now = Instant::now(); + if let Some(existing_ix) = self + .recent_project_entries + .iter() + .rposition(|(id, _)| *id == project_entry_id) + { + self.recent_project_entries.remove(existing_ix); + } + if self.recent_project_entries.len() >= MAX_RECENT_PROJECT_ENTRIES_COUNT { + self.recent_project_entries.pop_front(); + } + self.recent_project_entries + .push_back((project_entry_id, now)); + } + + fn recent_files( + &mut self, + now: &Instant, + repository: &Repository, + cx: &Context, + ) -> Vec { + let Ok(project) = self + .workspace + .read_with(cx, |workspace, _cx| workspace.project().clone()) + else { + return Vec::new(); + }; + let mut results = Vec::new(); + for ix in (0..self.recent_project_entries.len()).rev() { + let (id, last_active_at) = &self.recent_project_entries[ix]; + let Some(project_path) = project.read(cx).path_for_entry(*id, cx) else { + self.recent_project_entries.remove(ix); + continue; + }; + let Some(repo_path) = repository.project_path_to_repo_path(&project_path, cx) else { + // entry not removed since queries involving other repositories might occur later + continue; + }; + let Some(repo_path) = repo_path.to_str() else { + // paths may not be valid UTF-8 + self.recent_project_entries.remove(ix); + continue; + }; + let Ok(active_to_now_ms) = now.duration_since(*last_active_at).as_millis().try_into() + else { + self.recent_project_entries.remove(ix); + continue; + }; + results.push(PredictEditsRecentFile { + repo_path: repo_path.to_string(), + active_to_now_ms, + }); + } + results + } } pub struct PerformPredictEditsParams { @@ -1123,33 +1209,32 @@ fn common_prefix, T2: Iterator>(a: T1, b: .sum() } -fn git_info_for_file( +fn git_repository_for_file( project: &Entity, project_path: &ProjectPath, cx: &App, -) -> Option { +) -> Option> { let git_store = project.read(cx).git_store().read(cx); - if let Some((repository, _repo_path)) = - git_store.repository_and_path_for_project_path(project_path, cx) - { - let repository = repository.read(cx); - let head_sha = repository - .head_commit - .as_ref() - .map(|head_commit| head_commit.sha.to_string()); - let remote_origin_url = repository.remote_origin_url.clone(); - let remote_upstream_url = repository.remote_upstream_url.clone(); - if head_sha.is_none() && remote_origin_url.is_none() && remote_upstream_url.is_none() { - return None; - } - Some(PredictEditsGitInfo { - head_sha, - remote_origin_url, - remote_upstream_url, - }) - } else { - None + git_store + .repository_and_path_for_project_path(project_path, cx) + .map(|(repo, _repo_path)| repo) +} + +fn make_predict_edits_git_info(repository: &Repository) -> Option { + let head_sha = repository + .head_commit + .as_ref() + .map(|head_commit| head_commit.sha.to_string()); + let remote_origin_url = repository.remote_origin_url.clone(); + let remote_upstream_url = repository.remote_upstream_url.clone(); + if head_sha.is_none() && remote_origin_url.is_none() && remote_upstream_url.is_none() { + return None; } + Some(PredictEditsGitInfo { + head_sha, + remote_origin_url, + remote_upstream_url, + }) } pub struct GatherContextOutput { @@ -1165,6 +1250,7 @@ pub fn gather_context( make_events_prompt: impl FnOnce() -> String + Send + 'static, can_collect_data: bool, git_info: Option, + recent_files: Option>, cx: &App, ) -> Task> { let local_lsp_store = @@ -1216,6 +1302,7 @@ pub fn gather_context( git_info, outline: None, speculated_output: None, + recent_files, }; Ok(GatherContextOutput { diff --git a/crates/zeta_cli/src/main.rs b/crates/zeta_cli/src/main.rs index 5b2d4cf615..76f638057a 100644 --- a/crates/zeta_cli/src/main.rs +++ b/crates/zeta_cli/src/main.rs @@ -174,6 +174,7 @@ async fn get_context( // Enable gathering extra data not currently needed for edit predictions let can_collect_data = true; let git_info = None; + let recent_files = None; let mut gather_context_output = cx .update(|cx| { gather_context( @@ -184,6 +185,7 @@ async fn get_context( move || events, can_collect_data, git_info, + recent_files, cx, ) })? From b40794d413bca1c454ed693e191ae6214cae3274 Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Mon, 25 Aug 2025 22:01:36 -0600 Subject: [PATCH 2/6] Refactor `Repository::status_for_path` to return `FileStatus` instead of `StatusEntry` --- crates/collab/src/tests/integration_tests.rs | 7 +--- crates/editor/src/items.rs | 2 +- crates/outline_panel/src/outline_panel.rs | 4 +- crates/project/src/git_store.rs | 11 +++--- crates/project/src/project_tests.rs | 40 ++++++++------------ 5 files changed, 24 insertions(+), 40 deletions(-) diff --git a/crates/collab/src/tests/integration_tests.rs b/crates/collab/src/tests/integration_tests.rs index 5c73253048..c82b50a8b0 100644 --- a/crates/collab/src/tests/integration_tests.rs +++ b/crates/collab/src/tests/integration_tests.rs @@ -3072,12 +3072,7 @@ async fn test_git_status_sync( .collect::>(); assert_eq!(repos.len(), 1); let repo = repos.into_iter().next().unwrap(); - assert_eq!( - repo.read(cx) - .status_for_path(&file.into()) - .map(|entry| entry.status), - status - ); + assert_eq!(repo.read(cx).status_for_path(&file.into()), status); } project_local.read_with(cx_a, |project, cx| { diff --git a/crates/editor/src/items.rs b/crates/editor/src/items.rs index b7110190fd..9f02e7de5c 100644 --- a/crates/editor/src/items.rs +++ b/crates/editor/src/items.rs @@ -685,7 +685,7 @@ impl Item for Editor { .git_store() .read(cx) .repository_and_path_for_buffer_id(buffer_id, cx)?; - let status = repo.read(cx).status_for_path(&repo_path)?.status; + let status = repo.read(cx).status_for_path(&repo_path)?; Some(entry_git_aware_label_color( status.summary(), diff --git a/crates/outline_panel/src/outline_panel.rs b/crates/outline_panel/src/outline_panel.rs index 10698cead8..a82dd5dd4b 100644 --- a/crates/outline_panel/src/outline_panel.rs +++ b/crates/outline_panel/src/outline_panel.rs @@ -2693,9 +2693,7 @@ impl OutlinePanel { let status = git_store .read(cx) .repository_and_path_for_buffer_id(buffer_id, cx) - .and_then(|(repo, path)| { - Some(repo.read(cx).status_for_path(&path)?.status) - }); + .and_then(|(repo, path)| Some(repo.read(cx).status_for_path(&path)?)); buffer_excerpts .entry(buffer_id) .or_insert_with(|| { diff --git a/crates/project/src/git_store.rs b/crates/project/src/git_store.rs index 5cf298a8bf..9b51d7b732 100644 --- a/crates/project/src/git_store.rs +++ b/crates/project/src/git_store.rs @@ -809,7 +809,7 @@ impl GitStore { cx: &App, ) -> Option { let (repo, repo_path) = self.repository_and_path_for_project_path(project_path, cx)?; - Some(repo.read(cx).status_for_path(&repo_path)?.status) + Some(repo.read(cx).status_for_path(&repo_path)?) } pub fn checkpoint(&self, cx: &mut App) -> Task> { @@ -1391,8 +1391,7 @@ impl GitStore { pub fn status_for_buffer_id(&self, buffer_id: BufferId, cx: &App) -> Option { let (repo, path) = self.repository_and_path_for_buffer_id(buffer_id, cx)?; - let status = repo.read(cx).snapshot.status_for_path(&path)?; - Some(status.status) + repo.read(cx).snapshot.status_for_path(&path) } pub fn repository_and_path_for_buffer_id( @@ -2844,10 +2843,10 @@ impl RepositorySnapshot { self.statuses_by_path.summary().item_summary } - pub fn status_for_path(&self, path: &RepoPath) -> Option { + pub fn status_for_path(&self, path: &RepoPath) -> Option { self.statuses_by_path .get(&PathKey(path.0.clone()), &()) - .cloned() + .map(|entry| entry.status.clone()) } pub fn abs_path_to_repo_path(&self, abs_path: &Path) -> Option { @@ -2874,7 +2873,7 @@ impl RepositorySnapshot { self.merge.conflicted_paths.contains(repo_path); let has_conflict_currently = self .status_for_path(repo_path) - .is_some_and(|entry| entry.status.is_conflicted()); + .is_some_and(|status| status.is_conflicted()); had_conflict_on_last_merge_head_change || has_conflict_currently } diff --git a/crates/project/src/project_tests.rs b/crates/project/src/project_tests.rs index 6dcd07482e..084eae8af1 100644 --- a/crates/project/src/project_tests.rs +++ b/crates/project/src/project_tests.rs @@ -8246,7 +8246,7 @@ async fn test_repository_subfolder_git_status( assert_eq!(repository.status_for_path(&C_TXT.into()), None); assert_eq!( - repository.status_for_path(&E_TXT.into()).unwrap().status, + repository.status_for_path(&E_TXT.into()).unwrap(), FileStatus::Untracked ); }); @@ -8459,15 +8459,11 @@ async fn test_rename_work_directory(cx: &mut gpui::TestAppContext) { root_path.join("projects/project1").as_path() ); assert_eq!( - repository - .status_for_path(&"a".into()) - .map(|entry| entry.status), + repository.status_for_path(&"a".into()), Some(StatusCode::Modified.worktree()), ); assert_eq!( - repository - .status_for_path(&"b".into()) - .map(|entry| entry.status), + repository.status_for_path(&"b".into()), Some(FileStatus::Untracked), ); }); @@ -8485,11 +8481,11 @@ async fn test_rename_work_directory(cx: &mut gpui::TestAppContext) { root_path.join("projects/project2").as_path() ); assert_eq!( - repository.status_for_path(&"a".into()).unwrap().status, + repository.status_for_path(&"a".into()).unwrap(), StatusCode::Modified.worktree(), ); assert_eq!( - repository.status_for_path(&"b".into()).unwrap().status, + repository.status_for_path(&"b".into()).unwrap(), FileStatus::Untracked, ); }); @@ -8562,11 +8558,11 @@ async fn test_file_status(cx: &mut gpui::TestAppContext) { ); assert_eq!( - repository.status_for_path(&B_TXT.into()).unwrap().status, + repository.status_for_path(&B_TXT.into()).unwrap(), FileStatus::Untracked, ); assert_eq!( - repository.status_for_path(&F_TXT.into()).unwrap().status, + repository.status_for_path(&F_TXT.into()).unwrap(), FileStatus::Untracked, ); }); @@ -8582,7 +8578,7 @@ async fn test_file_status(cx: &mut gpui::TestAppContext) { // The worktree detects that the file's git status has changed. repository.read_with(cx, |repository, _| { assert_eq!( - repository.status_for_path(&A_TXT.into()).unwrap().status, + repository.status_for_path(&A_TXT.into()).unwrap(), StatusCode::Modified.worktree(), ); }); @@ -8600,7 +8596,7 @@ async fn test_file_status(cx: &mut gpui::TestAppContext) { // The worktree detects that the files' git status have changed. repository.read_with(cx, |repository, _cx| { assert_eq!( - repository.status_for_path(&F_TXT.into()).unwrap().status, + repository.status_for_path(&F_TXT.into()).unwrap(), FileStatus::Untracked, ); assert_eq!(repository.status_for_path(&B_TXT.into()), None); @@ -8623,11 +8619,11 @@ async fn test_file_status(cx: &mut gpui::TestAppContext) { repository.read_with(cx, |repository, _cx| { assert_eq!(repository.status_for_path(&A_TXT.into()), None); assert_eq!( - repository.status_for_path(&B_TXT.into()).unwrap().status, + repository.status_for_path(&B_TXT.into()).unwrap(), FileStatus::Untracked, ); assert_eq!( - repository.status_for_path(&E_TXT.into()).unwrap().status, + repository.status_for_path(&E_TXT.into()).unwrap(), StatusCode::Modified.worktree(), ); }); @@ -8666,8 +8662,7 @@ async fn test_file_status(cx: &mut gpui::TestAppContext) { assert_eq!( repository .status_for_path(&Path::new(renamed_dir_name).join(RENAMED_FILE).into()) - .unwrap() - .status, + .unwrap(), FileStatus::Untracked, ); }); @@ -8690,8 +8685,7 @@ async fn test_file_status(cx: &mut gpui::TestAppContext) { assert_eq!( repository .status_for_path(&Path::new(renamed_dir_name).join(RENAMED_FILE).into()) - .unwrap() - .status, + .unwrap(), FileStatus::Untracked, ); }); @@ -9000,7 +8994,7 @@ async fn test_git_worktrees_and_submodules(cx: &mut gpui::TestAppContext) { barrier.await.unwrap(); worktree_repo.update(cx, |repo, _| { pretty_assertions::assert_eq!( - repo.status_for_path(&"src/b.txt".into()).unwrap().status, + repo.status_for_path(&"src/b.txt".into()).unwrap(), StatusCode::Modified.worktree(), ); }); @@ -9039,7 +9033,7 @@ async fn test_git_worktrees_and_submodules(cx: &mut gpui::TestAppContext) { barrier.await.unwrap(); submodule_repo.update(cx, |repo, _| { pretty_assertions::assert_eq!( - repo.status_for_path(&"c.txt".into()).unwrap().status, + repo.status_for_path(&"c.txt".into()).unwrap(), StatusCode::Modified.worktree(), ); }); @@ -9300,9 +9294,7 @@ fn assert_entry_git_state( let entry = tree .entry_for_path(path) .unwrap_or_else(|| panic!("entry {path} not found")); - let status = repository - .status_for_path(&path.into()) - .map(|entry| entry.status); + let status = repository.status_for_path(&path.into()).map(|entry| entry); let expected = index_status.map(|index_status| { TrackedStatus { index_status, From ee6a8a20e263d96cd61b5f316f9ae44ab72cc603 Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Mon, 25 Aug 2025 22:44:08 -0600 Subject: [PATCH 3/6] Use worktree status and git status when filtering recent files list --- crates/zeta/src/zeta.rs | 65 +++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/crates/zeta/src/zeta.rs b/crates/zeta/src/zeta.rs index 45cd7b1614..73c6cfb914 100644 --- a/crates/zeta/src/zeta.rs +++ b/crates/zeta/src/zeta.rs @@ -1159,29 +1159,50 @@ and then another }; let mut results = Vec::new(); for ix in (0..self.recent_project_entries.len()).rev() { - let (id, last_active_at) = &self.recent_project_entries[ix]; - let Some(project_path) = project.read(cx).path_for_entry(*id, cx) else { + let (entry_id, last_active_at) = &self.recent_project_entries[ix]; + if let Some(worktree) = project.read(cx).worktree_for_entry(*entry_id, cx) + && let worktree = worktree.read(cx) + && let Some(entry) = worktree.entry_for_id(*entry_id) + && entry.is_file() + && entry.is_created() + && !entry.is_ignored + && !entry.is_private + && !entry.is_external + && !entry.is_fifo + { + let project_path = ProjectPath { + worktree_id: worktree.id(), + path: entry.path.clone(), + }; + let Some(repo_path) = repository.project_path_to_repo_path(&project_path, cx) + else { + // entry not removed since queries involving other repositories might occur later + continue; + }; + let Some(repo_path_str) = repo_path.to_str() else { + // paths may not be valid UTF-8 + self.recent_project_entries.remove(ix); + continue; + }; + if let Some(file_status) = repository.status_for_path(&repo_path) { + if file_status.is_ignored() || file_status.is_untracked() { + // entry not removed because it may belong to a nested repository + continue; + } + } + let Ok(active_to_now_ms) = + now.duration_since(*last_active_at).as_millis().try_into() + else { + self.recent_project_entries.remove(ix); + continue; + }; + results.push(PredictEditsRecentFile { + repo_path: repo_path_str.to_string(), + active_to_now_ms, + }); + } else { self.recent_project_entries.remove(ix); - continue; - }; - let Some(repo_path) = repository.project_path_to_repo_path(&project_path, cx) else { - // entry not removed since queries involving other repositories might occur later - continue; - }; - let Some(repo_path) = repo_path.to_str() else { - // paths may not be valid UTF-8 - self.recent_project_entries.remove(ix); - continue; - }; - let Ok(active_to_now_ms) = now.duration_since(*last_active_at).as_millis().try_into() - else { - self.recent_project_entries.remove(ix); - continue; - }; - results.push(PredictEditsRecentFile { - repo_path: repo_path.to_string(), - active_to_now_ms, - }); + } } results } From 87609557f008e31c31f164512b3824ed9ccd74a5 Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Mon, 25 Aug 2025 22:47:00 -0600 Subject: [PATCH 4/6] Filter out excessively long paths --- crates/zeta/src/zeta.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/zeta/src/zeta.rs b/crates/zeta/src/zeta.rs index 73c6cfb914..a415c95d3b 100644 --- a/crates/zeta/src/zeta.rs +++ b/crates/zeta/src/zeta.rs @@ -75,6 +75,9 @@ const MAX_EVENT_COUNT: usize = 16; /// Maximum number of recent files to track. const MAX_RECENT_PROJECT_ENTRIES_COUNT: usize = 16; +/// Maximum file path length to include in recent files list. +const MAX_RECENT_FILE_PATH_LENGTH: usize = 512; + /// Maximum number of edit predictions to store for feedback. const MAX_SHOWN_COMPLETION_COUNT: usize = 50; @@ -1184,6 +1187,10 @@ and then another self.recent_project_entries.remove(ix); continue; }; + if repo_path_str.len() > MAX_RECENT_FILE_PATH_LENGTH { + self.recent_project_entries.remove(ix); + continue; + } if let Some(file_status) = repository.status_for_path(&repo_path) { if file_status.is_ignored() || file_status.is_untracked() { // entry not removed because it may belong to a nested repository From b9cd8f5d2a4c6504f4b40983bff16d484d30c399 Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Mon, 25 Aug 2025 23:52:23 -0600 Subject: [PATCH 5/6] Cleanup + only record git info if current file may be in repo --- .../cloud_llm_client/src/cloud_llm_client.rs | 4 +- crates/zeta/src/zeta.rs | 141 ++++++++++-------- crates/zeta_cli/src/main.rs | 7 +- 3 files changed, 80 insertions(+), 72 deletions(-) diff --git a/crates/cloud_llm_client/src/cloud_llm_client.rs b/crates/cloud_llm_client/src/cloud_llm_client.rs index 7d1548b322..eca725bef1 100644 --- a/crates/cloud_llm_client/src/cloud_llm_client.rs +++ b/crates/cloud_llm_client/src/cloud_llm_client.rs @@ -152,8 +152,6 @@ pub struct PredictEditsBody { /// Info about the git repository state, only present when can_collect_data is true. #[serde(skip_serializing_if = "Option::is_none", default)] pub git_info: Option, - #[serde(skip_serializing_if = "Option::is_none", default)] - pub recent_files: Option>, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -167,6 +165,8 @@ pub struct PredictEditsGitInfo { /// URL of the remote called `upstream`. #[serde(skip_serializing_if = "Option::is_none", default)] pub remote_upstream_url: Option, + #[serde(skip_serializing_if = "Option::is_none", default)] + pub recent_files: Option>, } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/zeta/src/zeta.rs b/crates/zeta/src/zeta.rs index a415c95d3b..6dbb8e317c 100644 --- a/crates/zeta/src/zeta.rs +++ b/crates/zeta/src/zeta.rs @@ -404,7 +404,7 @@ impl Zeta { project: Option<&Entity>, buffer: &Entity, cursor: language::Anchor, - can_collect_data: bool, + can_collect_data: CanCollectData, cx: &mut Context, perform_predict_edits: F, ) -> Task>> @@ -423,17 +423,10 @@ impl Zeta { let llm_token = self.llm_token.clone(); let app_version = AppVersion::global(cx); - let (git_info, recent_files) = if let (true, Some(project), Some(file)) = - (can_collect_data, project, snapshot.file()) - && let Some(repository) = - git_repository_for_file(project, &ProjectPath::from_file(file.as_ref(), cx), cx) - { - let repository = repository.read(cx); - let git_info = make_predict_edits_git_info(repository); - let recent_files = self.recent_files(&buffer_snapshotted_at, repository, cx); - (git_info, Some(recent_files)) + let git_info = if matches!(can_collect_data, CanCollectData(true)) { + self.gather_git_info(project.clone(), &buffer_snapshotted_at, &snapshot, cx) } else { - (None, None) + None }; let full_path: Arc = snapshot @@ -452,7 +445,6 @@ impl Zeta { make_events_prompt, can_collect_data, git_info, - recent_files, cx, ); @@ -725,9 +717,15 @@ and then another ) -> Task>> { use std::future::ready; - self.request_completion_impl(None, project, buffer, position, false, cx, |_params| { - ready(Ok((response, None))) - }) + self.request_completion_impl( + None, + project, + buffer, + position, + CanCollectData(false), + cx, + |_params| ready(Ok((response, None))), + ) } pub fn request_completion( @@ -735,7 +733,7 @@ and then another project: Option<&Entity>, buffer: &Entity, position: language::Anchor, - can_collect_data: bool, + can_collect_data: CanCollectData, cx: &mut Context, ) -> Task>> { self.request_completion_impl( @@ -1132,6 +1130,46 @@ and then another } } + fn gather_git_info( + &mut self, + project: Option<&Entity>, + buffer_snapshotted_at: &Instant, + snapshot: &BufferSnapshot, + cx: &Context, + ) -> Option { + let project = project?.read(cx); + let file = snapshot.file()?; + let project_path = ProjectPath::from_file(file.as_ref(), cx); + let entry = project.entry_for_path(&project_path, cx)?; + if !worktree_entry_eligible_for_collection(&entry) { + return None; + } + + let git_store = project.git_store().read(cx); + let (repository, _repo_path) = + git_store.repository_and_path_for_project_path(&project_path, cx)?; + + let repository = repository.read(cx); + let head_sha = repository + .head_commit + .as_ref() + .map(|head_commit| head_commit.sha.to_string()); + let remote_origin_url = repository.remote_origin_url.clone(); + let remote_upstream_url = repository.remote_upstream_url.clone(); + if head_sha.is_none() && remote_origin_url.is_none() && remote_upstream_url.is_none() { + return None; + } + + let recent_files = self.recent_files(&buffer_snapshotted_at, repository, cx); + + Some(PredictEditsGitInfo { + head_sha, + remote_origin_url, + remote_upstream_url, + recent_files: Some(recent_files), + }) + } + fn push_recent_project_entry(&mut self, project_entry_id: ProjectEntryId) { let now = Instant::now(); if let Some(existing_ix) = self @@ -1166,12 +1204,7 @@ and then another if let Some(worktree) = project.read(cx).worktree_for_entry(*entry_id, cx) && let worktree = worktree.read(cx) && let Some(entry) = worktree.entry_for_id(*entry_id) - && entry.is_file() - && entry.is_created() - && !entry.is_ignored - && !entry.is_private - && !entry.is_external - && !entry.is_fifo + && worktree_entry_eligible_for_collection(entry) { let project_path = ProjectPath { worktree_id: worktree.id(), @@ -1191,12 +1224,6 @@ and then another self.recent_project_entries.remove(ix); continue; } - if let Some(file_status) = repository.status_for_path(&repo_path) { - if file_status.is_ignored() || file_status.is_untracked() { - // entry not removed because it may belong to a nested repository - continue; - } - } let Ok(active_to_now_ms) = now.duration_since(*last_active_at).as_millis().try_into() else { @@ -1215,6 +1242,15 @@ and then another } } +fn worktree_entry_eligible_for_collection(entry: &worktree::Entry) -> bool { + entry.is_file() + && entry.is_created() + && !entry.is_ignored + && !entry.is_private + && !entry.is_external + && !entry.is_fifo +} + pub struct PerformPredictEditsParams { pub client: Arc, pub llm_token: LlmApiToken, @@ -1237,34 +1273,6 @@ fn common_prefix, T2: Iterator>(a: T1, b: .sum() } -fn git_repository_for_file( - project: &Entity, - project_path: &ProjectPath, - cx: &App, -) -> Option> { - let git_store = project.read(cx).git_store().read(cx); - git_store - .repository_and_path_for_project_path(project_path, cx) - .map(|(repo, _repo_path)| repo) -} - -fn make_predict_edits_git_info(repository: &Repository) -> Option { - let head_sha = repository - .head_commit - .as_ref() - .map(|head_commit| head_commit.sha.to_string()); - let remote_origin_url = repository.remote_origin_url.clone(); - let remote_upstream_url = repository.remote_upstream_url.clone(); - if head_sha.is_none() && remote_origin_url.is_none() && remote_upstream_url.is_none() { - return None; - } - Some(PredictEditsGitInfo { - head_sha, - remote_origin_url, - remote_upstream_url, - }) -} - pub struct GatherContextOutput { pub body: PredictEditsBody, pub editable_range: Range, @@ -1276,15 +1284,16 @@ pub fn gather_context( snapshot: &BufferSnapshot, cursor_point: language::Point, make_events_prompt: impl FnOnce() -> String + Send + 'static, - can_collect_data: bool, + can_collect_data: CanCollectData, git_info: Option, - recent_files: Option>, cx: &App, ) -> Task> { let local_lsp_store = project.and_then(|project| project.read(cx).lsp_store().read(cx).as_local()); let diagnostic_groups: Vec<(String, serde_json::Value)> = - if can_collect_data && let Some(local_lsp_store) = local_lsp_store { + if matches!(can_collect_data, CanCollectData(true)) + && let Some(local_lsp_store) = local_lsp_store + { snapshot .diagnostic_groups(None) .into_iter() @@ -1325,12 +1334,11 @@ pub fn gather_context( let body = PredictEditsBody { input_events, input_excerpt: input_excerpt.prompt, - can_collect_data, + can_collect_data: can_collect_data.0, diagnostic_groups, git_info, outline: None, speculated_output: None, - recent_files, }; Ok(GatherContextOutput { @@ -1491,6 +1499,9 @@ pub struct ProviderDataCollection { license_detection_watcher: Option>, } +#[derive(Debug, Clone, Copy)] +pub struct CanCollectData(pub bool); + impl ProviderDataCollection { pub fn new(zeta: Entity, buffer: Option>, cx: &mut App) -> Self { let choice_and_watcher = buffer.and_then(|buffer| { @@ -1524,8 +1535,8 @@ impl ProviderDataCollection { } } - pub fn can_collect_data(&self, cx: &App) -> bool { - self.is_data_collection_enabled(cx) && self.is_project_open_source() + pub fn can_collect_data(&self, cx: &App) -> CanCollectData { + CanCollectData(self.is_data_collection_enabled(cx) && self.is_project_open_source()) } pub fn is_data_collection_enabled(&self, cx: &App) -> bool { @@ -2149,7 +2160,7 @@ mod tests { let buffer = cx.new(|cx| Buffer::local(buffer_content, cx)); let cursor = buffer.read_with(cx, |buffer, _| buffer.anchor_before(Point::new(1, 0))); let completion_task = zeta.update(cx, |zeta, cx| { - zeta.request_completion(None, &buffer, cursor, false, cx) + zeta.request_completion(None, &buffer, cursor, CanCollectData(false), cx) }); let completion = completion_task.await.unwrap().unwrap(); @@ -2214,7 +2225,7 @@ mod tests { let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot()); let cursor = buffer.read_with(cx, |buffer, _| buffer.anchor_before(Point::new(1, 0))); let completion_task = zeta.update(cx, |zeta, cx| { - zeta.request_completion(None, &buffer, cursor, false, cx) + zeta.request_completion(None, &buffer, cursor, CanCollectData(false), cx) }); let completion = completion_task.await.unwrap().unwrap(); diff --git a/crates/zeta_cli/src/main.rs b/crates/zeta_cli/src/main.rs index 76f638057a..7ffbd68898 100644 --- a/crates/zeta_cli/src/main.rs +++ b/crates/zeta_cli/src/main.rs @@ -18,7 +18,7 @@ use std::process::exit; use std::str::FromStr; use std::sync::Arc; use std::time::Duration; -use zeta::{GatherContextOutput, PerformPredictEditsParams, Zeta, gather_context}; +use zeta::{CanCollectData, GatherContextOutput, PerformPredictEditsParams, Zeta, gather_context}; use crate::headless::ZetaCliAppState; @@ -172,9 +172,7 @@ async fn get_context( None => String::new(), }; // Enable gathering extra data not currently needed for edit predictions - let can_collect_data = true; let git_info = None; - let recent_files = None; let mut gather_context_output = cx .update(|cx| { gather_context( @@ -183,9 +181,8 @@ async fn get_context( &snapshot, clipped_cursor, move || events, - can_collect_data, + CanCollectData(true), git_info, - recent_files, cx, ) })? From aaaba05ef73ebae474543be7b39ede2dee5c25bd Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Tue, 26 Aug 2025 00:03:46 -0600 Subject: [PATCH 6/6] Also record repo relative file path --- crates/cloud_llm_client/src/cloud_llm_client.rs | 8 +++++++- crates/zeta/src/zeta.rs | 6 ++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/crates/cloud_llm_client/src/cloud_llm_client.rs b/crates/cloud_llm_client/src/cloud_llm_client.rs index eca725bef1..f0edef6e27 100644 --- a/crates/cloud_llm_client/src/cloud_llm_client.rs +++ b/crates/cloud_llm_client/src/cloud_llm_client.rs @@ -156,6 +156,9 @@ pub struct PredictEditsBody { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PredictEditsGitInfo { + /// Repo-relative path to the file that contains the input excerpt. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub input_file_path: Option, /// SHA of git HEAD commit at time of prediction. #[serde(skip_serializing_if = "Option::is_none", default)] pub head_sha: Option, @@ -165,13 +168,16 @@ pub struct PredictEditsGitInfo { /// URL of the remote called `upstream`. #[serde(skip_serializing_if = "Option::is_none", default)] pub remote_upstream_url: Option, + /// Recently active files that may be within this repository. #[serde(skip_serializing_if = "Option::is_none", default)] pub recent_files: Option>, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PredictEditsRecentFile { - pub repo_path: String, + /// Path to a file within the repository. + pub path: String, + /// Milliseconds between the editor for this file being active and the request time. pub active_to_now_ms: u32, } diff --git a/crates/zeta/src/zeta.rs b/crates/zeta/src/zeta.rs index 6dbb8e317c..331949b9a2 100644 --- a/crates/zeta/src/zeta.rs +++ b/crates/zeta/src/zeta.rs @@ -1146,8 +1146,9 @@ and then another } let git_store = project.git_store().read(cx); - let (repository, _repo_path) = + let (repository, repo_path) = git_store.repository_and_path_for_project_path(&project_path, cx)?; + let repo_path_str = repo_path.to_str()?; let repository = repository.read(cx); let head_sha = repository @@ -1163,6 +1164,7 @@ and then another let recent_files = self.recent_files(&buffer_snapshotted_at, repository, cx); Some(PredictEditsGitInfo { + input_file_path: Some(repo_path_str.to_string()), head_sha, remote_origin_url, remote_upstream_url, @@ -1231,7 +1233,7 @@ and then another continue; }; results.push(PredictEditsRecentFile { - repo_path: repo_path_str.to_string(), + path: repo_path_str.to_string(), active_to_now_ms, }); } else {