use std::ops::Range; use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::{Result, anyhow, bail}; use collections::{BTreeMap, HashMap, HashSet}; use futures::{self, Future, FutureExt, future}; use gpui::{App, AppContext as _, AsyncApp, Context, Entity, SharedString, Task, WeakEntity}; use language::Buffer; use project::{ProjectItem, ProjectPath, Worktree}; use rope::Rope; use text::{Anchor, BufferId, OffsetRangeExt}; use util::maybe; use workspace::Workspace; use crate::context::{ AssistantContext, ContextBuffer, ContextId, ContextSnapshot, ContextSymbol, ContextSymbolId, DirectoryContext, FetchedUrlContext, FileContext, SymbolContext, ThreadContext, }; use crate::context_strip::SuggestedContext; use crate::thread::{Thread, ThreadId}; pub struct ContextStore { workspace: WeakEntity, context: Vec, // TODO: If an EntityId is used for all context types (like BufferId), can remove ContextId. next_context_id: ContextId, files: BTreeMap, directories: HashMap, symbols: HashMap, symbol_buffers: HashMap>, symbols_by_path: HashMap>, threads: HashMap, fetched_urls: HashMap, } impl ContextStore { pub fn new(workspace: WeakEntity) -> Self { Self { workspace, context: Vec::new(), next_context_id: ContextId(0), files: BTreeMap::default(), directories: HashMap::default(), symbols: HashMap::default(), symbol_buffers: HashMap::default(), symbols_by_path: HashMap::default(), threads: HashMap::default(), fetched_urls: HashMap::default(), } } pub fn snapshot<'a>(&'a self, cx: &'a App) -> impl Iterator + 'a { self.context() .iter() .flat_map(|context| context.snapshot(cx)) } pub fn context(&self) -> &Vec { &self.context } pub fn context_for_id(&self, id: ContextId) -> Option<&AssistantContext> { self.context().iter().find(|context| context.id() == id) } pub fn clear(&mut self) { self.context.clear(); self.files.clear(); self.directories.clear(); self.threads.clear(); self.fetched_urls.clear(); } pub fn add_file_from_path( &mut self, project_path: ProjectPath, remove_if_exists: bool, cx: &mut Context, ) -> Task> { let workspace = self.workspace.clone(); let Some(project) = workspace .upgrade() .map(|workspace| workspace.read(cx).project().clone()) else { return Task::ready(Err(anyhow!("failed to read project"))); }; cx.spawn(async move |this, cx| { let open_buffer_task = project.update(cx, |project, cx| { project.open_buffer(project_path.clone(), cx) })?; let buffer_entity = open_buffer_task.await?; let buffer_id = this.update(cx, |_, cx| buffer_entity.read(cx).remote_id())?; let already_included = this.update(cx, |this, _cx| { match this.will_include_buffer(buffer_id, &project_path.path) { Some(FileInclusion::Direct(context_id)) => { if remove_if_exists { this.remove_context(context_id); } true } Some(FileInclusion::InDirectory(_)) => true, None => false, } })?; if already_included { return anyhow::Ok(()); } let (buffer_info, text_task) = this.update(cx, |_, cx| { let buffer = buffer_entity.read(cx); collect_buffer_info_and_text( project_path.path.clone(), buffer_entity, buffer, None, cx.to_async(), ) })?; let text = text_task.await; this.update(cx, |this, _cx| { this.insert_file(make_context_buffer(buffer_info, text)); })?; anyhow::Ok(()) }) } pub fn add_file_from_buffer( &mut self, buffer_entity: Entity, cx: &mut Context, ) -> Task> { cx.spawn(async move |this, cx| { let (buffer_info, text_task) = this.update(cx, |_, cx| { let buffer = buffer_entity.read(cx); let Some(file) = buffer.file() else { return Err(anyhow!("Buffer has no path.")); }; Ok(collect_buffer_info_and_text( file.path().clone(), buffer_entity, buffer, None, cx.to_async(), )) })??; let text = text_task.await; this.update(cx, |this, _cx| { this.insert_file(make_context_buffer(buffer_info, text)) })?; anyhow::Ok(()) }) } fn insert_file(&mut self, context_buffer: ContextBuffer) { let id = self.next_context_id.post_inc(); self.files.insert(context_buffer.id, id); self.context .push(AssistantContext::File(FileContext { id, context_buffer })); } pub fn add_directory( &mut self, project_path: ProjectPath, remove_if_exists: bool, cx: &mut Context, ) -> Task> { let workspace = self.workspace.clone(); let Some(project) = workspace .upgrade() .map(|workspace| workspace.read(cx).project().clone()) else { return Task::ready(Err(anyhow!("failed to read project"))); }; let already_included = if let Some(context_id) = self.includes_directory(&project_path.path) { if remove_if_exists { self.remove_context(context_id); } true } else { false }; if already_included { return Task::ready(Ok(())); } let worktree_id = project_path.worktree_id; cx.spawn(async move |this, cx| { let worktree = project.update(cx, |project, cx| { project .worktree_for_id(worktree_id, cx) .ok_or_else(|| anyhow!("no worktree found for {worktree_id:?}")) })??; let files = worktree.update(cx, |worktree, _cx| { collect_files_in_path(worktree, &project_path.path) })?; let open_buffers_task = project.update(cx, |project, cx| { let tasks = files.iter().map(|file_path| { project.open_buffer( ProjectPath { worktree_id, path: file_path.clone(), }, cx, ) }); future::join_all(tasks) })?; let buffers = open_buffers_task.await; let mut buffer_infos = Vec::new(); let mut text_tasks = Vec::new(); this.update(cx, |_, cx| { for (path, buffer_entity) in files.into_iter().zip(buffers) { // Skip all binary files and other non-UTF8 files if let Ok(buffer_entity) = buffer_entity { let buffer = buffer_entity.read(cx); let (buffer_info, text_task) = collect_buffer_info_and_text( path, buffer_entity, buffer, None, cx.to_async(), ); buffer_infos.push(buffer_info); text_tasks.push(text_task); } } anyhow::Ok(()) })??; let buffer_texts = future::join_all(text_tasks).await; let context_buffers = buffer_infos .into_iter() .zip(buffer_texts) .map(|(info, text)| make_context_buffer(info, text)) .collect::>(); if context_buffers.is_empty() { bail!("No text files found in {}", &project_path.path.display()); } this.update(cx, |this, _| { this.insert_directory(project_path, context_buffers); })?; anyhow::Ok(()) }) } fn insert_directory(&mut self, project_path: ProjectPath, context_buffers: Vec) { let id = self.next_context_id.post_inc(); self.directories.insert(project_path.path.to_path_buf(), id); self.context .push(AssistantContext::Directory(DirectoryContext::new( id, project_path, context_buffers, ))); } pub fn add_symbol( &mut self, buffer: Entity, symbol_name: SharedString, symbol_range: Range, symbol_enclosing_range: Range, remove_if_exists: bool, cx: &mut Context, ) -> Task> { let buffer_ref = buffer.read(cx); let Some(file) = buffer_ref.file() else { return Task::ready(Err(anyhow!("Buffer has no path."))); }; let Some(project_path) = buffer_ref.project_path(cx) else { return Task::ready(Err(anyhow!("Buffer has no project path."))); }; if let Some(symbols_for_path) = self.symbols_by_path.get(&project_path) { let mut matching_symbol_id = None; for symbol in symbols_for_path { if &symbol.name == &symbol_name { let snapshot = buffer_ref.snapshot(); if symbol.range.to_offset(&snapshot) == symbol_range.to_offset(&snapshot) { matching_symbol_id = self.symbols.get(symbol).cloned(); break; } } } if let Some(id) = matching_symbol_id { if remove_if_exists { self.remove_context(id); } return Task::ready(Ok(false)); } } let (buffer_info, collect_content_task) = collect_buffer_info_and_text( file.path().clone(), buffer, buffer_ref, Some(symbol_enclosing_range.clone()), cx.to_async(), ); cx.spawn(async move |this, cx| { let content = collect_content_task.await; this.update(cx, |this, _cx| { this.insert_symbol(make_context_symbol( buffer_info, project_path, symbol_name, symbol_range, symbol_enclosing_range, content, )) })?; anyhow::Ok(true) }) } fn insert_symbol(&mut self, context_symbol: ContextSymbol) { let id = self.next_context_id.post_inc(); self.symbols.insert(context_symbol.id.clone(), id); self.symbols_by_path .entry(context_symbol.id.path.clone()) .or_insert_with(Vec::new) .push(context_symbol.id.clone()); self.symbol_buffers .insert(context_symbol.id.clone(), context_symbol.buffer.clone()); self.context.push(AssistantContext::Symbol(SymbolContext { id, context_symbol, })); } pub fn add_thread( &mut self, thread: Entity, remove_if_exists: bool, cx: &mut Context, ) { if let Some(context_id) = self.includes_thread(&thread.read(cx).id()) { if remove_if_exists { self.remove_context(context_id); } } else { self.insert_thread(thread, cx); } } fn insert_thread(&mut self, thread: Entity, cx: &App) { let id = self.next_context_id.post_inc(); let text = thread.read(cx).text().into(); self.threads.insert(thread.read(cx).id().clone(), id); self.context .push(AssistantContext::Thread(ThreadContext { id, thread, text })); } pub fn add_fetched_url(&mut self, url: String, text: impl Into) { if self.includes_url(&url).is_none() { self.insert_fetched_url(url, text); } } fn insert_fetched_url(&mut self, url: String, text: impl Into) { let id = self.next_context_id.post_inc(); self.fetched_urls.insert(url.clone(), id); self.context .push(AssistantContext::FetchedUrl(FetchedUrlContext { id, url: url.into(), text: text.into(), })); } pub fn accept_suggested_context( &mut self, suggested: &SuggestedContext, cx: &mut Context, ) -> Task> { match suggested { SuggestedContext::File { buffer, icon_path: _, name: _, } => { if let Some(buffer) = buffer.upgrade() { return self.add_file_from_buffer(buffer, cx); }; } SuggestedContext::Thread { thread, name: _ } => { if let Some(thread) = thread.upgrade() { self.insert_thread(thread, cx); }; } } Task::ready(Ok(())) } pub fn remove_context(&mut self, id: ContextId) { let Some(ix) = self.context.iter().position(|context| context.id() == id) else { return; }; match self.context.remove(ix) { AssistantContext::File(_) => { self.files.retain(|_, context_id| *context_id != id); } AssistantContext::Directory(_) => { self.directories.retain(|_, context_id| *context_id != id); } AssistantContext::Symbol(symbol) => { if let Some(symbols_in_path) = self.symbols_by_path.get_mut(&symbol.context_symbol.id.path) { symbols_in_path.retain(|s| { self.symbols .get(s) .map_or(false, |context_id| *context_id != id) }); } self.symbol_buffers.remove(&symbol.context_symbol.id); self.symbols.retain(|_, context_id| *context_id != id); } AssistantContext::FetchedUrl(_) => { self.fetched_urls.retain(|_, context_id| *context_id != id); } AssistantContext::Thread(_) => { self.threads.retain(|_, context_id| *context_id != id); } } } /// Returns whether the buffer is already included directly in the context, or if it will be /// included in the context via a directory. Directory inclusion is based on paths rather than /// buffer IDs as the directory will be re-scanned. pub fn will_include_buffer(&self, buffer_id: BufferId, path: &Path) -> Option { if let Some(context_id) = self.files.get(&buffer_id) { return Some(FileInclusion::Direct(*context_id)); } self.will_include_file_path_via_directory(path) } /// Returns whether this file path is already included directly in the context, or if it will be /// included in the context via a directory. pub fn will_include_file_path(&self, path: &Path, cx: &App) -> Option { if !self.files.is_empty() { let found_file_context = self.context.iter().find(|context| match &context { AssistantContext::File(file_context) => { let buffer = file_context.context_buffer.buffer.read(cx); if let Some(file_path) = buffer_path_log_err(buffer) { *file_path == *path } else { false } } _ => false, }); if let Some(context) = found_file_context { return Some(FileInclusion::Direct(context.id())); } } self.will_include_file_path_via_directory(path) } fn will_include_file_path_via_directory(&self, path: &Path) -> Option { if self.directories.is_empty() { return None; } let mut buf = path.to_path_buf(); while buf.pop() { if let Some(_) = self.directories.get(&buf) { return Some(FileInclusion::InDirectory(buf)); } } None } pub fn includes_directory(&self, path: &Path) -> Option { self.directories.get(path).copied() } pub fn included_symbol(&self, symbol_id: &ContextSymbolId) -> Option { self.symbols.get(symbol_id).copied() } pub fn included_symbols_by_path(&self) -> &HashMap> { &self.symbols_by_path } pub fn buffer_for_symbol(&self, symbol_id: &ContextSymbolId) -> Option> { self.symbol_buffers.get(symbol_id).cloned() } pub fn includes_thread(&self, thread_id: &ThreadId) -> Option { self.threads.get(thread_id).copied() } pub fn includes_url(&self, url: &str) -> Option { self.fetched_urls.get(url).copied() } /// Replaces the context that matches the ID of the new context, if any match. fn replace_context(&mut self, new_context: AssistantContext) { let id = new_context.id(); for context in self.context.iter_mut() { if context.id() == id { *context = new_context; break; } } } pub fn file_paths(&self, cx: &App) -> HashSet { self.context .iter() .filter_map(|context| match context { AssistantContext::File(file) => { let buffer = file.context_buffer.buffer.read(cx); buffer_path_log_err(buffer).map(|p| p.to_path_buf()) } AssistantContext::Directory(_) | AssistantContext::Symbol(_) | AssistantContext::FetchedUrl(_) | AssistantContext::Thread(_) => None, }) .collect() } pub fn thread_ids(&self) -> HashSet { self.threads.keys().cloned().collect() } } pub enum FileInclusion { Direct(ContextId), InDirectory(PathBuf), } // ContextBuffer without text. struct BufferInfo { buffer_entity: Entity, id: BufferId, version: clock::Global, } fn make_context_buffer(info: BufferInfo, text: SharedString) -> ContextBuffer { ContextBuffer { id: info.id, buffer: info.buffer_entity, version: info.version, text, } } fn make_context_symbol( info: BufferInfo, path: ProjectPath, name: SharedString, range: Range, enclosing_range: Range, text: SharedString, ) -> ContextSymbol { ContextSymbol { id: ContextSymbolId { name, range, path }, buffer_version: info.version, enclosing_range, buffer: info.buffer_entity, text, } } fn collect_buffer_info_and_text( path: Arc, buffer_entity: Entity, buffer: &Buffer, range: Option>, cx: AsyncApp, ) -> (BufferInfo, Task) { let buffer_info = BufferInfo { id: buffer.remote_id(), buffer_entity, version: buffer.version(), }; // Important to collect version at the same time as content so that staleness logic is correct. let content = if let Some(range) = range { buffer.text_for_range(range).collect::() } else { buffer.as_rope().clone() }; let text_task = cx.background_spawn(async move { to_fenced_codeblock(&path, content) }); (buffer_info, text_task) } pub fn buffer_path_log_err(buffer: &Buffer) -> Option> { if let Some(file) = buffer.file() { Some(file.path().clone()) } else { log::error!("Buffer that had a path unexpectedly no longer has a path."); None } } fn to_fenced_codeblock(path: &Path, content: Rope) -> SharedString { let path_extension = path.extension().and_then(|ext| ext.to_str()); let path_string = path.to_string_lossy(); let capacity = 3 + path_extension.map_or(0, |extension| extension.len() + 1) + path_string.len() + 1 + content.len() + 5; let mut buffer = String::with_capacity(capacity); buffer.push_str("```"); if let Some(extension) = path_extension { buffer.push_str(extension); buffer.push(' '); } buffer.push_str(&path_string); buffer.push('\n'); for chunk in content.chunks() { buffer.push_str(&chunk); } if !buffer.ends_with('\n') { buffer.push('\n'); } buffer.push_str("```\n"); debug_assert!( buffer.len() == capacity - 1 || buffer.len() == capacity, "to_fenced_codeblock calculated capacity of {}, but length was {}", capacity, buffer.len(), ); buffer.into() } fn collect_files_in_path(worktree: &Worktree, path: &Path) -> Vec> { let mut files = Vec::new(); for entry in worktree.child_entries(path) { if entry.is_dir() { files.extend(collect_files_in_path(worktree, &entry.path)); } else if entry.is_file() { files.push(entry.path.clone()); } } files } pub fn refresh_context_store_text( context_store: Entity, changed_buffers: &HashSet>, cx: &App, ) -> impl Future> + use<> { let mut tasks = Vec::new(); for context in &context_store.read(cx).context { let id = context.id(); let task = maybe!({ match context { AssistantContext::File(file_context) => { if changed_buffers.is_empty() || changed_buffers.contains(&file_context.context_buffer.buffer) { let context_store = context_store.clone(); return refresh_file_text(context_store, file_context, cx); } } AssistantContext::Directory(directory_context) => { let should_refresh = changed_buffers.is_empty() || changed_buffers.iter().any(|buffer| { let buffer = buffer.read(cx); buffer_path_log_err(&buffer).map_or(false, |path| { path.starts_with(&directory_context.path.path) }) }); if should_refresh { let context_store = context_store.clone(); return refresh_directory_text(context_store, directory_context, cx); } } AssistantContext::Symbol(symbol_context) => { if changed_buffers.is_empty() || changed_buffers.contains(&symbol_context.context_symbol.buffer) { let context_store = context_store.clone(); return refresh_symbol_text(context_store, symbol_context, cx); } } AssistantContext::Thread(thread_context) => { if changed_buffers.is_empty() { let context_store = context_store.clone(); return Some(refresh_thread_text(context_store, thread_context, cx)); } } // Intentionally omit refreshing fetched URLs as it doesn't seem all that useful, // and doing the caching properly could be tricky (unless it's already handled by // the HttpClient?). AssistantContext::FetchedUrl(_) => {} } None }); if let Some(task) = task { tasks.push(task.map(move |_| id)); } } future::join_all(tasks) } fn refresh_file_text( context_store: Entity, file_context: &FileContext, cx: &App, ) -> Option> { let id = file_context.id; let task = refresh_context_buffer(&file_context.context_buffer, cx); if let Some(task) = task { Some(cx.spawn(async move |cx| { let context_buffer = task.await; context_store .update(cx, |context_store, _| { let new_file_context = FileContext { id, context_buffer }; context_store.replace_context(AssistantContext::File(new_file_context)); }) .ok(); })) } else { None } } fn refresh_directory_text( context_store: Entity, directory_context: &DirectoryContext, cx: &App, ) -> Option> { let mut stale = false; let futures = directory_context .context_buffers .iter() .map(|context_buffer| { if let Some(refresh_task) = refresh_context_buffer(context_buffer, cx) { stale = true; future::Either::Left(refresh_task) } else { future::Either::Right(future::ready((*context_buffer).clone())) } }) .collect::>(); if !stale { return None; } let context_buffers = future::join_all(futures); let id = directory_context.snapshot.id; let path = directory_context.path.clone(); Some(cx.spawn(async move |cx| { let context_buffers = context_buffers.await; context_store .update(cx, |context_store, _| { let new_directory_context = DirectoryContext::new(id, path, context_buffers); context_store.replace_context(AssistantContext::Directory(new_directory_context)); }) .ok(); })) } fn refresh_symbol_text( context_store: Entity, symbol_context: &SymbolContext, cx: &App, ) -> Option> { let id = symbol_context.id; let task = refresh_context_symbol(&symbol_context.context_symbol, cx); if let Some(task) = task { Some(cx.spawn(async move |cx| { let context_symbol = task.await; context_store .update(cx, |context_store, _| { let new_symbol_context = SymbolContext { id, context_symbol }; context_store.replace_context(AssistantContext::Symbol(new_symbol_context)); }) .ok(); })) } else { None } } fn refresh_thread_text( context_store: Entity, thread_context: &ThreadContext, cx: &App, ) -> Task<()> { let id = thread_context.id; let thread = thread_context.thread.clone(); cx.spawn(async move |cx| { context_store .update(cx, |context_store, cx| { let text = thread.read(cx).text().into(); context_store.replace_context(AssistantContext::Thread(ThreadContext { id, thread, text, })); }) .ok(); }) } fn refresh_context_buffer( context_buffer: &ContextBuffer, cx: &App, ) -> Option + use<>> { let buffer = context_buffer.buffer.read(cx); let path = buffer_path_log_err(buffer)?; if buffer.version.changed_since(&context_buffer.version) { let (buffer_info, text_task) = collect_buffer_info_and_text( path, context_buffer.buffer.clone(), buffer, None, cx.to_async(), ); Some(text_task.map(move |text| make_context_buffer(buffer_info, text))) } else { None } } fn refresh_context_symbol( context_symbol: &ContextSymbol, cx: &App, ) -> Option + use<>> { let buffer = context_symbol.buffer.read(cx); let path = buffer_path_log_err(buffer)?; let project_path = buffer.project_path(cx)?; if buffer.version.changed_since(&context_symbol.buffer_version) { let (buffer_info, text_task) = collect_buffer_info_and_text( path, context_symbol.buffer.clone(), buffer, Some(context_symbol.enclosing_range.clone()), cx.to_async(), ); let name = context_symbol.id.name.clone(); let range = context_symbol.id.range.clone(); let enclosing_range = context_symbol.enclosing_range.clone(); Some(text_task.map(move |text| { make_context_symbol( buffer_info, project_path, name, range, enclosing_range, text, ) })) } else { None } }