project: Track manifest locations per unique manifest locator (#27194)

This pull request paves way for exposing manifest tracking to
extensions.
- Project tree was renamed to manifest tree to better reflect it's
intent (and avoid confusion).
- Language server adapters now provide a name of their *manifest
locator*. If multiple language servers refer to the same locator, the
locating code will run just once for a given path.

Release Notes:

- N/A *or* Added/Fixed/Improved ...

---------

Co-authored-by: Anthony <anthony@zed.dev>
This commit is contained in:
Piotr Osiewicz 2025-03-21 15:22:36 +01:00 committed by GitHub
parent 6bced3a834
commit 05aa8880a4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 241 additions and 154 deletions

View file

@ -0,0 +1,48 @@
use collections::HashMap;
use gpui::{App, Global, SharedString};
use parking_lot::RwLock;
use std::{ops::Deref, sync::Arc};
use language::{ManifestName, ManifestProvider};
#[derive(Default)]
struct ManifestProvidersState {
providers: HashMap<ManifestName, Arc<dyn ManifestProvider>>,
}
#[derive(Clone, Default)]
pub struct ManifestProviders(Arc<RwLock<ManifestProvidersState>>);
#[derive(Default)]
struct GlobalManifestProvider(ManifestProviders);
impl Deref for GlobalManifestProvider {
type Target = ManifestProviders;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Global for GlobalManifestProvider {}
impl ManifestProviders {
/// Returns the global [`ManifestStore`].
///
/// Inserts a default [`ManifestStore`] if one does not yet exist.
pub fn global(cx: &mut App) -> Self {
cx.default_global::<GlobalManifestProvider>().0.clone()
}
pub fn register(&self, provider: Arc<dyn ManifestProvider>) {
self.0.write().providers.insert(provider.name(), provider);
}
pub fn unregister(&self, name: &SharedString) {
self.0.write().providers.remove(name);
}
pub(super) fn get(&self, name: &SharedString) -> Option<Arc<dyn ManifestProvider>> {
self.0.read().providers.get(name).cloned()
}
}

View file

@ -0,0 +1,240 @@
use std::{
collections::{btree_map::Entry, BTreeMap},
ffi::OsStr,
ops::ControlFlow,
path::{Path, PathBuf},
sync::Arc,
};
/// [RootPathTrie] is a workhorse of [super::ManifestTree]. It is responsible for determining the closest known project root for a given path.
/// It also determines how much of a given path is unexplored, thus letting callers fill in that gap if needed.
/// Conceptually, it allows one to annotate Worktree entries with arbitrary extra metadata and run closest-ancestor searches.
///
/// A path is unexplored when the closest ancestor of a path is not the path itself; that means that we have not yet ran the scan on that path.
/// For example, if there's a project root at path `python/project` and we query for a path `python/project/subdir/another_subdir/file.py`, there is
/// a known root at `python/project` and the unexplored part is `subdir/another_subdir` - we need to run a scan on these 2 directories.
pub(super) struct RootPathTrie<Label> {
worktree_relative_path: Arc<Path>,
labels: BTreeMap<Label, LabelPresence>,
children: BTreeMap<Arc<OsStr>, RootPathTrie<Label>>,
}
/// Label presence is a marker that allows to optimize searches within [RootPathTrie]; node label can be:
/// - Present; we know there's definitely a project root at this node and it is the only label of that kind on the path to the root of a worktree
/// (none of it's ancestors or descendants can contain the same present label)
/// - Known Absent - we know there's definitely no project root at this node and none of it's ancestors are Present (descendants can be present though!).
/// - Forbidden - we know there's definitely no project root at this node and none of it's ancestors or descendants can be Present.
/// The distinction is there to optimize searching; when we encounter a node with unknown status, we don't need to look at it's full path
/// to the root of the worktree; it's sufficient to explore only the path between last node with a KnownAbsent state and the directory of a path, since we run searches
/// from the leaf up to the root of the worktree. When any of the ancestors is forbidden, we don't need to look at the node or its ancestors.
/// When there's a present labeled node on the path to the root, we don't need to ask the adapter to run the search at all.
///
/// In practical terms, it means that by storing label presence we don't need to do a project discovery on a given folder more than once
/// (unless the node is invalidated, which can happen when FS entries are renamed/removed).
///
/// Storing project absence allows us to recognize which paths have already been scanned for a project root unsuccessfully. This way we don't need to run
/// such scan more than once.
#[derive(Clone, Copy, Debug, PartialOrd, PartialEq, Ord, Eq)]
pub(super) enum LabelPresence {
KnownAbsent,
Present,
}
impl<Label: Ord + Clone> RootPathTrie<Label> {
pub(super) fn new() -> Self {
Self::new_with_key(Arc::from(Path::new("")))
}
fn new_with_key(worktree_relative_path: Arc<Path>) -> Self {
RootPathTrie {
worktree_relative_path,
labels: Default::default(),
children: Default::default(),
}
}
// Internal implementation of inner that allows one to visit descendants of insertion point for a node.
fn insert_inner(
&mut self,
path: &TriePath,
value: Label,
presence: LabelPresence,
) -> &mut Self {
let mut current = self;
let mut path_so_far = PathBuf::new();
for key in path.0.iter() {
path_so_far.push(Path::new(key));
current = match current.children.entry(key.clone()) {
Entry::Vacant(vacant_entry) => vacant_entry
.insert(RootPathTrie::new_with_key(Arc::from(path_so_far.as_path()))),
Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
};
}
let _previous_value = current.labels.insert(value, presence);
debug_assert_eq!(_previous_value, None);
current
}
pub(super) fn insert(&mut self, path: &TriePath, value: Label, presence: LabelPresence) {
self.insert_inner(path, value, presence);
}
pub(super) fn walk<'a>(
&'a self,
path: &TriePath,
callback: &mut dyn for<'b> FnMut(
&'b Arc<Path>,
&'a BTreeMap<Label, LabelPresence>,
) -> ControlFlow<()>,
) {
let mut current = self;
for key in path.0.iter() {
if !current.labels.is_empty() {
if (callback)(&current.worktree_relative_path, &current.labels).is_break() {
return;
};
}
current = match current.children.get(key) {
Some(child) => child,
None => return,
};
}
if !current.labels.is_empty() {
(callback)(&current.worktree_relative_path, &current.labels);
}
}
pub(super) fn remove(&mut self, path: &TriePath) {
let mut current = self;
for path in path.0.iter().take(path.0.len().saturating_sub(1)) {
current = match current.children.get_mut(path) {
Some(child) => child,
None => return,
};
}
if let Some(final_entry_name) = path.0.last() {
current.children.remove(final_entry_name);
}
}
}
/// [TriePath] is a [Path] preprocessed for amortizing the cost of doing multiple lookups in distinct [RootPathTrie]s.
#[derive(Clone)]
pub(super) struct TriePath(Arc<[Arc<OsStr>]>);
impl From<&Path> for TriePath {
fn from(value: &Path) -> Self {
TriePath(value.components().map(|c| c.as_os_str().into()).collect())
}
}
#[cfg(test)]
mod tests {
use std::collections::BTreeSet;
use super::*;
#[test]
fn test_insert_and_lookup() {
let mut trie = RootPathTrie::<()>::new();
trie.insert(
&TriePath::from(Path::new("a/b/c")),
(),
LabelPresence::Present,
);
trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
assert_eq!(path.as_ref(), Path::new("a/b/c"));
ControlFlow::Continue(())
});
// Now let's annotate a parent with "Known missing" node.
trie.insert(
&TriePath::from(Path::new("a")),
(),
LabelPresence::KnownAbsent,
);
// Ensure that we walk from the root to the leaf.
let mut visited_paths = BTreeSet::new();
trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
if path.as_ref() == Path::new("a/b/c") {
assert_eq!(
visited_paths,
BTreeSet::from_iter([Arc::from(Path::new("a/"))])
);
assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
} else if path.as_ref() == Path::new("a/") {
assert!(visited_paths.is_empty());
assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
} else {
panic!("Unknown path");
}
// Assert that we only ever visit a path once.
assert!(visited_paths.insert(path.clone()));
ControlFlow::Continue(())
});
// One can also pass a path whose prefix is in the tree, but not that path itself.
let mut visited_paths = BTreeSet::new();
trie.walk(
&TriePath::from(Path::new("a/b/c/d/e/f/g")),
&mut |path, nodes| {
if path.as_ref() == Path::new("a/b/c") {
assert_eq!(
visited_paths,
BTreeSet::from_iter([Arc::from(Path::new("a/"))])
);
assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
} else if path.as_ref() == Path::new("a/") {
assert!(visited_paths.is_empty());
assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
} else {
panic!("Unknown path");
}
// Assert that we only ever visit a path once.
assert!(visited_paths.insert(path.clone()));
ControlFlow::Continue(())
},
);
// Test breaking from the tree-walk.
let mut visited_paths = BTreeSet::new();
trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
if path.as_ref() == Path::new("a/") {
assert!(visited_paths.is_empty());
assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
} else {
panic!("Unknown path");
}
// Assert that we only ever visit a path once.
assert!(visited_paths.insert(path.clone()));
ControlFlow::Break(())
});
assert_eq!(visited_paths.len(), 1);
// Entry removal.
trie.insert(
&TriePath::from(Path::new("a/b")),
(),
LabelPresence::KnownAbsent,
);
let mut visited_paths = BTreeSet::new();
trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, _nodes| {
// Assert that we only ever visit a path once.
assert!(visited_paths.insert(path.clone()));
ControlFlow::Continue(())
});
assert_eq!(visited_paths.len(), 3);
trie.remove(&TriePath::from(Path::new("a/b/")));
let mut visited_paths = BTreeSet::new();
trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, _nodes| {
// Assert that we only ever visit a path once.
assert!(visited_paths.insert(path.clone()));
ControlFlow::Continue(())
});
assert_eq!(visited_paths.len(), 1);
assert_eq!(
visited_paths.into_iter().next().unwrap().as_ref(),
Path::new("a/")
);
}
}

View file

@ -0,0 +1,444 @@
//! This module defines an LSP Tree.
//!
//! An LSP Tree is responsible for determining which language servers apply to a given project path.
//!
//! ## RPC
//! LSP Tree is transparent to RPC peers; when clients ask host to spawn a new language server, the host will perform LSP Tree lookup for provided path; it may decide
//! to reuse existing language server. The client maintains it's own LSP Tree that is a subset of host LSP Tree. Done this way, the client does not need to
//! ask about suitable language server for each path it interacts with; it can resolve most of the queries locally.
use std::{
collections::{BTreeMap, BTreeSet},
path::Path,
sync::{Arc, Weak},
};
use collections::{HashMap, IndexMap};
use gpui::{App, AppContext as _, Entity, Subscription};
use language::{
language_settings::AllLanguageSettings, Attach, CachedLspAdapter, LanguageName,
LanguageRegistry, LspAdapterDelegate,
};
use lsp::LanguageServerName;
use settings::{Settings, SettingsLocation, WorktreeId};
use std::sync::OnceLock;
use crate::{project_settings::LspSettings, LanguageServerId, ProjectPath};
use super::{ManifestTree, ManifestTreeEvent};
#[derive(Debug, Default)]
struct ServersForWorktree {
roots: BTreeMap<
Arc<Path>,
BTreeMap<LanguageServerName, (Arc<InnerTreeNode>, BTreeSet<LanguageName>)>,
>,
}
pub struct LanguageServerTree {
manifest_tree: Entity<ManifestTree>,
instances: BTreeMap<WorktreeId, ServersForWorktree>,
attach_kind_cache: HashMap<LanguageServerName, Attach>,
languages: Arc<LanguageRegistry>,
_subscriptions: Subscription,
}
/// A node in language server tree represents either:
/// - A language server that has already been initialized/updated for a given project
/// - A soon-to-be-initialized language server.
#[derive(Clone)]
pub(crate) struct LanguageServerTreeNode(Weak<InnerTreeNode>);
/// Describes a request to launch a language server.
#[derive(Debug)]
pub(crate) struct LaunchDisposition<'a> {
pub(crate) server_name: &'a LanguageServerName,
pub(crate) attach: Attach,
pub(crate) path: ProjectPath,
pub(crate) settings: Arc<LspSettings>,
}
impl<'a> From<&'a InnerTreeNode> for LaunchDisposition<'a> {
fn from(value: &'a InnerTreeNode) -> Self {
LaunchDisposition {
server_name: &value.name,
attach: value.attach,
path: value.path.clone(),
settings: value.settings.clone(),
}
}
}
impl LanguageServerTreeNode {
/// Returns a language server ID for this node if there is one.
/// Returns None if this node has not been initialized yet or it is no longer in the tree.
pub(crate) fn server_id(&self) -> Option<LanguageServerId> {
self.0.upgrade()?.id.get().copied()
}
/// Returns a language server ID for this node if it has already been initialized; otherwise runs the provided closure to initialize the language server node in a tree.
/// May return None if the node no longer belongs to the server tree it was created in.
pub(crate) fn server_id_or_init(
&self,
init: impl FnOnce(LaunchDisposition) -> LanguageServerId,
) -> Option<LanguageServerId> {
let this = self.0.upgrade()?;
Some(
*this
.id
.get_or_init(|| init(LaunchDisposition::from(&*this))),
)
}
}
impl From<Weak<InnerTreeNode>> for LanguageServerTreeNode {
fn from(weak: Weak<InnerTreeNode>) -> Self {
LanguageServerTreeNode(weak)
}
}
#[derive(Debug)]
struct InnerTreeNode {
id: OnceLock<LanguageServerId>,
name: LanguageServerName,
attach: Attach,
path: ProjectPath,
settings: Arc<LspSettings>,
}
impl InnerTreeNode {
fn new(
name: LanguageServerName,
attach: Attach,
path: ProjectPath,
settings: impl Into<Arc<LspSettings>>,
) -> Self {
InnerTreeNode {
id: Default::default(),
name,
attach,
path,
settings: settings.into(),
}
}
}
/// Determines how the list of adapters to query should be constructed.
pub(crate) enum AdapterQuery<'a> {
/// Search for roots of all adapters associated with a given language name.
Language(&'a LanguageName),
/// Search for roots of adapter with a given name.
Adapter(&'a LanguageServerName),
}
impl LanguageServerTree {
pub(crate) fn new(
manifest_tree: Entity<ManifestTree>,
languages: Arc<LanguageRegistry>,
cx: &mut App,
) -> Entity<Self> {
cx.new(|cx| Self {
_subscriptions: cx.subscribe(&manifest_tree, |_: &mut Self, _, event, _| {
if event == &ManifestTreeEvent::Cleared {}
}),
manifest_tree,
instances: Default::default(),
attach_kind_cache: Default::default(),
languages,
})
}
/// Get all language server root points for a given path and language; the language servers might already be initialized at a given path.
pub(crate) fn get<'a>(
&'a mut self,
path: ProjectPath,
query: AdapterQuery<'_>,
delegate: Arc<dyn LspAdapterDelegate>,
cx: &mut App,
) -> impl Iterator<Item = LanguageServerTreeNode> + 'a {
let settings_location = SettingsLocation {
worktree_id: path.worktree_id,
path: &path.path,
};
let adapters = match query {
AdapterQuery::Language(language_name) => {
self.adapters_for_language(settings_location, language_name, cx)
}
AdapterQuery::Adapter(language_server_name) => {
IndexMap::from_iter(self.adapter_for_name(language_server_name).map(|adapter| {
(
adapter.name(),
(LspSettings::default(), BTreeSet::new(), adapter),
)
}))
}
};
self.get_with_adapters(path, adapters, delegate, cx)
}
fn get_with_adapters<'a>(
&'a mut self,
path: ProjectPath,
adapters: IndexMap<
LanguageServerName,
(LspSettings, BTreeSet<LanguageName>, Arc<CachedLspAdapter>),
>,
delegate: Arc<dyn LspAdapterDelegate>,
cx: &mut App,
) -> impl Iterator<Item = LanguageServerTreeNode> + 'a {
let worktree_id = path.worktree_id;
let mut manifest_to_adapters = BTreeMap::default();
for (_, _, adapter) in adapters.values() {
if let Some(manifest_name) = adapter.manifest_name() {
manifest_to_adapters
.entry(manifest_name)
.or_insert_with(Vec::default)
.push(adapter.clone());
}
}
let roots = self.manifest_tree.update(cx, |this, cx| {
this.root_for_path(
path,
&mut manifest_to_adapters.keys().cloned(),
delegate,
cx,
)
});
let root_path = std::cell::LazyCell::new(move || ProjectPath {
worktree_id,
path: Arc::from("".as_ref()),
});
adapters
.into_iter()
.map(move |(_, (settings, new_languages, adapter))| {
// Backwards-compat: Fill in any adapters for which we did not detect the root as having the project root at the root of a worktree.
let root_path = adapter
.manifest_name()
.and_then(|name| roots.get(&name))
.cloned()
.unwrap_or_else(|| root_path.clone());
let attach = adapter.attach_kind();
let inner_node = self
.instances
.entry(root_path.worktree_id)
.or_default()
.roots
.entry(root_path.path.clone())
.or_default()
.entry(adapter.name());
let (node, languages) = inner_node.or_insert_with(|| {
(
Arc::new(InnerTreeNode::new(
adapter.name(),
attach,
root_path.clone(),
settings.clone(),
)),
Default::default(),
)
});
languages.extend(new_languages.iter().cloned());
Arc::downgrade(&node).into()
})
}
fn adapter_for_name(&self, name: &LanguageServerName) -> Option<Arc<CachedLspAdapter>> {
self.languages.adapter_for_name(name)
}
fn adapters_for_language(
&self,
settings_location: SettingsLocation,
language_name: &LanguageName,
cx: &App,
) -> IndexMap<LanguageServerName, (LspSettings, BTreeSet<LanguageName>, Arc<CachedLspAdapter>)>
{
let settings = AllLanguageSettings::get(Some(settings_location), cx).language(
Some(settings_location),
Some(language_name),
cx,
);
if !settings.enable_language_server {
return Default::default();
}
let available_lsp_adapters = self.languages.lsp_adapters(&language_name);
let available_language_servers = available_lsp_adapters
.iter()
.map(|lsp_adapter| lsp_adapter.name.clone())
.collect::<Vec<_>>();
let desired_language_servers =
settings.customized_language_servers(&available_language_servers);
let adapters_with_settings = desired_language_servers
.into_iter()
.filter_map(|desired_adapter| {
let adapter = if let Some(adapter) = available_lsp_adapters
.iter()
.find(|adapter| adapter.name == desired_adapter)
{
Some(adapter.clone())
} else if let Some(adapter) =
self.languages.load_available_lsp_adapter(&desired_adapter)
{
self.languages
.register_lsp_adapter(language_name.clone(), adapter.adapter.clone());
Some(adapter)
} else {
None
}?;
let adapter_settings = crate::lsp_store::language_server_settings_for(
settings_location,
&adapter.name,
cx,
)
.cloned()
.unwrap_or_default();
Some((
adapter.name(),
(
adapter_settings,
BTreeSet::from_iter([language_name.clone()]),
adapter,
),
))
})
.collect::<IndexMap<_, _>>();
// After starting all the language servers, reorder them to reflect the desired order
// based on the settings.
//
// This is done, in part, to ensure that language servers loaded at different points
// (e.g., native vs extension) still end up in the right order at the end, rather than
// it being based on which language server happened to be loaded in first.
self.languages.reorder_language_servers(
&language_name,
adapters_with_settings
.values()
.map(|(_, _, adapter)| adapter.clone())
.collect(),
);
adapters_with_settings
}
// Rebasing a tree:
// - Clears it out
// - Provides you with the indirect access to the old tree while you're reinitializing a new one (by querying it).
pub(crate) fn rebase(&mut self) -> ServerTreeRebase<'_> {
ServerTreeRebase::new(self)
}
/// Remove nodes with a given ID from the tree.
pub(crate) fn remove_nodes(&mut self, ids: &BTreeSet<LanguageServerId>) {
for (_, servers) in &mut self.instances {
for (_, nodes) in &mut servers.roots {
nodes.retain(|_, (node, _)| node.id.get().map_or(true, |id| !ids.contains(&id)));
}
}
}
}
pub(crate) struct ServerTreeRebase<'a> {
old_contents: BTreeMap<WorktreeId, ServersForWorktree>,
new_tree: &'a mut LanguageServerTree,
/// All server IDs seen in the old tree.
all_server_ids: BTreeMap<LanguageServerId, LanguageServerName>,
/// Server IDs we've preserved for a new iteration of the tree. `all_server_ids - rebased_server_ids` is the
/// set of server IDs that can be shut down.
rebased_server_ids: BTreeSet<LanguageServerId>,
}
impl<'tree> ServerTreeRebase<'tree> {
fn new(new_tree: &'tree mut LanguageServerTree) -> Self {
let old_contents = std::mem::take(&mut new_tree.instances);
new_tree.attach_kind_cache.clear();
let all_server_ids = old_contents
.values()
.flat_map(|nodes| {
nodes.roots.values().flat_map(|servers| {
servers.values().filter_map(|server| {
server
.0
.id
.get()
.copied()
.map(|id| (id, server.0.name.clone()))
})
})
})
.collect();
Self {
old_contents,
new_tree,
all_server_ids,
rebased_server_ids: BTreeSet::new(),
}
}
pub(crate) fn get<'a>(
&'a mut self,
path: ProjectPath,
query: AdapterQuery<'_>,
delegate: Arc<dyn LspAdapterDelegate>,
cx: &mut App,
) -> impl Iterator<Item = LanguageServerTreeNode> + 'a {
let settings_location = SettingsLocation {
worktree_id: path.worktree_id,
path: &path.path,
};
let adapters = match query {
AdapterQuery::Language(language_name) => {
self.new_tree
.adapters_for_language(settings_location, language_name, cx)
}
AdapterQuery::Adapter(language_server_name) => {
IndexMap::from_iter(self.new_tree.adapter_for_name(language_server_name).map(
|adapter| {
(
adapter.name(),
(LspSettings::default(), BTreeSet::new(), adapter),
)
},
))
}
};
self.new_tree
.get_with_adapters(path, adapters, delegate, cx)
.filter_map(|node| {
// Inspect result of the query and initialize it ourselves before
// handing it off to the caller.
let disposition = node.0.upgrade()?;
if disposition.id.get().is_some() {
return Some(node);
}
let Some((existing_node, _)) = self
.old_contents
.get(&disposition.path.worktree_id)
.and_then(|worktree_nodes| worktree_nodes.roots.get(&disposition.path.path))
.and_then(|roots| roots.get(&disposition.name))
.filter(|(old_node, _)| {
disposition.attach == old_node.attach
&& disposition.settings == old_node.settings
})
else {
return Some(node);
};
if let Some(existing_id) = existing_node.id.get() {
self.rebased_server_ids.insert(*existing_id);
disposition.id.set(*existing_id).ok();
}
Some(node)
})
}
/// Returns IDs of servers that are no longer referenced (and can be shut down).
pub(crate) fn finish(self) -> BTreeMap<LanguageServerId, LanguageServerName> {
self.all_server_ids
.into_iter()
.filter(|(id, _)| !self.rebased_server_ids.contains(id))
.collect()
}
}