Lay some groundwork for language plugins (#3837)

This PR adds undocumented functionality for loading custom language
plugins at runtime. I don't intend to expose the functionality to end
users yet, but this will allow the team to test the capability
internally.

### Implementation

There isn't much new code in Zed. Most of the work here is within
Tree-sitter, in PRs https://github.com/tree-sitter/tree-sitter/pull/1864
and https://github.com/tree-sitter/tree-sitter/pull/2840, which allow
Tree-sitter to load languages from WASM blobs. I've tested the
functionality in Tree-sitter's test suite and via its CLI, but having it
wired into Zed allows us to test the functionality more fully.

### Details

Now, on startup, Zed will look for subdirectories inside of
`~/Application Support/plugins`. These subdirectories are expected to
look similar to the per-language subdirectories in
[`crates/zed2/src/languages`](https://github.com/zed-industries/zed/tree/main/crates/zed2/src/languages),
except that they also contain a `.wasm` file for the parser itself.

I'll add more details here as I go.
This commit is contained in:
Max Brunsfeld 2024-01-03 09:37:42 -08:00 committed by GitHub
commit b594e59134
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 518 additions and 95 deletions

View file

@ -1169,7 +1169,7 @@ impl Language {
indents_config: None,
injection_config: None,
override_config: None,
error_query: Query::new(ts_language, "(ERROR) @error").unwrap(),
error_query: Query::new(&ts_language, "(ERROR) @error").unwrap(),
ts_language,
highlight_map: Default::default(),
})
@ -1230,13 +1230,13 @@ impl Language {
pub fn with_highlights_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
grammar.highlights_query = Some(Query::new(grammar.ts_language, source)?);
grammar.highlights_query = Some(Query::new(&grammar.ts_language, source)?);
Ok(self)
}
pub fn with_outline_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut item_capture_ix = None;
let mut name_capture_ix = None;
let mut context_capture_ix = None;
@ -1264,7 +1264,7 @@ impl Language {
pub fn with_embedding_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut item_capture_ix = None;
let mut name_capture_ix = None;
let mut context_capture_ix = None;
@ -1295,7 +1295,7 @@ impl Language {
pub fn with_brackets_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut open_capture_ix = None;
let mut close_capture_ix = None;
get_capture_indices(
@ -1317,7 +1317,7 @@ impl Language {
pub fn with_indents_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut indent_capture_ix = None;
let mut start_capture_ix = None;
let mut end_capture_ix = None;
@ -1345,7 +1345,7 @@ impl Language {
pub fn with_injection_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut language_capture_ix = None;
let mut content_capture_ix = None;
get_capture_indices(
@ -1384,7 +1384,7 @@ impl Language {
}
pub fn with_override_query(mut self, source: &str) -> anyhow::Result<Self> {
let query = Query::new(self.grammar_mut().ts_language, source)?;
let query = Query::new(&self.grammar_mut().ts_language, source)?;
let mut override_configs_by_id = HashMap::default();
for (ix, name) in query.capture_names().iter().copied().enumerate() {
@ -1695,7 +1695,7 @@ impl Grammar {
PARSER.with(|parser| {
let mut parser = parser.borrow_mut();
parser
.set_language(self.ts_language)
.set_language(&self.ts_language)
.expect("incompatible grammar");
let mut chunks = text.chunks_in_range(0..text.len());
parser

View file

@ -1166,7 +1166,7 @@ fn parse_text(
.set_included_ranges(&ranges)
.expect("overlapping ranges");
parser
.set_language(grammar.ts_language)
.set_language(&grammar.ts_language)
.expect("incompatible grammar");
parser
.parse_with(

View file

@ -44,7 +44,7 @@ use std::{
};
use syntax_map::SyntaxSnapshot;
use theme::{SyntaxTheme, Theme};
use tree_sitter::{self, Query};
use tree_sitter::{self, wasmtime, Query, WasmStore};
use unicase::UniCase;
use util::{http::HttpClient, paths::PathExt};
use util::{post_inc, ResultExt, TryFutureExt as _, UnwrapFuture};
@ -84,10 +84,15 @@ impl LspBinaryStatusSender {
}
thread_local! {
static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
static PARSER: RefCell<Parser> = {
let mut parser = Parser::new();
parser.set_wasm_store(WasmStore::new(WASM_ENGINE.clone()).unwrap()).unwrap();
RefCell::new(parser)
};
}
lazy_static! {
pub static ref WASM_ENGINE: wasmtime::Engine = wasmtime::Engine::default();
pub static ref NEXT_GRAMMAR_ID: AtomicUsize = Default::default();
pub static ref PLAIN_TEXT: Arc<Language> = Arc::new(Language::new(
LanguageConfig {
@ -360,6 +365,7 @@ pub struct CodeLabel {
#[derive(Clone, Deserialize)]
pub struct LanguageConfig {
pub name: Arc<str>,
pub grammar_name: Option<Arc<str>>,
pub path_suffixes: Vec<String>,
pub brackets: BracketPairConfig,
#[serde(default, deserialize_with = "deserialize_regex")]
@ -446,6 +452,7 @@ impl Default for LanguageConfig {
fn default() -> Self {
Self {
name: "".into(),
grammar_name: None,
path_suffixes: Default::default(),
brackets: Default::default(),
auto_indent_using_last_non_empty_line: auto_indent_using_last_non_empty_line_default(),
@ -620,14 +627,25 @@ type AvailableLanguageId = usize;
#[derive(Clone)]
struct AvailableLanguage {
id: AvailableLanguageId,
path: &'static str,
config: LanguageConfig,
grammar: tree_sitter::Language,
grammar: AvailableGrammar,
lsp_adapters: Vec<Arc<dyn LspAdapter>>,
get_queries: fn(&str) -> LanguageQueries,
loaded: bool,
}
#[derive(Clone)]
enum AvailableGrammar {
Native {
grammar: tree_sitter::Language,
asset_dir: &'static str,
get_queries: fn(&str) -> LanguageQueries,
},
Wasm {
grammar_name: Arc<str>,
path: Arc<Path>,
},
}
pub struct LanguageRegistry {
state: RwLock<LanguageRegistryState>,
language_server_download_dir: Option<Arc<Path>>,
@ -699,7 +717,7 @@ impl LanguageRegistry {
pub fn register(
&self,
path: &'static str,
asset_dir: &'static str,
config: LanguageConfig,
grammar: tree_sitter::Language,
lsp_adapters: Vec<Arc<dyn LspAdapter>>,
@ -708,11 +726,24 @@ impl LanguageRegistry {
let state = &mut *self.state.write();
state.available_languages.push(AvailableLanguage {
id: post_inc(&mut state.next_available_language_id),
path,
config,
grammar,
grammar: AvailableGrammar::Native {
grammar,
get_queries,
asset_dir,
},
lsp_adapters,
get_queries,
loaded: false,
});
}
pub fn register_wasm(&self, path: Arc<Path>, grammar_name: Arc<str>, config: LanguageConfig) {
let state = &mut *self.state.write();
state.available_languages.push(AvailableLanguage {
id: post_inc(&mut state.next_available_language_id),
config,
grammar: AvailableGrammar::Wasm { grammar_name, path },
lsp_adapters: Vec::new(),
loaded: false,
});
}
@ -837,13 +868,43 @@ impl LanguageRegistry {
executor
.spawn(async move {
let id = language.id;
let queries = (language.get_queries)(&language.path);
let language =
Language::new(language.config, Some(language.grammar))
let name = language.config.name.clone();
let language = async {
let (grammar, queries) = match language.grammar {
AvailableGrammar::Native {
grammar,
asset_dir,
get_queries,
} => (grammar, (get_queries)(asset_dir)),
AvailableGrammar::Wasm { grammar_name, path } => {
let mut wasm_path = path.join(grammar_name.as_ref());
wasm_path.set_extension("wasm");
let wasm_bytes = std::fs::read(&wasm_path)?;
let grammar = PARSER.with(|parser| {
let mut parser = parser.borrow_mut();
let mut store = parser.take_wasm_store().unwrap();
let grammar =
store.load_language(&grammar_name, &wasm_bytes);
parser.set_wasm_store(store).unwrap();
grammar
})?;
let mut queries = LanguageQueries::default();
if let Ok(contents) = std::fs::read_to_string(
&path.join("highlights.scm"),
) {
queries.highlights = Some(contents.into());
}
(grammar, queries)
}
};
Language::new(language.config, Some(grammar))
.with_lsp_adapters(language.lsp_adapters)
.await;
let name = language.name();
match language.with_queries(queries) {
.await
.with_queries(queries)
}
.await;
match language {
Ok(language) => {
let language = Arc::new(language);
let mut state = this.state.write();
@ -1175,7 +1236,7 @@ impl Language {
indents_config: None,
injection_config: None,
override_config: None,
error_query: Query::new(ts_language, "(ERROR) @error").unwrap(),
error_query: Query::new(&ts_language, "(ERROR) @error").unwrap(),
ts_language,
highlight_map: Default::default(),
})
@ -1236,13 +1297,13 @@ impl Language {
pub fn with_highlights_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
grammar.highlights_query = Some(Query::new(grammar.ts_language, source)?);
grammar.highlights_query = Some(Query::new(&grammar.ts_language, source)?);
Ok(self)
}
pub fn with_outline_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut item_capture_ix = None;
let mut name_capture_ix = None;
let mut context_capture_ix = None;
@ -1270,7 +1331,7 @@ impl Language {
pub fn with_embedding_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut item_capture_ix = None;
let mut name_capture_ix = None;
let mut context_capture_ix = None;
@ -1301,7 +1362,7 @@ impl Language {
pub fn with_brackets_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut open_capture_ix = None;
let mut close_capture_ix = None;
get_capture_indices(
@ -1323,7 +1384,7 @@ impl Language {
pub fn with_indents_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut indent_capture_ix = None;
let mut start_capture_ix = None;
let mut end_capture_ix = None;
@ -1351,7 +1412,7 @@ impl Language {
pub fn with_injection_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut language_capture_ix = None;
let mut content_capture_ix = None;
get_capture_indices(
@ -1390,7 +1451,7 @@ impl Language {
}
pub fn with_override_query(mut self, source: &str) -> anyhow::Result<Self> {
let query = Query::new(self.grammar_mut().ts_language, source)?;
let query = Query::new(&self.grammar_mut().ts_language, source)?;
let mut override_configs_by_id = HashMap::default();
for (ix, name) in query.capture_names().iter().enumerate() {
@ -1701,7 +1762,7 @@ impl Grammar {
PARSER.with(|parser| {
let mut parser = parser.borrow_mut();
parser
.set_language(self.ts_language)
.set_language(&self.ts_language)
.expect("incompatible grammar");
let mut chunks = text.chunks_in_range(0..text.len());
parser

View file

@ -7,7 +7,6 @@ use futures::FutureExt;
use parking_lot::Mutex;
use std::{
borrow::Cow,
cell::RefCell,
cmp::{self, Ordering, Reverse},
collections::BinaryHeap,
fmt, iter,
@ -16,13 +15,9 @@ use std::{
};
use sum_tree::{Bias, SeekTarget, SumTree};
use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
use tree_sitter::{
Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree,
};
use tree_sitter::{Node, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree};
thread_local! {
static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
}
use super::PARSER;
static QUERY_CURSORS: Mutex<Vec<QueryCursor>> = Mutex::new(vec![]);
@ -1166,7 +1161,7 @@ fn parse_text(
.set_included_ranges(&ranges)
.expect("overlapping ranges");
parser
.set_language(grammar.ts_language)
.set_language(&grammar.ts_language)
.expect("incompatible grammar");
parser
.parse_with(

View file

@ -169,7 +169,7 @@ impl CodeContextRetriever {
.embedding_config
.as_ref()
.ok_or_else(|| anyhow!("no embedding queries"))?;
self.parser.set_language(grammar.ts_language).unwrap();
self.parser.set_language(&grammar.ts_language).unwrap();
let tree = self
.parser

View file

@ -169,7 +169,7 @@ impl CodeContextRetriever {
.embedding_config
.as_ref()
.ok_or_else(|| anyhow!("no embedding queries"))?;
self.parser.set_language(grammar.ts_language).unwrap();
self.parser.set_language(&grammar.ts_language).unwrap();
let tree = self
.parser

View file

@ -679,14 +679,14 @@ fn replace_value_in_json_text(
lazy_static! {
static ref PAIR_QUERY: tree_sitter::Query = tree_sitter::Query::new(
tree_sitter_json::language(),
&tree_sitter_json::language(),
"(pair key: (string) @key value: (_) @value)",
)
.unwrap();
}
let mut parser = tree_sitter::Parser::new();
parser.set_language(tree_sitter_json::language()).unwrap();
parser.set_language(&tree_sitter_json::language()).unwrap();
let syntax_tree = parser.parse(text, None).unwrap();
let mut cursor = tree_sitter::QueryCursor::new();

View file

@ -715,14 +715,14 @@ fn replace_value_in_json_text(
lazy_static! {
static ref PAIR_QUERY: tree_sitter::Query = tree_sitter::Query::new(
tree_sitter_json::language(),
&tree_sitter_json::language(),
"(pair key: (string) @key value: (_) @value)",
)
.unwrap();
}
let mut parser = tree_sitter::Parser::new();
parser.set_language(tree_sitter_json::language()).unwrap();
parser.set_language(&tree_sitter_json::language()).unwrap();
let syntax_tree = parser.parse(text, None).unwrap();
let mut cursor = tree_sitter::QueryCursor::new();

View file

@ -10,6 +10,7 @@ lazy_static::lazy_static! {
pub static ref EMBEDDINGS_DIR: PathBuf = HOME.join(".config/zed/embeddings");
pub static ref LOGS_DIR: PathBuf = HOME.join("Library/Logs/Zed");
pub static ref SUPPORT_DIR: PathBuf = HOME.join("Library/Application Support/Zed");
pub static ref PLUGINS_DIR: PathBuf = HOME.join("Library/Application Support/Zed/plugins");
pub static ref LANGUAGES_DIR: PathBuf = HOME.join("Library/Application Support/Zed/languages");
pub static ref COPILOT_DIR: PathBuf = HOME.join("Library/Application Support/Zed/copilot");
pub static ref DEFAULT_PRETTIER_DIR: PathBuf = HOME.join("Library/Application Support/Zed/prettier");

View file

@ -5,7 +5,7 @@ use node_runtime::NodeRuntime;
use rust_embed::RustEmbed;
use settings::Settings;
use std::{borrow::Cow, str, sync::Arc};
use util::asset_str;
use util::{asset_str, paths::PLUGINS_DIR};
use self::elixir::ElixirSettings;
@ -228,6 +228,21 @@ pub fn init(
tree_sitter_uiua::language(),
vec![Arc::new(uiua::UiuaLanguageServer {})],
);
if let Ok(children) = std::fs::read_dir(&*PLUGINS_DIR) {
for child in children {
if let Ok(child) = child {
let path = child.path();
let config_path = path.join("config.toml");
if let Ok(config) = std::fs::read(&config_path) {
let config: LanguageConfig = toml::from_slice(&config).unwrap();
if let Some(grammar_name) = config.grammar_name.clone() {
languages.register_wasm(path.into(), grammar_name, config);
}
}
}
}
}
}
#[cfg(any(test, feature = "test-support"))]