//! The `language` crate provides a large chunk of Zed's language-related //! features (the other big contributors being project and lsp crates that revolve around LSP features). //! Namely, this crate: //! - Provides [`Language`], [`Grammar`] and [`LanguageRegistry`] types that //! use Tree-sitter to provide syntax highlighting to the editor; note though that `language` doesn't perform the highlighting by itself. It only maps ranges in a buffer to colors. Treesitter is also used for buffer outlines (lists of symbols in a buffer) //! - Exposes [`LanguageConfig`] that describes how constructs (like brackets or line comments) should be handled by the editor for a source file of a particular language. //! //! Notably we do *not* assign a single language to a single file; in real world a single file can consist of multiple programming languages - HTML is a good example of that - and `language` crate tends to reflect that status quo in its API. mod buffer; mod diagnostic_set; mod highlight_map; mod language_registry; pub mod language_settings; mod outline; pub mod proto; mod syntax_map; mod task_context; mod toolchain; #[cfg(test)] pub mod buffer_tests; pub mod markdown; use crate::language_settings::SoftWrap; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; use collections::{HashMap, HashSet}; use futures::Future; use gpui::{AppContext, AsyncAppContext, Model, SharedString, Task}; pub use highlight_map::HighlightMap; use http_client::HttpClient; pub use language_registry::{LanguageName, LoadedLanguage}; use lsp::{CodeActionKind, LanguageServerBinary, LanguageServerBinaryOptions, LanguageServerName}; use parking_lot::Mutex; use regex::Regex; use schemars::{ gen::SchemaGenerator, schema::{InstanceType, Schema, SchemaObject}, JsonSchema, }; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use serde_json::Value; use settings::WorktreeId; use smol::future::FutureExt as _; use std::num::NonZeroU32; use std::{ any::Any, ffi::OsStr, fmt::Debug, hash::Hash, mem, ops::{DerefMut, Range}, path::{Path, PathBuf}, pin::Pin, str, sync::{ atomic::{AtomicU64, AtomicUsize, Ordering::SeqCst}, Arc, LazyLock, }, }; use syntax_map::{QueryCursorHandle, SyntaxSnapshot}; use task::RunnableTag; pub use task_context::{ContextProvider, RunnableRange}; use theme::SyntaxTheme; pub use toolchain::{LanguageToolchainStore, Toolchain, ToolchainList, ToolchainLister}; use tree_sitter::{self, wasmtime, Query, QueryCursor, WasmStore}; use util::serde::default_true; pub use buffer::Operation; pub use buffer::*; pub use diagnostic_set::DiagnosticEntry; pub use language_registry::{ AvailableLanguage, LanguageNotFound, LanguageQueries, LanguageRegistry, LanguageServerBinaryStatus, QUERY_FILENAME_PREFIXES, }; pub use lsp::LanguageServerId; pub use outline::*; pub use syntax_map::{OwnedSyntaxLayer, SyntaxLayer}; pub use text::{AnchorRangeExt, LineEnding}; pub use tree_sitter::{Node, Parser, Tree, TreeCursor}; /// Initializes the `language` crate. /// /// This should be called before making use of items from the create. pub fn init(cx: &mut AppContext) { language_settings::init(cx); } static QUERY_CURSORS: Mutex> = Mutex::new(vec![]); static PARSERS: Mutex> = Mutex::new(vec![]); pub fn with_parser(func: F) -> R where F: FnOnce(&mut Parser) -> R, { let mut parser = PARSERS.lock().pop().unwrap_or_else(|| { let mut parser = Parser::new(); parser .set_wasm_store(WasmStore::new(&WASM_ENGINE).unwrap()) .unwrap(); parser }); parser.set_included_ranges(&[]).unwrap(); let result = func(&mut parser); PARSERS.lock().push(parser); result } pub fn with_query_cursor(func: F) -> R where F: FnOnce(&mut QueryCursor) -> R, { let mut cursor = QueryCursorHandle::new(); func(cursor.deref_mut()) } static NEXT_LANGUAGE_ID: LazyLock = LazyLock::new(Default::default); static NEXT_GRAMMAR_ID: LazyLock = LazyLock::new(Default::default); static WASM_ENGINE: LazyLock = LazyLock::new(|| { wasmtime::Engine::new(&wasmtime::Config::new()).expect("Failed to create Wasmtime engine") }); /// A shared grammar for plain text, exposed for reuse by downstream crates. pub static PLAIN_TEXT: LazyLock> = LazyLock::new(|| { Arc::new(Language::new( LanguageConfig { name: "Plain Text".into(), soft_wrap: Some(SoftWrap::EditorWidth), ..Default::default() }, None, )) }); /// Types that represent a position in a buffer, and can be converted into /// an LSP position, to send to a language server. pub trait ToLspPosition { /// Converts the value into an LSP position. fn to_lsp_position(self) -> lsp::Position; } #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Location { pub buffer: Model, pub range: Range, } /// Represents a Language Server, with certain cached sync properties. /// Uses [`LspAdapter`] under the hood, but calls all 'static' methods /// once at startup, and caches the results. pub struct CachedLspAdapter { pub name: LanguageServerName, pub disk_based_diagnostic_sources: Vec, pub disk_based_diagnostics_progress_token: Option, language_ids: HashMap, pub adapter: Arc, pub reinstall_attempt_count: AtomicU64, cached_binary: futures::lock::Mutex>, } impl Debug for CachedLspAdapter { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("CachedLspAdapter") .field("name", &self.name) .field( "disk_based_diagnostic_sources", &self.disk_based_diagnostic_sources, ) .field( "disk_based_diagnostics_progress_token", &self.disk_based_diagnostics_progress_token, ) .field("language_ids", &self.language_ids) .field("reinstall_attempt_count", &self.reinstall_attempt_count) .finish_non_exhaustive() } } impl CachedLspAdapter { pub fn new(adapter: Arc) -> Arc { let name = adapter.name(); let disk_based_diagnostic_sources = adapter.disk_based_diagnostic_sources(); let disk_based_diagnostics_progress_token = adapter.disk_based_diagnostics_progress_token(); let language_ids = adapter.language_ids(); Arc::new(CachedLspAdapter { name, disk_based_diagnostic_sources, disk_based_diagnostics_progress_token, language_ids, adapter, cached_binary: Default::default(), reinstall_attempt_count: AtomicU64::new(0), }) } pub fn name(&self) -> LanguageServerName { self.adapter.name().clone() } pub async fn get_language_server_command( self: Arc, delegate: Arc, binary_options: LanguageServerBinaryOptions, cx: &mut AsyncAppContext, ) -> Result { let cached_binary = self.cached_binary.lock().await; self.adapter .clone() .get_language_server_command(delegate, binary_options, cached_binary, cx) .await } pub fn code_action_kinds(&self) -> Option> { self.adapter.code_action_kinds() } pub fn process_diagnostics(&self, params: &mut lsp::PublishDiagnosticsParams) { self.adapter.process_diagnostics(params) } pub async fn process_completions(&self, completion_items: &mut [lsp::CompletionItem]) { self.adapter.process_completions(completion_items).await } pub async fn labels_for_completions( &self, completion_items: &[lsp::CompletionItem], language: &Arc, ) -> Result>> { self.adapter .clone() .labels_for_completions(completion_items, language) .await } pub async fn labels_for_symbols( &self, symbols: &[(String, lsp::SymbolKind)], language: &Arc, ) -> Result>> { self.adapter .clone() .labels_for_symbols(symbols, language) .await } pub fn language_id(&self, language_name: &LanguageName) -> String { self.language_ids .get(language_name.0.as_ref()) .cloned() .unwrap_or_else(|| language_name.lsp_id()) } } /// [`LspAdapterDelegate`] allows [`LspAdapter]` implementations to interface with the application // e.g. to display a notification or fetch data from the web. #[async_trait] pub trait LspAdapterDelegate: Send + Sync { fn show_notification(&self, message: &str, cx: &mut AppContext); fn http_client(&self) -> Arc; fn worktree_id(&self) -> WorktreeId; fn worktree_root_path(&self) -> &Path; fn update_status(&self, language: LanguageServerName, status: LanguageServerBinaryStatus); async fn language_server_download_dir(&self, name: &LanguageServerName) -> Option>; async fn npm_package_installed_version( &self, package_name: &str, ) -> Result>; async fn which(&self, command: &OsStr) -> Option; async fn shell_env(&self) -> HashMap; async fn read_text_file(&self, path: PathBuf) -> Result; async fn try_exec(&self, binary: LanguageServerBinary) -> Result<()>; } #[async_trait(?Send)] pub trait LspAdapter: 'static + Send + Sync { fn name(&self) -> LanguageServerName; fn get_language_server_command<'a>( self: Arc, delegate: Arc, binary_options: LanguageServerBinaryOptions, mut cached_binary: futures::lock::MutexGuard<'a, Option>, cx: &'a mut AsyncAppContext, ) -> Pin>>> { async move { // First we check whether the adapter can give us a user-installed binary. // If so, we do *not* want to cache that, because each worktree might give us a different // binary: // // worktree 1: user-installed at `.bin/gopls` // worktree 2: user-installed at `~/bin/gopls` // worktree 3: no gopls found in PATH -> fallback to Zed installation // // We only want to cache when we fall back to the global one, // because we don't want to download and overwrite our global one // for each worktree we might have open. if binary_options.allow_path_lookup { if let Some(binary) = self.check_if_user_installed(delegate.as_ref(), cx).await { log::info!( "found user-installed language server for {}. path: {:?}, arguments: {:?}", self.name().0, binary.path, binary.arguments ); return Ok(binary); } } if !binary_options.allow_binary_download { return Err(anyhow!("downloading language servers disabled")); } if let Some(cached_binary) = cached_binary.as_ref() { return Ok(cached_binary.clone()); } let Some(container_dir) = delegate.language_server_download_dir(&self.name()).await else { anyhow::bail!("no language server download dir defined") }; let mut binary = try_fetch_server_binary(self.as_ref(), &delegate, container_dir.to_path_buf(), cx).await; if let Err(error) = binary.as_ref() { if let Some(prev_downloaded_binary) = self .cached_server_binary(container_dir.to_path_buf(), delegate.as_ref()) .await { log::info!( "failed to fetch newest version of language server {:?}. error: {:?}, falling back to using {:?}", self.name(), error, prev_downloaded_binary.path ); binary = Ok(prev_downloaded_binary); } else { delegate.update_status( self.name(), LanguageServerBinaryStatus::Failed { error: format!("{error:?}"), }, ); } } if let Ok(binary) = &binary { *cached_binary = Some(binary.clone()); } binary } .boxed_local() } async fn check_if_user_installed( &self, _: &dyn LspAdapterDelegate, _: &AsyncAppContext, ) -> Option { None } async fn fetch_latest_server_version( &self, delegate: &dyn LspAdapterDelegate, ) -> Result>; fn will_fetch_server( &self, _: &Arc, _: &mut AsyncAppContext, ) -> Option>> { None } async fn fetch_server_binary( &self, latest_version: Box, container_dir: PathBuf, delegate: &dyn LspAdapterDelegate, ) -> Result; async fn cached_server_binary( &self, container_dir: PathBuf, delegate: &dyn LspAdapterDelegate, ) -> Option; fn process_diagnostics(&self, _: &mut lsp::PublishDiagnosticsParams) {} /// Post-processes completions provided by the language server. async fn process_completions(&self, _: &mut [lsp::CompletionItem]) {} async fn labels_for_completions( self: Arc, completions: &[lsp::CompletionItem], language: &Arc, ) -> Result>> { let mut labels = Vec::new(); for (ix, completion) in completions.iter().enumerate() { let label = self.label_for_completion(completion, language).await; if let Some(label) = label { labels.resize(ix + 1, None); *labels.last_mut().unwrap() = Some(label); } } Ok(labels) } async fn label_for_completion( &self, _: &lsp::CompletionItem, _: &Arc, ) -> Option { None } async fn labels_for_symbols( self: Arc, symbols: &[(String, lsp::SymbolKind)], language: &Arc, ) -> Result>> { let mut labels = Vec::new(); for (ix, (name, kind)) in symbols.iter().enumerate() { let label = self.label_for_symbol(name, *kind, language).await; if let Some(label) = label { labels.resize(ix + 1, None); *labels.last_mut().unwrap() = Some(label); } } Ok(labels) } async fn label_for_symbol( &self, _: &str, _: lsp::SymbolKind, _: &Arc, ) -> Option { None } /// Returns initialization options that are going to be sent to a LSP server as a part of [`lsp::InitializeParams`] async fn initialization_options( self: Arc, _: &Arc, ) -> Result> { Ok(None) } async fn workspace_configuration( self: Arc, _: &Arc, _: Arc, _cx: &mut AsyncAppContext, ) -> Result { Ok(serde_json::json!({})) } /// Returns a list of code actions supported by a given LspAdapter fn code_action_kinds(&self) -> Option> { Some(vec![ CodeActionKind::EMPTY, CodeActionKind::QUICKFIX, CodeActionKind::REFACTOR, CodeActionKind::REFACTOR_EXTRACT, CodeActionKind::SOURCE, ]) } fn disk_based_diagnostic_sources(&self) -> Vec { Default::default() } fn disk_based_diagnostics_progress_token(&self) -> Option { None } fn language_ids(&self) -> HashMap { Default::default() } } async fn try_fetch_server_binary( adapter: &L, delegate: &Arc, container_dir: PathBuf, cx: &mut AsyncAppContext, ) -> Result { if let Some(task) = adapter.will_fetch_server(delegate, cx) { task.await?; } let name = adapter.name(); log::info!("fetching latest version of language server {:?}", name.0); delegate.update_status(name.clone(), LanguageServerBinaryStatus::CheckingForUpdate); let latest_version = adapter .fetch_latest_server_version(delegate.as_ref()) .await?; log::info!("downloading language server {:?}", name.0); delegate.update_status(adapter.name(), LanguageServerBinaryStatus::Downloading); let binary = adapter .fetch_server_binary(latest_version, container_dir, delegate.as_ref()) .await; delegate.update_status(name.clone(), LanguageServerBinaryStatus::None); binary } #[derive(Clone, Debug, Default, PartialEq, Eq)] pub struct CodeLabel { /// The text to display. pub text: String, /// Syntax highlighting runs. pub runs: Vec<(Range, HighlightId)>, /// The portion of the text that should be used in fuzzy filtering. pub filter_range: Range, } #[derive(Clone, Deserialize, JsonSchema)] pub struct LanguageConfig { /// Human-readable name of the language. pub name: LanguageName, /// The name of this language for a Markdown code fence block pub code_fence_block_name: Option>, // The name of the grammar in a WASM bundle (experimental). pub grammar: Option>, /// The criteria for matching this language to a given file. #[serde(flatten)] pub matcher: LanguageMatcher, /// List of bracket types in a language. #[serde(default)] #[schemars(schema_with = "bracket_pair_config_json_schema")] pub brackets: BracketPairConfig, /// If set to true, auto indentation uses last non empty line to determine /// the indentation level for a new line. #[serde(default = "auto_indent_using_last_non_empty_line_default")] pub auto_indent_using_last_non_empty_line: bool, // Whether indentation of pasted content should be adjusted based on the context. #[serde(default)] pub auto_indent_on_paste: Option, /// A regex that is used to determine whether the indentation level should be /// increased in the following line. #[serde(default, deserialize_with = "deserialize_regex")] #[schemars(schema_with = "regex_json_schema")] pub increase_indent_pattern: Option, /// A regex that is used to determine whether the indentation level should be /// decreased in the following line. #[serde(default, deserialize_with = "deserialize_regex")] #[schemars(schema_with = "regex_json_schema")] pub decrease_indent_pattern: Option, /// A list of characters that trigger the automatic insertion of a closing /// bracket when they immediately precede the point where an opening /// bracket is inserted. #[serde(default)] pub autoclose_before: String, /// A placeholder used internally by Semantic Index. #[serde(default)] pub collapsed_placeholder: String, /// A line comment string that is inserted in e.g. `toggle comments` action. /// A language can have multiple flavours of line comments. All of the provided line comments are /// used for comment continuations on the next line, but only the first one is used for Editor::ToggleComments. #[serde(default)] pub line_comments: Vec>, /// Starting and closing characters of a block comment. #[serde(default)] pub block_comment: Option<(Arc, Arc)>, /// A list of language servers that are allowed to run on subranges of a given language. #[serde(default)] pub scope_opt_in_language_servers: Vec, #[serde(default)] pub overrides: HashMap, /// A list of characters that Zed should treat as word characters for the /// purpose of features that operate on word boundaries, like 'move to next word end' /// or a whole-word search in buffer search. #[serde(default)] pub word_characters: HashSet, /// Whether to indent lines using tab characters, as opposed to multiple /// spaces. #[serde(default)] pub hard_tabs: Option, /// How many columns a tab should occupy. #[serde(default)] pub tab_size: Option, /// How to soft-wrap long lines of text. #[serde(default)] pub soft_wrap: Option, /// The name of a Prettier parser that will be used for this language when no file path is available. /// If there's a parser name in the language settings, that will be used instead. #[serde(default)] pub prettier_parser_name: Option, /// If true, this language is only for syntax highlighting via an injection into other /// languages, but should not appear to the user as a distinct language. #[serde(default)] pub hidden: bool, } #[derive(Clone, Debug, Serialize, Deserialize, Default, JsonSchema)] pub struct LanguageMatcher { /// Given a list of `LanguageConfig`'s, the language of a file can be determined based on the path extension matching any of the `path_suffixes`. #[serde(default)] pub path_suffixes: Vec, /// A regex pattern that determines whether the language should be assigned to a file or not. #[serde( default, serialize_with = "serialize_regex", deserialize_with = "deserialize_regex" )] #[schemars(schema_with = "regex_json_schema")] pub first_line_pattern: Option, } /// Represents a language for the given range. Some languages (e.g. HTML) /// interleave several languages together, thus a single buffer might actually contain /// several nested scopes. #[derive(Clone, Debug)] pub struct LanguageScope { language: Arc, override_id: Option, } #[derive(Clone, Deserialize, Default, Debug, JsonSchema)] pub struct LanguageConfigOverride { #[serde(default)] pub line_comments: Override>>, #[serde(default)] pub block_comment: Override<(Arc, Arc)>, #[serde(skip_deserializing)] #[schemars(skip)] pub disabled_bracket_ixs: Vec, #[serde(default)] pub word_characters: Override>, #[serde(default)] pub opt_into_language_servers: Vec, } #[derive(Clone, Deserialize, Debug, Serialize, JsonSchema)] #[serde(untagged)] pub enum Override { Remove { remove: bool }, Set(T), } impl Default for Override { fn default() -> Self { Override::Remove { remove: false } } } impl Override { fn as_option<'a>(this: Option<&'a Self>, original: Option<&'a T>) -> Option<&'a T> { match this { Some(Self::Set(value)) => Some(value), Some(Self::Remove { remove: true }) => None, Some(Self::Remove { remove: false }) | None => original, } } } impl Default for LanguageConfig { fn default() -> Self { Self { name: LanguageName::new(""), code_fence_block_name: None, grammar: None, matcher: LanguageMatcher::default(), brackets: Default::default(), auto_indent_using_last_non_empty_line: auto_indent_using_last_non_empty_line_default(), auto_indent_on_paste: None, increase_indent_pattern: Default::default(), decrease_indent_pattern: Default::default(), autoclose_before: Default::default(), line_comments: Default::default(), block_comment: Default::default(), scope_opt_in_language_servers: Default::default(), overrides: Default::default(), word_characters: Default::default(), collapsed_placeholder: Default::default(), hard_tabs: None, tab_size: None, soft_wrap: None, prettier_parser_name: None, hidden: false, } } } fn auto_indent_using_last_non_empty_line_default() -> bool { true } fn deserialize_regex<'de, D: Deserializer<'de>>(d: D) -> Result, D::Error> { let source = Option::::deserialize(d)?; if let Some(source) = source { Ok(Some(regex::Regex::new(&source).map_err(de::Error::custom)?)) } else { Ok(None) } } fn regex_json_schema(_: &mut SchemaGenerator) -> Schema { Schema::Object(SchemaObject { instance_type: Some(InstanceType::String.into()), ..Default::default() }) } fn serialize_regex(regex: &Option, serializer: S) -> Result where S: Serializer, { match regex { Some(regex) => serializer.serialize_str(regex.as_str()), None => serializer.serialize_none(), } } #[doc(hidden)] #[cfg(any(test, feature = "test-support"))] pub struct FakeLspAdapter { pub name: &'static str, pub initialization_options: Option, pub prettier_plugins: Vec<&'static str>, pub disk_based_diagnostics_progress_token: Option, pub disk_based_diagnostics_sources: Vec, pub language_server_binary: LanguageServerBinary, pub capabilities: lsp::ServerCapabilities, pub initializer: Option>, } /// Configuration of handling bracket pairs for a given language. /// /// This struct includes settings for defining which pairs of characters are considered brackets and /// also specifies any language-specific scopes where these pairs should be ignored for bracket matching purposes. #[derive(Clone, Debug, Default, JsonSchema)] pub struct BracketPairConfig { /// A list of character pairs that should be treated as brackets in the context of a given language. pub pairs: Vec, /// A list of tree-sitter scopes for which a given bracket should not be active. /// N-th entry in `[Self::disabled_scopes_by_bracket_ix]` contains a list of disabled scopes for an n-th entry in `[Self::pairs]` #[schemars(skip)] pub disabled_scopes_by_bracket_ix: Vec>, } fn bracket_pair_config_json_schema(gen: &mut SchemaGenerator) -> Schema { Option::>::json_schema(gen) } #[derive(Deserialize, JsonSchema)] pub struct BracketPairContent { #[serde(flatten)] pub bracket_pair: BracketPair, #[serde(default)] pub not_in: Vec, } impl<'de> Deserialize<'de> for BracketPairConfig { fn deserialize(deserializer: D) -> std::result::Result where D: Deserializer<'de>, { let result = Vec::::deserialize(deserializer)?; let mut brackets = Vec::with_capacity(result.len()); let mut disabled_scopes_by_bracket_ix = Vec::with_capacity(result.len()); for entry in result { brackets.push(entry.bracket_pair); disabled_scopes_by_bracket_ix.push(entry.not_in); } Ok(BracketPairConfig { pairs: brackets, disabled_scopes_by_bracket_ix, }) } } /// Describes a single bracket pair and how an editor should react to e.g. inserting /// an opening bracket or to a newline character insertion in between `start` and `end` characters. #[derive(Clone, Debug, Default, Deserialize, PartialEq, JsonSchema)] pub struct BracketPair { /// Starting substring for a bracket. pub start: String, /// Ending substring for a bracket. pub end: String, /// True if `end` should be automatically inserted right after `start` characters. pub close: bool, /// True if selected text should be surrounded by `start` and `end` characters. #[serde(default = "default_true")] pub surround: bool, /// True if an extra newline should be inserted while the cursor is in the middle /// of that bracket pair. pub newline: bool, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] pub(crate) struct LanguageId(usize); impl LanguageId { pub(crate) fn new() -> Self { Self(NEXT_LANGUAGE_ID.fetch_add(1, SeqCst)) } } pub struct Language { pub(crate) id: LanguageId, pub(crate) config: LanguageConfig, pub(crate) grammar: Option>, pub(crate) context_provider: Option>, pub(crate) toolchain: Option>, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] pub struct GrammarId(pub usize); impl GrammarId { pub(crate) fn new() -> Self { Self(NEXT_GRAMMAR_ID.fetch_add(1, SeqCst)) } } pub struct Grammar { id: GrammarId, pub ts_language: tree_sitter::Language, pub(crate) error_query: Query, pub(crate) highlights_query: Option, pub(crate) brackets_config: Option, pub(crate) redactions_config: Option, pub(crate) runnable_config: Option, pub(crate) indents_config: Option, pub outline_config: Option, pub embedding_config: Option, pub(crate) injection_config: Option, pub(crate) override_config: Option, pub(crate) highlight_map: Mutex, } struct IndentConfig { query: Query, indent_capture_ix: u32, start_capture_ix: Option, end_capture_ix: Option, outdent_capture_ix: Option, } pub struct OutlineConfig { pub query: Query, pub item_capture_ix: u32, pub name_capture_ix: u32, pub context_capture_ix: Option, pub extra_context_capture_ix: Option, pub open_capture_ix: Option, pub close_capture_ix: Option, pub annotation_capture_ix: Option, } #[derive(Debug)] pub struct EmbeddingConfig { pub query: Query, pub item_capture_ix: u32, pub name_capture_ix: Option, pub context_capture_ix: Option, pub collapse_capture_ix: Option, pub keep_capture_ix: Option, } struct InjectionConfig { query: Query, content_capture_ix: u32, language_capture_ix: Option, patterns: Vec, } struct RedactionConfig { pub query: Query, pub redaction_capture_ix: u32, } #[derive(Clone, Debug, PartialEq)] enum RunnableCapture { Named(SharedString), Run, } struct RunnableConfig { pub query: Query, /// A mapping from capture indice to capture kind pub extra_captures: Vec, } struct OverrideConfig { query: Query, values: HashMap, } #[derive(Debug)] struct OverrideEntry { name: String, range_is_inclusive: bool, value: LanguageConfigOverride, } #[derive(Default, Clone)] struct InjectionPatternConfig { language: Option>, combined: bool, } struct BracketConfig { query: Query, open_capture_ix: u32, close_capture_ix: u32, } impl Language { pub fn new(config: LanguageConfig, ts_language: Option) -> Self { Self::new_with_id(LanguageId::new(), config, ts_language) } fn new_with_id( id: LanguageId, config: LanguageConfig, ts_language: Option, ) -> Self { Self { id, config, grammar: ts_language.map(|ts_language| { Arc::new(Grammar { id: GrammarId::new(), highlights_query: None, brackets_config: None, outline_config: None, embedding_config: None, indents_config: None, injection_config: None, override_config: None, redactions_config: None, runnable_config: None, error_query: Query::new(&ts_language, "(ERROR) @error").unwrap(), ts_language, highlight_map: Default::default(), }) }), context_provider: None, toolchain: None, } } pub fn with_context_provider(mut self, provider: Option>) -> Self { self.context_provider = provider; self } pub fn with_toolchain_lister(mut self, provider: Option>) -> Self { self.toolchain = provider; self } pub fn with_queries(mut self, queries: LanguageQueries) -> Result { if let Some(query) = queries.highlights { self = self .with_highlights_query(query.as_ref()) .context("Error loading highlights query")?; } if let Some(query) = queries.brackets { self = self .with_brackets_query(query.as_ref()) .context("Error loading brackets query")?; } if let Some(query) = queries.indents { self = self .with_indents_query(query.as_ref()) .context("Error loading indents query")?; } if let Some(query) = queries.outline { self = self .with_outline_query(query.as_ref()) .context("Error loading outline query")?; } if let Some(query) = queries.embedding { self = self .with_embedding_query(query.as_ref()) .context("Error loading embedding query")?; } if let Some(query) = queries.injections { self = self .with_injection_query(query.as_ref()) .context("Error loading injection query")?; } if let Some(query) = queries.overrides { self = self .with_override_query(query.as_ref()) .context("Error loading override query")?; } if let Some(query) = queries.redactions { self = self .with_redaction_query(query.as_ref()) .context("Error loading redaction query")?; } if let Some(query) = queries.runnables { self = self .with_runnable_query(query.as_ref()) .context("Error loading tests query")?; } Ok(self) } pub fn with_highlights_query(mut self, source: &str) -> Result { let grammar = self .grammar_mut() .ok_or_else(|| anyhow!("cannot mutate grammar"))?; grammar.highlights_query = Some(Query::new(&grammar.ts_language, source)?); Ok(self) } pub fn with_runnable_query(mut self, source: &str) -> Result { let grammar = self .grammar_mut() .ok_or_else(|| anyhow!("cannot mutate grammar"))?; let query = Query::new(&grammar.ts_language, source)?; let mut extra_captures = Vec::with_capacity(query.capture_names().len()); for name in query.capture_names().iter() { let kind = if *name == "run" { RunnableCapture::Run } else { RunnableCapture::Named(name.to_string().into()) }; extra_captures.push(kind); } grammar.runnable_config = Some(RunnableConfig { extra_captures, query, }); Ok(self) } pub fn with_outline_query(mut self, source: &str) -> Result { let grammar = self .grammar_mut() .ok_or_else(|| anyhow!("cannot mutate grammar"))?; let query = Query::new(&grammar.ts_language, source)?; let mut item_capture_ix = None; let mut name_capture_ix = None; let mut context_capture_ix = None; let mut extra_context_capture_ix = None; let mut open_capture_ix = None; let mut close_capture_ix = None; let mut annotation_capture_ix = None; get_capture_indices( &query, &mut [ ("item", &mut item_capture_ix), ("name", &mut name_capture_ix), ("context", &mut context_capture_ix), ("context.extra", &mut extra_context_capture_ix), ("open", &mut open_capture_ix), ("close", &mut close_capture_ix), ("annotation", &mut annotation_capture_ix), ], ); if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) { grammar.outline_config = Some(OutlineConfig { query, item_capture_ix, name_capture_ix, context_capture_ix, extra_context_capture_ix, open_capture_ix, close_capture_ix, annotation_capture_ix, }); } Ok(self) } pub fn with_embedding_query(mut self, source: &str) -> Result { let grammar = self .grammar_mut() .ok_or_else(|| anyhow!("cannot mutate grammar"))?; let query = Query::new(&grammar.ts_language, source)?; let mut item_capture_ix = None; let mut name_capture_ix = None; let mut context_capture_ix = None; let mut collapse_capture_ix = None; let mut keep_capture_ix = None; get_capture_indices( &query, &mut [ ("item", &mut item_capture_ix), ("name", &mut name_capture_ix), ("context", &mut context_capture_ix), ("keep", &mut keep_capture_ix), ("collapse", &mut collapse_capture_ix), ], ); if let Some(item_capture_ix) = item_capture_ix { grammar.embedding_config = Some(EmbeddingConfig { query, item_capture_ix, name_capture_ix, context_capture_ix, collapse_capture_ix, keep_capture_ix, }); } Ok(self) } pub fn with_brackets_query(mut self, source: &str) -> Result { let grammar = self .grammar_mut() .ok_or_else(|| anyhow!("cannot mutate grammar"))?; let query = Query::new(&grammar.ts_language, source)?; let mut open_capture_ix = None; let mut close_capture_ix = None; get_capture_indices( &query, &mut [ ("open", &mut open_capture_ix), ("close", &mut close_capture_ix), ], ); if let Some((open_capture_ix, close_capture_ix)) = open_capture_ix.zip(close_capture_ix) { grammar.brackets_config = Some(BracketConfig { query, open_capture_ix, close_capture_ix, }); } Ok(self) } pub fn with_indents_query(mut self, source: &str) -> Result { let grammar = self .grammar_mut() .ok_or_else(|| anyhow!("cannot mutate grammar"))?; let query = Query::new(&grammar.ts_language, source)?; let mut indent_capture_ix = None; let mut start_capture_ix = None; let mut end_capture_ix = None; let mut outdent_capture_ix = None; get_capture_indices( &query, &mut [ ("indent", &mut indent_capture_ix), ("start", &mut start_capture_ix), ("end", &mut end_capture_ix), ("outdent", &mut outdent_capture_ix), ], ); if let Some(indent_capture_ix) = indent_capture_ix { grammar.indents_config = Some(IndentConfig { query, indent_capture_ix, start_capture_ix, end_capture_ix, outdent_capture_ix, }); } Ok(self) } pub fn with_injection_query(mut self, source: &str) -> Result { let grammar = self .grammar_mut() .ok_or_else(|| anyhow!("cannot mutate grammar"))?; let query = Query::new(&grammar.ts_language, source)?; let mut language_capture_ix = None; let mut content_capture_ix = None; get_capture_indices( &query, &mut [ ("language", &mut language_capture_ix), ("content", &mut content_capture_ix), ], ); let patterns = (0..query.pattern_count()) .map(|ix| { let mut config = InjectionPatternConfig::default(); for setting in query.property_settings(ix) { match setting.key.as_ref() { "language" => { config.language.clone_from(&setting.value); } "combined" => { config.combined = true; } _ => {} } } config }) .collect(); if let Some(content_capture_ix) = content_capture_ix { grammar.injection_config = Some(InjectionConfig { query, language_capture_ix, content_capture_ix, patterns, }); } Ok(self) } pub fn with_override_query(mut self, source: &str) -> anyhow::Result { let query = { let grammar = self .grammar .as_ref() .ok_or_else(|| anyhow!("no grammar for language"))?; Query::new(&grammar.ts_language, source)? }; let mut override_configs_by_id = HashMap::default(); for (ix, mut name) in query.capture_names().iter().copied().enumerate() { let mut range_is_inclusive = false; if name.starts_with('_') { continue; } if let Some(prefix) = name.strip_suffix(".inclusive") { name = prefix; range_is_inclusive = true; } let value = self.config.overrides.get(name).cloned().unwrap_or_default(); for server_name in &value.opt_into_language_servers { if !self .config .scope_opt_in_language_servers .contains(server_name) { util::debug_panic!("Server {server_name:?} has been opted-in by scope {name:?} but has not been marked as an opt-in server"); } } override_configs_by_id.insert( ix as u32, OverrideEntry { name: name.to_string(), range_is_inclusive, value, }, ); } let referenced_override_names = self.config.overrides.keys().chain( self.config .brackets .disabled_scopes_by_bracket_ix .iter() .flatten(), ); for referenced_name in referenced_override_names { if !override_configs_by_id .values() .any(|entry| entry.name == *referenced_name) { Err(anyhow!( "language {:?} has overrides in config not in query: {referenced_name:?}", self.config.name ))?; } } for entry in override_configs_by_id.values_mut() { entry.value.disabled_bracket_ixs = self .config .brackets .disabled_scopes_by_bracket_ix .iter() .enumerate() .filter_map(|(ix, disabled_scope_names)| { if disabled_scope_names.contains(&entry.name) { Some(ix as u16) } else { None } }) .collect(); } self.config.brackets.disabled_scopes_by_bracket_ix.clear(); let grammar = self .grammar_mut() .ok_or_else(|| anyhow!("cannot mutate grammar"))?; grammar.override_config = Some(OverrideConfig { query, values: override_configs_by_id, }); Ok(self) } pub fn with_redaction_query(mut self, source: &str) -> anyhow::Result { let grammar = self .grammar_mut() .ok_or_else(|| anyhow!("cannot mutate grammar"))?; let query = Query::new(&grammar.ts_language, source)?; let mut redaction_capture_ix = None; get_capture_indices(&query, &mut [("redact", &mut redaction_capture_ix)]); if let Some(redaction_capture_ix) = redaction_capture_ix { grammar.redactions_config = Some(RedactionConfig { query, redaction_capture_ix, }); } Ok(self) } fn grammar_mut(&mut self) -> Option<&mut Grammar> { Arc::get_mut(self.grammar.as_mut()?) } pub fn name(&self) -> LanguageName { self.config.name.clone() } pub fn code_fence_block_name(&self) -> Arc { self.config .code_fence_block_name .clone() .unwrap_or_else(|| self.config.name.0.to_lowercase().into()) } pub fn context_provider(&self) -> Option> { self.context_provider.clone() } pub fn toolchain_lister(&self) -> Option> { self.toolchain.clone() } pub fn highlight_text<'a>( self: &'a Arc, text: &'a Rope, range: Range, ) -> Vec<(Range, HighlightId)> { let mut result = Vec::new(); if let Some(grammar) = &self.grammar { let tree = grammar.parse_text(text, None); let captures = SyntaxSnapshot::single_tree_captures(range.clone(), text, &tree, self, |grammar| { grammar.highlights_query.as_ref() }); let highlight_maps = vec![grammar.highlight_map()]; let mut offset = 0; for chunk in BufferChunks::new(text, range, Some((captures, highlight_maps)), false, None) { let end_offset = offset + chunk.text.len(); if let Some(highlight_id) = chunk.syntax_highlight_id { if !highlight_id.is_default() { result.push((offset..end_offset, highlight_id)); } } offset = end_offset; } } result } pub fn path_suffixes(&self) -> &[String] { &self.config.matcher.path_suffixes } pub fn should_autoclose_before(&self, c: char) -> bool { c.is_whitespace() || self.config.autoclose_before.contains(c) } pub fn set_theme(&self, theme: &SyntaxTheme) { if let Some(grammar) = self.grammar.as_ref() { if let Some(highlights_query) = &grammar.highlights_query { *grammar.highlight_map.lock() = HighlightMap::new(highlights_query.capture_names(), theme); } } } pub fn grammar(&self) -> Option<&Arc> { self.grammar.as_ref() } pub fn default_scope(self: &Arc) -> LanguageScope { LanguageScope { language: self.clone(), override_id: None, } } pub fn lsp_id(&self) -> String { self.config.name.lsp_id() } pub fn prettier_parser_name(&self) -> Option<&str> { self.config.prettier_parser_name.as_deref() } } impl LanguageScope { pub fn path_suffixes(&self) -> &[String] { &self.language.path_suffixes() } pub fn language_name(&self) -> LanguageName { self.language.config.name.clone() } pub fn collapsed_placeholder(&self) -> &str { self.language.config.collapsed_placeholder.as_ref() } /// Returns line prefix that is inserted in e.g. line continuations or /// in `toggle comments` action. pub fn line_comment_prefixes(&self) -> &[Arc] { Override::as_option( self.config_override().map(|o| &o.line_comments), Some(&self.language.config.line_comments), ) .map_or(&[] as &[_], |e| e.as_slice()) } pub fn block_comment_delimiters(&self) -> Option<(&Arc, &Arc)> { Override::as_option( self.config_override().map(|o| &o.block_comment), self.language.config.block_comment.as_ref(), ) .map(|e| (&e.0, &e.1)) } /// Returns a list of language-specific word characters. /// /// By default, Zed treats alphanumeric characters (and '_') as word characters for /// the purpose of actions like 'move to next word end` or whole-word search. /// It additionally accounts for language's additional word characters. pub fn word_characters(&self) -> Option<&HashSet> { Override::as_option( self.config_override().map(|o| &o.word_characters), Some(&self.language.config.word_characters), ) } /// Returns a list of bracket pairs for a given language with an additional /// piece of information about whether the particular bracket pair is currently active for a given language. pub fn brackets(&self) -> impl Iterator { let mut disabled_ids = self .config_override() .map_or(&[] as _, |o| o.disabled_bracket_ixs.as_slice()); self.language .config .brackets .pairs .iter() .enumerate() .map(move |(ix, bracket)| { let mut is_enabled = true; if let Some(next_disabled_ix) = disabled_ids.first() { if ix == *next_disabled_ix as usize { disabled_ids = &disabled_ids[1..]; is_enabled = false; } } (bracket, is_enabled) }) } pub fn should_autoclose_before(&self, c: char) -> bool { c.is_whitespace() || self.language.config.autoclose_before.contains(c) } pub fn language_allowed(&self, name: &LanguageServerName) -> bool { let config = &self.language.config; let opt_in_servers = &config.scope_opt_in_language_servers; if opt_in_servers.iter().any(|o| *o == *name) { if let Some(over) = self.config_override() { over.opt_into_language_servers.iter().any(|o| *o == *name) } else { false } } else { true } } pub fn override_name(&self) -> Option<&str> { let id = self.override_id?; let grammar = self.language.grammar.as_ref()?; let override_config = grammar.override_config.as_ref()?; override_config.values.get(&id).map(|e| e.name.as_str()) } fn config_override(&self) -> Option<&LanguageConfigOverride> { let id = self.override_id?; let grammar = self.language.grammar.as_ref()?; let override_config = grammar.override_config.as_ref()?; override_config.values.get(&id).map(|e| &e.value) } } impl Hash for Language { fn hash(&self, state: &mut H) { self.id.hash(state) } } impl PartialEq for Language { fn eq(&self, other: &Self) -> bool { self.id.eq(&other.id) } } impl Eq for Language {} impl Debug for Language { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Language") .field("name", &self.config.name) .finish() } } impl Grammar { pub fn id(&self) -> GrammarId { self.id } fn parse_text(&self, text: &Rope, old_tree: Option) -> Tree { with_parser(|parser| { parser .set_language(&self.ts_language) .expect("incompatible grammar"); let mut chunks = text.chunks_in_range(0..text.len()); parser .parse_with( &mut move |offset, _| { chunks.seek(offset); chunks.next().unwrap_or("").as_bytes() }, old_tree.as_ref(), ) .unwrap() }) } pub fn highlight_map(&self) -> HighlightMap { self.highlight_map.lock().clone() } pub fn highlight_id_for_name(&self, name: &str) -> Option { let capture_id = self .highlights_query .as_ref()? .capture_index_for_name(name)?; Some(self.highlight_map.lock().get(capture_id)) } } impl CodeLabel { pub fn plain(text: String, filter_text: Option<&str>) -> Self { let mut result = Self { runs: Vec::new(), filter_range: 0..text.len(), text, }; if let Some(filter_text) = filter_text { if let Some(ix) = result.text.find(filter_text) { result.filter_range = ix..ix + filter_text.len(); } } result } pub fn push_str(&mut self, text: &str, highlight: Option) { let start_ix = self.text.len(); self.text.push_str(text); let end_ix = self.text.len(); if let Some(highlight) = highlight { self.runs.push((start_ix..end_ix, highlight)); } } pub fn text(&self) -> &str { self.text.as_str() } } impl From for CodeLabel { fn from(value: String) -> Self { Self::plain(value, None) } } impl From<&str> for CodeLabel { fn from(value: &str) -> Self { Self::plain(value.to_string(), None) } } impl Ord for LanguageMatcher { fn cmp(&self, other: &Self) -> std::cmp::Ordering { self.path_suffixes.cmp(&other.path_suffixes).then_with(|| { self.first_line_pattern .as_ref() .map(Regex::as_str) .cmp(&other.first_line_pattern.as_ref().map(Regex::as_str)) }) } } impl PartialOrd for LanguageMatcher { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Eq for LanguageMatcher {} impl PartialEq for LanguageMatcher { fn eq(&self, other: &Self) -> bool { self.path_suffixes == other.path_suffixes && self.first_line_pattern.as_ref().map(Regex::as_str) == other.first_line_pattern.as_ref().map(Regex::as_str) } } #[cfg(any(test, feature = "test-support"))] impl Default for FakeLspAdapter { fn default() -> Self { Self { name: "the-fake-language-server", capabilities: lsp::LanguageServer::full_capabilities(), initializer: None, disk_based_diagnostics_progress_token: None, initialization_options: None, disk_based_diagnostics_sources: Vec::new(), prettier_plugins: Vec::new(), language_server_binary: LanguageServerBinary { path: "/the/fake/lsp/path".into(), arguments: vec![], env: Default::default(), }, } } } #[cfg(any(test, feature = "test-support"))] #[async_trait(?Send)] impl LspAdapter for FakeLspAdapter { fn name(&self) -> LanguageServerName { LanguageServerName(self.name.into()) } async fn check_if_user_installed( &self, _: &dyn LspAdapterDelegate, _: &AsyncAppContext, ) -> Option { Some(self.language_server_binary.clone()) } fn get_language_server_command<'a>( self: Arc, _: Arc, _: LanguageServerBinaryOptions, _: futures::lock::MutexGuard<'a, Option>, _: &'a mut AsyncAppContext, ) -> Pin>>> { async move { Ok(self.language_server_binary.clone()) }.boxed_local() } async fn fetch_latest_server_version( &self, _: &dyn LspAdapterDelegate, ) -> Result> { unreachable!(); } async fn fetch_server_binary( &self, _: Box, _: PathBuf, _: &dyn LspAdapterDelegate, ) -> Result { unreachable!(); } async fn cached_server_binary( &self, _: PathBuf, _: &dyn LspAdapterDelegate, ) -> Option { unreachable!(); } fn process_diagnostics(&self, _: &mut lsp::PublishDiagnosticsParams) {} fn disk_based_diagnostic_sources(&self) -> Vec { self.disk_based_diagnostics_sources.clone() } fn disk_based_diagnostics_progress_token(&self) -> Option { self.disk_based_diagnostics_progress_token.clone() } async fn initialization_options( self: Arc, _: &Arc, ) -> Result> { Ok(self.initialization_options.clone()) } } fn get_capture_indices(query: &Query, captures: &mut [(&str, &mut Option)]) { for (ix, name) in query.capture_names().iter().enumerate() { for (capture_name, index) in captures.iter_mut() { if capture_name == name { **index = Some(ix as u32); break; } } } } pub fn point_to_lsp(point: PointUtf16) -> lsp::Position { lsp::Position::new(point.row, point.column) } pub fn point_from_lsp(point: lsp::Position) -> Unclipped { Unclipped(PointUtf16::new(point.line, point.character)) } pub fn range_to_lsp(range: Range) -> lsp::Range { lsp::Range { start: point_to_lsp(range.start), end: point_to_lsp(range.end), } } pub fn range_from_lsp(range: lsp::Range) -> Range> { let mut start = point_from_lsp(range.start); let mut end = point_from_lsp(range.end); if start > end { mem::swap(&mut start, &mut end); } start..end } #[cfg(test)] mod tests { use super::*; use gpui::TestAppContext; #[gpui::test(iterations = 10)] async fn test_language_loading(cx: &mut TestAppContext) { let languages = LanguageRegistry::test(cx.executor()); let languages = Arc::new(languages); languages.register_native_grammars([ ("json", tree_sitter_json::LANGUAGE), ("rust", tree_sitter_rust::LANGUAGE), ]); languages.register_test_language(LanguageConfig { name: "JSON".into(), grammar: Some("json".into()), matcher: LanguageMatcher { path_suffixes: vec!["json".into()], ..Default::default() }, ..Default::default() }); languages.register_test_language(LanguageConfig { name: "Rust".into(), grammar: Some("rust".into()), matcher: LanguageMatcher { path_suffixes: vec!["rs".into()], ..Default::default() }, ..Default::default() }); assert_eq!( languages.language_names(), &[ "JSON".to_string(), "Plain Text".to_string(), "Rust".to_string(), ] ); let rust1 = languages.language_for_name("Rust"); let rust2 = languages.language_for_name("Rust"); // Ensure language is still listed even if it's being loaded. assert_eq!( languages.language_names(), &[ "JSON".to_string(), "Plain Text".to_string(), "Rust".to_string(), ] ); let (rust1, rust2) = futures::join!(rust1, rust2); assert!(Arc::ptr_eq(&rust1.unwrap(), &rust2.unwrap())); // Ensure language is still listed even after loading it. assert_eq!( languages.language_names(), &[ "JSON".to_string(), "Plain Text".to_string(), "Rust".to_string(), ] ); // Loading an unknown language returns an error. assert!(languages.language_for_name("Unknown").await.is_err()); } }