language: Update block_comment and documentation comment (#34861)

As suggested in https://github.com/zed-industries/zed/pull/34418, this
proposes various changes to language configs to make block comments and
doc-block-style comments more similar. In doing so, it introduces some
breaking changes into the extension schema.

This change is needed to support the changes I'm working on in #34418,
to be able to support `rewrap` in block comments like `/* really long
comment ... */`. As is, we can do this in C-style doc-block comments (eg
`/** ... */`) because of the config in `documentation`, but we can't do
this in regular block comments because we lack the info about what the
line prefix and indentation should be.

And while I was here, I did various other clean-ups, many of which feel
nice but are optional.

I would love special attention on the changes to the schema, version and
related changes; I'm totally unfamiliar with that part of Zed.

**Summary of changes**
- break: changes type of `block_comment` to same type as
`documentation_comment` (**this is the important change**)
- break: rename `documentation` to `documentation_comment` (optional,
but improves consistency w/ `line_comments` and `block_comment`)
- break/refactor?: removes some whitespace in the declaration of
`block_comment` delimiters (optional, may break things, need input; some
langs had no spaces, others did)
- refactor: change `tab_size` from `NonZeroU32` to just a `u32` (some
block comments don't seem to need/want indent past the initial
delimiter, so we need this be 0 sometimes)
- refactor: moves the `documentation_comment` declarations to appear
next to `block_comment`, rearranges the order of the fields in the TOML
for `documentation_comment`, rename backing `struct` (all optional)

**Future scope**
I believe that this will also allow us to extend regular block comments
on newline – as we do doc-block comments – but I haven't looked into
this yet. (eg, in JS try pressing enter in both of these: `/* */` and
`/** */`; the latter should extend w/ a `*` prefixed line, while the
former does not.)

Release Notes:

- BREAKING CHANGE: update extension schema version from 1 to 2, change
format of `block_comment` and rename `documentation_comment`

/cc @smitbarmase
This commit is contained in:
claytonrcarter 2025-07-23 11:08:52 -04:00 committed by GitHub
parent 14171e0721
commit 1f4c9b9427
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 249 additions and 74 deletions

View file

@ -92,6 +92,7 @@ tree-sitter-python.workspace = true
tree-sitter-ruby.workspace = true
tree-sitter-rust.workspace = true
tree-sitter-typescript.workspace = true
toml.workspace = true
unindent.workspace = true
util = { workspace = true, features = ["test-support"] }
zlog.workspace = true

View file

@ -2273,7 +2273,12 @@ fn test_language_scope_at_with_javascript(cx: &mut App) {
LanguageConfig {
name: "JavaScript".into(),
line_comments: vec!["// ".into()],
block_comment: Some(("/*".into(), "*/".into())),
block_comment: Some(BlockCommentConfig {
start: "/*".into(),
end: "*/".into(),
prefix: "* ".into(),
tab_size: 1,
}),
brackets: BracketPairConfig {
pairs: vec![
BracketPair {
@ -2300,7 +2305,12 @@ fn test_language_scope_at_with_javascript(cx: &mut App) {
"element".into(),
LanguageConfigOverride {
line_comments: Override::Remove { remove: true },
block_comment: Override::Set(("{/*".into(), "*/}".into())),
block_comment: Override::Set(BlockCommentConfig {
start: "{/*".into(),
prefix: "".into(),
end: "*/}".into(),
tab_size: 0,
}),
..Default::default()
},
)]
@ -2338,9 +2348,15 @@ fn test_language_scope_at_with_javascript(cx: &mut App) {
let config = snapshot.language_scope_at(0).unwrap();
assert_eq!(config.line_comment_prefixes(), &[Arc::from("// ")]);
assert_eq!(
config.block_comment_delimiters(),
Some((&"/*".into(), &"*/".into()))
config.block_comment(),
Some(&BlockCommentConfig {
start: "/*".into(),
prefix: "* ".into(),
end: "*/".into(),
tab_size: 1,
})
);
// Both bracket pairs are enabled
assert_eq!(
config.brackets().map(|e| e.1).collect::<Vec<_>>(),
@ -2360,8 +2376,13 @@ fn test_language_scope_at_with_javascript(cx: &mut App) {
.unwrap();
assert_eq!(string_config.line_comment_prefixes(), &[Arc::from("// ")]);
assert_eq!(
string_config.block_comment_delimiters(),
Some((&"/*".into(), &"*/".into()))
string_config.block_comment(),
Some(&BlockCommentConfig {
start: "/*".into(),
prefix: "* ".into(),
end: "*/".into(),
tab_size: 1,
})
);
// Second bracket pair is disabled
assert_eq!(
@ -2391,8 +2412,13 @@ fn test_language_scope_at_with_javascript(cx: &mut App) {
.unwrap();
assert_eq!(tag_config.line_comment_prefixes(), &[Arc::from("// ")]);
assert_eq!(
tag_config.block_comment_delimiters(),
Some((&"/*".into(), &"*/".into()))
tag_config.block_comment(),
Some(&BlockCommentConfig {
start: "/*".into(),
prefix: "* ".into(),
end: "*/".into(),
tab_size: 1,
})
);
assert_eq!(
tag_config.brackets().map(|e| e.1).collect::<Vec<_>>(),
@ -2408,8 +2434,13 @@ fn test_language_scope_at_with_javascript(cx: &mut App) {
&[Arc::from("// ")]
);
assert_eq!(
expression_in_element_config.block_comment_delimiters(),
Some((&"/*".into(), &"*/".into()))
expression_in_element_config.block_comment(),
Some(&BlockCommentConfig {
start: "/*".into(),
prefix: "* ".into(),
end: "*/".into(),
tab_size: 1,
})
);
assert_eq!(
expression_in_element_config
@ -2528,13 +2559,18 @@ fn test_language_scope_at_with_combined_injections(cx: &mut App) {
let html_config = snapshot.language_scope_at(Point::new(2, 4)).unwrap();
assert_eq!(html_config.line_comment_prefixes(), &[]);
assert_eq!(
html_config.block_comment_delimiters(),
Some((&"<!--".into(), &"-->".into()))
html_config.block_comment(),
Some(&BlockCommentConfig {
start: "<!--".into(),
end: "-->".into(),
prefix: "".into(),
tab_size: 0,
})
);
let ruby_config = snapshot.language_scope_at(Point::new(3, 12)).unwrap();
assert_eq!(ruby_config.line_comment_prefixes(), &[Arc::from("# ")]);
assert_eq!(ruby_config.block_comment_delimiters(), None);
assert_eq!(ruby_config.block_comment(), None);
buffer
});
@ -3490,7 +3526,12 @@ fn html_lang() -> Language {
Language::new(
LanguageConfig {
name: LanguageName::new("HTML"),
block_comment: Some(("<!--".into(), "-->".into())),
block_comment: Some(BlockCommentConfig {
start: "<!--".into(),
prefix: "".into(),
end: "-->".into(),
tab_size: 0,
}),
..Default::default()
},
Some(tree_sitter_html::LANGUAGE.into()),
@ -3521,7 +3562,12 @@ fn erb_lang() -> Language {
path_suffixes: vec!["erb".to_string()],
..Default::default()
},
block_comment: Some(("<%#".into(), "%>".into())),
block_comment: Some(BlockCommentConfig {
start: "<%#".into(),
prefix: "".into(),
end: "%>".into(),
tab_size: 0,
}),
..Default::default()
},
Some(tree_sitter_embedded_template::LANGUAGE.into()),

View file

@ -727,9 +727,12 @@ pub struct LanguageConfig {
/// used for comment continuations on the next line, but only the first one is used for Editor::ToggleComments.
#[serde(default)]
pub line_comments: Vec<Arc<str>>,
/// Starting and closing characters of a block comment.
/// Delimiters and configuration for recognizing and formatting block comments.
#[serde(default)]
pub block_comment: Option<(Arc<str>, Arc<str>)>,
pub block_comment: Option<BlockCommentConfig>,
/// Delimiters and configuration for recognizing and formatting documentation comments.
#[serde(default, alias = "documentation")]
pub documentation_comment: Option<BlockCommentConfig>,
/// A list of additional regex patterns that should be treated as prefixes
/// for creating boundaries during rewrapping, ensuring content from one
/// prefixed section doesn't merge with another (e.g., markdown list items).
@ -774,10 +777,6 @@ pub struct LanguageConfig {
/// A list of preferred debuggers for this language.
#[serde(default)]
pub debuggers: IndexSet<SharedString>,
/// Whether to treat documentation comment of this language differently by
/// auto adding prefix on new line, adjusting the indenting , etc.
#[serde(default)]
pub documentation: Option<DocumentationConfig>,
}
#[derive(Clone, Debug, Deserialize, Default, JsonSchema)]
@ -837,17 +836,56 @@ pub struct JsxTagAutoCloseConfig {
pub erroneous_close_tag_name_node_name: Option<String>,
}
/// The configuration for documentation block for this language.
#[derive(Clone, Deserialize, JsonSchema)]
pub struct DocumentationConfig {
/// A start tag of documentation block.
/// The configuration for block comments for this language.
#[derive(Clone, Debug, JsonSchema, PartialEq)]
pub struct BlockCommentConfig {
/// A start tag of block comment.
pub start: Arc<str>,
/// A end tag of documentation block.
/// A end tag of block comment.
pub end: Arc<str>,
/// A character to add as a prefix when a new line is added to a documentation block.
/// A character to add as a prefix when a new line is added to a block comment.
pub prefix: Arc<str>,
/// A indent to add for prefix and end line upon new line.
pub tab_size: NonZeroU32,
pub tab_size: u32,
}
impl<'de> Deserialize<'de> for BlockCommentConfig {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum BlockCommentConfigHelper {
New {
start: Arc<str>,
end: Arc<str>,
prefix: Arc<str>,
tab_size: u32,
},
Old([Arc<str>; 2]),
}
match BlockCommentConfigHelper::deserialize(deserializer)? {
BlockCommentConfigHelper::New {
start,
end,
prefix,
tab_size,
} => Ok(BlockCommentConfig {
start,
end,
prefix,
tab_size,
}),
BlockCommentConfigHelper::Old([start, end]) => Ok(BlockCommentConfig {
start,
end,
prefix: "".into(),
tab_size: 0,
}),
}
}
}
/// Represents a language for the given range. Some languages (e.g. HTML)
@ -864,7 +902,7 @@ pub struct LanguageConfigOverride {
#[serde(default)]
pub line_comments: Override<Vec<Arc<str>>>,
#[serde(default)]
pub block_comment: Override<(Arc<str>, Arc<str>)>,
pub block_comment: Override<BlockCommentConfig>,
#[serde(skip)]
pub disabled_bracket_ixs: Vec<u16>,
#[serde(default)]
@ -916,6 +954,7 @@ impl Default for LanguageConfig {
autoclose_before: Default::default(),
line_comments: Default::default(),
block_comment: Default::default(),
documentation_comment: Default::default(),
rewrap_prefixes: Default::default(),
scope_opt_in_language_servers: Default::default(),
overrides: Default::default(),
@ -929,7 +968,6 @@ impl Default for LanguageConfig {
jsx_tag_auto_close: None,
completion_query_characters: Default::default(),
debuggers: Default::default(),
documentation: None,
}
}
}
@ -1847,12 +1885,17 @@ impl LanguageScope {
.map_or([].as_slice(), |e| e.as_slice())
}
pub fn block_comment_delimiters(&self) -> Option<(&Arc<str>, &Arc<str>)> {
/// Config for block comments for this language.
pub fn block_comment(&self) -> Option<&BlockCommentConfig> {
Override::as_option(
self.config_override().map(|o| &o.block_comment),
self.language.config.block_comment.as_ref(),
)
.map(|e| (&e.0, &e.1))
}
/// Config for documentation-style block comments for this language.
pub fn documentation_comment(&self) -> Option<&BlockCommentConfig> {
self.language.config.documentation_comment.as_ref()
}
/// Returns additional regex patterns that act as prefix markers for creating
@ -1897,14 +1940,6 @@ impl LanguageScope {
.unwrap_or(false)
}
/// Returns config to documentation block for this language.
///
/// Used for documentation styles that require a leading character on each line,
/// such as the asterisk in JSDoc, Javadoc, etc.
pub fn documentation(&self) -> Option<&DocumentationConfig> {
self.language.config.documentation.as_ref()
}
/// Returns a list of bracket pairs for a given language with an additional
/// piece of information about whether the particular bracket pair is currently active for a given language.
pub fn brackets(&self) -> impl Iterator<Item = (&BracketPair, bool)> {
@ -2299,6 +2334,7 @@ pub fn range_from_lsp(range: lsp::Range) -> Range<Unclipped<PointUtf16>> {
mod tests {
use super::*;
use gpui::TestAppContext;
use pretty_assertions::assert_matches;
#[gpui::test(iterations = 10)]
async fn test_language_loading(cx: &mut TestAppContext) {
@ -2460,4 +2496,75 @@ mod tests {
"LSP completion items with duplicate label and detail, should omit the detail"
);
}
#[test]
fn test_deserializing_comments_backwards_compat() {
// current version of `block_comment` and `documentation_comment` work
{
let config: LanguageConfig = ::toml::from_str(
r#"
name = "Foo"
block_comment = { start = "a", end = "b", prefix = "c", tab_size = 1 }
documentation_comment = { start = "d", end = "e", prefix = "f", tab_size = 2 }
"#,
)
.unwrap();
assert_matches!(config.block_comment, Some(BlockCommentConfig { .. }));
assert_matches!(
config.documentation_comment,
Some(BlockCommentConfig { .. })
);
let block_config = config.block_comment.unwrap();
assert_eq!(block_config.start.as_ref(), "a");
assert_eq!(block_config.end.as_ref(), "b");
assert_eq!(block_config.prefix.as_ref(), "c");
assert_eq!(block_config.tab_size, 1);
let doc_config = config.documentation_comment.unwrap();
assert_eq!(doc_config.start.as_ref(), "d");
assert_eq!(doc_config.end.as_ref(), "e");
assert_eq!(doc_config.prefix.as_ref(), "f");
assert_eq!(doc_config.tab_size, 2);
}
// former `documentation` setting is read into `documentation_comment`
{
let config: LanguageConfig = ::toml::from_str(
r#"
name = "Foo"
documentation = { start = "a", end = "b", prefix = "c", tab_size = 1}
"#,
)
.unwrap();
assert_matches!(
config.documentation_comment,
Some(BlockCommentConfig { .. })
);
let config = config.documentation_comment.unwrap();
assert_eq!(config.start.as_ref(), "a");
assert_eq!(config.end.as_ref(), "b");
assert_eq!(config.prefix.as_ref(), "c");
assert_eq!(config.tab_size, 1);
}
// old block_comment format is read into BlockCommentConfig
{
let config: LanguageConfig = ::toml::from_str(
r#"
name = "Foo"
block_comment = ["a", "b"]
"#,
)
.unwrap();
assert_matches!(config.block_comment, Some(BlockCommentConfig { .. }));
let config = config.block_comment.unwrap();
assert_eq!(config.start.as_ref(), "a");
assert_eq!(config.end.as_ref(), "b");
assert_eq!(config.prefix.as_ref(), "");
assert_eq!(config.tab_size, 0);
}
}
}