Merge pull request #1404 from zed-industries/html-support

Basic html support
This commit is contained in:
Max Brunsfeld 2022-10-06 10:32:44 -07:00 committed by GitHub
commit 51fa06cc8d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 1538 additions and 623 deletions

View file

@ -64,6 +64,8 @@ util = { path = "../util", features = ["test-support"] }
ctor = "0.1"
env_logger = "0.9"
rand = "0.8.3"
tree-sitter-html = "*"
tree-sitter-javascript = "*"
tree-sitter-json = "*"
tree-sitter-rust = "*"
tree-sitter-python = "*"

View file

@ -95,14 +95,15 @@ pub struct BufferSnapshot {
parse_count: usize,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
pub struct IndentSize {
pub len: u32,
pub kind: IndentKind,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
pub enum IndentKind {
#[default]
Space,
Tab,
}
@ -247,7 +248,6 @@ pub enum AutoindentMode {
struct AutoindentRequest {
before_edit: BufferSnapshot,
entries: Vec<AutoindentRequestEntry>,
indent_size: IndentSize,
is_block_mode: bool,
}
@ -260,6 +260,7 @@ struct AutoindentRequestEntry {
/// only be adjusted if the suggested indentation level has *changed*
/// since the edit was made.
first_line_is_new: bool,
indent_size: IndentSize,
original_indent_column: Option<u32>,
}
@ -719,6 +720,16 @@ impl Buffer {
self.language.as_ref()
}
pub fn language_at<D: ToOffset>(&self, position: D) -> Option<Arc<Language>> {
let offset = position.to_offset(self);
self.syntax_map
.lock()
.layers_for_range(offset..offset, &self.text)
.last()
.map(|info| info.language.clone())
.or_else(|| self.language.clone())
}
pub fn parse_count(&self) -> usize {
self.parse_count
}
@ -866,10 +877,13 @@ impl Buffer {
// buffer before this batch of edits.
let mut row_ranges = Vec::new();
let mut old_to_new_rows = BTreeMap::new();
let mut language_indent_sizes_by_new_row = Vec::new();
for entry in &request.entries {
let position = entry.range.start;
let new_row = position.to_point(&snapshot).row;
let new_end_row = entry.range.end.to_point(&snapshot).row + 1;
language_indent_sizes_by_new_row.push((new_row, entry.indent_size));
if !entry.first_line_is_new {
let old_row = position.to_point(&request.before_edit).row;
old_to_new_rows.insert(old_row, new_row);
@ -883,6 +897,8 @@ impl Buffer {
let mut old_suggestions = BTreeMap::<u32, IndentSize>::default();
let old_edited_ranges =
contiguous_ranges(old_to_new_rows.keys().copied(), max_rows_between_yields);
let mut language_indent_sizes = language_indent_sizes_by_new_row.iter().peekable();
let mut language_indent_size = IndentSize::default();
for old_edited_range in old_edited_ranges {
let suggestions = request
.before_edit
@ -891,6 +907,17 @@ impl Buffer {
.flatten();
for (old_row, suggestion) in old_edited_range.zip(suggestions) {
if let Some(suggestion) = suggestion {
let new_row = *old_to_new_rows.get(&old_row).unwrap();
// Find the indent size based on the language for this row.
while let Some((row, size)) = language_indent_sizes.peek() {
if *row > new_row {
break;
}
language_indent_size = *size;
language_indent_sizes.next();
}
let suggested_indent = old_to_new_rows
.get(&suggestion.basis_row)
.and_then(|from_row| old_suggestions.get(from_row).copied())
@ -899,9 +926,8 @@ impl Buffer {
.before_edit
.indent_size_for_line(suggestion.basis_row)
})
.with_delta(suggestion.delta, request.indent_size);
old_suggestions
.insert(*old_to_new_rows.get(&old_row).unwrap(), suggested_indent);
.with_delta(suggestion.delta, language_indent_size);
old_suggestions.insert(new_row, suggested_indent);
}
}
yield_now().await;
@ -922,6 +948,8 @@ impl Buffer {
// Compute new suggestions for each line, but only include them in the result
// if they differ from the old suggestion for that line.
let mut language_indent_sizes = language_indent_sizes_by_new_row.iter().peekable();
let mut language_indent_size = IndentSize::default();
for new_edited_row_range in new_edited_row_ranges {
let suggestions = snapshot
.suggest_autoindents(new_edited_row_range.clone())
@ -929,13 +957,22 @@ impl Buffer {
.flatten();
for (new_row, suggestion) in new_edited_row_range.zip(suggestions) {
if let Some(suggestion) = suggestion {
// Find the indent size based on the language for this row.
while let Some((row, size)) = language_indent_sizes.peek() {
if *row > new_row {
break;
}
language_indent_size = *size;
language_indent_sizes.next();
}
let suggested_indent = indent_sizes
.get(&suggestion.basis_row)
.copied()
.unwrap_or_else(|| {
snapshot.indent_size_for_line(suggestion.basis_row)
})
.with_delta(suggestion.delta, request.indent_size);
.with_delta(suggestion.delta, language_indent_size);
if old_suggestions
.get(&new_row)
.map_or(true, |old_indentation| {
@ -1266,7 +1303,6 @@ impl Buffer {
let edit_id = edit_operation.local_timestamp();
if let Some((before_edit, mode)) = autoindent_request {
let indent_size = before_edit.single_indent_size(cx);
let (start_columns, is_block_mode) = match mode {
AutoindentMode::Block {
original_indent_columns: start_columns,
@ -1315,6 +1351,7 @@ impl Buffer {
AutoindentRequestEntry {
first_line_is_new,
original_indent_column: start_column,
indent_size: before_edit.language_indent_size_at(range.start, cx),
range: self.anchor_before(new_start + range_of_insertion_to_indent.start)
..self.anchor_after(new_start + range_of_insertion_to_indent.end),
}
@ -1324,7 +1361,6 @@ impl Buffer {
self.autoindent_requests.push(Arc::new(AutoindentRequest {
before_edit,
entries,
indent_size,
is_block_mode,
}));
}
@ -1642,8 +1678,8 @@ impl BufferSnapshot {
indent_size_for_line(self, row)
}
pub fn single_indent_size(&self, cx: &AppContext) -> IndentSize {
let language_name = self.language().map(|language| language.name());
pub fn language_indent_size_at<T: ToOffset>(&self, position: T, cx: &AppContext) -> IndentSize {
let language_name = self.language_at(position).map(|language| language.name());
let settings = cx.global::<Settings>();
if settings.hard_tabs(language_name.as_deref()) {
IndentSize::tab()
@ -1713,6 +1749,8 @@ impl BufferSnapshot {
if capture.index == config.indent_capture_ix {
start.get_or_insert(Point::from_ts_point(capture.node.start_position()));
end.get_or_insert(Point::from_ts_point(capture.node.end_position()));
} else if Some(capture.index) == config.start_capture_ix {
start = Some(Point::from_ts_point(capture.node.end_position()));
} else if Some(capture.index) == config.end_capture_ix {
end = Some(Point::from_ts_point(capture.node.start_position()));
}
@ -1902,8 +1940,14 @@ impl BufferSnapshot {
}
}
pub fn language(&self) -> Option<&Arc<Language>> {
self.language.as_ref()
pub fn language_at<D: ToOffset>(&self, position: D) -> Option<&Arc<Language>> {
let offset = position.to_offset(self);
self.syntax
.layers_for_range(offset..offset, &self.text)
.filter(|l| l.node.end_byte() > offset)
.last()
.map(|info| info.language)
.or(self.language.as_ref())
}
pub fn surrounding_word<T: ToOffset>(&self, start: T) -> (Range<usize>, Option<CharKind>) {
@ -1938,8 +1982,8 @@ impl BufferSnapshot {
pub fn range_for_syntax_ancestor<T: ToOffset>(&self, range: Range<T>) -> Option<Range<usize>> {
let range = range.start.to_offset(self)..range.end.to_offset(self);
let mut result: Option<Range<usize>> = None;
'outer: for (_, _, node) in self.syntax.layers_for_range(range.clone(), &self.text) {
let mut cursor = node.walk();
'outer: for layer in self.syntax.layers_for_range(range.clone(), &self.text) {
let mut cursor = layer.node.walk();
// Descend to the first leaf that touches the start of the range,
// and if the range is non-empty, extends beyond the start.

View file

@ -26,6 +26,7 @@ use serde_json::Value;
use std::{
any::Any,
cell::RefCell,
fmt::Debug,
mem,
ops::Range,
path::{Path, PathBuf},
@ -135,7 +136,7 @@ impl CachedLspAdapter {
pub async fn label_for_completion(
&self,
completion_item: &lsp::CompletionItem,
language: &Language,
language: &Arc<Language>,
) -> Option<CodeLabel> {
self.adapter
.label_for_completion(completion_item, language)
@ -146,7 +147,7 @@ impl CachedLspAdapter {
&self,
name: &str,
kind: lsp::SymbolKind,
language: &Language,
language: &Arc<Language>,
) -> Option<CodeLabel> {
self.adapter.label_for_symbol(name, kind, language).await
}
@ -175,7 +176,7 @@ pub trait LspAdapter: 'static + Send + Sync {
async fn label_for_completion(
&self,
_: &lsp::CompletionItem,
_: &Language,
_: &Arc<Language>,
) -> Option<CodeLabel> {
None
}
@ -184,7 +185,7 @@ pub trait LspAdapter: 'static + Send + Sync {
&self,
_: &str,
_: lsp::SymbolKind,
_: &Language,
_: &Arc<Language>,
) -> Option<CodeLabel> {
None
}
@ -230,7 +231,10 @@ pub struct LanguageConfig {
pub decrease_indent_pattern: Option<Regex>,
#[serde(default)]
pub autoclose_before: String,
pub line_comment: Option<String>,
#[serde(default)]
pub line_comment: Option<Arc<str>>,
#[serde(default)]
pub block_comment: Option<(Arc<str>, Arc<str>)>,
}
impl Default for LanguageConfig {
@ -244,6 +248,7 @@ impl Default for LanguageConfig {
decrease_indent_pattern: Default::default(),
autoclose_before: Default::default(),
line_comment: Default::default(),
block_comment: Default::default(),
}
}
}
@ -270,7 +275,7 @@ pub struct FakeLspAdapter {
pub disk_based_diagnostics_sources: Vec<String>,
}
#[derive(Clone, Debug, Deserialize)]
#[derive(Clone, Debug, Default, Deserialize)]
pub struct BracketPair {
pub start: String,
pub end: String,
@ -304,6 +309,7 @@ pub struct Grammar {
struct IndentConfig {
query: Query,
indent_capture_ix: u32,
start_capture_ix: Option<u32>,
end_capture_ix: Option<u32>,
}
@ -661,11 +667,13 @@ impl Language {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let mut indent_capture_ix = None;
let mut start_capture_ix = None;
let mut end_capture_ix = None;
get_capture_indices(
&query,
&mut [
("indent", &mut indent_capture_ix),
("start", &mut start_capture_ix),
("end", &mut end_capture_ix),
],
);
@ -673,6 +681,7 @@ impl Language {
grammar.indents_config = Some(IndentConfig {
query,
indent_capture_ix,
start_capture_ix,
end_capture_ix,
});
}
@ -763,8 +772,15 @@ impl Language {
self.config.name.clone()
}
pub fn line_comment_prefix(&self) -> Option<&str> {
self.config.line_comment.as_deref()
pub fn line_comment_prefix(&self) -> Option<&Arc<str>> {
self.config.line_comment.as_ref()
}
pub fn block_comment_delimiters(&self) -> Option<(&Arc<str>, &Arc<str>)> {
self.config
.block_comment
.as_ref()
.map(|(start, end)| (start, end))
}
pub async fn disk_based_diagnostic_sources(&self) -> &[String] {
@ -789,7 +805,7 @@ impl Language {
}
pub async fn label_for_completion(
&self,
self: &Arc<Self>,
completion: &lsp::CompletionItem,
) -> Option<CodeLabel> {
self.adapter
@ -798,7 +814,11 @@ impl Language {
.await
}
pub async fn label_for_symbol(&self, name: &str, kind: lsp::SymbolKind) -> Option<CodeLabel> {
pub async fn label_for_symbol(
self: &Arc<Self>,
name: &str,
kind: lsp::SymbolKind,
) -> Option<CodeLabel> {
self.adapter
.as_ref()?
.label_for_symbol(name, kind, self)
@ -806,20 +826,17 @@ impl Language {
}
pub fn highlight_text<'a>(
&'a self,
self: &'a Arc<Self>,
text: &'a Rope,
range: Range<usize>,
) -> Vec<(Range<usize>, HighlightId)> {
let mut result = Vec::new();
if let Some(grammar) = &self.grammar {
let tree = grammar.parse_text(text, None);
let captures = SyntaxSnapshot::single_tree_captures(
range.clone(),
text,
&tree,
grammar,
|grammar| grammar.highlights_query.as_ref(),
);
let captures =
SyntaxSnapshot::single_tree_captures(range.clone(), text, &tree, self, |grammar| {
grammar.highlights_query.as_ref()
});
let highlight_maps = vec![grammar.highlight_map()];
let mut offset = 0;
for chunk in BufferChunks::new(text, range, Some((captures, highlight_maps)), vec![]) {
@ -861,6 +878,14 @@ impl Language {
}
}
impl Debug for Language {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Language")
.field("name", &self.config.name)
.finish()
}
}
impl Grammar {
pub fn id(&self) -> usize {
self.id

View file

@ -92,6 +92,13 @@ struct SyntaxLayer {
language: Arc<Language>,
}
#[derive(Debug)]
pub struct SyntaxLayerInfo<'a> {
pub depth: usize,
pub node: Node<'a>,
pub language: &'a Arc<Language>,
}
#[derive(Debug, Clone)]
struct SyntaxLayerSummary {
min_depth: usize,
@ -473,13 +480,18 @@ impl SyntaxSnapshot {
range: Range<usize>,
text: &'a Rope,
tree: &'a Tree,
grammar: &'a Grammar,
language: &'a Arc<Language>,
query: fn(&Grammar) -> Option<&Query>,
) -> SyntaxMapCaptures<'a> {
SyntaxMapCaptures::new(
range.clone(),
text,
[(grammar, 0, tree.root_node())].into_iter(),
[SyntaxLayerInfo {
language,
depth: 0,
node: tree.root_node(),
}]
.into_iter(),
query,
)
}
@ -513,19 +525,19 @@ impl SyntaxSnapshot {
}
#[cfg(test)]
pub fn layers(&self, buffer: &BufferSnapshot) -> Vec<(&Grammar, usize, Node)> {
self.layers_for_range(0..buffer.len(), buffer)
pub fn layers<'a>(&'a self, buffer: &'a BufferSnapshot) -> Vec<SyntaxLayerInfo> {
self.layers_for_range(0..buffer.len(), buffer).collect()
}
pub fn layers_for_range<'a, T: ToOffset>(
&self,
&'a self,
range: Range<T>,
buffer: &BufferSnapshot,
) -> Vec<(&Grammar, usize, Node)> {
buffer: &'a BufferSnapshot,
) -> impl 'a + Iterator<Item = SyntaxLayerInfo> {
let start = buffer.anchor_before(range.start.to_offset(buffer));
let end = buffer.anchor_after(range.end.to_offset(buffer));
let mut cursor = self.layers.filter::<_, ()>(|summary| {
let mut cursor = self.layers.filter::<_, ()>(move |summary| {
if summary.max_depth > summary.min_depth {
true
} else {
@ -535,23 +547,26 @@ impl SyntaxSnapshot {
}
});
let mut result = Vec::new();
// let mut result = Vec::new();
cursor.next(buffer);
while let Some(layer) = cursor.item() {
if let Some(grammar) = &layer.language.grammar {
result.push((
grammar.as_ref(),
layer.depth,
layer.tree.root_node_with_offset(
std::iter::from_fn(move || {
if let Some(layer) = cursor.item() {
let info = SyntaxLayerInfo {
language: &layer.language,
depth: layer.depth,
node: layer.tree.root_node_with_offset(
layer.range.start.to_offset(buffer),
layer.range.start.to_point(buffer).to_ts_point(),
),
));
};
cursor.next(buffer);
Some(info)
} else {
None
}
cursor.next(buffer)
}
})
result
// result
}
}
@ -559,7 +574,7 @@ impl<'a> SyntaxMapCaptures<'a> {
fn new(
range: Range<usize>,
text: &'a Rope,
layers: impl Iterator<Item = (&'a Grammar, usize, Node<'a>)>,
layers: impl Iterator<Item = SyntaxLayerInfo<'a>>,
query: fn(&Grammar) -> Option<&Query>,
) -> Self {
let mut result = Self {
@ -567,11 +582,19 @@ impl<'a> SyntaxMapCaptures<'a> {
grammars: Vec::new(),
active_layer_count: 0,
};
for (grammar, depth, node) in layers {
let query = if let Some(query) = query(grammar) {
query
} else {
continue;
for SyntaxLayerInfo {
language,
depth,
node,
} in layers
{
let grammar = match &language.grammar {
Some(grammer) => grammer,
None => continue,
};
let query = match query(&grammar) {
Some(query) => query,
None => continue,
};
let mut query_cursor = QueryCursorHandle::new();
@ -678,15 +701,23 @@ impl<'a> SyntaxMapMatches<'a> {
fn new(
range: Range<usize>,
text: &'a Rope,
layers: impl Iterator<Item = (&'a Grammar, usize, Node<'a>)>,
layers: impl Iterator<Item = SyntaxLayerInfo<'a>>,
query: fn(&Grammar) -> Option<&Query>,
) -> Self {
let mut result = Self::default();
for (grammar, depth, node) in layers {
let query = if let Some(query) = query(grammar) {
query
} else {
continue;
for SyntaxLayerInfo {
language,
depth,
node,
} in layers
{
let grammar = match &language.grammar {
Some(grammer) => grammer,
None => continue,
};
let query = match query(&grammar) {
Some(query) => query,
None => continue,
};
let mut query_cursor = QueryCursorHandle::new();
@ -1624,8 +1655,8 @@ mod tests {
let reference_layers = reference_syntax_map.layers(&buffer);
for (edited_layer, reference_layer) in layers.into_iter().zip(reference_layers.into_iter())
{
assert_eq!(edited_layer.2.to_sexp(), reference_layer.2.to_sexp());
assert_eq!(edited_layer.2.range(), reference_layer.2.range());
assert_eq!(edited_layer.node.to_sexp(), reference_layer.node.to_sexp());
assert_eq!(edited_layer.node.range(), reference_layer.node.range());
}
}
@ -1770,13 +1801,13 @@ mod tests {
mutated_layers.into_iter().zip(reference_layers.into_iter())
{
assert_eq!(
edited_layer.2.to_sexp(),
reference_layer.2.to_sexp(),
edited_layer.node.to_sexp(),
reference_layer.node.to_sexp(),
"different layer at step {i}"
);
assert_eq!(
edited_layer.2.range(),
reference_layer.2.range(),
edited_layer.node.range(),
reference_layer.node.range(),
"different layer at step {i}"
);
}
@ -1822,13 +1853,15 @@ mod tests {
range: Range<Point>,
expected_layers: &[&str],
) {
let layers = syntax_map.layers_for_range(range, &buffer);
let layers = syntax_map
.layers_for_range(range, &buffer)
.collect::<Vec<_>>();
assert_eq!(
layers.len(),
expected_layers.len(),
"wrong number of layers"
);
for (i, ((_, _, node), expected_s_exp)) in
for (i, (SyntaxLayerInfo { node, .. }, expected_s_exp)) in
layers.iter().zip(expected_layers.iter()).enumerate()
{
let actual_s_exp = node.to_sexp();

View file

@ -14,7 +14,7 @@ use std::{
};
use text::network::Network;
use unindent::Unindent as _;
use util::post_inc;
use util::{post_inc, test::marked_text_ranges};
#[cfg(test)]
#[ctor::ctor]
@ -1035,6 +1035,120 @@ fn test_autoindent_language_without_indents_query(cx: &mut MutableAppContext) {
});
}
#[gpui::test]
fn test_autoindent_with_injected_languages(cx: &mut MutableAppContext) {
cx.set_global({
let mut settings = Settings::test(cx);
settings.language_overrides.extend([
(
"HTML".into(),
settings::EditorSettings {
tab_size: Some(2.try_into().unwrap()),
..Default::default()
},
),
(
"JavaScript".into(),
settings::EditorSettings {
tab_size: Some(8.try_into().unwrap()),
..Default::default()
},
),
]);
settings
});
let html_language = Arc::new(
Language::new(
LanguageConfig {
name: "HTML".into(),
..Default::default()
},
Some(tree_sitter_html::language()),
)
.with_indents_query(
"
(element
(start_tag) @start
(end_tag)? @end) @indent
",
)
.unwrap()
.with_injection_query(
r#"
(script_element
(raw_text) @content
(#set! "language" "javascript"))
"#,
)
.unwrap(),
);
let javascript_language = Arc::new(
Language::new(
LanguageConfig {
name: "JavaScript".into(),
..Default::default()
},
Some(tree_sitter_javascript::language()),
)
.with_indents_query(
r#"
(object "}" @end) @indent
"#,
)
.unwrap(),
);
let language_registry = Arc::new(LanguageRegistry::test());
language_registry.add(html_language.clone());
language_registry.add(javascript_language.clone());
cx.add_model(|cx| {
let (text, ranges) = marked_text_ranges(
&"
<div>ˇ
</div>
<script>
init({ˇ
})
</script>
<span>ˇ
</span>
"
.unindent(),
false,
);
let mut buffer = Buffer::new(0, text, cx);
buffer.set_language_registry(language_registry);
buffer.set_language(Some(html_language), cx);
buffer.edit(
ranges.into_iter().map(|range| (range, "\na")),
Some(AutoindentMode::EachLine),
cx,
);
assert_eq!(
buffer.text(),
"
<div>
a
</div>
<script>
init({
a
})
</script>
<span>
a
</span>
"
.unindent()
);
buffer
});
}
#[gpui::test]
fn test_serialization(cx: &mut gpui::MutableAppContext) {
let mut now = Instant::now();
@ -1449,7 +1563,7 @@ fn get_tree_sexp(buffer: &ModelHandle<Buffer>, cx: &gpui::TestAppContext) -> Str
buffer.read_with(cx, |buffer, _| {
let snapshot = buffer.snapshot();
let layers = snapshot.syntax.layers(buffer.as_text_snapshot());
layers[0].2.to_sexp()
layers[0].node.to_sexp()
})
}