diff --git a/Cargo.lock b/Cargo.lock index 19c73433ed..5caef13355 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4813,6 +4813,7 @@ dependencies = [ "pretty_assertions", "project", "rand 0.8.5", + "regex", "release_channel", "rpc", "schemars", diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml index c42b58729e..bea83b1df8 100644 --- a/crates/editor/Cargo.toml +++ b/crates/editor/Cargo.toml @@ -61,6 +61,7 @@ parking_lot.workspace = true pretty_assertions.workspace = true project.workspace = true rand.workspace = true +regex.workspace = true rpc.workspace = true schemars.workspace = true serde.workspace = true diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 66f5f2d5fa..fe904ab4ec 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -11541,66 +11541,90 @@ impl Editor { let language_settings = buffer.language_settings_at(selection.head(), cx); let language_scope = buffer.language_scope_at(selection.head()); + let indent_and_prefix_for_row = + |row: u32| -> (IndentSize, Option, Option) { + let indent = buffer.indent_size_for_line(MultiBufferRow(row)); + let (comment_prefix, rewrap_prefix) = + if let Some(language_scope) = &language_scope { + let indent_end = Point::new(row, indent.len); + let comment_prefix = language_scope + .line_comment_prefixes() + .iter() + .find(|prefix| buffer.contains_str_at(indent_end, prefix)) + .map(|prefix| prefix.to_string()); + let line_end = Point::new(row, buffer.line_len(MultiBufferRow(row))); + let line_text_after_indent = buffer + .text_for_range(indent_end..line_end) + .collect::(); + let rewrap_prefix = language_scope + .rewrap_prefixes() + .iter() + .find_map(|prefix_regex| { + prefix_regex.find(&line_text_after_indent).map(|mat| { + if mat.start() == 0 { + Some(mat.as_str().to_string()) + } else { + None + } + }) + }) + .flatten(); + (comment_prefix, rewrap_prefix) + } else { + (None, None) + }; + (indent, comment_prefix, rewrap_prefix) + }; + let mut ranges = Vec::new(); - let mut current_range_start = first_row; let from_empty_selection = selection.is_empty(); + let mut current_range_start = first_row; let mut prev_row = first_row; - let mut prev_indent = buffer.indent_size_for_line(MultiBufferRow(first_row)); - let mut prev_comment_prefix = if let Some(language_scope) = &language_scope { - let indent = buffer.indent_size_for_line(MultiBufferRow(first_row)); - let indent_end = Point::new(first_row, indent.len); - language_scope - .line_comment_prefixes() - .iter() - .find(|prefix| buffer.contains_str_at(indent_end, prefix)) - .cloned() - } else { - None - }; + let ( + mut current_range_indent, + mut current_range_comment_prefix, + mut current_range_rewrap_prefix, + ) = indent_and_prefix_for_row(first_row); for row in non_blank_rows_iter.skip(1) { let has_paragraph_break = row > prev_row + 1; - let row_indent = buffer.indent_size_for_line(MultiBufferRow(row)); - let row_comment_prefix = if let Some(language_scope) = &language_scope { - let indent = buffer.indent_size_for_line(MultiBufferRow(row)); - let indent_end = Point::new(row, indent.len); - language_scope - .line_comment_prefixes() - .iter() - .find(|prefix| buffer.contains_str_at(indent_end, prefix)) - .cloned() - } else { - None - }; + let (row_indent, row_comment_prefix, row_rewrap_prefix) = + indent_and_prefix_for_row(row); - let has_boundary_change = - row_indent != prev_indent || row_comment_prefix != prev_comment_prefix; + let has_indent_change = row_indent != current_range_indent; + let has_comment_change = row_comment_prefix != current_range_comment_prefix; + + let has_boundary_change = has_comment_change + || row_rewrap_prefix.is_some() + || (has_indent_change && current_range_comment_prefix.is_some()); if has_paragraph_break || has_boundary_change { ranges.push(( language_settings.clone(), Point::new(current_range_start, 0) ..Point::new(prev_row, buffer.line_len(MultiBufferRow(prev_row))), - prev_indent, - prev_comment_prefix.clone(), + current_range_indent, + current_range_comment_prefix.clone(), + current_range_rewrap_prefix.clone(), from_empty_selection, )); current_range_start = row; + current_range_indent = row_indent; + current_range_comment_prefix = row_comment_prefix; + current_range_rewrap_prefix = row_rewrap_prefix; } - prev_row = row; - prev_indent = row_indent; - prev_comment_prefix = row_comment_prefix; } ranges.push(( language_settings.clone(), Point::new(current_range_start, 0) ..Point::new(prev_row, buffer.line_len(MultiBufferRow(prev_row))), - prev_indent, - prev_comment_prefix, + current_range_indent, + current_range_comment_prefix, + current_range_rewrap_prefix, from_empty_selection, )); @@ -11610,8 +11634,14 @@ impl Editor { let mut edits = Vec::new(); let mut rewrapped_row_ranges = Vec::>::new(); - for (language_settings, wrap_range, indent_size, comment_prefix, from_empty_selection) in - wrap_ranges + for ( + language_settings, + wrap_range, + indent_size, + comment_prefix, + rewrap_prefix, + from_empty_selection, + ) in wrap_ranges { let mut start_row = wrap_range.start.row; let mut end_row = wrap_range.end.row; @@ -11627,12 +11657,16 @@ impl Editor { let tab_size = language_settings.tab_size; - let mut line_prefix = indent_size.chars().collect::(); + let indent_prefix = indent_size.chars().collect::(); + let mut line_prefix = indent_prefix.clone(); let mut inside_comment = false; if let Some(prefix) = &comment_prefix { line_prefix.push_str(prefix); inside_comment = true; } + if let Some(prefix) = &rewrap_prefix { + line_prefix.push_str(prefix); + } let allow_rewrap_based_on_language = match language_settings.allow_rewrap { RewrapBehavior::InComments => inside_comment, @@ -11679,12 +11713,18 @@ impl Editor { let selection_text = buffer.text_for_range(start..end).collect::(); let Some(lines_without_prefixes) = selection_text .lines() - .map(|line| { - line.strip_prefix(&line_prefix) - .or_else(|| line.trim_start().strip_prefix(&line_prefix.trim_start())) - .with_context(|| { - format!("line did not start with prefix {line_prefix:?}: {line:?}") - }) + .enumerate() + .map(|(ix, line)| { + let line_trimmed = line.trim_start(); + if rewrap_prefix.is_some() && ix > 0 { + Ok(line_trimmed) + } else { + line_trimmed + .strip_prefix(&line_prefix.trim_start()) + .with_context(|| { + format!("line did not start with prefix {line_prefix:?}: {line:?}") + }) + } }) .collect::, _>>() .log_err() @@ -11697,8 +11737,16 @@ impl Editor { .language_settings_at(Point::new(start_row, 0), cx) .preferred_line_length as usize }); + + let subsequent_lines_prefix = if let Some(rewrap_prefix_str) = &rewrap_prefix { + format!("{}{}", indent_prefix, " ".repeat(rewrap_prefix_str.len())) + } else { + line_prefix.clone() + }; + let wrapped_text = wrap_with_prefix( line_prefix, + subsequent_lines_prefix, lines_without_prefixes.join("\n"), wrap_column, tab_size, @@ -21200,18 +21248,22 @@ fn test_word_breaking_tokenizer() { } fn wrap_with_prefix( - line_prefix: String, + first_line_prefix: String, + subsequent_lines_prefix: String, unwrapped_text: String, wrap_column: usize, tab_size: NonZeroU32, preserve_existing_whitespace: bool, ) -> String { - let line_prefix_len = char_len_with_expanded_tabs(0, &line_prefix, tab_size); + let first_line_prefix_len = char_len_with_expanded_tabs(0, &first_line_prefix, tab_size); + let subsequent_lines_prefix_len = + char_len_with_expanded_tabs(0, &subsequent_lines_prefix, tab_size); let mut wrapped_text = String::new(); - let mut current_line = line_prefix.clone(); + let mut current_line = first_line_prefix.clone(); + let mut is_first_line = true; let tokenizer = WordBreakingTokenizer::new(&unwrapped_text); - let mut current_line_len = line_prefix_len; + let mut current_line_len = first_line_prefix_len; let mut in_whitespace = false; for token in tokenizer { let have_preceding_whitespace = in_whitespace; @@ -21221,13 +21273,19 @@ fn wrap_with_prefix( grapheme_len, } => { in_whitespace = false; + let current_prefix_len = if is_first_line { + first_line_prefix_len + } else { + subsequent_lines_prefix_len + }; if current_line_len + grapheme_len > wrap_column - && current_line_len != line_prefix_len + && current_line_len != current_prefix_len { wrapped_text.push_str(current_line.trim_end()); wrapped_text.push('\n'); - current_line.truncate(line_prefix.len()); - current_line_len = line_prefix_len; + is_first_line = false; + current_line = subsequent_lines_prefix.clone(); + current_line_len = subsequent_lines_prefix_len; } current_line.push_str(token); current_line_len += grapheme_len; @@ -21244,32 +21302,46 @@ fn wrap_with_prefix( token = " "; grapheme_len = 1; } + let current_prefix_len = if is_first_line { + first_line_prefix_len + } else { + subsequent_lines_prefix_len + }; if current_line_len + grapheme_len > wrap_column { wrapped_text.push_str(current_line.trim_end()); wrapped_text.push('\n'); - current_line.truncate(line_prefix.len()); - current_line_len = line_prefix_len; - } else if current_line_len != line_prefix_len || preserve_existing_whitespace { + is_first_line = false; + current_line = subsequent_lines_prefix.clone(); + current_line_len = subsequent_lines_prefix_len; + } else if current_line_len != current_prefix_len || preserve_existing_whitespace { current_line.push_str(token); current_line_len += grapheme_len; } } WordBreakToken::Newline => { in_whitespace = true; + let current_prefix_len = if is_first_line { + first_line_prefix_len + } else { + subsequent_lines_prefix_len + }; if preserve_existing_whitespace { wrapped_text.push_str(current_line.trim_end()); wrapped_text.push('\n'); - current_line.truncate(line_prefix.len()); - current_line_len = line_prefix_len; + is_first_line = false; + current_line = subsequent_lines_prefix.clone(); + current_line_len = subsequent_lines_prefix_len; } else if have_preceding_whitespace { continue; - } else if current_line_len + 1 > wrap_column && current_line_len != line_prefix_len + } else if current_line_len + 1 > wrap_column + && current_line_len != current_prefix_len { wrapped_text.push_str(current_line.trim_end()); wrapped_text.push('\n'); - current_line.truncate(line_prefix.len()); - current_line_len = line_prefix_len; - } else if current_line_len != line_prefix_len { + is_first_line = false; + current_line = subsequent_lines_prefix.clone(); + current_line_len = subsequent_lines_prefix_len; + } else if current_line_len != current_prefix_len { current_line.push(' '); current_line_len += 1; } @@ -21287,6 +21359,7 @@ fn wrap_with_prefix( fn test_wrap_with_prefix() { assert_eq!( wrap_with_prefix( + "# ".to_string(), "# ".to_string(), "abcdefg".to_string(), 4, @@ -21297,6 +21370,7 @@ fn test_wrap_with_prefix() { ); assert_eq!( wrap_with_prefix( + "".to_string(), "".to_string(), "\thello world".to_string(), 8, @@ -21307,6 +21381,7 @@ fn test_wrap_with_prefix() { ); assert_eq!( wrap_with_prefix( + "// ".to_string(), "// ".to_string(), "xx \nyy zz aa bb cc".to_string(), 12, @@ -21317,6 +21392,7 @@ fn test_wrap_with_prefix() { ); assert_eq!( wrap_with_prefix( + String::new(), String::new(), "这是什么 \n 钢笔".to_string(), 3, diff --git a/crates/editor/src/editor_tests.rs b/crates/editor/src/editor_tests.rs index 404284c4b0..4f3a9bcd35 100644 --- a/crates/editor/src/editor_tests.rs +++ b/crates/editor/src/editor_tests.rs @@ -5210,6 +5210,10 @@ async fn test_rewrap(cx: &mut TestAppContext) { let markdown_language = Arc::new(Language::new( LanguageConfig { name: "Markdown".into(), + rewrap_prefixes: vec![ + regex::Regex::new("\\d+\\.\\s+").unwrap(), + regex::Regex::new("[-*+]\\s+").unwrap(), + ], ..LanguageConfig::default() }, None, @@ -5372,7 +5376,82 @@ async fn test_rewrap(cx: &mut TestAppContext) { A long long long line of markdown text to wrap.ˇ "}, - markdown_language, + markdown_language.clone(), + &mut cx, + ); + + // Test that rewrapping boundary works and preserves relative indent for Markdown documents + assert_rewrap( + indoc! {" + «1. This is a numbered list item that is very long and needs to be wrapped properly. + 2. This is a numbered list item that is very long and needs to be wrapped properly. + - This is an unordered list item that is also very long and should not merge with the numbered item.ˇ» + "}, + indoc! {" + «1. This is a numbered list item that is + very long and needs to be wrapped + properly. + 2. This is a numbered list item that is + very long and needs to be wrapped + properly. + - This is an unordered list item that is + also very long and should not merge + with the numbered item.ˇ» + "}, + markdown_language.clone(), + &mut cx, + ); + + // Test that rewrapping add indents for rewrapping boundary if not exists already. + assert_rewrap( + indoc! {" + «1. This is a numbered list item that is + very long and needs to be wrapped + properly. + 2. This is a numbered list item that is + very long and needs to be wrapped + properly. + - This is an unordered list item that is + also very long and should not merge with + the numbered item.ˇ» + "}, + indoc! {" + «1. This is a numbered list item that is + very long and needs to be wrapped + properly. + 2. This is a numbered list item that is + very long and needs to be wrapped + properly. + - This is an unordered list item that is + also very long and should not merge + with the numbered item.ˇ» + "}, + markdown_language.clone(), + &mut cx, + ); + + // Test that rewrapping maintain indents even when they already exists. + assert_rewrap( + indoc! {" + «1. This is a numbered list + item that is very long and needs to be wrapped properly. + 2. This is a numbered list + item that is very long and needs to be wrapped properly. + - This is an unordered list item that is also very long and + should not merge with the numbered item.ˇ» + "}, + indoc! {" + «1. This is a numbered list item that is + very long and needs to be wrapped + properly. + 2. This is a numbered list item that is + very long and needs to be wrapped + properly. + - This is an unordered list item that is + also very long and should not merge + with the numbered item.ˇ» + "}, + markdown_language.clone(), &mut cx, ); diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 951a0dbddc..1ad057ff41 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -39,7 +39,7 @@ use lsp::{CodeActionKind, InitializeParams, LanguageServerBinary, LanguageServer pub use manifest::{ManifestDelegate, ManifestName, ManifestProvider, ManifestQuery}; use parking_lot::Mutex; use regex::Regex; -use schemars::{JsonSchema, json_schema}; +use schemars::{JsonSchema, SchemaGenerator, json_schema}; use serde::{Deserialize, Deserializer, Serialize, Serializer, de}; use serde_json::Value; use settings::WorktreeId; @@ -730,6 +730,13 @@ pub struct LanguageConfig { /// Starting and closing characters of a block comment. #[serde(default)] pub block_comment: Option<(Arc, Arc)>, + /// A list of additional regex patterns that should be treated as prefixes + /// for creating boundaries during rewrapping, ensuring content from one + /// prefixed section doesn't merge with another (e.g., markdown list items). + /// By default, Zed treats as paragraph and comment prefixes as boundaries. + #[serde(default, deserialize_with = "deserialize_regex_vec")] + #[schemars(schema_with = "regex_vec_json_schema")] + pub rewrap_prefixes: Vec, /// A list of language servers that are allowed to run on subranges of a given language. #[serde(default)] pub scope_opt_in_language_servers: Vec, @@ -909,6 +916,7 @@ impl Default for LanguageConfig { autoclose_before: Default::default(), line_comments: Default::default(), block_comment: Default::default(), + rewrap_prefixes: Default::default(), scope_opt_in_language_servers: Default::default(), overrides: Default::default(), word_characters: Default::default(), @@ -955,6 +963,22 @@ where } } +fn deserialize_regex_vec<'de, D: Deserializer<'de>>(d: D) -> Result, D::Error> { + let sources = Vec::::deserialize(d)?; + let mut regexes = Vec::new(); + for source in sources { + regexes.push(regex::Regex::new(&source).map_err(de::Error::custom)?); + } + Ok(regexes) +} + +fn regex_vec_json_schema(_: &mut SchemaGenerator) -> schemars::Schema { + json_schema!({ + "type": "array", + "items": { "type": "string" } + }) +} + #[doc(hidden)] #[cfg(any(test, feature = "test-support"))] pub struct FakeLspAdapter { @@ -1831,6 +1855,14 @@ impl LanguageScope { .map(|e| (&e.0, &e.1)) } + /// Returns additional regex patterns that act as prefix markers for creating + /// boundaries during rewrapping. + /// + /// By default, Zed treats as paragraph and comment prefixes as boundaries. + pub fn rewrap_prefixes(&self) -> &[Regex] { + &self.language.config.rewrap_prefixes + } + /// Returns a list of language-specific word characters. /// /// By default, Zed treats alphanumeric characters (and '_') as word characters for diff --git a/crates/languages/src/markdown/config.toml b/crates/languages/src/markdown/config.toml index 00c4fafecd..fec542c4d6 100644 --- a/crates/languages/src/markdown/config.toml +++ b/crates/languages/src/markdown/config.toml @@ -13,6 +13,12 @@ brackets = [ { start = "'", end = "'", close = false, newline = false }, { start = "`", end = "`", close = false, newline = false }, ] +rewrap_prefixes = [ + "[-*+]\\s+", + "\\d+\\.\\s+", + ">\\s*", + "[-*+]\\s+\\[[\\sx]\\]\\s+" +] auto_indent_on_paste = false auto_indent_using_last_non_empty_line = false