diff --git a/Cargo.lock b/Cargo.lock index 310c9d1fdd..17518e119a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4729,7 +4729,6 @@ dependencies = [ "tree-sitter-rust", "tree-sitter-typescript", "ui", - "unicode-script", "unicode-segmentation", "unindent", "url", @@ -17106,6 +17105,8 @@ dependencies = [ "tempfile", "tendril", "unicase", + "unicode-script", + "unicode-segmentation", "util_macros", "walkdir", "workspace-hack", diff --git a/assets/settings/default.json b/assets/settings/default.json index e9032e9c19..22cc6a753e 100644 --- a/assets/settings/default.json +++ b/assets/settings/default.json @@ -1431,7 +1431,9 @@ "language_servers": ["erlang-ls", "!elp", "..."] }, "Git Commit": { - "allow_rewrap": "anywhere" + "allow_rewrap": "anywhere", + "preferred_line_length": 72, + "soft_wrap": "bounded" }, "Go": { "code_actions_on_format": { diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml index cd1db877e7..ccaeee9dd6 100644 --- a/crates/editor/Cargo.toml +++ b/crates/editor/Cargo.toml @@ -82,7 +82,6 @@ tree-sitter-rust = { workspace = true, optional = true } tree-sitter-typescript = { workspace = true, optional = true } tree-sitter-python = { workspace = true, optional = true } unicode-segmentation.workspace = true -unicode-script.workspace = true unindent = { workspace = true, optional = true } ui.workspace = true url.workspace = true diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 355bcb0bd6..366df3b97d 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -201,7 +201,7 @@ use ui::{ ButtonSize, ButtonStyle, ContextMenu, Disclosure, IconButton, IconButtonShape, IconName, IconSize, Indicator, Key, Tooltip, h_flex, prelude::*, }; -use util::{RangeExt, ResultExt, TryFutureExt, maybe, post_inc}; +use util::{RangeExt, ResultExt, TryFutureExt, maybe, post_inc, wrap_with_prefix}; use workspace::{ CollaboratorId, Item as WorkspaceItem, ItemId, ItemNavHistory, OpenInTerminal, OpenTerminal, RestoreOnStartupBehavior, SERIALIZATION_THROTTLE_TIME, SplitDirection, TabBarSettings, Toast, @@ -19440,347 +19440,6 @@ fn update_uncommitted_diff_for_buffer( }) } -fn char_len_with_expanded_tabs(offset: usize, text: &str, tab_size: NonZeroU32) -> usize { - let tab_size = tab_size.get() as usize; - let mut width = offset; - - for ch in text.chars() { - width += if ch == '\t' { - tab_size - (width % tab_size) - } else { - 1 - }; - } - - width - offset -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_string_size_with_expanded_tabs() { - let nz = |val| NonZeroU32::new(val).unwrap(); - assert_eq!(char_len_with_expanded_tabs(0, "", nz(4)), 0); - assert_eq!(char_len_with_expanded_tabs(0, "hello", nz(4)), 5); - assert_eq!(char_len_with_expanded_tabs(0, "\thello", nz(4)), 9); - assert_eq!(char_len_with_expanded_tabs(0, "abc\tab", nz(4)), 6); - assert_eq!(char_len_with_expanded_tabs(0, "hello\t", nz(4)), 8); - assert_eq!(char_len_with_expanded_tabs(0, "\t\t", nz(8)), 16); - assert_eq!(char_len_with_expanded_tabs(0, "x\t", nz(8)), 8); - assert_eq!(char_len_with_expanded_tabs(7, "x\t", nz(8)), 9); - } -} - -/// Tokenizes a string into runs of text that should stick together, or that is whitespace. -struct WordBreakingTokenizer<'a> { - input: &'a str, -} - -impl<'a> WordBreakingTokenizer<'a> { - fn new(input: &'a str) -> Self { - Self { input } - } -} - -fn is_char_ideographic(ch: char) -> bool { - use unicode_script::Script::*; - use unicode_script::UnicodeScript; - matches!(ch.script(), Han | Tangut | Yi) -} - -fn is_grapheme_ideographic(text: &str) -> bool { - text.chars().any(is_char_ideographic) -} - -fn is_grapheme_whitespace(text: &str) -> bool { - text.chars().any(|x| x.is_whitespace()) -} - -fn should_stay_with_preceding_ideograph(text: &str) -> bool { - text.chars().next().map_or(false, |ch| { - matches!(ch, '。' | '、' | ',' | '?' | '!' | ':' | ';' | '…') - }) -} - -#[derive(PartialEq, Eq, Debug, Clone, Copy)] -enum WordBreakToken<'a> { - Word { token: &'a str, grapheme_len: usize }, - InlineWhitespace { token: &'a str, grapheme_len: usize }, - Newline, -} - -impl<'a> Iterator for WordBreakingTokenizer<'a> { - /// Yields a span, the count of graphemes in the token, and whether it was - /// whitespace. Note that it also breaks at word boundaries. - type Item = WordBreakToken<'a>; - - fn next(&mut self) -> Option { - use unicode_segmentation::UnicodeSegmentation; - if self.input.is_empty() { - return None; - } - - let mut iter = self.input.graphemes(true).peekable(); - let mut offset = 0; - let mut grapheme_len = 0; - if let Some(first_grapheme) = iter.next() { - let is_newline = first_grapheme == "\n"; - let is_whitespace = is_grapheme_whitespace(first_grapheme); - offset += first_grapheme.len(); - grapheme_len += 1; - if is_grapheme_ideographic(first_grapheme) && !is_whitespace { - if let Some(grapheme) = iter.peek().copied() { - if should_stay_with_preceding_ideograph(grapheme) { - offset += grapheme.len(); - grapheme_len += 1; - } - } - } else { - let mut words = self.input[offset..].split_word_bound_indices().peekable(); - let mut next_word_bound = words.peek().copied(); - if next_word_bound.map_or(false, |(i, _)| i == 0) { - next_word_bound = words.next(); - } - while let Some(grapheme) = iter.peek().copied() { - if next_word_bound.map_or(false, |(i, _)| i == offset) { - break; - }; - if is_grapheme_whitespace(grapheme) != is_whitespace - || (grapheme == "\n") != is_newline - { - break; - }; - offset += grapheme.len(); - grapheme_len += 1; - iter.next(); - } - } - let token = &self.input[..offset]; - self.input = &self.input[offset..]; - if token == "\n" { - Some(WordBreakToken::Newline) - } else if is_whitespace { - Some(WordBreakToken::InlineWhitespace { - token, - grapheme_len, - }) - } else { - Some(WordBreakToken::Word { - token, - grapheme_len, - }) - } - } else { - None - } - } -} - -#[test] -fn test_word_breaking_tokenizer() { - let tests: &[(&str, &[WordBreakToken<'static>])] = &[ - ("", &[]), - (" ", &[whitespace(" ", 2)]), - ("Ʒ", &[word("Ʒ", 1)]), - ("Ǽ", &[word("Ǽ", 1)]), - ("⋑", &[word("⋑", 1)]), - ("⋑⋑", &[word("⋑⋑", 2)]), - ( - "原理,进而", - &[word("原", 1), word("理,", 2), word("进", 1), word("而", 1)], - ), - ( - "hello world", - &[word("hello", 5), whitespace(" ", 1), word("world", 5)], - ), - ( - "hello, world", - &[word("hello,", 6), whitespace(" ", 1), word("world", 5)], - ), - ( - " hello world", - &[ - whitespace(" ", 2), - word("hello", 5), - whitespace(" ", 1), - word("world", 5), - ], - ), - ( - "这是什么 \n 钢笔", - &[ - word("这", 1), - word("是", 1), - word("什", 1), - word("么", 1), - whitespace(" ", 1), - newline(), - whitespace(" ", 1), - word("钢", 1), - word("笔", 1), - ], - ), - (" mutton", &[whitespace(" ", 1), word("mutton", 6)]), - ]; - - fn word(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> { - WordBreakToken::Word { - token, - grapheme_len, - } - } - - fn whitespace(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> { - WordBreakToken::InlineWhitespace { - token, - grapheme_len, - } - } - - fn newline() -> WordBreakToken<'static> { - WordBreakToken::Newline - } - - for (input, result) in tests { - assert_eq!( - WordBreakingTokenizer::new(input) - .collect::>() - .as_slice(), - *result, - ); - } -} - -fn wrap_with_prefix( - line_prefix: String, - unwrapped_text: String, - wrap_column: usize, - tab_size: NonZeroU32, - preserve_existing_whitespace: bool, -) -> String { - let line_prefix_len = char_len_with_expanded_tabs(0, &line_prefix, tab_size); - let mut wrapped_text = String::new(); - let mut current_line = line_prefix.clone(); - - let tokenizer = WordBreakingTokenizer::new(&unwrapped_text); - let mut current_line_len = line_prefix_len; - let mut in_whitespace = false; - for token in tokenizer { - let have_preceding_whitespace = in_whitespace; - match token { - WordBreakToken::Word { - token, - grapheme_len, - } => { - in_whitespace = false; - if current_line_len + grapheme_len > wrap_column - && current_line_len != line_prefix_len - { - wrapped_text.push_str(current_line.trim_end()); - wrapped_text.push('\n'); - current_line.truncate(line_prefix.len()); - current_line_len = line_prefix_len; - } - current_line.push_str(token); - current_line_len += grapheme_len; - } - WordBreakToken::InlineWhitespace { - mut token, - mut grapheme_len, - } => { - in_whitespace = true; - if have_preceding_whitespace && !preserve_existing_whitespace { - continue; - } - if !preserve_existing_whitespace { - token = " "; - grapheme_len = 1; - } - if current_line_len + grapheme_len > wrap_column { - wrapped_text.push_str(current_line.trim_end()); - wrapped_text.push('\n'); - current_line.truncate(line_prefix.len()); - current_line_len = line_prefix_len; - } else if current_line_len != line_prefix_len || preserve_existing_whitespace { - current_line.push_str(token); - current_line_len += grapheme_len; - } - } - WordBreakToken::Newline => { - in_whitespace = true; - if preserve_existing_whitespace { - wrapped_text.push_str(current_line.trim_end()); - wrapped_text.push('\n'); - current_line.truncate(line_prefix.len()); - current_line_len = line_prefix_len; - } else if have_preceding_whitespace { - continue; - } else if current_line_len + 1 > wrap_column && current_line_len != line_prefix_len - { - wrapped_text.push_str(current_line.trim_end()); - wrapped_text.push('\n'); - current_line.truncate(line_prefix.len()); - current_line_len = line_prefix_len; - } else if current_line_len != line_prefix_len { - current_line.push(' '); - current_line_len += 1; - } - } - } - } - - if !current_line.is_empty() { - wrapped_text.push_str(¤t_line); - } - wrapped_text -} - -#[test] -fn test_wrap_with_prefix() { - assert_eq!( - wrap_with_prefix( - "# ".to_string(), - "abcdefg".to_string(), - 4, - NonZeroU32::new(4).unwrap(), - false, - ), - "# abcdefg" - ); - assert_eq!( - wrap_with_prefix( - "".to_string(), - "\thello world".to_string(), - 8, - NonZeroU32::new(4).unwrap(), - false, - ), - "hello\nworld" - ); - assert_eq!( - wrap_with_prefix( - "// ".to_string(), - "xx \nyy zz aa bb cc".to_string(), - 12, - NonZeroU32::new(4).unwrap(), - false, - ), - "// xx yy zz\n// aa bb cc" - ); - assert_eq!( - wrap_with_prefix( - String::new(), - "这是什么 \n 钢笔".to_string(), - 3, - NonZeroU32::new(4).unwrap(), - false, - ), - "这是什\n么 钢\n笔" - ); -} - pub trait CollaborationHub { fn collaborators<'a>(&self, cx: &'a App) -> &'a HashMap; fn user_participant_indices<'a>(&self, cx: &'a App) -> &'a HashMap; diff --git a/crates/editor/src/element.rs b/crates/editor/src/element.rs index 368b79dbc7..be29ff624c 100644 --- a/crates/editor/src/element.rs +++ b/crates/editor/src/element.rs @@ -7396,10 +7396,7 @@ impl Element for EditorElement { editor.gutter_dimensions = gutter_dimensions; editor.set_visible_line_count(bounds.size.height / line_height, window, cx); - if matches!( - editor.mode, - EditorMode::AutoHeight { .. } | EditorMode::Minimap { .. } - ) { + if matches!(editor.mode, EditorMode::Minimap { .. }) { snapshot } else { let wrap_width_for = |column: u32| (column as f32 * em_advance).ceil(); @@ -9390,6 +9387,7 @@ fn compute_auto_height_layout( let font_size = style.text.font_size.to_pixels(window.rem_size()); let line_height = style.text.line_height_in_pixels(window.rem_size()); let em_width = window.text_system().em_width(font_id, font_size).unwrap(); + let em_advance = window.text_system().em_advance(font_id, font_size).unwrap(); let mut snapshot = editor.snapshot(window, cx); let gutter_dimensions = snapshot @@ -9406,10 +9404,18 @@ fn compute_auto_height_layout( let overscroll = size(em_width, px(0.)); let editor_width = text_width - gutter_dimensions.margin - overscroll.width - em_width; - if !matches!(editor.soft_wrap_mode(cx), SoftWrap::None) { - if editor.set_wrap_width(Some(editor_width), cx) { - snapshot = editor.snapshot(window, cx); - } + let content_offset = point(gutter_dimensions.margin, Pixels::ZERO); + let editor_content_width = editor_width - content_offset.x; + let wrap_width_for = |column: u32| (column as f32 * em_advance).ceil(); + let wrap_width = match editor.soft_wrap_mode(cx) { + SoftWrap::GitDiff => None, + SoftWrap::None => Some(wrap_width_for(MAX_LINE_LEN as u32 / 2)), + SoftWrap::EditorWidth => Some(editor_content_width), + SoftWrap::Column(column) => Some(wrap_width_for(column)), + SoftWrap::Bounded(column) => Some(editor_content_width.min(wrap_width_for(column))), + }; + if editor.set_wrap_width(wrap_width, cx) { + snapshot = editor.snapshot(window, cx); } let scroll_height = (snapshot.max_point().row().next_row().0 as f32) * line_height; diff --git a/crates/git_ui/src/git_panel.rs b/crates/git_ui/src/git_panel.rs index 4946bd0ecd..dad4c96478 100644 --- a/crates/git_ui/src/git_panel.rs +++ b/crates/git_ui/src/git_panel.rs @@ -54,6 +54,7 @@ use project::{ use serde::{Deserialize, Serialize}; use settings::{Settings as _, SettingsStore}; use std::future::Future; +use std::num::NonZeroU32; use std::path::{Path, PathBuf}; use std::{collections::HashSet, sync::Arc, time::Duration, usize}; use strum::{IntoEnumIterator, VariantNames}; @@ -62,7 +63,7 @@ use ui::{ Checkbox, ContextMenu, ElevationIndex, PopoverMenu, Scrollbar, ScrollbarState, SplitButton, Tooltip, prelude::*, }; -use util::{ResultExt, TryFutureExt, maybe}; +use util::{ResultExt, TryFutureExt, maybe, wrap_with_prefix}; use workspace::AppState; use notifications::status_toast::{StatusToast, ToastIcon}; @@ -382,7 +383,6 @@ pub(crate) fn commit_message_editor( commit_editor.set_show_gutter(false, cx); commit_editor.set_show_wrap_guides(false, cx); commit_editor.set_show_indent_guides(false, cx); - commit_editor.set_hard_wrap(Some(72), cx); let placeholder = placeholder.unwrap_or("Enter commit message".into()); commit_editor.set_placeholder_text(placeholder, cx); commit_editor @@ -1484,8 +1484,22 @@ impl GitPanel { fn custom_or_suggested_commit_message(&self, cx: &mut Context) -> Option { let message = self.commit_editor.read(cx).text(cx); + let width = self + .commit_editor + .read(cx) + .buffer() + .read(cx) + .language_settings(cx) + .preferred_line_length as usize; if !message.trim().is_empty() { + let message = wrap_with_prefix( + String::new(), + message, + width, + NonZeroU32::new(8).unwrap(), // tab size doesn't matter when prefix is empty + false, + ); return Some(message); } diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 77884634fc..3da6101f41 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -666,7 +666,7 @@ pub struct CodeLabel { pub filter_range: Range, } -#[derive(Clone, Deserialize, JsonSchema)] +#[derive(Clone, Debug, Deserialize, JsonSchema)] pub struct LanguageConfig { /// Human-readable name of the language. pub name: LanguageName, @@ -777,7 +777,7 @@ pub struct LanguageMatcher { } /// The configuration for JSX tag auto-closing. -#[derive(Clone, Deserialize, JsonSchema)] +#[derive(Clone, Debug, Deserialize, JsonSchema)] pub struct JsxTagAutoCloseConfig { /// The name of the node for a opening tag pub open_tag_node_name: String, @@ -810,7 +810,7 @@ pub struct JsxTagAutoCloseConfig { } /// The configuration for documentation block for this language. -#[derive(Clone, Deserialize, JsonSchema)] +#[derive(Clone, Debug, Deserialize, JsonSchema)] pub struct DocumentationConfig { /// A start tag of documentation block. pub start: Arc, diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml index f6fc4b5164..3b5ffcb24c 100644 --- a/crates/util/Cargo.toml +++ b/crates/util/Cargo.toml @@ -37,6 +37,8 @@ smol.workspace = true take-until.workspace = true tempfile.workspace = true unicase.workspace = true +unicode-script.workspace = true +unicode-segmentation.workspace = true util_macros = { workspace = true, optional = true } walkdir.workspace = true workspace-hack.workspace = true diff --git a/crates/util/src/util.rs b/crates/util/src/util.rs index d726b5aae8..40f67cd62e 100644 --- a/crates/util/src/util.rs +++ b/crates/util/src/util.rs @@ -14,6 +14,7 @@ use anyhow::Result; use futures::Future; use itertools::Either; use regex::Regex; +use std::num::NonZeroU32; use std::sync::{LazyLock, OnceLock}; use std::{ borrow::Cow, @@ -183,29 +184,208 @@ pub fn truncate_lines_to_byte_limit(s: &str, max_bytes: usize) -> &str { truncate_to_byte_limit(s, max_bytes) } -#[test] -fn test_truncate_lines_to_byte_limit() { - let text = "Line 1\nLine 2\nLine 3\nLine 4"; +fn char_len_with_expanded_tabs(offset: usize, text: &str, tab_size: NonZeroU32) -> usize { + let tab_size = tab_size.get() as usize; + let mut width = offset; - // Limit that includes all lines - assert_eq!(truncate_lines_to_byte_limit(text, 100), text); + for ch in text.chars() { + width += if ch == '\t' { + tab_size - (width % tab_size) + } else { + 1 + }; + } - // Exactly the first line - assert_eq!(truncate_lines_to_byte_limit(text, 7), "Line 1\n"); + width - offset +} - // Limit between lines - assert_eq!(truncate_lines_to_byte_limit(text, 13), "Line 1\n"); - assert_eq!(truncate_lines_to_byte_limit(text, 20), "Line 1\nLine 2\n"); +/// Tokenizes a string into runs of text that should stick together, or that is whitespace. +struct WordBreakingTokenizer<'a> { + input: &'a str, +} - // Limit before first newline - assert_eq!(truncate_lines_to_byte_limit(text, 6), "Line "); +impl<'a> WordBreakingTokenizer<'a> { + fn new(input: &'a str) -> Self { + Self { input } + } +} - // Test with non-ASCII characters - let text_utf8 = "Line 1\nLíne 2\nLine 3"; - assert_eq!( - truncate_lines_to_byte_limit(text_utf8, 15), - "Line 1\nLíne 2\n" - ); +fn is_char_ideographic(ch: char) -> bool { + use unicode_script::Script::*; + use unicode_script::UnicodeScript; + matches!(ch.script(), Han | Tangut | Yi) +} + +fn is_grapheme_ideographic(text: &str) -> bool { + text.chars().any(is_char_ideographic) +} + +fn is_grapheme_whitespace(text: &str) -> bool { + text.chars().any(|x| x.is_whitespace()) +} + +fn should_stay_with_preceding_ideograph(text: &str) -> bool { + text.chars().next().map_or(false, |ch| { + matches!(ch, '。' | '、' | ',' | '?' | '!' | ':' | ';' | '…') + }) +} + +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +enum WordBreakToken<'a> { + Word { token: &'a str, grapheme_len: usize }, + InlineWhitespace { token: &'a str, grapheme_len: usize }, + Newline, +} + +impl<'a> Iterator for WordBreakingTokenizer<'a> { + /// Yields a span, the count of graphemes in the token, and whether it was + /// whitespace. Note that it also breaks at word boundaries. + type Item = WordBreakToken<'a>; + + fn next(&mut self) -> Option { + use unicode_segmentation::UnicodeSegmentation; + if self.input.is_empty() { + return None; + } + + let mut iter = self.input.graphemes(true).peekable(); + let mut offset = 0; + let mut grapheme_len = 0; + if let Some(first_grapheme) = iter.next() { + let is_newline = first_grapheme == "\n"; + let is_whitespace = is_grapheme_whitespace(first_grapheme); + offset += first_grapheme.len(); + grapheme_len += 1; + if is_grapheme_ideographic(first_grapheme) && !is_whitespace { + if let Some(grapheme) = iter.peek().copied() { + if should_stay_with_preceding_ideograph(grapheme) { + offset += grapheme.len(); + grapheme_len += 1; + } + } + } else { + let mut words = self.input[offset..].split_word_bound_indices().peekable(); + let mut next_word_bound = words.peek().copied(); + if next_word_bound.map_or(false, |(i, _)| i == 0) { + next_word_bound = words.next(); + } + while let Some(grapheme) = iter.peek().copied() { + if next_word_bound.map_or(false, |(i, _)| i == offset) { + break; + }; + if is_grapheme_whitespace(grapheme) != is_whitespace + || (grapheme == "\n") != is_newline + { + break; + }; + offset += grapheme.len(); + grapheme_len += 1; + iter.next(); + } + } + let token = &self.input[..offset]; + self.input = &self.input[offset..]; + if token == "\n" { + Some(WordBreakToken::Newline) + } else if is_whitespace { + Some(WordBreakToken::InlineWhitespace { + token, + grapheme_len, + }) + } else { + Some(WordBreakToken::Word { + token, + grapheme_len, + }) + } + } else { + None + } + } +} + +pub fn wrap_with_prefix( + line_prefix: String, + unwrapped_text: String, + wrap_column: usize, + tab_size: NonZeroU32, + preserve_existing_whitespace: bool, +) -> String { + let line_prefix_len = char_len_with_expanded_tabs(0, &line_prefix, tab_size); + let mut wrapped_text = String::new(); + let mut current_line = line_prefix.clone(); + + let tokenizer = WordBreakingTokenizer::new(&unwrapped_text); + let mut current_line_len = line_prefix_len; + let mut in_whitespace = false; + for token in tokenizer { + let have_preceding_whitespace = in_whitespace; + match token { + WordBreakToken::Word { + token, + grapheme_len, + } => { + in_whitespace = false; + if current_line_len + grapheme_len > wrap_column + && current_line_len != line_prefix_len + { + wrapped_text.push_str(current_line.trim_end()); + wrapped_text.push('\n'); + current_line.truncate(line_prefix.len()); + current_line_len = line_prefix_len; + } + current_line.push_str(token); + current_line_len += grapheme_len; + } + WordBreakToken::InlineWhitespace { + mut token, + mut grapheme_len, + } => { + in_whitespace = true; + if have_preceding_whitespace && !preserve_existing_whitespace { + continue; + } + if !preserve_existing_whitespace { + token = " "; + grapheme_len = 1; + } + if current_line_len + grapheme_len > wrap_column { + wrapped_text.push_str(current_line.trim_end()); + wrapped_text.push('\n'); + current_line.truncate(line_prefix.len()); + current_line_len = line_prefix_len; + } else if current_line_len != line_prefix_len || preserve_existing_whitespace { + current_line.push_str(token); + current_line_len += grapheme_len; + } + } + WordBreakToken::Newline => { + in_whitespace = true; + if preserve_existing_whitespace { + wrapped_text.push_str(current_line.trim_end()); + wrapped_text.push('\n'); + current_line.truncate(line_prefix.len()); + current_line_len = line_prefix_len; + } else if have_preceding_whitespace { + continue; + } else if current_line_len + 1 > wrap_column && current_line_len != line_prefix_len + { + wrapped_text.push_str(current_line.trim_end()); + wrapped_text.push('\n'); + current_line.truncate(line_prefix.len()); + current_line_len = line_prefix_len; + } else if current_line_len != line_prefix_len { + current_line.push(' '); + current_line_len += 1; + } + } + } + } + + if !current_line.is_empty() { + wrapped_text.push_str(¤t_line); + } + wrapped_text } pub fn post_inc + AddAssign + Copy>(value: &mut T) -> T { @@ -1302,4 +1482,161 @@ Line 3"# (0..8).collect::>() ); } + + #[test] + fn test_truncate_lines_to_byte_limit() { + let text = "Line 1\nLine 2\nLine 3\nLine 4"; + + // Limit that includes all lines + assert_eq!(truncate_lines_to_byte_limit(text, 100), text); + + // Exactly the first line + assert_eq!(truncate_lines_to_byte_limit(text, 7), "Line 1\n"); + + // Limit between lines + assert_eq!(truncate_lines_to_byte_limit(text, 13), "Line 1\n"); + assert_eq!(truncate_lines_to_byte_limit(text, 20), "Line 1\nLine 2\n"); + + // Limit before first newline + assert_eq!(truncate_lines_to_byte_limit(text, 6), "Line "); + + // Test with non-ASCII characters + let text_utf8 = "Line 1\nLíne 2\nLine 3"; + assert_eq!( + truncate_lines_to_byte_limit(text_utf8, 15), + "Line 1\nLíne 2\n" + ); + } + + #[test] + fn test_string_size_with_expanded_tabs() { + let nz = |val| NonZeroU32::new(val).unwrap(); + assert_eq!(char_len_with_expanded_tabs(0, "", nz(4)), 0); + assert_eq!(char_len_with_expanded_tabs(0, "hello", nz(4)), 5); + assert_eq!(char_len_with_expanded_tabs(0, "\thello", nz(4)), 9); + assert_eq!(char_len_with_expanded_tabs(0, "abc\tab", nz(4)), 6); + assert_eq!(char_len_with_expanded_tabs(0, "hello\t", nz(4)), 8); + assert_eq!(char_len_with_expanded_tabs(0, "\t\t", nz(8)), 16); + assert_eq!(char_len_with_expanded_tabs(0, "x\t", nz(8)), 8); + assert_eq!(char_len_with_expanded_tabs(7, "x\t", nz(8)), 9); + } + + #[test] + fn test_word_breaking_tokenizer() { + let tests: &[(&str, &[WordBreakToken<'static>])] = &[ + ("", &[]), + (" ", &[whitespace(" ", 2)]), + ("Ʒ", &[word("Ʒ", 1)]), + ("Ǽ", &[word("Ǽ", 1)]), + ("⋑", &[word("⋑", 1)]), + ("⋑⋑", &[word("⋑⋑", 2)]), + ( + "原理,进而", + &[word("原", 1), word("理,", 2), word("进", 1), word("而", 1)], + ), + ( + "hello world", + &[word("hello", 5), whitespace(" ", 1), word("world", 5)], + ), + ( + "hello, world", + &[word("hello,", 6), whitespace(" ", 1), word("world", 5)], + ), + ( + " hello world", + &[ + whitespace(" ", 2), + word("hello", 5), + whitespace(" ", 1), + word("world", 5), + ], + ), + ( + "这是什么 \n 钢笔", + &[ + word("这", 1), + word("是", 1), + word("什", 1), + word("么", 1), + whitespace(" ", 1), + newline(), + whitespace(" ", 1), + word("钢", 1), + word("笔", 1), + ], + ), + (" mutton", &[whitespace(" ", 1), word("mutton", 6)]), + ]; + + fn word(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> { + WordBreakToken::Word { + token, + grapheme_len, + } + } + + fn whitespace(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> { + WordBreakToken::InlineWhitespace { + token, + grapheme_len, + } + } + + fn newline() -> WordBreakToken<'static> { + WordBreakToken::Newline + } + + for (input, result) in tests { + assert_eq!( + WordBreakingTokenizer::new(input) + .collect::>() + .as_slice(), + *result, + ); + } + } + + #[test] + fn test_wrap_with_prefix() { + assert_eq!( + wrap_with_prefix( + "# ".to_string(), + "abcdefg".to_string(), + 4, + NonZeroU32::new(4).unwrap(), + false, + ), + "# abcdefg" + ); + assert_eq!( + wrap_with_prefix( + "".to_string(), + "\thello world".to_string(), + 8, + NonZeroU32::new(4).unwrap(), + false, + ), + "hello\nworld" + ); + assert_eq!( + wrap_with_prefix( + "// ".to_string(), + "xx \nyy zz aa bb cc".to_string(), + 12, + NonZeroU32::new(4).unwrap(), + false, + ), + "// xx yy zz\n// aa bb cc" + ); + assert_eq!( + wrap_with_prefix( + String::new(), + "这是什么 \n 钢笔".to_string(), + 3, + NonZeroU32::new(4).unwrap(), + false, + ), + "这是什\n么 钢\n笔" + ); + } }