diff --git a/Cargo.lock b/Cargo.lock index 6e674784ad..412008d161 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -416,7 +416,6 @@ dependencies = [ "serde", "serde_json_lenient", "settings", - "similar", "smol", "streaming_diff", "telemetry", @@ -482,7 +481,6 @@ dependencies = [ "serde", "serde_json", "settings", - "similar", "smol", "streaming_diff", "telemetry_events", @@ -4064,7 +4062,6 @@ dependencies = [ "serde", "serde_json", "settings", - "similar", "smallvec", "smol", "snippet", @@ -6376,6 +6373,15 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edcd27d72f2f071c64249075f42e205ff93c9a4c5f6c6da53e79ed9f9832c285" +[[package]] +name = "imara-diff" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17d34b7d42178945f775e84bc4c36dde7c1c6cdfea656d3354d009056f2bb3d2" +dependencies = [ + "hashbrown 0.15.2", +] + [[package]] name = "imgref" version = "1.11.0" @@ -6868,6 +6874,7 @@ dependencies = [ "globset", "gpui", "http_client", + "imara-diff", "indoc", "itertools 0.14.0", "log", @@ -6882,7 +6889,6 @@ dependencies = [ "serde", "serde_json", "settings", - "similar", "smallvec", "smol", "streaming-iterator", @@ -10129,7 +10135,6 @@ dependencies = [ "sha2", "shellexpand 2.1.2", "shlex", - "similar", "smol", "snippet", "snippet_provider", @@ -12270,12 +12275,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" -[[package]] -name = "similar" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad1d488a557b235fc46dae55512ffbfc429d2482b08b4d9435ab07384ca8aec" - [[package]] name = "simple_asn1" version = "0.6.2" @@ -17140,7 +17139,6 @@ dependencies = [ "serde", "serde_json", "settings", - "similar", "telemetry", "telemetry_events", "theme", diff --git a/Cargo.toml b/Cargo.toml index 4b787d5e75..c93c56486d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -421,6 +421,7 @@ hyper = "0.14" http = "1.1" ignore = "0.4.22" image = "0.25.1" +imara-diff = "0.1.8" indexmap = { version = "2.7.0", features = ["serde"] } indoc = "2" inventory = "0.3.19" @@ -499,7 +500,6 @@ sha2 = "0.10" shellexpand = "2.1.0" shlex = "1.3.0" signal-hook = "0.3.17" -similar = "1.3" simplelog = "0.12.2" smallvec = { version = "1.6", features = ["union"] } smol = "2.0" diff --git a/crates/assistant/Cargo.toml b/crates/assistant/Cargo.toml index 2ad0e7e082..1bb7cbc1ae 100644 --- a/crates/assistant/Cargo.toml +++ b/crates/assistant/Cargo.toml @@ -59,7 +59,6 @@ search.workspace = true semantic_index.workspace = true serde.workspace = true settings.workspace = true -similar.workspace = true smol.workspace = true streaming_diff.workspace = true telemetry.workspace = true diff --git a/crates/assistant/src/inline_assistant.rs b/crates/assistant/src/inline_assistant.rs index 554db12645..cb4ada5f08 100644 --- a/crates/assistant/src/inline_assistant.rs +++ b/crates/assistant/src/inline_assistant.rs @@ -30,7 +30,7 @@ use gpui::{ EventEmitter, FocusHandle, Focusable, FontWeight, Global, HighlightStyle, Subscription, Task, TextStyle, UpdateGlobal, WeakEntity, Window, }; -use language::{Buffer, IndentKind, Point, Selection, TransactionId}; +use language::{line_diff, Buffer, IndentKind, Point, Selection, TransactionId}; use language_model::{ LanguageModel, LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage, LanguageModelTextStream, Role, @@ -3350,52 +3350,29 @@ impl CodegenAlternative { ) .collect::(); - let mut old_row = old_range.start.row; - let mut new_row = new_range.start.row; - let batch_diff = - similar::TextDiff::from_lines(old_text.as_str(), new_text.as_str()); - + let old_start_row = old_range.start.row; + let new_start_row = new_range.start.row; let mut deleted_row_ranges: Vec<(Anchor, RangeInclusive)> = Vec::new(); let mut inserted_row_ranges = Vec::new(); - for change in batch_diff.iter_all_changes() { - let line_count = change.value().lines().count() as u32; - match change.tag() { - similar::ChangeTag::Equal => { - old_row += line_count; - new_row += line_count; - } - similar::ChangeTag::Delete => { - let old_end_row = old_row + line_count - 1; - let new_row = new_snapshot.anchor_before(Point::new(new_row, 0)); - - if let Some((_, last_deleted_row_range)) = - deleted_row_ranges.last_mut() - { - if *last_deleted_row_range.end() + 1 == old_row { - *last_deleted_row_range = - *last_deleted_row_range.start()..=old_end_row; - } else { - deleted_row_ranges.push((new_row, old_row..=old_end_row)); - } - } else { - deleted_row_ranges.push((new_row, old_row..=old_end_row)); - } - - old_row += line_count; - } - similar::ChangeTag::Insert => { - let new_end_row = new_row + line_count - 1; - let start = new_snapshot.anchor_before(Point::new(new_row, 0)); - let end = new_snapshot.anchor_before(Point::new( - new_end_row, - new_snapshot.line_len(MultiBufferRow(new_end_row)), - )); - inserted_row_ranges.push(start..end); - new_row += line_count; - } + for (old_rows, new_rows) in line_diff(&old_text, &new_text) { + let old_rows = old_start_row + old_rows.start..old_start_row + old_rows.end; + let new_rows = new_start_row + new_rows.start..new_start_row + new_rows.end; + if !old_rows.is_empty() { + deleted_row_ranges.push(( + new_snapshot.anchor_before(Point::new(new_rows.start, 0)), + old_rows.start..=old_rows.end - 1, + )); + } + if !new_rows.is_empty() { + let start = new_snapshot.anchor_before(Point::new(new_rows.start, 0)); + let new_end_row = new_rows.end - 1; + let end = new_snapshot.anchor_before(Point::new( + new_end_row, + new_snapshot.line_len(MultiBufferRow(new_end_row)), + )); + inserted_row_ranges.push(start..end); } } - (deleted_row_ranges, inserted_row_ranges) }) .await; diff --git a/crates/assistant2/Cargo.toml b/crates/assistant2/Cargo.toml index f53595cd0c..9a74a5e2fe 100644 --- a/crates/assistant2/Cargo.toml +++ b/crates/assistant2/Cargo.toml @@ -62,7 +62,6 @@ rope.workspace = true serde.workspace = true serde_json.workspace = true settings.workspace = true -similar.workspace = true smol.workspace = true streaming_diff.workspace = true telemetry_events.workspace = true @@ -79,5 +78,10 @@ workspace.workspace = true zed_actions.workspace = true [dev-dependencies] +editor = { workspace = true, features = ["test-support"] } +gpui = { workspace = true, "features" = ["test-support"] } +language = { workspace = true, "features" = ["test-support"] } +language_model = { workspace = true, "features" = ["test-support"] } +project = { workspace = true, features = ["test-support"] } rand.workspace = true indoc.workspace = true diff --git a/crates/assistant2/src/buffer_codegen.rs b/crates/assistant2/src/buffer_codegen.rs index 9c36cc69dc..4e62f9549d 100644 --- a/crates/assistant2/src/buffer_codegen.rs +++ b/crates/assistant2/src/buffer_codegen.rs @@ -7,7 +7,7 @@ use collections::HashSet; use editor::{Anchor, AnchorRangeExt, MultiBuffer, MultiBufferSnapshot, ToOffset as _, ToPoint}; use futures::{channel::mpsc, future::LocalBoxFuture, join, SinkExt, Stream, StreamExt}; use gpui::{App, AppContext as _, Context, Entity, EventEmitter, Subscription, Task}; -use language::{Buffer, IndentKind, Point, TransactionId}; +use language::{line_diff, Buffer, IndentKind, Point, TransactionId}; use language_model::{ LanguageModel, LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage, LanguageModelTextStream, Role, @@ -827,52 +827,29 @@ impl CodegenAlternative { ) .collect::(); - let mut old_row = old_range.start.row; - let mut new_row = new_range.start.row; - let batch_diff = - similar::TextDiff::from_lines(old_text.as_str(), new_text.as_str()); - + let old_start_row = old_range.start.row; + let new_start_row = new_range.start.row; let mut deleted_row_ranges: Vec<(Anchor, RangeInclusive)> = Vec::new(); let mut inserted_row_ranges = Vec::new(); - for change in batch_diff.iter_all_changes() { - let line_count = change.value().lines().count() as u32; - match change.tag() { - similar::ChangeTag::Equal => { - old_row += line_count; - new_row += line_count; - } - similar::ChangeTag::Delete => { - let old_end_row = old_row + line_count - 1; - let new_row = new_snapshot.anchor_before(Point::new(new_row, 0)); - - if let Some((_, last_deleted_row_range)) = - deleted_row_ranges.last_mut() - { - if *last_deleted_row_range.end() + 1 == old_row { - *last_deleted_row_range = - *last_deleted_row_range.start()..=old_end_row; - } else { - deleted_row_ranges.push((new_row, old_row..=old_end_row)); - } - } else { - deleted_row_ranges.push((new_row, old_row..=old_end_row)); - } - - old_row += line_count; - } - similar::ChangeTag::Insert => { - let new_end_row = new_row + line_count - 1; - let start = new_snapshot.anchor_before(Point::new(new_row, 0)); - let end = new_snapshot.anchor_before(Point::new( - new_end_row, - new_snapshot.line_len(MultiBufferRow(new_end_row)), - )); - inserted_row_ranges.push(start..end); - new_row += line_count; - } + for (old_rows, new_rows) in line_diff(&old_text, &new_text) { + let old_rows = old_start_row + old_rows.start..old_start_row + old_rows.end; + let new_rows = new_start_row + new_rows.start..new_start_row + new_rows.end; + if !old_rows.is_empty() { + deleted_row_ranges.push(( + new_snapshot.anchor_before(Point::new(new_rows.start, 0)), + old_rows.start..=old_rows.end - 1, + )); + } + if !new_rows.is_empty() { + let start = new_snapshot.anchor_before(Point::new(new_rows.start, 0)); + let new_end_row = new_rows.end - 1; + let end = new_snapshot.anchor_before(Point::new( + new_end_row, + new_snapshot.line_len(MultiBufferRow(new_end_row)), + )); + inserted_row_ranges.push(start..end); } } - (deleted_row_ranges, inserted_row_ranges) }) .await; diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml index abc8f0b6f0..08d626c25f 100644 --- a/crates/editor/Cargo.toml +++ b/crates/editor/Cargo.toml @@ -66,7 +66,6 @@ schemars.workspace = true serde.workspace = true serde_json.workspace = true settings.workspace = true -similar.workspace = true smallvec.workspace = true smol.workspace = true snippet.workspace = true diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 1afd49fba2..e30b369bd3 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -100,10 +100,10 @@ use language::{ language_settings::{ self, all_language_settings, language_settings, InlayHintSettings, RewrapBehavior, }, - point_from_lsp, AutoindentMode, BracketPair, Buffer, Capability, CharKind, CodeLabel, - CursorShape, Diagnostic, DiskState, EditPredictionsMode, EditPreview, HighlightedText, - IndentKind, IndentSize, Language, OffsetRangeExt, Point, Selection, SelectionGoal, TextObject, - TransactionId, TreeSitterOptions, + point_from_lsp, text_diff_with_options, AutoindentMode, BracketPair, Buffer, Capability, + CharKind, CodeLabel, CursorShape, Diagnostic, DiffOptions, DiskState, EditPredictionsMode, + EditPreview, HighlightedText, IndentKind, IndentSize, Language, OffsetRangeExt, Point, + Selection, SelectionGoal, TextObject, TransactionId, TreeSitterOptions, }; use language::{point_to_lsp, BufferRow, CharClassifier, Runnable, RunnableRange}; use linked_editing_ranges::refresh_linked_ranges; @@ -112,7 +112,6 @@ use persistence::DB; pub use proposed_changes_editor::{ ProposedChangeLocation, ProposedChangesEditor, ProposedChangesEditorToolbar, }; -use similar::{ChangeTag, TextDiff}; use std::iter::Peekable; use task::{ResolvedTask, TaskTemplate, TaskVariables}; @@ -202,7 +201,7 @@ pub(crate) const CURSORS_VISIBLE_FOR: Duration = Duration::from_millis(2000); #[doc(hidden)] pub const CODE_ACTIONS_DEBOUNCE_TIMEOUT: Duration = Duration::from_millis(250); -pub(crate) const FORMAT_TIMEOUT: Duration = Duration::from_secs(2); +pub(crate) const FORMAT_TIMEOUT: Duration = Duration::from_secs(5); pub(crate) const SCROLL_CENTER_TOP_BOTTOM_DEBOUNCE_TIMEOUT: Duration = Duration::from_secs(1); pub(crate) const EDIT_PREDICTION_KEY_CONTEXT: &str = "edit_prediction"; @@ -7829,6 +7828,7 @@ impl Editor { } let start = Point::new(start_row, 0); + let start_offset = start.to_offset(&buffer); let end = Point::new(end_row, buffer.line_len(MultiBufferRow(end_row))); let selection_text = buffer.text_for_range(start..end).collect::(); let Some(lines_without_prefixes) = selection_text @@ -7858,44 +7858,21 @@ impl Editor { // TODO: should always use char-based diff while still supporting cursor behavior that // matches vim. - let diff = match is_vim_mode { - IsVimMode::Yes => TextDiff::from_lines(&selection_text, &wrapped_text), - IsVimMode::No => TextDiff::from_chars(&selection_text, &wrapped_text), - }; - let mut offset = start.to_offset(&buffer); - let mut moved_since_edit = true; + let mut diff_options = DiffOptions::default(); + if is_vim_mode == IsVimMode::Yes { + diff_options.max_word_diff_len = 0; + diff_options.max_word_diff_line_count = 0; + } else { + diff_options.max_word_diff_len = usize::MAX; + diff_options.max_word_diff_line_count = usize::MAX; + } - for change in diff.iter_all_changes() { - let value = change.value(); - match change.tag() { - ChangeTag::Equal => { - offset += value.len(); - moved_since_edit = true; - } - ChangeTag::Delete => { - let start = buffer.anchor_after(offset); - let end = buffer.anchor_before(offset + value.len()); - - if moved_since_edit { - edits.push((start..end, String::new())); - } else { - edits.last_mut().unwrap().0.end = end; - } - - offset += value.len(); - moved_since_edit = false; - } - ChangeTag::Insert => { - if moved_since_edit { - let anchor = buffer.anchor_after(offset); - edits.push((anchor..anchor, value.to_string())); - } else { - edits.last_mut().unwrap().1.push_str(value); - } - - moved_since_edit = false; - } - } + for (old_range, new_text) in + text_diff_with_options(&selection_text, &wrapped_text, diff_options) + { + let edit_start = buffer.anchor_after(start_offset + old_range.start); + let edit_end = buffer.anchor_after(start_offset + old_range.end); + edits.push((edit_start..edit_end, new_text)); } rewrapped_row_ranges.push(start_row..=end_row); diff --git a/crates/editor/src/element.rs b/crates/editor/src/element.rs index 93d60624e2..ec99900ba8 100644 --- a/crates/editor/src/element.rs +++ b/crates/editor/src/element.rs @@ -8409,7 +8409,6 @@ mod tests { use gpui::{TestAppContext, VisualTestContext}; use language::language_settings; use log::info; - use similar::DiffableStr; use std::num::NonZeroU32; use util::test::sample_text; @@ -8709,7 +8708,7 @@ mod tests { state .line_numbers .get(&MultiBufferRow(0)) - .and_then(|line_number| line_number.shaped_line.text.as_str()), + .map(|line_number| line_number.shaped_line.text.as_ref()), Some("1") ); } diff --git a/crates/language/Cargo.toml b/crates/language/Cargo.toml index 9e5fd55c96..c2e4c35454 100644 --- a/crates/language/Cargo.toml +++ b/crates/language/Cargo.toml @@ -37,6 +37,7 @@ fuzzy.workspace = true globset.workspace = true gpui.workspace = true http_client.workspace = true +imara-diff.workspace = true itertools.workspace = true log.workspace = true lsp.workspace = true @@ -49,7 +50,6 @@ schemars.workspace = true serde.workspace = true serde_json.workspace = true settings.workspace = true -similar.workspace = true smallvec.workspace = true smol.workspace = true streaming-iterator.workspace = true diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index c23eeae533..192519367b 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -12,6 +12,7 @@ use crate::{ SyntaxMapMatches, SyntaxSnapshot, ToTreeSitterPoint, }, task_context::RunnableRange, + text_diff::text_diff, LanguageScope, Outline, OutlineConfig, RunnableCapture, RunnableTag, TextObject, TreeSitterOptions, }; @@ -32,7 +33,6 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use serde_json::Value; use settings::WorktreeId; -use similar::{ChangeTag, TextDiff}; use smallvec::SmallVec; use smol::future::yield_now; use std::{ @@ -1792,61 +1792,7 @@ impl Buffer { let old_text = old_text.to_string(); let line_ending = LineEnding::detect(&new_text); LineEnding::normalize(&mut new_text); - - let diff = TextDiff::from_chars(old_text.as_str(), new_text.as_str()); - let empty: Arc = Arc::default(); - - let mut edits = Vec::new(); - let mut old_offset = 0; - let mut new_offset = 0; - let mut last_edit: Option<(Range, Range)> = None; - for change in diff.iter_all_changes().map(Some).chain([None]) { - if let Some(change) = &change { - let len = change.value().len(); - match change.tag() { - ChangeTag::Equal => { - old_offset += len; - new_offset += len; - } - ChangeTag::Delete => { - let old_end_offset = old_offset + len; - if let Some((last_old_range, _)) = &mut last_edit { - last_old_range.end = old_end_offset; - } else { - last_edit = - Some((old_offset..old_end_offset, new_offset..new_offset)); - } - old_offset = old_end_offset; - } - ChangeTag::Insert => { - let new_end_offset = new_offset + len; - if let Some((_, last_new_range)) = &mut last_edit { - last_new_range.end = new_end_offset; - } else { - last_edit = - Some((old_offset..old_offset, new_offset..new_end_offset)); - } - new_offset = new_end_offset; - } - } - } - - if let Some((old_range, new_range)) = &last_edit { - if old_offset > old_range.end - || new_offset > new_range.end - || change.is_none() - { - let text = if new_range.is_empty() { - empty.clone() - } else { - new_text[new_range.clone()].into() - }; - edits.push((old_range.clone(), text)); - last_edit.take(); - } - } - } - + let edits = text_diff(&old_text, &new_text); Diff { base_version, line_ending, diff --git a/crates/language/src/buffer_tests.rs b/crates/language/src/buffer_tests.rs index 84aef374ca..1b3b39dcc4 100644 --- a/crates/language/src/buffer_tests.rs +++ b/crates/language/src/buffer_tests.rs @@ -25,6 +25,7 @@ use text::{BufferId, LineEnding}; use text::{Point, ToPoint}; use theme::ActiveTheme; use unindent::Unindent as _; +use util::test::marked_text_offsets; use util::{assert_set_eq, post_inc, test::marked_text_ranges, RandomCharIter}; pub static TRAILING_WHITESPACE_REGEX: LazyLock = LazyLock::new(|| { @@ -354,24 +355,44 @@ fn test_edit_events(cx: &mut gpui::App) { #[gpui::test] async fn test_apply_diff(cx: &mut TestAppContext) { - let text = "a\nbb\nccc\ndddd\neeeee\nffffff\n"; + let (text, offsets) = marked_text_offsets( + "one two three\nfour fiˇve six\nseven eightˇ nine\nten eleven twelve\n", + ); let buffer = cx.new(|cx| Buffer::local(text, cx)); - let anchor = buffer.update(cx, |buffer, _| buffer.anchor_before(Point::new(3, 3))); - - let text = "a\nccc\ndddd\nffffff\n"; - let diff = buffer.update(cx, |b, cx| b.diff(text.into(), cx)).await; - buffer.update(cx, |buffer, cx| { - buffer.apply_diff(diff, cx).unwrap(); - assert_eq!(buffer.text(), text); - assert_eq!(anchor.to_point(buffer), Point::new(2, 3)); + let anchors = buffer.update(cx, |buffer, _| { + offsets + .iter() + .map(|offset| buffer.anchor_before(offset)) + .collect::>() }); - let text = "a\n1\n\nccc\ndd2dd\nffffff\n"; - let diff = buffer.update(cx, |b, cx| b.diff(text.into(), cx)).await; + let (text, offsets) = marked_text_offsets( + "one two three\n{\nfour FIVEˇ six\n}\nseven AND EIGHTˇ nine\nten eleven twelve\n", + ); + + let diff = buffer.update(cx, |b, cx| b.diff(text.clone(), cx)).await; buffer.update(cx, |buffer, cx| { buffer.apply_diff(diff, cx).unwrap(); assert_eq!(buffer.text(), text); - assert_eq!(anchor.to_point(buffer), Point::new(4, 4)); + let actual_offsets = anchors + .iter() + .map(|anchor| anchor.to_offset(buffer)) + .collect::>(); + assert_eq!(actual_offsets, offsets); + }); + + let (text, offsets) = + marked_text_offsets("one two three\n{\nˇ}\nseven AND EIGHTEENˇ nine\nten eleven twelve\n"); + + let diff = buffer.update(cx, |b, cx| b.diff(text.clone(), cx)).await; + buffer.update(cx, |buffer, cx| { + buffer.apply_diff(diff, cx).unwrap(); + assert_eq!(buffer.text(), text); + let actual_offsets = anchors + .iter() + .map(|anchor| anchor.to_offset(buffer)) + .collect::>(); + assert_eq!(actual_offsets, offsets); }); } diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index b6c5bf6225..89efb27ec4 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -15,6 +15,7 @@ mod outline; pub mod proto; mod syntax_map; mod task_context; +mod text_diff; mod toolchain; #[cfg(test)] @@ -62,6 +63,7 @@ use std::{num::NonZeroU32, sync::OnceLock}; use syntax_map::{QueryCursorHandle, SyntaxSnapshot}; use task::RunnableTag; pub use task_context::{ContextProvider, RunnableRange}; +pub use text_diff::{line_diff, text_diff, text_diff_with_options, unified_diff, DiffOptions}; use theme::SyntaxTheme; pub use toolchain::{LanguageToolchainStore, Toolchain, ToolchainList, ToolchainLister}; use tree_sitter::{self, wasmtime, Query, QueryCursor, WasmStore}; diff --git a/crates/language/src/text_diff.rs b/crates/language/src/text_diff.rs new file mode 100644 index 0000000000..83c4aba3bb --- /dev/null +++ b/crates/language/src/text_diff.rs @@ -0,0 +1,274 @@ +use crate::{CharClassifier, CharKind, LanguageScope}; +use imara_diff::{ + diff, + intern::{InternedInput, Token}, + sources::lines_with_terminator, + Algorithm, UnifiedDiffBuilder, +}; +use std::{iter, ops::Range, sync::Arc}; + +const MAX_WORD_DIFF_LEN: usize = 512; +const MAX_WORD_DIFF_LINE_COUNT: usize = 8; + +/// Computes a diff between two strings, returning a unified diff string. +pub fn unified_diff(old_text: &str, new_text: &str) -> String { + let input = InternedInput::new(old_text, new_text); + diff( + Algorithm::Histogram, + &input, + UnifiedDiffBuilder::new(&input), + ) +} + +/// Computes a diff between two strings, returning a vector of old and new row +/// ranges. +pub fn line_diff(old_text: &str, new_text: &str) -> Vec<(Range, Range)> { + let mut edits = Vec::new(); + let input = InternedInput::new( + lines_with_terminator(old_text), + lines_with_terminator(new_text), + ); + diff_internal(&input, |_, _, old_rows, new_rows| { + edits.push((old_rows, new_rows)); + }); + edits +} + +/// Computes a diff between two strings, returning a vector of edits. +/// +/// The edits are represented as tuples of byte ranges and replacement strings. +/// +/// Internally, this function first performs a line-based diff, and then performs a second +/// word-based diff within hunks that replace small numbers of lines. +pub fn text_diff(old_text: &str, new_text: &str) -> Vec<(Range, Arc)> { + text_diff_with_options(old_text, new_text, DiffOptions::default()) +} + +pub struct DiffOptions { + pub language_scope: Option, + pub max_word_diff_len: usize, + pub max_word_diff_line_count: usize, +} + +impl Default for DiffOptions { + fn default() -> Self { + Self { + language_scope: Default::default(), + max_word_diff_len: MAX_WORD_DIFF_LEN, + max_word_diff_line_count: MAX_WORD_DIFF_LINE_COUNT, + } + } +} + +/// Computes a diff between two strings, using a specific language scope's +/// word characters for word-level diffing. +pub fn text_diff_with_options( + old_text: &str, + new_text: &str, + options: DiffOptions, +) -> Vec<(Range, Arc)> { + let empty: Arc = Arc::default(); + let mut edits = Vec::new(); + let mut hunk_input = InternedInput::default(); + let input = InternedInput::new( + lines_with_terminator(old_text), + lines_with_terminator(new_text), + ); + diff_internal( + &input, + |old_byte_range, new_byte_range, old_rows, new_rows| { + if should_perform_word_diff_within_hunk( + &old_rows, + &old_byte_range, + &new_rows, + &new_byte_range, + &options, + ) { + let old_offset = old_byte_range.start; + let new_offset = new_byte_range.start; + hunk_input.clear(); + hunk_input.update_before(tokenize( + &old_text[old_byte_range.clone()], + options.language_scope.clone(), + )); + hunk_input.update_after(tokenize( + &new_text[new_byte_range.clone()], + options.language_scope.clone(), + )); + diff_internal(&hunk_input, |old_byte_range, new_byte_range, _, _| { + let old_byte_range = + old_offset + old_byte_range.start..old_offset + old_byte_range.end; + let new_byte_range = + new_offset + new_byte_range.start..new_offset + new_byte_range.end; + let replacement_text = if new_byte_range.is_empty() { + empty.clone() + } else { + new_text[new_byte_range.clone()].into() + }; + edits.push((old_byte_range, replacement_text)); + }); + } else { + let replacement_text = if new_byte_range.is_empty() { + empty.clone() + } else { + new_text[new_byte_range.clone()].into() + }; + edits.push((old_byte_range.clone(), replacement_text)); + } + }, + ); + edits +} + +fn should_perform_word_diff_within_hunk( + old_row_range: &Range, + old_byte_range: &Range, + new_row_range: &Range, + new_byte_range: &Range, + options: &DiffOptions, +) -> bool { + !old_byte_range.is_empty() + && !new_byte_range.is_empty() + && old_byte_range.len() <= options.max_word_diff_len + && new_byte_range.len() <= options.max_word_diff_len + && old_row_range.len() <= options.max_word_diff_line_count + && new_row_range.len() <= options.max_word_diff_line_count +} + +fn diff_internal( + input: &InternedInput<&str>, + mut on_change: impl FnMut(Range, Range, Range, Range), +) { + let mut old_offset = 0; + let mut new_offset = 0; + let mut old_token_ix = 0; + let mut new_token_ix = 0; + diff( + Algorithm::Histogram, + input, + |old_tokens: Range, new_tokens: Range| { + old_offset += token_len( + &input, + &input.before[old_token_ix as usize..old_tokens.start as usize], + ); + new_offset += token_len( + &input, + &input.after[new_token_ix as usize..new_tokens.start as usize], + ); + let old_len = token_len( + &input, + &input.before[old_tokens.start as usize..old_tokens.end as usize], + ); + let new_len = token_len( + &input, + &input.after[new_tokens.start as usize..new_tokens.end as usize], + ); + let old_byte_range = old_offset..old_offset + old_len; + let new_byte_range = new_offset..new_offset + new_len; + old_token_ix = old_tokens.end; + new_token_ix = new_tokens.end; + old_offset = old_byte_range.end; + new_offset = new_byte_range.end; + on_change(old_byte_range, new_byte_range, old_tokens, new_tokens); + }, + ); +} + +fn tokenize(text: &str, language_scope: Option) -> impl Iterator { + let classifier = CharClassifier::new(language_scope).for_completion(true); + let mut chars = text.char_indices(); + let mut prev = None; + let mut start_ix = 0; + iter::from_fn(move || { + while let Some((ix, c)) = chars.next() { + let mut token = None; + let kind = classifier.kind(c); + if let Some((prev_char, prev_kind)) = prev { + if kind != prev_kind || (kind == CharKind::Punctuation && c != prev_char) { + token = Some(&text[start_ix..ix]); + start_ix = ix; + } + } + prev = Some((c, kind)); + if token.is_some() { + return token; + } + } + if start_ix < text.len() { + let token = &text[start_ix..]; + start_ix = text.len(); + return Some(token); + } + None + }) +} + +fn token_len(input: &InternedInput<&str>, tokens: &[Token]) -> usize { + tokens + .iter() + .map(|token| input.interner[*token].len()) + .sum() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tokenize() { + let text = ""; + assert_eq!(tokenize(text, None).collect::>(), Vec::<&str>::new()); + + let text = " "; + assert_eq!(tokenize(text, None).collect::>(), vec![" "]); + + let text = "one"; + assert_eq!(tokenize(text, None).collect::>(), vec!["one"]); + + let text = "one\n"; + assert_eq!(tokenize(text, None).collect::>(), vec!["one", "\n"]); + + let text = "one.two(three)"; + assert_eq!( + tokenize(text, None).collect::>(), + vec!["one", ".", "two", "(", "three", ")"] + ); + + let text = "one two three()"; + assert_eq!( + tokenize(text, None).collect::>(), + vec!["one", " ", "two", " ", "three", "(", ")"] + ); + + let text = " one\n two three"; + assert_eq!( + tokenize(text, None).collect::>(), + vec![" ", "one", "\n ", "two", " ", "three"] + ); + } + + #[test] + fn test_text_diff() { + let old_text = "one two three"; + let new_text = "one TWO three"; + assert_eq!(text_diff(old_text, new_text), [(4..7, "TWO".into()),]); + + let old_text = "one\ntwo\nthree\n"; + let new_text = "one\ntwo\nAND\nTHEN\nthree\n"; + assert_eq!( + text_diff(old_text, new_text), + [(8..8, "AND\nTHEN\n".into()),] + ); + + let old_text = "one two\nthree four five\nsix seven eight nine\nten\n"; + let new_text = "one two\nthree FOUR five\nsix SEVEN eight nine\nten\nELEVEN\n"; + assert_eq!( + text_diff(old_text, new_text), + [ + (14..18, "FOUR".into()), + (28..33, "SEVEN".into()), + (49..49, "ELEVEN\n".into()) + ] + ); + } +} diff --git a/crates/project/Cargo.toml b/crates/project/Cargo.toml index ea5a852f78..d2d571a81c 100644 --- a/crates/project/Cargo.toml +++ b/crates/project/Cargo.toml @@ -64,7 +64,6 @@ settings.workspace = true sha2.workspace = true shellexpand.workspace = true shlex.workspace = true -similar = "1.3" smol.workspace = true snippet.workspace = true snippet_provider.workspace = true diff --git a/crates/project/src/lsp_store.rs b/crates/project/src/lsp_store.rs index 427ebd1b59..632b96c8b7 100644 --- a/crates/project/src/lsp_store.rs +++ b/crates/project/src/lsp_store.rs @@ -63,7 +63,6 @@ use rpc::{ use serde::Serialize; use settings::{Settings, SettingsLocation, SettingsStore}; use sha2::{Digest, Sha256}; -use similar::{ChangeTag, TextDiff}; use smol::channel::Sender; use snippet::Snippet; use std::{ @@ -110,15 +109,13 @@ pub enum LspFormatTarget { Ranges(BTreeMap>>), } -// proto::RegisterBufferWithLanguageServer {} - pub type OpenLspBufferHandle = Entity>; // Currently, formatting operations are represented differently depending on // whether they come from a language server or an external command. #[derive(Debug)] pub enum FormatOperation { - Lsp(Vec<(Range, String)>), + Lsp(Vec<(Range, Arc)>), External(Diff), Prettier(Diff), } @@ -1388,7 +1385,7 @@ impl LocalLspStore { language_server: &Arc, settings: &LanguageSettings, cx: &mut AsyncApp, - ) -> Result, String)>> { + ) -> Result, Arc)>> { let capabilities = &language_server.capabilities(); let range_formatting_provider = capabilities.document_range_formatting_provider.as_ref(); if range_formatting_provider.map_or(false, |provider| provider == &OneOf::Left(false)) { @@ -1459,7 +1456,7 @@ impl LocalLspStore { language_server: &Arc, settings: &LanguageSettings, cx: &mut AsyncApp, - ) -> Result, String)>> { + ) -> Result, Arc)>> { let uri = lsp::Url::from_file_path(abs_path) .map_err(|_| anyhow!("failed to convert abs path to uri"))?; let text_document = lsp::TextDocumentIdentifier::new(uri); @@ -2144,7 +2141,7 @@ impl LocalLspStore { server_id: LanguageServerId, version: Option, cx: &mut Context, - ) -> Task, String)>>> { + ) -> Task, Arc)>>> { let snapshot = self.buffer_snapshot_for_lsp_version(buffer, server_id, version, cx); cx.background_spawn(async move { let snapshot = snapshot?; @@ -2192,48 +2189,23 @@ impl LocalLspStore { // we can identify the changes more precisely, preserving the locations // of any anchors positioned in the unchanged regions. if range.end.row > range.start.row { - let mut offset = range.start.to_offset(&snapshot); + let offset = range.start.to_offset(&snapshot); let old_text = snapshot.text_for_range(range).collect::(); - - let diff = TextDiff::from_lines(old_text.as_str(), &new_text); - let mut moved_since_edit = true; - for change in diff.iter_all_changes() { - let tag = change.tag(); - let value = change.value(); - match tag { - ChangeTag::Equal => { - offset += value.len(); - moved_since_edit = true; - } - ChangeTag::Delete => { - let start = snapshot.anchor_after(offset); - let end = snapshot.anchor_before(offset + value.len()); - if moved_since_edit { - edits.push((start..end, String::new())); - } else { - edits.last_mut().unwrap().0.end = end; - } - offset += value.len(); - moved_since_edit = false; - } - ChangeTag::Insert => { - if moved_since_edit { - let anchor = snapshot.anchor_after(offset); - edits.push((anchor..anchor, value.to_string())); - } else { - edits.last_mut().unwrap().1.push_str(value); - } - moved_since_edit = false; - } - } - } + let range_edits = language::text_diff(old_text.as_str(), &new_text); + edits.extend(range_edits.into_iter().map(|(range, replacement)| { + ( + snapshot.anchor_after(offset + range.start) + ..snapshot.anchor_before(offset + range.end), + replacement, + ) + })); } else if range.end == range.start { let anchor = snapshot.anchor_after(range.start); - edits.push((anchor..anchor, new_text)); + edits.push((anchor..anchor, new_text.into())); } else { let edit_start = snapshot.anchor_after(range.start); let edit_end = snapshot.anchor_before(range.end); - edits.push((edit_start..edit_end, new_text)); + edits.push((edit_start..edit_end, new_text.into())); } } diff --git a/crates/project/src/project_tests.rs b/crates/project/src/project_tests.rs index 1a3d753a21..150fc9fa56 100644 --- a/crates/project/src/project_tests.rs +++ b/crates/project/src/project_tests.rs @@ -25,7 +25,11 @@ use std::{mem, num::NonZeroU32, ops::Range, task::Poll}; use task::{ResolvedTask, TaskContext}; use unindent::Unindent as _; use util::{ - assert_set_eq, path, paths::PathMatcher, separator, test::TempTree, uri, TryFutureExt as _, + assert_set_eq, path, + paths::PathMatcher, + separator, + test::{marked_text_offsets, TempTree}, + uri, TryFutureExt as _, }; #[gpui::test] @@ -3625,7 +3629,8 @@ async fn test_buffer_is_dirty(cx: &mut gpui::TestAppContext) { async fn test_buffer_file_changes_on_disk(cx: &mut gpui::TestAppContext) { init_test(cx); - let initial_contents = "aaa\nbbbbb\nc\n"; + let (initial_contents, initial_offsets) = + marked_text_offsets("one twoˇ\nthree ˇfourˇ five\nsixˇ seven\n"); let fs = FakeFs::new(cx.executor()); fs.insert_tree( path!("/dir"), @@ -3640,8 +3645,9 @@ async fn test_buffer_file_changes_on_disk(cx: &mut gpui::TestAppContext) { .await .unwrap(); - let anchors = (0..3) - .map(|row| buffer.update(cx, |b, _| b.anchor_before(Point::new(row, 1)))) + let anchors = initial_offsets + .iter() + .map(|offset| buffer.update(cx, |b, _| b.anchor_before(offset))) .collect::>(); // Change the file on disk, adding two new lines of text, and removing @@ -3650,10 +3656,12 @@ async fn test_buffer_file_changes_on_disk(cx: &mut gpui::TestAppContext) { assert!(!buffer.is_dirty()); assert!(!buffer.has_conflict()); }); - let new_contents = "AAAA\naaa\nBB\nbbbbb\n"; + + let (new_contents, new_offsets) = + marked_text_offsets("oneˇ\nthree ˇFOURˇ five\nsixtyˇ seven\n"); fs.save( path!("/dir/the-file").as_ref(), - &new_contents.into(), + &new_contents.as_str().into(), LineEnding::Unix, ) .await @@ -3668,14 +3676,11 @@ async fn test_buffer_file_changes_on_disk(cx: &mut gpui::TestAppContext) { assert!(!buffer.is_dirty()); assert!(!buffer.has_conflict()); - let anchor_positions = anchors + let anchor_offsets = anchors .iter() - .map(|anchor| anchor.to_point(&*buffer)) + .map(|anchor| anchor.to_offset(&*buffer)) .collect::>(); - assert_eq!( - anchor_positions, - [Point::new(1, 1), Point::new(3, 1), Point::new(3, 5)] - ); + assert_eq!(anchor_offsets, new_offsets); }); // Modify the buffer @@ -3698,6 +3703,7 @@ async fn test_buffer_file_changes_on_disk(cx: &mut gpui::TestAppContext) { // marked as having a conflict. cx.executor().run_until_parked(); buffer.update(cx, |buffer, _| { + assert_eq!(buffer.text(), " ".to_string() + &new_contents); assert!(buffer.has_conflict()); }); } diff --git a/crates/zeta/Cargo.toml b/crates/zeta/Cargo.toml index f8f0bb4da3..515624962a 100644 --- a/crates/zeta/Cargo.toml +++ b/crates/zeta/Cargo.toml @@ -45,7 +45,6 @@ release_channel.workspace = true serde.workspace = true serde_json.workspace = true settings.workspace = true -similar.workspace = true telemetry.workspace = true telemetry_events.workspace = true theme.workspace = true diff --git a/crates/zeta/src/zeta.rs b/crates/zeta/src/zeta.rs index f7f335469c..82fd0e9991 100644 --- a/crates/zeta/src/zeta.rs +++ b/crates/zeta/src/zeta.rs @@ -29,8 +29,7 @@ use gpui::{ use http_client::{HttpClient, Method}; use input_excerpt::excerpt_for_cursor_position; use language::{ - Anchor, Buffer, BufferSnapshot, CharClassifier, CharKind, EditPreview, OffsetRangeExt, - ToOffset, ToPoint, + text_diff, Anchor, Buffer, BufferSnapshot, EditPreview, OffsetRangeExt, ToOffset, ToPoint, }; use language_models::LlmApiToken; use postage::watch; @@ -919,77 +918,18 @@ and then another offset: usize, snapshot: &BufferSnapshot, ) -> Vec<(Range, String)> { - fn tokenize(text: &str) -> Vec<&str> { - let classifier = CharClassifier::new(None).for_completion(true); - let mut chars = text.chars().peekable(); - let mut prev_ch = chars.peek().copied(); - let mut tokens = Vec::new(); - let mut start = 0; - let mut end = 0; - while let Some(ch) = chars.next() { - let prev_kind = prev_ch.map(|ch| classifier.kind(ch)); - let kind = classifier.kind(ch); - if Some(kind) != prev_kind || (kind == CharKind::Punctuation && Some(ch) != prev_ch) - { - tokens.push(&text[start..end]); - start = end; - } - end += ch.len_utf8(); - prev_ch = Some(ch); - } - tokens.push(&text[start..end]); - tokens - } - - let old_tokens = tokenize(&old_text); - let new_tokens = tokenize(new_text); - - let diff = similar::TextDiffConfig::default() - .algorithm(similar::Algorithm::Patience) - .diff_slices(&old_tokens, &new_tokens); - let mut edits: Vec<(Range, String)> = Vec::new(); - let mut old_start = offset; - for change in diff.iter_all_changes() { - let value = change.value(); - match change.tag() { - similar::ChangeTag::Equal => { - old_start += value.len(); - } - similar::ChangeTag::Delete => { - let old_end = old_start + value.len(); - if let Some((last_old_range, _)) = edits.last_mut() { - if last_old_range.end == old_start { - last_old_range.end = old_end; - } else { - edits.push((old_start..old_end, String::new())); - } - } else { - edits.push((old_start..old_end, String::new())); - } - old_start = old_end; - } - similar::ChangeTag::Insert => { - if let Some((last_old_range, last_new_text)) = edits.last_mut() { - if last_old_range.end == old_start { - last_new_text.push_str(value); - } else { - edits.push((old_start..old_start, value.into())); - } - } else { - edits.push((old_start..old_start, value.into())); - } - } - } - } - - edits + text_diff(&old_text, &new_text) .into_iter() .map(|(mut old_range, new_text)| { + old_range.start += offset; + old_range.end += offset; + let prefix_len = common_prefix( snapshot.chars_for_range(old_range.clone()), new_text.chars(), ); old_range.start += prefix_len; + let suffix_len = common_prefix( snapshot.reversed_chars_for_range(old_range.clone()), new_text[prefix_len..].chars().rev(), @@ -1248,10 +1188,7 @@ impl Event { writeln!(prompt, "User renamed {:?} to {:?}\n", old_path, new_path).unwrap(); } - let diff = - similar::TextDiff::from_lines(&old_snapshot.text(), &new_snapshot.text()) - .unified_diff() - .to_string(); + let diff = language::unified_diff(&old_snapshot.text(), &new_snapshot.text()); if !diff.is_empty() { write!( prompt,