From 1f52aab7c7d22a7ef2d32391598dc346945df7e3 Mon Sep 17 00:00:00 2001 From: smit <0xtimsb@gmail.com> Date: Thu, 27 Feb 2025 16:27:07 +0530 Subject: [PATCH] buffer: Fix panic when multi-byte character is used in languages like Swift (#25739) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #25471 In languages like Swift, names can be concatinated in form like `class Example: UI`, notice here `Example` and `:` are two different words. Before, `name_ranges`translation of above text would look like: ``` "class" -> [0..5] " Example" -> [5..13] (Spaces are intentional) "e:" -> [12..14] (This is incorrect, and should be ":" -> [13..14]) " UI" -> [14..16] ``` Because this translation does not account for concatinated words, this might affect queries, but most importantly this panics when multi-byte character (`ф`) is used in place of `e`, as it then tries to access index which lies inside that multi-byte. For example, it panics on `class Examplф: UI`. --- This PR fixes this by handing concatinated words when calculating `name_ranges`. Now, the corrected ranges will look like: ``` "class" -> [0..5] " Example" -> [5..13] ":" -> [13..14] (Now it's correct) " UI" -> [14..16] ``` and for multi-byte character ``` "class" -> [0..5] " Examplф" -> [5..14] (Notice ф takes two bytes) ":" -> [14..15] " UI" -> [15..17] ``` This way, it no longer tries to access a previous index, preventing a panic when that index contains a multi-byte character. Release Notes: - Fixed a panic when Cyrillic characters are used in languages like Swift. --- crates/language/src/buffer.rs | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index c9719e8e6b..2d946dae16 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -3507,24 +3507,13 @@ impl BufferSnapshot { true, ); let mut last_buffer_range_end = 0; + for (buffer_range, is_name) in buffer_ranges { - if !text.is_empty() && buffer_range.start > last_buffer_range_end { + let space_added = !text.is_empty() && buffer_range.start > last_buffer_range_end; + if space_added { text.push(' '); } - last_buffer_range_end = buffer_range.end; - if is_name { - let mut start = text.len(); - let end = start + buffer_range.len(); - - // When multiple names are captured, then the matchable text - // includes the whitespace in between the names. - if !name_ranges.is_empty() { - start -= 1; - } - - name_ranges.push(start..end); - } - + let before_append_len = text.len(); let mut offset = buffer_range.start; chunks.seek(buffer_range.clone()); for mut chunk in chunks.by_ref() { @@ -3548,6 +3537,16 @@ impl BufferSnapshot { break; } } + if is_name { + let after_append_len = text.len(); + let start = if space_added && !name_ranges.is_empty() { + before_append_len - 1 + } else { + before_append_len + }; + name_ranges.push(start..after_append_len); + } + last_buffer_range_end = buffer_range.end; } Some(OutlineItem {