ZIm/crates/fuzzy/src/strings.rs
Smit Barmase 131f2857a5
editor: Improve code completion filtering to provide fewer and more accurate suggestions (#32928)
Closes #32756

- Uses `filter_text` from LSP source to filter items in completion list.
This fixes noisy lists like on typing `await` in Rust, it would suggest
`await.or`, `await.and`, etc., which are bad suggestions. Fallbacks to
label.
- Add `penalize_length` flag to fuzzy matcher, which was the default
behavior across. Now, this flag is set to `false` just for code
completion fuzzy matching. This fixes the case where if the query is
`unreac` and the completion items are `unreachable` and
`unreachable!()`, the item with a shorter length would have a larger
score than the other one, which is not right in the case of
auto-complete context. Now these two items will have the same fuzzy
score, and LSP `sort_text` will take over in finalizing its ranking.
- Updated test to be more utility based rather than example based. This
will help to iterate/verify logic faster on what's going on.

Before/After:

await: 
<img width="600" alt="before-await"
src="https://github.com/user-attachments/assets/384138dd-a90d-4942-a430-6ae15df37268"
/>
<img width="600" alt="after-await"
src="https://github.com/user-attachments/assets/d05a10fa-bae5-49bd-9fe7-9933ff215f29"
/>

iter:
<img width="600" alt="before-iter"
src="https://github.com/user-attachments/assets/6e57ffe9-007d-4b17-9cc2-d48fc0176c8e"
/>
<img width="600" alt="after-iter"
src="https://github.com/user-attachments/assets/a8577a9f-dcc8-4fd6-9ba0-b7590584ec31"
/>

opt:
<img width="600" alt="opt-before"
src="https://github.com/user-attachments/assets/d45b6c52-c9ee-4bf3-8552-d5e3fdbecbff"
/>
<img width="600" alt="opt-after"
src="https://github.com/user-attachments/assets/daac11a8-9699-48f8-b441-19fe9803848d"
/>

Release Notes:

- Improved code completion filtering to provide fewer and more accurate
suggestions.
2025-06-18 16:01:28 +05:30

199 lines
6 KiB
Rust

use crate::{
CharBag,
matcher::{MatchCandidate, Matcher},
};
use gpui::BackgroundExecutor;
use std::{
borrow::{Borrow, Cow},
cmp::{self, Ordering},
iter,
ops::Range,
sync::atomic::{self, AtomicBool},
};
#[derive(Clone, Debug)]
pub struct StringMatchCandidate {
pub id: usize,
pub string: String,
pub char_bag: CharBag,
}
impl StringMatchCandidate {
pub fn new(id: usize, string: &str) -> Self {
Self {
id,
string: string.into(),
char_bag: string.into(),
}
}
}
impl<'a> MatchCandidate for &'a StringMatchCandidate {
fn has_chars(&self, bag: CharBag) -> bool {
self.char_bag.is_superset(bag)
}
fn to_string(&self) -> Cow<'a, str> {
self.string.as_str().into()
}
}
#[derive(Clone, Debug)]
pub struct StringMatch {
pub candidate_id: usize,
pub score: f64,
pub positions: Vec<usize>,
pub string: String,
}
impl StringMatch {
pub fn ranges(&self) -> impl '_ + Iterator<Item = Range<usize>> {
let mut positions = self.positions.iter().peekable();
iter::from_fn(move || {
if let Some(start) = positions.next().copied() {
let Some(char_len) = self.char_len_at_index(start) else {
log::error!(
"Invariant violation: Index {start} out of range or not on a utf-8 boundary in string {:?}",
self.string
);
return None;
};
let mut end = start + char_len;
while let Some(next_start) = positions.peek() {
if end == **next_start {
let Some(char_len) = self.char_len_at_index(end) else {
log::error!(
"Invariant violation: Index {end} out of range or not on a utf-8 boundary in string {:?}",
self.string
);
return None;
};
end += char_len;
positions.next();
} else {
break;
}
}
return Some(start..end);
}
None
})
}
/// Gets the byte length of the utf-8 character at a byte offset. If the index is out of range
/// or not on a utf-8 boundary then None is returned.
fn char_len_at_index(&self, ix: usize) -> Option<usize> {
self.string
.get(ix..)
.and_then(|slice| slice.chars().next().map(|char| char.len_utf8()))
}
}
impl PartialEq for StringMatch {
fn eq(&self, other: &Self) -> bool {
self.cmp(other).is_eq()
}
}
impl Eq for StringMatch {}
impl PartialOrd for StringMatch {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for StringMatch {
fn cmp(&self, other: &Self) -> Ordering {
self.score
.partial_cmp(&other.score)
.unwrap_or(Ordering::Equal)
.then_with(|| self.candidate_id.cmp(&other.candidate_id))
}
}
pub async fn match_strings<T>(
candidates: &[T],
query: &str,
smart_case: bool,
penalize_length: bool,
max_results: usize,
cancel_flag: &AtomicBool,
executor: BackgroundExecutor,
) -> Vec<StringMatch>
where
T: Borrow<StringMatchCandidate> + Sync,
{
if candidates.is_empty() || max_results == 0 {
return Default::default();
}
if query.is_empty() {
return candidates
.iter()
.map(|candidate| StringMatch {
candidate_id: candidate.borrow().id,
score: 0.,
positions: Default::default(),
string: candidate.borrow().string.clone(),
})
.collect();
}
let lowercase_query = query.to_lowercase().chars().collect::<Vec<_>>();
let query = query.chars().collect::<Vec<_>>();
let lowercase_query = &lowercase_query;
let query = &query;
let query_char_bag = CharBag::from(&lowercase_query[..]);
let num_cpus = executor.num_cpus().min(candidates.len());
let segment_size = candidates.len().div_ceil(num_cpus);
let mut segment_results = (0..num_cpus)
.map(|_| Vec::with_capacity(max_results.min(candidates.len())))
.collect::<Vec<_>>();
executor
.scoped(|scope| {
for (segment_idx, results) in segment_results.iter_mut().enumerate() {
let cancel_flag = &cancel_flag;
scope.spawn(async move {
let segment_start = cmp::min(segment_idx * segment_size, candidates.len());
let segment_end = cmp::min(segment_start + segment_size, candidates.len());
let mut matcher = Matcher::new(
query,
lowercase_query,
query_char_bag,
smart_case,
penalize_length,
);
matcher.match_candidates(
&[],
&[],
candidates[segment_start..segment_end]
.iter()
.map(|c| c.borrow()),
results,
cancel_flag,
|candidate: &&StringMatchCandidate, score, positions| StringMatch {
candidate_id: candidate.id,
score,
positions: positions.clone(),
string: candidate.string.to_string(),
},
);
});
}
})
.await;
if cancel_flag.load(atomic::Ordering::Relaxed) {
return Vec::new();
}
let mut results = segment_results.concat();
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
results
}