Case Insensitive Unicode Text Search: Fallback To Regex (#28752)

Closes #9980

Release Notes:

- Fixed: case insensitive text search with unicode characters
This commit is contained in:
François Mockers 2025-04-15 13:12:37 +02:00 committed by GitHub
parent 616d17f517
commit d4a985a6e3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 96 additions and 0 deletions

View file

@ -5425,6 +5425,87 @@ async fn test_search_in_gitignored_dirs(cx: &mut gpui::TestAppContext) {
);
}
#[gpui::test]
async fn test_search_with_unicode(cx: &mut gpui::TestAppContext) {
init_test(cx);
let fs = FakeFs::new(cx.executor());
fs.insert_tree(
path!("/dir"),
json!({
"one.rs": "// ПРИВЕТ? привет!",
"two.rs": "// ПРИВЕТ.",
"three.rs": "// привет",
}),
)
.await;
let project = Project::test(fs.clone(), [path!("/dir").as_ref()], cx).await;
let unicode_case_sensitive_query = SearchQuery::text(
"привет",
false,
true,
false,
Default::default(),
Default::default(),
None,
);
assert_matches!(unicode_case_sensitive_query, Ok(SearchQuery::Text { .. }));
assert_eq!(
search(&project, unicode_case_sensitive_query.unwrap(), cx)
.await
.unwrap(),
HashMap::from_iter([
(separator!("dir/one.rs").to_string(), vec![17..29]),
(separator!("dir/three.rs").to_string(), vec![3..15]),
])
);
let unicode_case_insensitive_query = SearchQuery::text(
"привет",
false,
false,
false,
Default::default(),
Default::default(),
None,
);
assert_matches!(
unicode_case_insensitive_query,
Ok(SearchQuery::Regex { .. })
);
assert_eq!(
search(&project, unicode_case_insensitive_query.unwrap(), cx)
.await
.unwrap(),
HashMap::from_iter([
(separator!("dir/one.rs").to_string(), vec![3..15, 17..29]),
(separator!("dir/two.rs").to_string(), vec![3..15]),
(separator!("dir/three.rs").to_string(), vec![3..15]),
])
);
assert_eq!(
search(
&project,
SearchQuery::text(
"привет.",
false,
false,
false,
Default::default(),
Default::default(),
None,
)
.unwrap(),
cx
)
.await
.unwrap(),
HashMap::from_iter([(separator!("dir/two.rs").to_string(), vec![3..16]),])
);
}
#[gpui::test]
async fn test_create_entry(cx: &mut gpui::TestAppContext) {
init_test(cx);

View file

@ -93,6 +93,21 @@ impl SearchQuery {
buffers: Option<Vec<Entity<Buffer>>>,
) -> Result<Self> {
let query = query.to_string();
if !case_sensitive && !query.is_ascii() {
// AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
// Fallback to regex search as recommended by
// https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
return Self::regex(
regex::escape(&query),
whole_word,
case_sensitive,
include_ignored,
false,
files_to_include,
files_to_exclude,
buffers,
);
}
let search = AhoCorasickBuilder::new()
.ascii_case_insensitive(!case_sensitive)
.build([&query])?;