Improve workflow step pruning and symbol similarity matching (#16036)

This PR improves workflow step management and symbol matching. We've
optimized step pruning to remove any step that intersects an edit and
switched to normalized Levenshtein distance for more accurate symbol
matching.

Release Notes:

- N/A
This commit is contained in:
Antonio Scandurra 2024-08-12 11:09:07 +02:00 committed by GitHub
parent 355aebd0e4
commit 48f6193628
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 124 additions and 20 deletions

View file

@ -84,13 +84,24 @@ impl<T> Outline<T> {
}
}
/// Find the most similar symbol to the provided query according to the Jaro-Winkler distance measure.
/// Find the most similar symbol to the provided query using normalized Levenshtein distance.
pub fn find_most_similar(&self, query: &str) -> Option<&OutlineItem<T>> {
let candidate = self.path_candidates.iter().max_by(|a, b| {
strsim::jaro_winkler(&a.string, query)
.total_cmp(&strsim::jaro_winkler(&b.string, query))
})?;
Some(&self.items[candidate.id])
const SIMILARITY_THRESHOLD: f64 = 0.6;
let (item, similarity) = self
.items
.iter()
.map(|item| {
let similarity = strsim::normalized_levenshtein(&item.text, query);
(item, similarity)
})
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())?;
if similarity >= SIMILARITY_THRESHOLD {
Some(item)
} else {
None
}
}
/// Find all outline symbols according to a longest subsequence match with the query, ordered descending by match score.
@ -208,3 +219,46 @@ pub fn render_item<T>(
StyledText::new(outline_item.text.clone()).with_highlights(&text_style, highlights)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_find_most_similar_with_low_similarity() {
let outline = Outline::new(vec![
OutlineItem {
depth: 0,
range: Point::new(0, 0)..Point::new(5, 0),
text: "fn process".to_string(),
highlight_ranges: vec![],
name_ranges: vec![3..10],
body_range: None,
annotation_range: None,
},
OutlineItem {
depth: 0,
range: Point::new(7, 0)..Point::new(12, 0),
text: "struct DataProcessor".to_string(),
highlight_ranges: vec![],
name_ranges: vec![7..20],
body_range: None,
annotation_range: None,
},
]);
assert_eq!(
outline.find_most_similar("pub fn process"),
Some(&outline.items[0])
);
assert_eq!(
outline.find_most_similar("async fn process"),
Some(&outline.items[0])
);
assert_eq!(
outline.find_most_similar("struct Processor"),
Some(&outline.items[1])
);
assert_eq!(outline.find_most_similar("struct User"), None);
assert_eq!(outline.find_most_similar("struct"), None);
}
}