Use new char_kind (parameterized by language)

This commit is contained in:
Piotr Osiewicz 2023-08-21 17:39:38 +02:00
parent 3e8522b5f2
commit ab5bd0ac5a
9 changed files with 93 additions and 47 deletions

View file

@ -1028,7 +1028,7 @@ impl SearchableItem for Editor {
if let Some((_, _, excerpt_buffer)) = buffer.as_singleton() { if let Some((_, _, excerpt_buffer)) = buffer.as_singleton() {
ranges.extend( ranges.extend(
query query
.search(excerpt_buffer.as_rope()) .search(excerpt_buffer, None)
.await .await
.into_iter() .into_iter()
.map(|range| { .map(|range| {
@ -1038,17 +1038,22 @@ impl SearchableItem for Editor {
} else { } else {
for excerpt in buffer.excerpt_boundaries_in_range(0..buffer.len()) { for excerpt in buffer.excerpt_boundaries_in_range(0..buffer.len()) {
let excerpt_range = excerpt.range.context.to_offset(&excerpt.buffer); let excerpt_range = excerpt.range.context.to_offset(&excerpt.buffer);
let rope = excerpt.buffer.as_rope().slice(excerpt_range.clone()); ranges.extend(
ranges.extend(query.search(&rope).await.into_iter().map(|range| { query
let start = excerpt .search(&excerpt.buffer, Some(excerpt_range.clone()))
.buffer .await
.anchor_after(excerpt_range.start + range.start); .into_iter()
let end = excerpt .map(|range| {
.buffer let start = excerpt
.anchor_before(excerpt_range.start + range.end); .buffer
buffer.anchor_in_excerpt(excerpt.id.clone(), start) .anchor_after(excerpt_range.start + range.start);
..buffer.anchor_in_excerpt(excerpt.id.clone(), end) let end = excerpt
})); .buffer
.anchor_before(excerpt_range.start + range.end);
buffer.anchor_in_excerpt(excerpt.id.clone(), start)
..buffer.anchor_in_excerpt(excerpt.id.clone(), end)
}),
);
} }
} }
ranges ranges

View file

@ -176,7 +176,9 @@ pub fn line_end(
} }
pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
let language = map.buffer_snapshot.language_at(point); let raw_point = point.to_point(map);
let language = map.buffer_snapshot.language_at(raw_point);
find_preceding_boundary(map, point, |left, right| { find_preceding_boundary(map, point, |left, right| {
(char_kind(language, left) != char_kind(language, right) && !right.is_whitespace()) (char_kind(language, left) != char_kind(language, right) && !right.is_whitespace())
|| left == '\n' || left == '\n'
@ -184,7 +186,8 @@ pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> Displa
} }
pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
let language = map.buffer_snapshot.language_at(point); let raw_point = point.to_point(map);
let language = map.buffer_snapshot.language_at(raw_point);
find_preceding_boundary(map, point, |left, right| { find_preceding_boundary(map, point, |left, right| {
let is_word_start = let is_word_start =
char_kind(language, left) != char_kind(language, right) && !right.is_whitespace(); char_kind(language, left) != char_kind(language, right) && !right.is_whitespace();
@ -195,14 +198,20 @@ pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> Dis
} }
pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
let raw_point = point.to_point(map);
let language = map.buffer_snapshot.language_at(raw_point);
find_boundary(map, point, |left, right| { find_boundary(map, point, |left, right| {
(char_kind(left) != char_kind(right) && !left.is_whitespace()) || right == '\n' (char_kind(language, left) != char_kind(language, right) && !left.is_whitespace())
|| right == '\n'
}) })
} }
pub fn next_subword_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { pub fn next_subword_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
let raw_point = point.to_point(map);
let language = map.buffer_snapshot.language_at(raw_point);
find_boundary(map, point, |left, right| { find_boundary(map, point, |left, right| {
let is_word_end = (char_kind(left) != char_kind(right)) && !left.is_whitespace(); let is_word_end =
(char_kind(language, left) != char_kind(language, right)) && !left.is_whitespace();
let is_subword_end = let is_subword_end =
left != '_' && right == '_' || left.is_lowercase() && right.is_uppercase(); left != '_' && right == '_' || left.is_lowercase() && right.is_uppercase();
is_word_end || is_subword_end || right == '\n' is_word_end || is_subword_end || right == '\n'
@ -389,10 +398,15 @@ pub fn find_boundary_in_line(
} }
pub fn is_inside_word(map: &DisplaySnapshot, point: DisplayPoint) -> bool { pub fn is_inside_word(map: &DisplaySnapshot, point: DisplayPoint) -> bool {
let raw_point = point.to_point(map);
let language = map.buffer_snapshot.language_at(raw_point);
let ix = map.clip_point(point, Bias::Left).to_offset(map, Bias::Left); let ix = map.clip_point(point, Bias::Left).to_offset(map, Bias::Left);
let text = &map.buffer_snapshot; let text = &map.buffer_snapshot;
let next_char_kind = text.chars_at(ix).next().map(char_kind); let next_char_kind = text.chars_at(ix).next().map(|c| char_kind(language, c));
let prev_char_kind = text.reversed_chars_at(ix).next().map(char_kind); let prev_char_kind = text
.reversed_chars_at(ix)
.next()
.map(|c| char_kind(language, c));
prev_char_kind.zip(next_char_kind) == Some((CharKind::Word, CharKind::Word)) prev_char_kind.zip(next_char_kind) == Some((CharKind::Word, CharKind::Word))
} }

View file

@ -2989,12 +2989,16 @@ pub fn contiguous_ranges(
pub fn char_kind(language: Option<&Arc<Language>>, c: char) -> CharKind { pub fn char_kind(language: Option<&Arc<Language>>, c: char) -> CharKind {
if c.is_whitespace() { if c.is_whitespace() {
CharKind::Whitespace return CharKind::Whitespace;
} else if c.is_alphanumeric() || c == '_' || c == '$' { } else if c.is_alphanumeric() || c == '_' {
CharKind::Word return CharKind::Word;
} else {
CharKind::Punctuation
} }
if let Some(language) = language {
if language.config.word_boundaries.contains(&c) {
return CharKind::Word;
}
}
CharKind::Punctuation
} }
/// Find all of the ranges of whitespace that occur at the ends of lines /// Find all of the ranges of whitespace that occur at the ends of lines

View file

@ -11,7 +11,7 @@ mod buffer_tests;
use anyhow::{anyhow, Context, Result}; use anyhow::{anyhow, Context, Result};
use async_trait::async_trait; use async_trait::async_trait;
use collections::HashMap; use collections::{HashMap, HashSet};
use futures::{ use futures::{
channel::oneshot, channel::oneshot,
future::{BoxFuture, Shared}, future::{BoxFuture, Shared},
@ -344,6 +344,8 @@ pub struct LanguageConfig {
pub block_comment: Option<(Arc<str>, Arc<str>)>, pub block_comment: Option<(Arc<str>, Arc<str>)>,
#[serde(default)] #[serde(default)]
pub overrides: HashMap<String, LanguageConfigOverride>, pub overrides: HashMap<String, LanguageConfigOverride>,
#[serde(default)]
pub word_boundaries: HashSet<char>,
} }
#[derive(Debug, Default)] #[derive(Debug, Default)]
@ -411,6 +413,7 @@ impl Default for LanguageConfig {
block_comment: Default::default(), block_comment: Default::default(),
overrides: Default::default(), overrides: Default::default(),
collapsed_placeholder: Default::default(), collapsed_placeholder: Default::default(),
word_boundaries: Default::default(),
} }
} }
} }

View file

@ -5170,7 +5170,7 @@ impl Project {
snapshot.file().map(|file| file.path().as_ref()), snapshot.file().map(|file| file.path().as_ref()),
) { ) {
query query
.search(snapshot.as_rope()) .search(&snapshot, None)
.await .await
.iter() .iter()
.map(|range| { .map(|range| {

View file

@ -3,7 +3,7 @@ use anyhow::{Context, Result};
use client::proto; use client::proto;
use globset::{Glob, GlobMatcher}; use globset::{Glob, GlobMatcher};
use itertools::Itertools; use itertools::Itertools;
use language::{char_kind, Rope}; use language::{char_kind, BufferSnapshot};
use regex::{Regex, RegexBuilder}; use regex::{Regex, RegexBuilder};
use smol::future::yield_now; use smol::future::yield_now;
use std::{ use std::{
@ -215,13 +215,23 @@ impl SearchQuery {
} }
} }
pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> { pub async fn search(
&self,
buffer: &BufferSnapshot,
subrange: Option<Range<usize>>,
) -> Vec<Range<usize>> {
const YIELD_INTERVAL: usize = 20000; const YIELD_INTERVAL: usize = 20000;
if self.as_str().is_empty() { if self.as_str().is_empty() {
return Default::default(); return Default::default();
} }
let language = rope.language(cx); let language = buffer.language_at(0);
let rope = if let Some(range) = subrange {
buffer.as_rope().slice(range)
} else {
buffer.as_rope().clone()
};
let kind = |c| char_kind(language, c); let kind = |c| char_kind(language, c);
let mut matches = Vec::new(); let mut matches = Vec::new();

View file

@ -439,11 +439,12 @@ pub(crate) fn next_word_start(
ignore_punctuation: bool, ignore_punctuation: bool,
times: usize, times: usize,
) -> DisplayPoint { ) -> DisplayPoint {
let language = map.buffer_snapshot.language_at(point.to_point(map));
for _ in 0..times { for _ in 0..times {
let mut crossed_newline = false; let mut crossed_newline = false;
point = movement::find_boundary(map, point, |left, right| { point = movement::find_boundary(map, point, |left, right| {
let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation); let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation); let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
let at_newline = right == '\n'; let at_newline = right == '\n';
let found = (left_kind != right_kind && right_kind != CharKind::Whitespace) let found = (left_kind != right_kind && right_kind != CharKind::Whitespace)
@ -463,11 +464,12 @@ fn next_word_end(
ignore_punctuation: bool, ignore_punctuation: bool,
times: usize, times: usize,
) -> DisplayPoint { ) -> DisplayPoint {
let language = map.buffer_snapshot.language_at(point.to_point(map));
for _ in 0..times { for _ in 0..times {
*point.column_mut() += 1; *point.column_mut() += 1;
point = movement::find_boundary(map, point, |left, right| { point = movement::find_boundary(map, point, |left, right| {
let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation); let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation); let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
left_kind != right_kind && left_kind != CharKind::Whitespace left_kind != right_kind && left_kind != CharKind::Whitespace
}); });
@ -493,12 +495,13 @@ fn previous_word_start(
ignore_punctuation: bool, ignore_punctuation: bool,
times: usize, times: usize,
) -> DisplayPoint { ) -> DisplayPoint {
let language = map.buffer_snapshot.language_at(point.to_point(map));
for _ in 0..times { for _ in 0..times {
// This works even though find_preceding_boundary is called for every character in the line containing // This works even though find_preceding_boundary is called for every character in the line containing
// cursor because the newline is checked only once. // cursor because the newline is checked only once.
point = movement::find_preceding_boundary(map, point, |left, right| { point = movement::find_preceding_boundary(map, point, |left, right| {
let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation); let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation); let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
(left_kind != right_kind && !right.is_whitespace()) || left == '\n' (left_kind != right_kind && !right.is_whitespace()) || left == '\n'
}); });
@ -508,6 +511,7 @@ fn previous_word_start(
fn first_non_whitespace(map: &DisplaySnapshot, from: DisplayPoint) -> DisplayPoint { fn first_non_whitespace(map: &DisplaySnapshot, from: DisplayPoint) -> DisplayPoint {
let mut last_point = DisplayPoint::new(from.row(), 0); let mut last_point = DisplayPoint::new(from.row(), 0);
let language = map.buffer_snapshot.language_at(from.to_point(map));
for (ch, point) in map.chars_at(last_point) { for (ch, point) in map.chars_at(last_point) {
if ch == '\n' { if ch == '\n' {
return from; return from;
@ -515,7 +519,7 @@ fn first_non_whitespace(map: &DisplaySnapshot, from: DisplayPoint) -> DisplayPoi
last_point = point; last_point = point;
if char_kind(ch) != CharKind::Whitespace { if char_kind(language, ch) != CharKind::Whitespace {
break; break;
} }
} }

View file

@ -82,16 +82,19 @@ fn expand_changed_word_selection(
ignore_punctuation: bool, ignore_punctuation: bool,
) -> bool { ) -> bool {
if times.is_none() || times.unwrap() == 1 { if times.is_none() || times.unwrap() == 1 {
let language = map
.buffer_snapshot
.language_at(selection.start.to_point(map));
let in_word = map let in_word = map
.chars_at(selection.head()) .chars_at(selection.head())
.next() .next()
.map(|(c, _)| char_kind(c) != CharKind::Whitespace) .map(|(c, _)| char_kind(language, c) != CharKind::Whitespace)
.unwrap_or_default(); .unwrap_or_default();
if in_word { if in_word {
selection.end = movement::find_boundary(map, selection.end, |left, right| { selection.end = movement::find_boundary(map, selection.end, |left, right| {
let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation); let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation); let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
left_kind != right_kind && left_kind != CharKind::Whitespace left_kind != right_kind && left_kind != CharKind::Whitespace
}); });

View file

@ -122,17 +122,18 @@ fn in_word(
ignore_punctuation: bool, ignore_punctuation: bool,
) -> Option<Range<DisplayPoint>> { ) -> Option<Range<DisplayPoint>> {
// Use motion::right so that we consider the character under the cursor when looking for the start // Use motion::right so that we consider the character under the cursor when looking for the start
let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
let start = movement::find_preceding_boundary_in_line( let start = movement::find_preceding_boundary_in_line(
map, map,
right(map, relative_to, 1), right(map, relative_to, 1),
|left, right| { |left, right| {
char_kind(left).coerce_punctuation(ignore_punctuation) char_kind(language, left).coerce_punctuation(ignore_punctuation)
!= char_kind(right).coerce_punctuation(ignore_punctuation) != char_kind(language, right).coerce_punctuation(ignore_punctuation)
}, },
); );
let end = movement::find_boundary_in_line(map, relative_to, |left, right| { let end = movement::find_boundary_in_line(map, relative_to, |left, right| {
char_kind(left).coerce_punctuation(ignore_punctuation) char_kind(language, left).coerce_punctuation(ignore_punctuation)
!= char_kind(right).coerce_punctuation(ignore_punctuation) != char_kind(language, right).coerce_punctuation(ignore_punctuation)
}); });
Some(start..end) Some(start..end)
@ -155,10 +156,11 @@ fn around_word(
relative_to: DisplayPoint, relative_to: DisplayPoint,
ignore_punctuation: bool, ignore_punctuation: bool,
) -> Option<Range<DisplayPoint>> { ) -> Option<Range<DisplayPoint>> {
let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
let in_word = map let in_word = map
.chars_at(relative_to) .chars_at(relative_to)
.next() .next()
.map(|(c, _)| char_kind(c) != CharKind::Whitespace) .map(|(c, _)| char_kind(language, c) != CharKind::Whitespace)
.unwrap_or(false); .unwrap_or(false);
if in_word { if in_word {
@ -182,20 +184,21 @@ fn around_next_word(
relative_to: DisplayPoint, relative_to: DisplayPoint,
ignore_punctuation: bool, ignore_punctuation: bool,
) -> Option<Range<DisplayPoint>> { ) -> Option<Range<DisplayPoint>> {
let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
// Get the start of the word // Get the start of the word
let start = movement::find_preceding_boundary_in_line( let start = movement::find_preceding_boundary_in_line(
map, map,
right(map, relative_to, 1), right(map, relative_to, 1),
|left, right| { |left, right| {
char_kind(left).coerce_punctuation(ignore_punctuation) char_kind(language, left).coerce_punctuation(ignore_punctuation)
!= char_kind(right).coerce_punctuation(ignore_punctuation) != char_kind(language, right).coerce_punctuation(ignore_punctuation)
}, },
); );
let mut word_found = false; let mut word_found = false;
let end = movement::find_boundary(map, relative_to, |left, right| { let end = movement::find_boundary(map, relative_to, |left, right| {
let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation); let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation); let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
let found = (word_found && left_kind != right_kind) || right == '\n' && left == '\n'; let found = (word_found && left_kind != right_kind) || right == '\n' && left == '\n';