Strip carriage returns from all text in text::Buffer

* Moving the logic from Rope to text::Buffer makes it easier
  to keep the Rope in sync with the fragment tree.
* Removing carriage return characters is lossier, but is much
  simpler than incrementally maintaining the invariant that
  there are no carriage returns followed by newlines. We may
  want to do something smarter in the future.

Co-authored-by: Keith Simmons <keith@zed.dev>
This commit is contained in:
Max Brunsfeld 2022-07-05 17:14:12 -07:00
parent 116fa92e84
commit 7e9beaf4bb
9 changed files with 132 additions and 100 deletions

View file

@ -53,7 +53,6 @@ pub struct Buffer {
saved_version: clock::Global, saved_version: clock::Global,
saved_version_fingerprint: String, saved_version_fingerprint: String,
saved_mtime: SystemTime, saved_mtime: SystemTime,
line_ending: LineEnding,
transaction_depth: usize, transaction_depth: usize,
was_dirty_before_starting_transaction: Option<bool>, was_dirty_before_starting_transaction: Option<bool>,
language: Option<Arc<Language>>, language: Option<Arc<Language>>,
@ -98,12 +97,6 @@ pub enum IndentKind {
Tab, Tab,
} }
#[derive(Copy, Debug, Clone, PartialEq, Eq)]
pub enum LineEnding {
Unix,
Windows,
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
struct SelectionSet { struct SelectionSet {
line_mode: bool, line_mode: bool,
@ -319,12 +312,9 @@ impl Buffer {
base_text: T, base_text: T,
cx: &mut ModelContext<Self>, cx: &mut ModelContext<Self>,
) -> Self { ) -> Self {
let base_text = base_text.into();
let line_ending = LineEnding::detect(&base_text);
Self::build( Self::build(
TextBuffer::new(replica_id, cx.model_id() as u64, base_text), TextBuffer::new(replica_id, cx.model_id() as u64, base_text.into()),
None, None,
line_ending,
) )
} }
@ -334,12 +324,9 @@ impl Buffer {
file: Arc<dyn File>, file: Arc<dyn File>,
cx: &mut ModelContext<Self>, cx: &mut ModelContext<Self>,
) -> Self { ) -> Self {
let base_text = base_text.into();
let line_ending = LineEnding::detect(&base_text);
Self::build( Self::build(
TextBuffer::new(replica_id, cx.model_id() as u64, base_text), TextBuffer::new(replica_id, cx.model_id() as u64, base_text.into()),
Some(file), Some(file),
line_ending,
) )
} }
@ -350,9 +337,11 @@ impl Buffer {
cx: &mut ModelContext<Self>, cx: &mut ModelContext<Self>,
) -> Result<Self> { ) -> Result<Self> {
let buffer = TextBuffer::new(replica_id, message.id, message.base_text); let buffer = TextBuffer::new(replica_id, message.id, message.base_text);
let line_ending = proto::LineEnding::from_i32(message.line_ending) let mut this = Self::build(buffer, file);
.ok_or_else(|| anyhow!("missing line_ending"))?; this.text.set_line_ending(proto::deserialize_line_ending(
let mut this = Self::build(buffer, file, LineEnding::from_proto(line_ending)); proto::LineEnding::from_i32(message.line_ending)
.ok_or_else(|| anyhow!("missing line_ending"))?,
));
let ops = message let ops = message
.operations .operations
.into_iter() .into_iter()
@ -417,7 +406,7 @@ impl Buffer {
diagnostics: proto::serialize_diagnostics(self.diagnostics.iter()), diagnostics: proto::serialize_diagnostics(self.diagnostics.iter()),
diagnostics_timestamp: self.diagnostics_timestamp.value, diagnostics_timestamp: self.diagnostics_timestamp.value,
completion_triggers: self.completion_triggers.clone(), completion_triggers: self.completion_triggers.clone(),
line_ending: self.line_ending.to_proto() as i32, line_ending: proto::serialize_line_ending(self.line_ending()) as i32,
} }
} }
@ -426,7 +415,7 @@ impl Buffer {
self self
} }
fn build(buffer: TextBuffer, file: Option<Arc<dyn File>>, line_ending: LineEnding) -> Self { fn build(buffer: TextBuffer, file: Option<Arc<dyn File>>) -> Self {
let saved_mtime; let saved_mtime;
if let Some(file) = file.as_ref() { if let Some(file) = file.as_ref() {
saved_mtime = file.mtime(); saved_mtime = file.mtime();
@ -442,7 +431,6 @@ impl Buffer {
was_dirty_before_starting_transaction: None, was_dirty_before_starting_transaction: None,
text: buffer, text: buffer,
file, file,
line_ending,
syntax_tree: Mutex::new(None), syntax_tree: Mutex::new(None),
parsing_in_background: false, parsing_in_background: false,
parse_count: 0, parse_count: 0,
@ -503,7 +491,7 @@ impl Buffer {
self.remote_id(), self.remote_id(),
text, text,
version, version,
self.line_ending, self.line_ending(),
cx.as_mut(), cx.as_mut(),
); );
cx.spawn(|this, mut cx| async move { cx.spawn(|this, mut cx| async move {
@ -559,7 +547,7 @@ impl Buffer {
this.did_reload( this.did_reload(
this.version(), this.version(),
this.as_rope().fingerprint(), this.as_rope().fingerprint(),
this.line_ending, this.line_ending(),
new_mtime, new_mtime,
cx, cx,
); );
@ -584,14 +572,14 @@ impl Buffer {
) { ) {
self.saved_version = version; self.saved_version = version;
self.saved_version_fingerprint = fingerprint; self.saved_version_fingerprint = fingerprint;
self.line_ending = line_ending; self.text.set_line_ending(line_ending);
self.saved_mtime = mtime; self.saved_mtime = mtime;
if let Some(file) = self.file.as_ref().and_then(|f| f.as_local()) { if let Some(file) = self.file.as_ref().and_then(|f| f.as_local()) {
file.buffer_reloaded( file.buffer_reloaded(
self.remote_id(), self.remote_id(),
&self.saved_version, &self.saved_version,
self.saved_version_fingerprint.clone(), self.saved_version_fingerprint.clone(),
self.line_ending, self.line_ending(),
self.saved_mtime, self.saved_mtime,
cx, cx,
); );
@ -970,13 +958,13 @@ impl Buffer {
} }
} }
pub(crate) fn diff(&self, new_text: String, cx: &AppContext) -> Task<Diff> { pub(crate) fn diff(&self, mut new_text: String, cx: &AppContext) -> Task<Diff> {
let old_text = self.as_rope().clone(); let old_text = self.as_rope().clone();
let base_version = self.version(); let base_version = self.version();
cx.background().spawn(async move { cx.background().spawn(async move {
let old_text = old_text.to_string(); let old_text = old_text.to_string();
let line_ending = LineEnding::detect(&new_text); let line_ending = LineEnding::detect(&new_text);
let new_text = new_text.replace("\r\n", "\n").replace('\r', "\n"); LineEnding::strip_carriage_returns(&mut new_text);
let changes = TextDiff::from_lines(old_text.as_str(), new_text.as_str()) let changes = TextDiff::from_lines(old_text.as_str(), new_text.as_str())
.iter_all_changes() .iter_all_changes()
.map(|c| (c.tag(), c.value().len())) .map(|c| (c.tag(), c.value().len()))
@ -999,7 +987,7 @@ impl Buffer {
if self.version == diff.base_version { if self.version == diff.base_version {
self.finalize_last_transaction(); self.finalize_last_transaction();
self.start_transaction(); self.start_transaction();
self.line_ending = diff.line_ending; self.text.set_line_ending(diff.line_ending);
let mut offset = diff.start_offset; let mut offset = diff.start_offset;
for (tag, len) in diff.changes { for (tag, len) in diff.changes {
let range = offset..(offset + len); let range = offset..(offset + len);
@ -1514,10 +1502,6 @@ impl Buffer {
pub fn completion_triggers(&self) -> &[String] { pub fn completion_triggers(&self) -> &[String] {
&self.completion_triggers &self.completion_triggers
} }
pub fn line_ending(&self) -> LineEnding {
self.line_ending
}
} }
#[cfg(any(test, feature = "test-support"))] #[cfg(any(test, feature = "test-support"))]
@ -2538,52 +2522,6 @@ impl std::ops::SubAssign for IndentSize {
} }
} }
impl LineEnding {
pub fn from_proto(style: proto::LineEnding) -> Self {
match style {
proto::LineEnding::Unix => Self::Unix,
proto::LineEnding::Windows => Self::Windows,
}
}
fn detect(text: &str) -> Self {
let text = &text[..cmp::min(text.len(), 1000)];
if let Some(ix) = text.find('\n') {
if ix == 0 || text.as_bytes()[ix - 1] != b'\r' {
Self::Unix
} else {
Self::Windows
}
} else {
Default::default()
}
}
pub fn as_str(self) -> &'static str {
match self {
LineEnding::Unix => "\n",
LineEnding::Windows => "\r\n",
}
}
pub fn to_proto(self) -> proto::LineEnding {
match self {
LineEnding::Unix => proto::LineEnding::Unix,
LineEnding::Windows => proto::LineEnding::Windows,
}
}
}
impl Default for LineEnding {
fn default() -> Self {
#[cfg(unix)]
return Self::Unix;
#[cfg(not(unix))]
return Self::Windows;
}
}
impl Completion { impl Completion {
pub fn sort_key(&self) -> (usize, &str) { pub fn sort_key(&self) -> (usize, &str) {
let kind_key = match self.lsp_completion.kind { let kind_key = match self.lsp_completion.kind {

View file

@ -11,6 +11,20 @@ use text::*;
pub use proto::{Buffer, BufferState, LineEnding, SelectionSet}; pub use proto::{Buffer, BufferState, LineEnding, SelectionSet};
pub fn deserialize_line_ending(message: proto::LineEnding) -> text::LineEnding {
match message {
LineEnding::Unix => text::LineEnding::Unix,
LineEnding::Windows => text::LineEnding::Windows,
}
}
pub fn serialize_line_ending(message: text::LineEnding) -> proto::LineEnding {
match message {
text::LineEnding::Unix => proto::LineEnding::Unix,
text::LineEnding::Windows => proto::LineEnding::Windows,
}
}
pub fn serialize_operation(operation: &Operation) -> proto::Operation { pub fn serialize_operation(operation: &Operation) -> proto::Operation {
proto::Operation { proto::Operation {
variant: Some(match operation { variant: Some(match operation {

View file

@ -421,7 +421,7 @@ async fn test_outline(cx: &mut gpui::TestAppContext) {
async fn search<'a>( async fn search<'a>(
outline: &'a Outline<Anchor>, outline: &'a Outline<Anchor>,
query: &str, query: &str,
cx: &gpui::TestAppContext, cx: &'a gpui::TestAppContext,
) -> Vec<(&'a str, Vec<usize>)> { ) -> Vec<(&'a str, Vec<usize>)> {
let matches = cx let matches = cx
.read(|cx| outline.search(query, cx.background().clone())) .read(|cx| outline.search(query, cx.background().clone()))

View file

@ -20,12 +20,14 @@ use gpui::{
}; };
use language::{ use language::{
point_to_lsp, point_to_lsp,
proto::{deserialize_anchor, deserialize_version, serialize_anchor, serialize_version}, proto::{
deserialize_anchor, deserialize_line_ending, deserialize_version, serialize_anchor,
serialize_version,
},
range_from_lsp, range_to_lsp, Anchor, Bias, Buffer, CharKind, CodeAction, CodeLabel, range_from_lsp, range_to_lsp, Anchor, Bias, Buffer, CharKind, CodeAction, CodeLabel,
Completion, Diagnostic, DiagnosticEntry, DiagnosticSet, Event as BufferEvent, File as _, Completion, Diagnostic, DiagnosticEntry, DiagnosticSet, Event as BufferEvent, File as _,
Language, LanguageRegistry, LanguageServerName, LineEnding, LocalFile, LspAdapter, Language, LanguageRegistry, LanguageServerName, LocalFile, LspAdapter, OffsetRangeExt,
OffsetRangeExt, Operation, Patch, PointUtf16, TextBufferSnapshot, ToOffset, ToPointUtf16, Operation, Patch, PointUtf16, TextBufferSnapshot, ToOffset, ToPointUtf16, Transaction,
Transaction,
}; };
use lsp::{ use lsp::{
DiagnosticSeverity, DiagnosticTag, DocumentHighlightKind, LanguageServer, LanguageString, DiagnosticSeverity, DiagnosticTag, DocumentHighlightKind, LanguageServer, LanguageString,
@ -5542,7 +5544,7 @@ impl Project {
) -> Result<()> { ) -> Result<()> {
let payload = envelope.payload; let payload = envelope.payload;
let version = deserialize_version(payload.version); let version = deserialize_version(payload.version);
let line_ending = LineEnding::from_proto( let line_ending = deserialize_line_ending(
proto::LineEnding::from_i32(payload.line_ending) proto::LineEnding::from_i32(payload.line_ending)
.ok_or_else(|| anyhow!("missing line ending"))?, .ok_or_else(|| anyhow!("missing line ending"))?,
); );

View file

@ -23,7 +23,7 @@ use gpui::{
Task, Task,
}; };
use language::{ use language::{
proto::{deserialize_version, serialize_version}, proto::{deserialize_version, serialize_line_ending, serialize_version},
Buffer, DiagnosticEntry, LineEnding, PointUtf16, Rope, Buffer, DiagnosticEntry, LineEnding, PointUtf16, Rope,
}; };
use lazy_static::lazy_static; use lazy_static::lazy_static;
@ -1750,7 +1750,7 @@ impl language::LocalFile for File {
version: serialize_version(&version), version: serialize_version(&version),
mtime: Some(mtime.into()), mtime: Some(mtime.into()),
fingerprint, fingerprint,
line_ending: line_ending.to_proto() as i32, line_ending: serialize_line_ending(line_ending) as i32,
}) })
.log_err(); .log_err();
} }

View file

@ -22,7 +22,7 @@ impl<T: Rng> Iterator for RandomCharIter<T> {
match self.0.gen_range(0..100) { match self.0.gen_range(0..100) {
// whitespace // whitespace
0..=19 => [' ', '\n', '\t'].choose(&mut self.0).copied(), 0..=19 => [' ', '\n', '\r', '\t'].choose(&mut self.0).copied(),
// two-byte greek letters // two-byte greek letters
20..=32 => char::from_u32(self.0.gen_range(('α' as u32)..('ω' as u32 + 1))), 20..=32 => char::from_u32(self.0.gen_range(('α' as u32)..('ω' as u32 + 1))),
// // three-byte characters // // three-byte characters

View file

@ -58,19 +58,12 @@ impl Rope {
pub fn push(&mut self, text: &str) { pub fn push(&mut self, text: &str) {
let mut new_chunks = SmallVec::<[_; 16]>::new(); let mut new_chunks = SmallVec::<[_; 16]>::new();
let mut new_chunk = ArrayString::new(); let mut new_chunk = ArrayString::new();
let mut chars = text.chars().peekable(); for ch in text.chars() {
while let Some(mut ch) = chars.next() {
if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE { if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
new_chunks.push(Chunk(new_chunk)); new_chunks.push(Chunk(new_chunk));
new_chunk = ArrayString::new(); new_chunk = ArrayString::new();
} }
if ch == '\r' {
ch = '\n';
if chars.peek().copied() == Some('\n') {
chars.next();
}
}
new_chunk.push(ch); new_chunk.push(ch);
} }
if !new_chunk.is_empty() { if !new_chunk.is_empty() {

View file

@ -43,6 +43,8 @@ fn test_random_edits(mut rng: StdRng) {
.take(reference_string_len) .take(reference_string_len)
.collect::<String>(); .collect::<String>();
let mut buffer = Buffer::new(0, 0, reference_string.clone().into()); let mut buffer = Buffer::new(0, 0, reference_string.clone().into());
reference_string = reference_string.replace("\r", "");
buffer.history.group_interval = Duration::from_millis(rng.gen_range(0..=200)); buffer.history.group_interval = Duration::from_millis(rng.gen_range(0..=200));
let mut buffer_versions = Vec::new(); let mut buffer_versions = Vec::new();
log::info!( log::info!(
@ -56,6 +58,8 @@ fn test_random_edits(mut rng: StdRng) {
for (old_range, new_text) in edits.iter().rev() { for (old_range, new_text) in edits.iter().rev() {
reference_string.replace_range(old_range.clone(), &new_text); reference_string.replace_range(old_range.clone(), &new_text);
} }
reference_string = reference_string.replace("\r", "");
assert_eq!(buffer.text(), reference_string); assert_eq!(buffer.text(), reference_string);
log::info!( log::info!(
"buffer text {:?}, version: {:?}", "buffer text {:?}, version: {:?}",
@ -148,6 +152,20 @@ fn test_random_edits(mut rng: StdRng) {
} }
} }
#[test]
fn test_line_endings() {
let mut buffer = Buffer::new(0, 0, "one\r\ntwo".into());
assert_eq!(buffer.text(), "one\ntwo");
assert_eq!(buffer.line_ending(), LineEnding::Windows);
buffer.check_invariants();
buffer.edit([(buffer.len()..buffer.len(), "\r\nthree")]);
buffer.edit([(0..0, "zero\r\n")]);
assert_eq!(buffer.text(), "zero\none\ntwo\nthree");
assert_eq!(buffer.line_ending(), LineEnding::Windows);
buffer.check_invariants();
}
#[test] #[test]
fn test_line_len() { fn test_line_len() {
let mut buffer = Buffer::new(0, 0, "".into()); let mut buffer = Buffer::new(0, 0, "".into());

View file

@ -63,6 +63,7 @@ pub struct BufferSnapshot {
remote_id: u64, remote_id: u64,
visible_text: Rope, visible_text: Rope,
deleted_text: Rope, deleted_text: Rope,
line_ending: LineEnding,
undo_map: UndoMap, undo_map: UndoMap,
fragments: SumTree<Fragment>, fragments: SumTree<Fragment>,
insertions: SumTree<InsertionFragment>, insertions: SumTree<InsertionFragment>,
@ -86,6 +87,12 @@ pub struct Transaction {
pub ranges: Vec<Range<FullOffset>>, pub ranges: Vec<Range<FullOffset>>,
} }
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum LineEnding {
Unix,
Windows,
}
impl HistoryEntry { impl HistoryEntry {
pub fn transaction_id(&self) -> TransactionId { pub fn transaction_id(&self) -> TransactionId {
self.transaction.id self.transaction.id
@ -539,7 +546,10 @@ pub struct UndoOperation {
} }
impl Buffer { impl Buffer {
pub fn new(replica_id: u16, remote_id: u64, base_text: String) -> Buffer { pub fn new(replica_id: u16, remote_id: u64, mut base_text: String) -> Buffer {
let line_ending = LineEnding::detect(&base_text);
LineEnding::strip_carriage_returns(&mut base_text);
let history = History::new(base_text.into()); let history = History::new(base_text.into());
let mut fragments = SumTree::new(); let mut fragments = SumTree::new();
let mut insertions = SumTree::new(); let mut insertions = SumTree::new();
@ -547,6 +557,7 @@ impl Buffer {
let mut local_clock = clock::Local::new(replica_id); let mut local_clock = clock::Local::new(replica_id);
let mut lamport_clock = clock::Lamport::new(replica_id); let mut lamport_clock = clock::Lamport::new(replica_id);
let mut version = clock::Global::new(); let mut version = clock::Global::new();
let visible_text = Rope::from(history.base_text.as_ref()); let visible_text = Rope::from(history.base_text.as_ref());
if visible_text.len() > 0 { if visible_text.len() > 0 {
let insertion_timestamp = InsertionTimestamp { let insertion_timestamp = InsertionTimestamp {
@ -577,6 +588,7 @@ impl Buffer {
remote_id, remote_id,
visible_text, visible_text,
deleted_text: Rope::new(), deleted_text: Rope::new(),
line_ending,
fragments, fragments,
insertions, insertions,
version, version,
@ -659,7 +671,7 @@ impl Buffer {
let mut new_insertions = Vec::new(); let mut new_insertions = Vec::new();
let mut insertion_offset = 0; let mut insertion_offset = 0;
let mut ranges = edits let mut edits = edits
.map(|(range, new_text)| (range.to_offset(&*self), new_text)) .map(|(range, new_text)| (range.to_offset(&*self), new_text))
.peekable(); .peekable();
@ -667,12 +679,12 @@ impl Buffer {
RopeBuilder::new(self.visible_text.cursor(0), self.deleted_text.cursor(0)); RopeBuilder::new(self.visible_text.cursor(0), self.deleted_text.cursor(0));
let mut old_fragments = self.fragments.cursor::<FragmentTextSummary>(); let mut old_fragments = self.fragments.cursor::<FragmentTextSummary>();
let mut new_fragments = let mut new_fragments =
old_fragments.slice(&ranges.peek().unwrap().0.start, Bias::Right, &None); old_fragments.slice(&edits.peek().unwrap().0.start, Bias::Right, &None);
new_ropes.push_tree(new_fragments.summary().text); new_ropes.push_tree(new_fragments.summary().text);
let mut fragment_start = old_fragments.start().visible; let mut fragment_start = old_fragments.start().visible;
for (range, new_text) in ranges { for (range, new_text) in edits {
let new_text = new_text.into(); let new_text = LineEnding::strip_carriage_returns_from_arc(new_text.into());
let fragment_end = old_fragments.end(&None).visible; let fragment_end = old_fragments.end(&None).visible;
// If the current fragment ends before this range, then jump ahead to the first fragment // If the current fragment ends before this range, then jump ahead to the first fragment
@ -715,6 +727,7 @@ impl Buffer {
// Insert the new text before any existing fragments within the range. // Insert the new text before any existing fragments within the range.
if !new_text.is_empty() { if !new_text.is_empty() {
let new_start = new_fragments.summary().text.visible; let new_start = new_fragments.summary().text.visible;
edits_patch.push(Edit { edits_patch.push(Edit {
old: fragment_start..fragment_start, old: fragment_start..fragment_start,
new: new_start..new_start + new_text.len(), new: new_start..new_start + new_text.len(),
@ -806,6 +819,10 @@ impl Buffer {
edit_op edit_op
} }
pub fn set_line_ending(&mut self, line_ending: LineEnding) {
self.snapshot.line_ending = line_ending;
}
pub fn apply_ops<I: IntoIterator<Item = Operation>>(&mut self, ops: I) -> Result<()> { pub fn apply_ops<I: IntoIterator<Item = Operation>>(&mut self, ops: I) -> Result<()> {
let mut deferred_ops = Vec::new(); let mut deferred_ops = Vec::new();
for op in ops { for op in ops {
@ -1413,6 +1430,8 @@ impl Buffer {
fragment_summary.text.deleted, fragment_summary.text.deleted,
self.snapshot.deleted_text.len() self.snapshot.deleted_text.len()
); );
assert!(!self.text().contains("\r\n"));
} }
pub fn set_group_interval(&mut self, group_interval: Duration) { pub fn set_group_interval(&mut self, group_interval: Duration) {
@ -1550,6 +1569,10 @@ impl BufferSnapshot {
self.visible_text.to_string() self.visible_text.to_string()
} }
pub fn line_ending(&self) -> LineEnding {
self.line_ending
}
pub fn deleted_text(&self) -> String { pub fn deleted_text(&self) -> String {
self.deleted_text.to_string() self.deleted_text.to_string()
} }
@ -2311,6 +2334,50 @@ impl operation_queue::Operation for Operation {
} }
} }
impl Default for LineEnding {
fn default() -> Self {
#[cfg(unix)]
return Self::Unix;
#[cfg(not(unix))]
return Self::CRLF;
}
}
impl LineEnding {
pub fn as_str(&self) -> &'static str {
match self {
LineEnding::Unix => "\n",
LineEnding::Windows => "\r\n",
}
}
pub fn detect(text: &str) -> Self {
if let Some(ix) = text[..cmp::min(text.len(), 1000)].find(&['\n']) {
let text = text.as_bytes();
if ix > 0 && text[ix - 1] == b'\r' {
Self::Windows
} else {
Self::Unix
}
} else {
Self::default()
}
}
pub fn strip_carriage_returns(text: &mut String) {
text.retain(|c| c != '\r')
}
fn strip_carriage_returns_from_arc(text: Arc<str>) -> Arc<str> {
if text.contains('\r') {
text.replace('\r', "").into()
} else {
text
}
}
}
pub trait ToOffset { pub trait ToOffset {
fn to_offset<'a>(&self, snapshot: &BufferSnapshot) -> usize; fn to_offset<'a>(&self, snapshot: &BufferSnapshot) -> usize;
} }