Introduce a new fingerprint field to TextSummary

This is calculated in `Rope` and uses the `bromberg_sl2` homomorphic
hash function to determine the fingerprint of a single chunk and
compose each chunk fingerprint into a single fingerprint for the entire
rope that is equivalent to hashing all the rope's bytes at once.
This commit is contained in:
Antonio Scandurra 2022-06-17 11:45:26 +02:00
parent cef85f5d84
commit c31a233aad
6 changed files with 33 additions and 22 deletions

20
Cargo.lock generated
View file

@ -561,6 +561,18 @@ dependencies = [
"workspace", "workspace",
] ]
[[package]]
name = "bromberg_sl2"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ed88064f69518b7e3ea50ecfc1b61d43f19248618a377b95ae5c8b611134d4d"
dependencies = [
"digest 0.9.0",
"lazy_static",
"rayon",
"seq-macro",
]
[[package]] [[package]]
name = "bstr" name = "bstr"
version = "0.2.17" version = "0.2.17"
@ -4156,6 +4168,12 @@ dependencies = [
"pest", "pest",
] ]
[[package]]
name = "seq-macro"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9f47faea3cad316faa914d013d24f471cd90bfca1a0c70f05a3f42c6441e99"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.137" version = "1.0.137"
@ -4806,9 +4824,11 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arrayvec 0.7.2", "arrayvec 0.7.2",
"bromberg_sl2",
"clock", "clock",
"collections", "collections",
"ctor", "ctor",
"digest 0.9.0",
"env_logger", "env_logger",
"gpui", "gpui",
"lazy_static", "lazy_static",

View file

@ -370,22 +370,10 @@ impl FoldMap {
if fold.end > fold.start { if fold.end > fold.start {
let output_text = ""; let output_text = "";
let chars = output_text.chars().count() as u32;
let lines = Point::new(0, output_text.len() as u32);
let lines_utf16 =
PointUtf16::new(0, output_text.encode_utf16().count() as u32);
new_transforms.push( new_transforms.push(
Transform { Transform {
summary: TransformSummary { summary: TransformSummary {
output: TextSummary { output: TextSummary::from(output_text),
bytes: output_text.len(),
lines,
lines_utf16,
first_line_chars: chars,
last_line_chars: chars,
longest_row: 0,
longest_row_chars: chars,
},
input: new_buffer.text_summary_for_range(fold.start..fold.end), input: new_buffer.text_summary_for_range(fold.start..fold.end),
}, },
output_text: Some(output_text), output_text: Some(output_text),

View file

@ -1923,15 +1923,7 @@ impl MultiBufferSnapshot {
); );
if range.end > end_before_newline { if range.end > end_before_newline {
summary.add_assign(&D::from_text_summary(&TextSummary { summary.add_assign(&D::from_text_summary(&TextSummary::from("\n")));
bytes: 1,
lines: Point::new(1 as u32, 0),
lines_utf16: PointUtf16::new(1 as u32, 0),
first_line_chars: 0,
last_line_chars: 0,
longest_row: 0,
longest_row_chars: 0,
}));
} }
cursor.next(&()); cursor.next(&());

View file

@ -16,6 +16,8 @@ collections = { path = "../collections" }
sum_tree = { path = "../sum_tree" } sum_tree = { path = "../sum_tree" }
anyhow = "1.0.38" anyhow = "1.0.38"
arrayvec = "0.7.1" arrayvec = "0.7.1"
digest = { version = "0.9", features = ["std"] }
bromberg_sl2 = "0.6"
lazy_static = "1.4" lazy_static = "1.4"
log = { version = "0.4.16", features = ["kv_unstable_serde"] } log = { version = "0.4.16", features = ["kv_unstable_serde"] }
parking_lot = "0.11" parking_lot = "0.11"

View file

@ -2,6 +2,7 @@ use crate::PointUtf16;
use super::Point; use super::Point;
use arrayvec::ArrayString; use arrayvec::ArrayString;
use bromberg_sl2::HashMatrix;
use smallvec::SmallVec; use smallvec::SmallVec;
use std::{cmp, fmt, io, mem, ops::Range, str}; use std::{cmp, fmt, io, mem, ops::Range, str};
use sum_tree::{Bias, Dimension, SumTree}; use sum_tree::{Bias, Dimension, SumTree};
@ -725,6 +726,7 @@ pub struct TextSummary {
pub last_line_chars: u32, pub last_line_chars: u32,
pub longest_row: u32, pub longest_row: u32,
pub longest_row_chars: u32, pub longest_row_chars: u32,
pub fingerprint: HashMatrix,
} }
impl<'a> From<&'a str> for TextSummary { impl<'a> From<&'a str> for TextSummary {
@ -764,6 +766,7 @@ impl<'a> From<&'a str> for TextSummary {
last_line_chars, last_line_chars,
longest_row, longest_row,
longest_row_chars, longest_row_chars,
fingerprint: bromberg_sl2::hash_strict(text.as_bytes()),
} }
} }
} }
@ -810,6 +813,7 @@ impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
self.bytes += other.bytes; self.bytes += other.bytes;
self.lines += other.lines; self.lines += other.lines;
self.lines_utf16 += other.lines_utf16; self.lines_utf16 += other.lines_utf16;
self.fingerprint = self.fingerprint * other.fingerprint;
} }
} }

View file

@ -226,6 +226,7 @@ fn test_text_summary_for_range() {
last_line_chars: 0, last_line_chars: 0,
longest_row: 0, longest_row: 0,
longest_row_chars: 1, longest_row_chars: 1,
fingerprint: bromberg_sl2::hash_strict(b"b\n")
} }
); );
assert_eq!( assert_eq!(
@ -238,6 +239,7 @@ fn test_text_summary_for_range() {
last_line_chars: 0, last_line_chars: 0,
longest_row: 2, longest_row: 2,
longest_row_chars: 4, longest_row_chars: 4,
fingerprint: bromberg_sl2::hash_strict(b"b\nefg\nhklm\n")
} }
); );
assert_eq!( assert_eq!(
@ -250,6 +252,7 @@ fn test_text_summary_for_range() {
last_line_chars: 1, last_line_chars: 1,
longest_row: 3, longest_row: 3,
longest_row_chars: 6, longest_row_chars: 6,
fingerprint: bromberg_sl2::hash_strict(b"ab\nefg\nhklm\nnopqrs\nt")
} }
); );
assert_eq!( assert_eq!(
@ -262,6 +265,7 @@ fn test_text_summary_for_range() {
last_line_chars: 3, last_line_chars: 3,
longest_row: 3, longest_row: 3,
longest_row_chars: 6, longest_row_chars: 6,
fingerprint: bromberg_sl2::hash_strict(b"ab\nefg\nhklm\nnopqrs\ntuv")
} }
); );
assert_eq!( assert_eq!(
@ -274,6 +278,7 @@ fn test_text_summary_for_range() {
last_line_chars: 3, last_line_chars: 3,
longest_row: 1, longest_row: 1,
longest_row_chars: 6, longest_row_chars: 6,
fingerprint: bromberg_sl2::hash_strict(b"hklm\nnopqrs\ntuv")
} }
); );
} }