Support multi-byte characters in diff
This commit is contained in:
parent
a9871a7a70
commit
c2935056e8
1 changed files with 37 additions and 20 deletions
|
@ -1,6 +1,7 @@
|
|||
use std::{
|
||||
cmp,
|
||||
fmt::{self, Debug},
|
||||
ops::Range,
|
||||
};
|
||||
|
||||
use collections::BinaryHeap;
|
||||
|
@ -71,8 +72,8 @@ pub enum Hunk {
|
|||
}
|
||||
|
||||
pub struct Diff {
|
||||
old: String,
|
||||
new: String,
|
||||
old: Vec<char>,
|
||||
new: Vec<char>,
|
||||
scores: Matrix,
|
||||
old_text_ix: usize,
|
||||
new_text_ix: usize,
|
||||
|
@ -84,6 +85,7 @@ impl Diff {
|
|||
const EQUALITY_SCORE: isize = 5;
|
||||
|
||||
pub fn new(old: String) -> Self {
|
||||
let old = old.chars().collect::<Vec<_>>();
|
||||
let mut scores = Matrix::new();
|
||||
scores.resize(old.len() + 1, 1);
|
||||
for i in 0..=old.len() {
|
||||
|
@ -91,7 +93,7 @@ impl Diff {
|
|||
}
|
||||
Self {
|
||||
old,
|
||||
new: String::new(),
|
||||
new: Vec::new(),
|
||||
scores,
|
||||
old_text_ix: 0,
|
||||
new_text_ix: 0,
|
||||
|
@ -99,7 +101,7 @@ impl Diff {
|
|||
}
|
||||
|
||||
pub fn push_new(&mut self, text: &str) -> Vec<Hunk> {
|
||||
self.new.push_str(text);
|
||||
self.new.extend(text.chars());
|
||||
self.scores.resize(self.old.len() + 1, self.new.len() + 1);
|
||||
|
||||
for j in self.new_text_ix + 1..=self.new.len() {
|
||||
|
@ -107,7 +109,7 @@ impl Diff {
|
|||
for i in 1..=self.old.len() {
|
||||
let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
|
||||
let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
|
||||
let equality_score = if self.old.as_bytes()[i - 1] == self.new.as_bytes()[j - 1] {
|
||||
let equality_score = if self.old[i - 1] == self.new[j - 1] {
|
||||
self.scores.get(i - 1, j - 1) + Self::EQUALITY_SCORE
|
||||
} else {
|
||||
isize::MIN
|
||||
|
@ -138,6 +140,7 @@ impl Diff {
|
|||
}
|
||||
|
||||
fn backtrack(&self, old_text_ix: usize, new_text_ix: usize) -> Vec<Hunk> {
|
||||
let mut pending_insert: Option<Range<usize>> = None;
|
||||
let mut hunks = Vec::new();
|
||||
let mut i = old_text_ix;
|
||||
let mut j = new_text_ix;
|
||||
|
@ -153,7 +156,7 @@ impl Diff {
|
|||
None
|
||||
};
|
||||
let equality_score = if i > self.old_text_ix && j > self.new_text_ix {
|
||||
if self.old.as_bytes()[i - 1] == self.new.as_bytes()[j - 1] {
|
||||
if self.old[i - 1] == self.new[j - 1] {
|
||||
Some((i - 1, j - 1))
|
||||
} else {
|
||||
None
|
||||
|
@ -169,30 +172,44 @@ impl Diff {
|
|||
.unwrap();
|
||||
|
||||
if prev_i == i && prev_j == j - 1 {
|
||||
if let Some(Hunk::Insert { text }) = hunks.last_mut() {
|
||||
text.insert_str(0, &self.new[prev_j..j]);
|
||||
if let Some(pending_insert) = pending_insert.as_mut() {
|
||||
pending_insert.start = prev_j;
|
||||
} else {
|
||||
hunks.push(Hunk::Insert {
|
||||
text: self.new[prev_j..j].to_string(),
|
||||
})
|
||||
}
|
||||
} else if prev_i == i - 1 && prev_j == j {
|
||||
if let Some(Hunk::Remove { len }) = hunks.last_mut() {
|
||||
*len += 1;
|
||||
} else {
|
||||
hunks.push(Hunk::Remove { len: 1 })
|
||||
pending_insert = Some(prev_j..j);
|
||||
}
|
||||
} else {
|
||||
if let Some(Hunk::Keep { len }) = hunks.last_mut() {
|
||||
*len += 1;
|
||||
if let Some(range) = pending_insert.take() {
|
||||
hunks.push(Hunk::Insert {
|
||||
text: self.new[range].iter().collect(),
|
||||
});
|
||||
}
|
||||
|
||||
let char_len = self.old[i - 1].len_utf8();
|
||||
if prev_i == i - 1 && prev_j == j {
|
||||
if let Some(Hunk::Remove { len }) = hunks.last_mut() {
|
||||
*len += char_len;
|
||||
} else {
|
||||
hunks.push(Hunk::Remove { len: char_len })
|
||||
}
|
||||
} else {
|
||||
hunks.push(Hunk::Keep { len: 1 })
|
||||
if let Some(Hunk::Keep { len }) = hunks.last_mut() {
|
||||
*len += char_len;
|
||||
} else {
|
||||
hunks.push(Hunk::Keep { len: char_len })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i = prev_i;
|
||||
j = prev_j;
|
||||
}
|
||||
|
||||
if let Some(range) = pending_insert.take() {
|
||||
hunks.push(Hunk::Insert {
|
||||
text: self.new[range].iter().collect(),
|
||||
});
|
||||
}
|
||||
|
||||
hunks.reverse();
|
||||
hunks
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue