123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333 |
- ////////////////////////////////////////////////////////////////////////////////////////////////////
- // Diffs
- #[derive(Debug, Copy, Clone, Eq, PartialEq)]
- pub enum LineDiff<E> {
- Del,
- Ins(E),
- Ovr(E),
- }
- ////////////////////////////////////////////////////////////////////////////////////////////////////
- // Generating diffs
- /// Although this looks just like `Eq`, the we are not using `Eq` because we want to support more
- /// general diffing, supporting treating two different values as "similar"
- pub trait Cmp<R: ?Sized> {
- fn similar(&self, rhs: &R) -> bool;
- }
- impl Cmp<char> for char {
- fn similar(&self, rhs: &char) -> bool {
- *self == *rhs
- }
- }
- /// Generate a diff from two sequences. Takes ownership of elements in the right-hand sequence (the
- /// "new" data).
- pub fn gen<L, R>(lhs: &[L], rhs: Vec<R>)
- -> impl Iterator<Item=LineDiff<R>>
- where L: Cmp<R> {
- let c = core(lhs, &*rhs);
- uncover_diffs(rhs, c)
- }
- /// Generate a diff from two slices. Returns references to elements in the resulting patch.
- pub fn gen_ref<'a, 'b, L, R>(lhs: &'a [L], rhs: &'b [R])
- -> impl Iterator<Item=LineDiff<&'b R>>
- where L: Cmp<R> {
- let c = core(lhs, rhs);
- uncover_diffs(rhs.iter(), c)
- }
- /// Generate a diff from two slices. Returns unit values in the resulting patch.
- pub fn gen_unit_diffs<L, R>(lhs: &[L], rhs: &[R])
- -> impl Iterator<Item=LineDiff<()>>
- where L: Cmp<R> {
- core(lhs, rhs)
- }
- /// Core LCS algorithm, takes only slices and outputs "unit diffs". Unit diffs do not contain any
- /// information about what data is being applied by the patch, instead just tell if a certain
- /// operations is an insert, delete, or overwrite.
- fn core<L, R>(lhs: &[L], rhs: &[R]) -> impl Iterator<Item=LineDiff<()>>
- where L: Cmp<R> {
- let n = lhs.len();
- let m = rhs.len();
- let mut memo = vec![vec![0; m+1]; n+1];
- for i in 1..(n+1) {
- for j in 1..(m+1) {
- use std::cmp::max;
- memo[i][j] =
- if lhs[i-1].similar(&rhs[j-1]) {
- 1 + memo[i-1][j-1]
- } else {
- max(memo[i-1][j], memo[i][j-1])
- };
- }
- }
- let mut backtrack = Vec::new();
- let mut i = n;
- let mut j = m;
- while (i, j) != (0, 0) {
- if j == 0 {
- backtrack.push(LineDiff::Del);
- i -= 1;
- } else if i == 0 {
- backtrack.push(LineDiff::Ins(()));
- j -= 1;
- } else if lhs[i-1].similar(&rhs[j-1]) {
- backtrack.push(LineDiff::Ovr(()));
- i -= 1;
- j -= 1;
- } else if memo[i-1][j] > memo[i][j-1] {
- backtrack.push(LineDiff::Del);
- i -= 1;
- } else {
- backtrack.push(LineDiff::Ins(()));
- j -= 1;
- }
- }
- backtrack.into_iter().rev()
- }
- /// Uncover the data at each insert/overwrite in a diff by consuming data from an input stream.
- fn uncover_diffs<I, J>(data: I, diffs: J) -> UncoverDiffs<I::IntoIter, J::IntoIter>
- where I: IntoIterator,
- J: IntoIterator<Item=LineDiff<()>> {
- UncoverDiffs(data.into_iter(), diffs.into_iter())
- }
- struct UncoverDiffs<I, J>(I, J);
- impl<I, J> Iterator for UncoverDiffs<I, J>
- where I: Iterator,
- J: Iterator<Item=LineDiff<()>> {
- type Item = LineDiff<I::Item>;
- fn next(&mut self) -> Option<LineDiff<I::Item>> {
- self.1.next()
- .and_then(|x| match x {
- LineDiff::Del => Some(LineDiff::Del),
- LineDiff::Ins(()) => self.0.next().map(LineDiff::Ins),
- LineDiff::Ovr(()) => self.0.next().map(LineDiff::Ovr),
- })
- }
- }
- #[cfg(test)]
- mod test_gen {
- use super::{LineDiff, gen};
- use super::LineDiff::*;
- fn gen_str(a: &str, b: &str) -> Vec<LineDiff<char>> {
- let av: Vec<_> = a.chars().collect();
- let bv: Vec<_> = b.chars().collect();
- gen(&*av, bv).collect()
- }
- #[test]
- fn test_same() {
- assert_eq!(gen_str("O_o", "O_o"), "O_o".chars().map(Ovr).collect::<Vec<_>>())
- }
- #[test]
- fn test_del() {
- assert_eq!(gen_str("UwU", ""), [Del; 3])
- }
- #[test]
- fn test_ins() {
- assert_eq!(gen_str("", "owo"), "owo".chars().map(Ins).collect::<Vec<_>>())
- }
- #[test]
- fn test_modify() {
- assert_eq!(gen_str("far", "bare"), [Del, Ins('b'), Ovr('a'), Ovr('r'), Ins('e')]);
- }
- #[test]
- fn test_modify_2() {
- assert_eq!(gen_str("hello", "handle"),
- [Ovr('h'), Del, Del, Ins('a'), Ins('n'), Ins('d'),
- Ovr('l'), Del, Ins('e')]);
- }
- }
- ////////////////////////////////////////////////////////////////////////////////////////////////////
- // Applying patches
- pub use self::apply::{ApplyPatch, Error as ApplyError};
- mod apply {
- use std;
- use super::{LineDiff};
- #[derive(Debug, Clone, Eq, PartialEq)]
- pub enum Error {
- Overrun,
- Unfinished(usize),
- Reject(usize),
- }
- impl std::fmt::Display for Error {
- fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
- match *self {
- Error::Overrun => write!(f, "destination overrun while applying diff"),
- Error::Unfinished(_) => write!(f, "diff did not cover all characters"),
- Error::Reject(_) => write!(f, "diff overwrite rejected"),
- }
- }
- }
- impl std::error::Error for Error {}
- pub type Result<T> = std::result::Result<T, Error>;
- pub trait ApplyPatch<E> {
- fn len(&self) -> usize;
- fn remove(&mut self, i: usize);
- fn insert(&mut self, i: usize, elem: E);
- fn overwrite(&mut self, i: usize, elem: E) -> bool {
- self.remove(i);
- self.insert(i, elem);
- true
- }
- fn apply_patch_offset<PATCH>(&mut self, mut off: usize, patch: PATCH)
- -> Result<usize>
- where PATCH: IntoIterator<Item=LineDiff<E>> {
- for ld in patch {
- match ld {
- LineDiff::Del =>
- if off >= self.len() {
- return Err(Error::Overrun)
- } else {
- self.remove(off)
- },
- LineDiff::Ins(e) => {
- self.insert(off, e);
- off += 1;
- }
- LineDiff::Ovr(e) =>
- if off >= self.len() {
- return Err(Error::Overrun)
- } else if !self.overwrite(off, e) {
- return Err(Error::Reject(off))
- } else {
- off += 1;
- },
- }
- }
- Ok(off)
- }
- fn apply_patch<PATCH>(&mut self, patch: PATCH) -> Result<()>
- where PATCH: IntoIterator<Item=LineDiff<E>> {
- let len = self.apply_patch_offset(0, patch)?;
- if len != self.len() {
- Err(Error::Unfinished(len))
- } else {
- Ok(())
- }
- }
- }
- impl<E> ApplyPatch<E> for Vec<E> {
- fn len(&self) -> usize { self.len() }
- fn remove(&mut self, i: usize) { self.remove(i); }
- fn insert(&mut self, i: usize, elem: E) { self.insert(i, elem); }
- }
- impl ApplyPatch<char> for String {
- fn len(&self) -> usize { self.len() }
- fn remove(&mut self, i: usize) { self.remove(i); }
- fn insert(&mut self, i: usize, elem: char) { self.insert(i, elem); }
- fn overwrite(&mut self, i: usize, elem: char) -> bool { self.chars().nth(i) == Some(elem) }
- }
- }
- #[cfg(test)]
- mod test_apply {
- use std::iter::{once};
- use super::{ApplyPatch, ApplyError};
- use super::LineDiff::*;
- #[test]
- fn test_apply_erase() {
- let mut s = String::from("uwu");
- assert_eq!(s.apply_patch((0..3).map(|_| Del)), Ok(()));
- assert_eq!(s, "");
- }
- #[test]
- fn test_apply_overwrite_del() {
- let mut s = String::from("hello");
- assert_eq!(
- s.apply_patch("hell"
- .chars()
- .map(Ovr)
- .chain(vec![Del])),
- Ok(()));
- assert_eq!(s, "hell");
- }
- #[test]
- fn test_apply_overwrite_insert() {
- let mut s = String::from("hello");
- assert_eq!(
- s.apply_patch("hell"
- .chars()
- .map(Ovr)
- .chain(" w".chars().map(Ins))
- .chain(once(Ovr('o')))
- .chain("rld".chars().map(Ins))),
- Ok(()));
- assert_eq!(s, "hell world");
- }
- #[test]
- fn test_far_bare() {
- let mut s = String::from("far");
- assert_eq!(
- s.apply_patch(vec![Del, Ins('b'), Ovr('a'), Ovr('r'), Ins('e')]),
- Ok(()));
- assert_eq!(s, "bare");
- }
- #[test]
- fn test_overrun() {
- let mut s = String::from("hi");
- assert_eq!(
- s.apply_patch((0..3).map(|_| Del)),
- Err(ApplyError::Overrun));
- }
- #[test]
- fn test_apply_overwrite_reject() {
- let mut s = String::from("hello");
- assert_eq!(
- s.apply_patch("henlo".chars().map(Ovr)),
- Err(ApplyError::Reject(2)));
- }
- #[test]
- fn test_unfinished() {
- let mut s = String::from("hello");
- assert_eq!(
- s.apply_patch(once(Ovr('h'))
- .chain((0..3).map(|_| Del))),
- Err(ApplyError::Unfinished(1)));
- }
- }
|