diff.rs 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. ////////////////////////////////////////////////////////////////////////////////////////////////////
  2. // Diffs
  3. #[derive(Debug, Copy, Clone, Eq, PartialEq)]
  4. pub enum LineDiff<E> {
  5. Del,
  6. Ins(E),
  7. Ovr(E),
  8. }
  9. ////////////////////////////////////////////////////////////////////////////////////////////////////
  10. // Generating diffs
  11. /// Although this looks just like `Eq`, the we are not using `Eq` because we want to support more
  12. /// general diffing, supporting treating two different values as "similar"
  13. pub trait Cmp<R: ?Sized> {
  14. fn similar(&self, rhs: &R) -> bool;
  15. }
  16. impl Cmp<char> for char {
  17. fn similar(&self, rhs: &char) -> bool {
  18. *self == *rhs
  19. }
  20. }
  21. /// Generate a diff from two sequences. Takes ownership of elements in the right-hand sequence (the
  22. /// "new" data).
  23. pub fn gen<L, R>(lhs: &[L], rhs: Vec<R>)
  24. -> impl Iterator<Item=LineDiff<R>>
  25. where L: Cmp<R> {
  26. let c = core(lhs, &*rhs);
  27. uncover_diffs(rhs, c)
  28. }
  29. /// Generate a diff from two slices. Returns references to elements in the resulting patch.
  30. pub fn gen_ref<'a, 'b, L, R>(lhs: &'a [L], rhs: &'b [R])
  31. -> impl Iterator<Item=LineDiff<&'b R>>
  32. where L: Cmp<R> {
  33. let c = core(lhs, rhs);
  34. uncover_diffs(rhs.iter(), c)
  35. }
  36. /// Generate a diff from two slices. Returns unit values in the resulting patch.
  37. pub fn gen_unit_diffs<L, R>(lhs: &[L], rhs: &[R])
  38. -> impl Iterator<Item=LineDiff<()>>
  39. where L: Cmp<R> {
  40. core(lhs, rhs)
  41. }
  42. /// Core LCS algorithm, takes only slices and outputs "unit diffs". Unit diffs do not contain any
  43. /// information about what data is being applied by the patch, instead just tell if a certain
  44. /// operations is an insert, delete, or overwrite.
  45. fn core<L, R>(lhs: &[L], rhs: &[R]) -> impl Iterator<Item=LineDiff<()>>
  46. where L: Cmp<R> {
  47. let n = lhs.len();
  48. let m = rhs.len();
  49. let mut memo = vec![vec![0; m+1]; n+1];
  50. for i in 1..(n+1) {
  51. for j in 1..(m+1) {
  52. use std::cmp::max;
  53. memo[i][j] =
  54. if lhs[i-1].similar(&rhs[j-1]) {
  55. 1 + memo[i-1][j-1]
  56. } else {
  57. max(memo[i-1][j], memo[i][j-1])
  58. };
  59. }
  60. }
  61. let mut backtrack = Vec::new();
  62. let mut i = n;
  63. let mut j = m;
  64. while (i, j) != (0, 0) {
  65. if j == 0 {
  66. backtrack.push(LineDiff::Del);
  67. i -= 1;
  68. } else if i == 0 {
  69. backtrack.push(LineDiff::Ins(()));
  70. j -= 1;
  71. } else if lhs[i-1].similar(&rhs[j-1]) {
  72. backtrack.push(LineDiff::Ovr(()));
  73. i -= 1;
  74. j -= 1;
  75. } else if memo[i-1][j] > memo[i][j-1] {
  76. backtrack.push(LineDiff::Del);
  77. i -= 1;
  78. } else {
  79. backtrack.push(LineDiff::Ins(()));
  80. j -= 1;
  81. }
  82. }
  83. backtrack.into_iter().rev()
  84. }
  85. /// Uncover the data at each insert/overwrite in a diff by consuming data from an input stream.
  86. fn uncover_diffs<I, J>(data: I, diffs: J) -> UncoverDiffs<I::IntoIter, J::IntoIter>
  87. where I: IntoIterator,
  88. J: IntoIterator<Item=LineDiff<()>> {
  89. UncoverDiffs(data.into_iter(), diffs.into_iter())
  90. }
  91. struct UncoverDiffs<I, J>(I, J);
  92. impl<I, J> Iterator for UncoverDiffs<I, J>
  93. where I: Iterator,
  94. J: Iterator<Item=LineDiff<()>> {
  95. type Item = LineDiff<I::Item>;
  96. fn next(&mut self) -> Option<LineDiff<I::Item>> {
  97. self.1.next()
  98. .and_then(|x| match x {
  99. LineDiff::Del => Some(LineDiff::Del),
  100. LineDiff::Ins(()) => self.0.next().map(LineDiff::Ins),
  101. LineDiff::Ovr(()) => self.0.next().map(LineDiff::Ovr),
  102. })
  103. }
  104. }
  105. #[cfg(test)]
  106. mod test_gen {
  107. use super::{LineDiff, gen};
  108. use super::LineDiff::*;
  109. fn gen_str(a: &str, b: &str) -> Vec<LineDiff<char>> {
  110. let av: Vec<_> = a.chars().collect();
  111. let bv: Vec<_> = b.chars().collect();
  112. gen(&*av, bv).collect()
  113. }
  114. #[test]
  115. fn test_same() {
  116. assert_eq!(gen_str("O_o", "O_o"), "O_o".chars().map(Ovr).collect::<Vec<_>>())
  117. }
  118. #[test]
  119. fn test_del() {
  120. assert_eq!(gen_str("UwU", ""), [Del; 3])
  121. }
  122. #[test]
  123. fn test_ins() {
  124. assert_eq!(gen_str("", "owo"), "owo".chars().map(Ins).collect::<Vec<_>>())
  125. }
  126. #[test]
  127. fn test_modify() {
  128. assert_eq!(gen_str("far", "bare"), [Del, Ins('b'), Ovr('a'), Ovr('r'), Ins('e')]);
  129. }
  130. #[test]
  131. fn test_modify_2() {
  132. assert_eq!(gen_str("hello", "handle"),
  133. [Ovr('h'), Del, Del, Ins('a'), Ins('n'), Ins('d'),
  134. Ovr('l'), Del, Ins('e')]);
  135. }
  136. }
  137. ////////////////////////////////////////////////////////////////////////////////////////////////////
  138. // Applying patches
  139. pub use self::apply::{ApplyPatch, Error as ApplyError};
  140. mod apply {
  141. use std;
  142. use super::{LineDiff};
  143. #[derive(Debug, Clone, Eq, PartialEq)]
  144. pub enum Error {
  145. Overrun,
  146. Unfinished(usize),
  147. Reject(usize),
  148. }
  149. impl std::fmt::Display for Error {
  150. fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  151. match *self {
  152. Error::Overrun => write!(f, "destination overrun while applying diff"),
  153. Error::Unfinished(_) => write!(f, "diff did not cover all characters"),
  154. Error::Reject(_) => write!(f, "diff overwrite rejected"),
  155. }
  156. }
  157. }
  158. impl std::error::Error for Error {}
  159. pub type Result<T> = std::result::Result<T, Error>;
  160. pub trait ApplyPatch<E> {
  161. fn len(&self) -> usize;
  162. fn remove(&mut self, i: usize);
  163. fn insert(&mut self, i: usize, elem: E);
  164. fn overwrite(&mut self, i: usize, elem: E) -> bool {
  165. self.remove(i);
  166. self.insert(i, elem);
  167. true
  168. }
  169. fn apply_patch_offset<PATCH>(&mut self, mut off: usize, patch: PATCH)
  170. -> Result<usize>
  171. where PATCH: IntoIterator<Item=LineDiff<E>> {
  172. for ld in patch {
  173. match ld {
  174. LineDiff::Del =>
  175. if off >= self.len() {
  176. return Err(Error::Overrun)
  177. } else {
  178. self.remove(off)
  179. },
  180. LineDiff::Ins(e) => {
  181. self.insert(off, e);
  182. off += 1;
  183. }
  184. LineDiff::Ovr(e) =>
  185. if off >= self.len() {
  186. return Err(Error::Overrun)
  187. } else if !self.overwrite(off, e) {
  188. return Err(Error::Reject(off))
  189. } else {
  190. off += 1;
  191. },
  192. }
  193. }
  194. Ok(off)
  195. }
  196. fn apply_patch<PATCH>(&mut self, patch: PATCH) -> Result<()>
  197. where PATCH: IntoIterator<Item=LineDiff<E>> {
  198. let len = self.apply_patch_offset(0, patch)?;
  199. if len != self.len() {
  200. Err(Error::Unfinished(len))
  201. } else {
  202. Ok(())
  203. }
  204. }
  205. }
  206. impl<E> ApplyPatch<E> for Vec<E> {
  207. fn len(&self) -> usize { self.len() }
  208. fn remove(&mut self, i: usize) { self.remove(i); }
  209. fn insert(&mut self, i: usize, elem: E) { self.insert(i, elem); }
  210. }
  211. impl ApplyPatch<char> for String {
  212. fn len(&self) -> usize { self.len() }
  213. fn remove(&mut self, i: usize) { self.remove(i); }
  214. fn insert(&mut self, i: usize, elem: char) { self.insert(i, elem); }
  215. fn overwrite(&mut self, i: usize, elem: char) -> bool { self.chars().nth(i) == Some(elem) }
  216. }
  217. }
  218. #[cfg(test)]
  219. mod test_apply {
  220. use std::iter::{once};
  221. use super::{ApplyPatch, ApplyError};
  222. use super::LineDiff::*;
  223. #[test]
  224. fn test_apply_erase() {
  225. let mut s = String::from("uwu");
  226. assert_eq!(s.apply_patch((0..3).map(|_| Del)), Ok(()));
  227. assert_eq!(s, "");
  228. }
  229. #[test]
  230. fn test_apply_overwrite_del() {
  231. let mut s = String::from("hello");
  232. assert_eq!(
  233. s.apply_patch("hell"
  234. .chars()
  235. .map(Ovr)
  236. .chain(vec![Del])),
  237. Ok(()));
  238. assert_eq!(s, "hell");
  239. }
  240. #[test]
  241. fn test_apply_overwrite_insert() {
  242. let mut s = String::from("hello");
  243. assert_eq!(
  244. s.apply_patch("hell"
  245. .chars()
  246. .map(Ovr)
  247. .chain(" w".chars().map(Ins))
  248. .chain(once(Ovr('o')))
  249. .chain("rld".chars().map(Ins))),
  250. Ok(()));
  251. assert_eq!(s, "hell world");
  252. }
  253. #[test]
  254. fn test_far_bare() {
  255. let mut s = String::from("far");
  256. assert_eq!(
  257. s.apply_patch(vec![Del, Ins('b'), Ovr('a'), Ovr('r'), Ins('e')]),
  258. Ok(()));
  259. assert_eq!(s, "bare");
  260. }
  261. #[test]
  262. fn test_overrun() {
  263. let mut s = String::from("hi");
  264. assert_eq!(
  265. s.apply_patch((0..3).map(|_| Del)),
  266. Err(ApplyError::Overrun));
  267. }
  268. #[test]
  269. fn test_apply_overwrite_reject() {
  270. let mut s = String::from("hello");
  271. assert_eq!(
  272. s.apply_patch("henlo".chars().map(Ovr)),
  273. Err(ApplyError::Reject(2)));
  274. }
  275. #[test]
  276. fn test_unfinished() {
  277. let mut s = String::from("hello");
  278. assert_eq!(
  279. s.apply_patch(once(Ovr('h'))
  280. .chain((0..3).map(|_| Del))),
  281. Err(ApplyError::Unfinished(1)));
  282. }
  283. }