diffreg_new.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. /*
  2. * Copyright (c) 2018 Martin Pieuchot
  3. * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
  4. *
  5. * Permission to use, copy, modify, and distribute this software for any
  6. * purpose with or without fee is hereby granted, provided that the above
  7. * copyright notice and this permission notice appear in all copies.
  8. *
  9. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16. */
  17. #include <sys/types.h>
  18. #include <sys/capsicum.h>
  19. #ifndef DIFF_NO_MMAP
  20. #include <sys/mman.h>
  21. #endif
  22. #include <sys/stat.h>
  23. #include <capsicum_helpers.h>
  24. #include <err.h>
  25. #include <fcntl.h>
  26. #include <stdbool.h>
  27. #include <stdint.h>
  28. #include <stdio.h>
  29. #include <stdlib.h>
  30. #include <string.h>
  31. #include <time.h>
  32. #include <unistd.h>
  33. #include "diff.h"
  34. #include <arraylist.h>
  35. #include <diff_main.h>
  36. #include <diff_output.h>
  37. const char *format_label(const char *, struct stat *);
  38. enum diffreg_algo {
  39. DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
  40. DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
  41. DIFFREG_ALGO_PATIENCE = 2,
  42. DIFFREG_ALGO_NONE = 3,
  43. };
  44. int diffreg_new(char *, char *, int, int);
  45. FILE * openfile(const char *, char **, struct stat *);
  46. static const struct diff_algo_config myers_then_patience;
  47. static const struct diff_algo_config myers_then_myers_divide;
  48. static const struct diff_algo_config patience;
  49. static const struct diff_algo_config myers_divide;
  50. static const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
  51. .impl = diff_algo_myers,
  52. .permitted_state_size = 1024 * 1024 * sizeof(int),
  53. .fallback_algo = &patience,
  54. };
  55. static const struct diff_algo_config myers_then_myers_divide =
  56. (struct diff_algo_config){
  57. .impl = diff_algo_myers,
  58. .permitted_state_size = 1024 * 1024 * sizeof(int),
  59. .fallback_algo = &myers_divide,
  60. };
  61. static const struct diff_algo_config patience = (struct diff_algo_config){
  62. .impl = diff_algo_patience,
  63. /* After subdivision, do Patience again: */
  64. .inner_algo = &patience,
  65. /* If subdivision failed, do Myers Divide et Impera: */
  66. .fallback_algo = &myers_then_myers_divide,
  67. };
  68. static const struct diff_algo_config myers_divide = (struct diff_algo_config){
  69. .impl = diff_algo_myers_divide,
  70. /* When division succeeded, start from the top: */
  71. .inner_algo = &myers_then_myers_divide,
  72. /* (fallback_algo = NULL implies diff_algo_none). */
  73. };
  74. static const struct diff_algo_config no_algo = (struct diff_algo_config){
  75. .impl = diff_algo_none,
  76. };
  77. /* If the state for a forward-Myers is small enough, use Myers, otherwise first
  78. * do a Myers-divide. */
  79. static const struct diff_config diff_config_myers_then_myers_divide = {
  80. .atomize_func = diff_atomize_text_by_line,
  81. .algo = &myers_then_myers_divide,
  82. };
  83. /* If the state for a forward-Myers is small enough, use Myers, otherwise first
  84. * do a Patience. */
  85. static const struct diff_config diff_config_myers_then_patience = {
  86. .atomize_func = diff_atomize_text_by_line,
  87. .algo = &myers_then_patience,
  88. };
  89. /* Directly force Patience as a first divider of the source file. */
  90. static const struct diff_config diff_config_patience = {
  91. .atomize_func = diff_atomize_text_by_line,
  92. .algo = &patience,
  93. };
  94. /* Directly force Patience as a first divider of the source file. */
  95. static const struct diff_config diff_config_no_algo = {
  96. .atomize_func = diff_atomize_text_by_line,
  97. };
  98. const char *
  99. format_label(const char *oldlabel, struct stat *stb)
  100. {
  101. const char *time_format = "%Y-%m-%d %H:%M:%S";
  102. char *newlabel;
  103. char buf[256];
  104. char end[10];
  105. struct tm tm, *tm_ptr;
  106. int nsec = stb->st_mtim.tv_nsec;
  107. size_t newlabellen, timelen, endlen;
  108. tm_ptr = localtime_r(&stb->st_mtime, &tm);
  109. timelen = strftime(buf, 256, time_format, tm_ptr);
  110. endlen = strftime(end, 10, "%z", tm_ptr);
  111. /*
  112. * The new label is the length of the time, old label, timezone,
  113. * 9 characters for nanoseconds, and 4 characters for a period
  114. * and for formatting.
  115. */
  116. newlabellen = timelen + strlen(oldlabel) + endlen + 9 + 4;
  117. newlabel = calloc(newlabellen, sizeof(char));
  118. snprintf(newlabel, newlabellen ,"%s\t%s.%.9d %s\n",
  119. oldlabel, buf, nsec, end);
  120. return newlabel;
  121. }
  122. int
  123. diffreg_new(char *file1, char *file2, int flags, int capsicum)
  124. {
  125. char *str1, *str2;
  126. FILE *f1, *f2;
  127. struct stat st1, st2;
  128. struct diff_input_info info;
  129. struct diff_data left = {}, right = {};
  130. struct diff_result *result = NULL;
  131. bool force_text, have_binary;
  132. int rc, atomizer_flags, rflags, diff_flags = 0;
  133. int context_lines = diff_context;
  134. const struct diff_config *cfg;
  135. enum diffreg_algo algo;
  136. cap_rights_t rights_ro;
  137. algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
  138. switch (algo) {
  139. default:
  140. case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
  141. cfg = &diff_config_myers_then_myers_divide;
  142. break;
  143. case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
  144. cfg = &diff_config_myers_then_patience;
  145. break;
  146. case DIFFREG_ALGO_PATIENCE:
  147. cfg = &diff_config_patience;
  148. break;
  149. case DIFFREG_ALGO_NONE:
  150. cfg = &diff_config_no_algo;
  151. break;
  152. }
  153. f1 = openfile(file1, &str1, &st1);
  154. f2 = openfile(file2, &str2, &st2);
  155. if (capsicum) {
  156. cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
  157. if (caph_rights_limit(fileno(f1), &rights_ro) < 0)
  158. err(2, "unable to limit rights on: %s", file1);
  159. if (caph_rights_limit(fileno(f2), &rights_ro) < 0)
  160. err(2, "unable to limit rights on: %s", file2);
  161. if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
  162. /* stdin has already been limited */
  163. if (caph_limit_stderr() == -1)
  164. err(2, "unable to limit stderr");
  165. if (caph_limit_stdout() == -1)
  166. err(2, "unable to limit stdout");
  167. } else if (caph_limit_stdio() == -1)
  168. err(2, "unable to limit stdio");
  169. caph_cache_catpages();
  170. caph_cache_tzdata();
  171. if (caph_enter() < 0)
  172. err(2, "unable to enter capability mode");
  173. }
  174. /*
  175. * If we have been given a label use that for the paths, if not format
  176. * the path with the files modification time.
  177. */
  178. info.flags = 0;
  179. info.left_path = (label[0] != NULL) ?
  180. label[0] : format_label(file1, &stb1);
  181. info.right_path = (label[1] != NULL) ?
  182. label[1] : format_label(file2, &stb2);
  183. if (flags & D_FORCEASCII)
  184. diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
  185. if (flags & D_IGNOREBLANKS)
  186. diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
  187. if (flags & D_PROTOTYPE)
  188. diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
  189. if (diff_atomize_file(&left, cfg, f1, (uint8_t *)str1, st1.st_size, diff_flags)) {
  190. rc = D_ERROR;
  191. goto done;
  192. }
  193. if (left.atomizer_flags & DIFF_ATOMIZER_FILE_TRUNCATED)
  194. warnx("%s truncated", file1);
  195. if (diff_atomize_file(&right, cfg, f2, (uint8_t *)str2, st2.st_size, diff_flags)) {
  196. rc = D_ERROR;
  197. goto done;
  198. }
  199. if (right.atomizer_flags & DIFF_ATOMIZER_FILE_TRUNCATED)
  200. warnx("%s truncated", file2);
  201. result = diff_main(cfg, &left, &right);
  202. if (result->rc != DIFF_RC_OK) {
  203. rc = D_ERROR;
  204. status |= 2;
  205. goto done;
  206. }
  207. /*
  208. * If there wasn't an error, but we don't have any printable chunks
  209. * then the files must match.
  210. */
  211. if (!diff_result_contains_printable_chunks(result)) {
  212. rc = D_SAME;
  213. goto done;
  214. }
  215. atomizer_flags = (result->left->atomizer_flags | result->right->atomizer_flags);
  216. rflags = (result->left->root->diff_flags | result->right->root->diff_flags);
  217. force_text = (rflags & DIFF_FLAG_FORCE_TEXT_DATA);
  218. have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
  219. if (have_binary && !force_text) {
  220. rc = D_BINARY;
  221. status |= 1;
  222. goto done;
  223. }
  224. if (diff_format == D_NORMAL) {
  225. rc = diff_output_plain(NULL, stdout, &info, result, false);
  226. } else if (diff_format == D_EDIT) {
  227. rc = diff_output_edscript(NULL, stdout, &info, result);
  228. } else {
  229. rc = diff_output_unidiff(NULL, stdout, &info, result,
  230. context_lines);
  231. }
  232. if (rc != DIFF_RC_OK) {
  233. rc = D_ERROR;
  234. status |= 2;
  235. } else {
  236. rc = D_DIFFER;
  237. status |= 1;
  238. }
  239. done:
  240. diff_result_free(result);
  241. diff_data_free(&left);
  242. diff_data_free(&right);
  243. #ifndef DIFF_NO_MMAP
  244. if (str1)
  245. munmap(str1, st1.st_size);
  246. if (str2)
  247. munmap(str2, st2.st_size);
  248. #endif
  249. fclose(f1);
  250. fclose(f2);
  251. return rc;
  252. }
  253. FILE *
  254. openfile(const char *path, char **p, struct stat *st)
  255. {
  256. FILE *f = NULL;
  257. if (strcmp(path, "-") == 0)
  258. f = stdin;
  259. else
  260. f = fopen(path, "r");
  261. if (f == NULL)
  262. err(2, "%s", path);
  263. if (fstat(fileno(f), st) == -1)
  264. err(2, "%s", path);
  265. #ifndef DIFF_NO_MMAP
  266. *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
  267. if (*p == MAP_FAILED)
  268. #endif
  269. *p = NULL; /* fall back on file I/O */
  270. return f;
  271. }
  272. bool
  273. can_libdiff(int flags)
  274. {
  275. /* We can't use fifos with libdiff yet */
  276. if (S_ISFIFO(stb1.st_mode) || S_ISFIFO(stb2.st_mode))
  277. return false;
  278. /* Is this one of the supported input/output modes for diffreg_new? */
  279. if ((flags == 0 || !(flags & ~D_NEWALGO_FLAGS)) &&
  280. ignore_pats == NULL && (
  281. diff_format == D_NORMAL ||
  282. #if 0
  283. diff_format == D_EDIT ||
  284. #endif
  285. diff_format == D_UNIFIED) &&
  286. (diff_algorithm == D_DIFFMYERS || diff_algorithm == D_DIFFPATIENCE)) {
  287. return true;
  288. }
  289. /* Fallback to using stone. */
  290. return false;
  291. }