matching.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. /*-
  2. * Copyright 2006-2025 Tarsnap Backup Inc.
  3. * All rights reserved.
  4. *
  5. * Portions of the file below are covered by the following license:
  6. *
  7. * Copyright (c) 2003-2007 Tim Kientzle
  8. * All rights reserved.
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. * 1. Redistributions of source code must retain the above copyright
  14. * notice, this list of conditions and the following disclaimer.
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  20. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  21. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  22. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  23. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  24. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  28. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. */
  30. #include "bsdtar_platform.h"
  31. __FBSDID("$FreeBSD: src/usr.bin/tar/matching.c,v 1.16 2008/08/18 18:13:40 kientzle Exp $");
  32. #ifdef HAVE_ERRNO_H
  33. #include <errno.h>
  34. #endif
  35. #ifdef HAVE_STDLIB_H
  36. #include <stdlib.h>
  37. #endif
  38. #ifdef HAVE_STRING_H
  39. #include <string.h>
  40. #endif
  41. #include "bsdtar.h"
  42. struct match {
  43. struct match *next;
  44. int matches;
  45. char pattern[1];
  46. };
  47. struct matching {
  48. struct match *exclusions;
  49. int exclusions_count;
  50. struct match *inclusions;
  51. int inclusions_count;
  52. int inclusions_unmatched_count;
  53. };
  54. static void add_pattern(struct bsdtar *, struct match **list,
  55. const char *pattern);
  56. static int bsdtar_fnmatch(const char *p, const char *s);
  57. static void initialize_matching(struct bsdtar *);
  58. static int match_exclusion(struct match *, const char *pathname);
  59. static int match_inclusion(struct match *, const char *pathname);
  60. static int pathmatch(const char *p, const char *s);
  61. /*
  62. * The matching logic here needs to be re-thought. I started out to
  63. * try to mimic gtar's matching logic, but it's not entirely
  64. * consistent. In particular 'tar -t' and 'tar -x' interpret patterns
  65. * on the command line as anchored, but --exclude doesn't.
  66. */
  67. /*
  68. * Utility functions to manage exclusion/inclusion patterns
  69. */
  70. int
  71. exclude(struct bsdtar *bsdtar, const char *pattern)
  72. {
  73. struct matching *matching;
  74. if (bsdtar->matching == NULL)
  75. initialize_matching(bsdtar);
  76. matching = bsdtar->matching;
  77. add_pattern(bsdtar, &(matching->exclusions), pattern);
  78. matching->exclusions_count++;
  79. return (0);
  80. }
  81. int
  82. exclude_from_file(struct bsdtar *bsdtar, const char *pathname)
  83. {
  84. return (process_lines(bsdtar, pathname, &exclude,
  85. bsdtar->option_null_input));
  86. }
  87. int
  88. include(struct bsdtar *bsdtar, const char *pattern)
  89. {
  90. struct matching *matching;
  91. if (bsdtar->matching == NULL)
  92. initialize_matching(bsdtar);
  93. matching = bsdtar->matching;
  94. add_pattern(bsdtar, &(matching->inclusions), pattern);
  95. matching->inclusions_count++;
  96. matching->inclusions_unmatched_count++;
  97. return (0);
  98. }
  99. int
  100. include_from_file(struct bsdtar *bsdtar, const char *pathname)
  101. {
  102. return (process_lines(bsdtar, pathname, &include,
  103. bsdtar->option_null_input));
  104. }
  105. static void
  106. add_pattern(struct bsdtar *bsdtar, struct match **list, const char *pattern)
  107. {
  108. struct match *match;
  109. size_t len;
  110. len = strlen(pattern);
  111. match = malloc(sizeof(*match) + len + 1);
  112. if (match == NULL)
  113. bsdtar_errc(bsdtar, 1, errno, "Out of memory");
  114. strcpy(match->pattern, pattern);
  115. /* Both "foo/" and "foo" should match "foo/bar". */
  116. if (len && match->pattern[len - 1] == '/')
  117. match->pattern[len - 1] = '\0';
  118. match->next = *list;
  119. *list = match;
  120. match->matches = 0;
  121. }
  122. int
  123. excluded(struct bsdtar *bsdtar, const char *pathname)
  124. {
  125. struct matching *matching;
  126. struct match *match;
  127. struct match *matched;
  128. matching = bsdtar->matching;
  129. if (matching == NULL)
  130. return (0);
  131. /* Exclusions take priority */
  132. for (match = matching->exclusions; match != NULL; match = match->next){
  133. if (match_exclusion(match, pathname))
  134. return (1);
  135. }
  136. /* Then check for inclusions */
  137. matched = NULL;
  138. for (match = matching->inclusions; match != NULL; match = match->next){
  139. if (match_inclusion(match, pathname)) {
  140. /*
  141. * If this pattern has never been matched,
  142. * then we're done.
  143. */
  144. if (match->matches == 0) {
  145. match->matches++;
  146. matching->inclusions_unmatched_count--;
  147. return (0);
  148. }
  149. /*
  150. * Otherwise, remember the match but keep checking
  151. * in case we can tick off an unmatched pattern.
  152. */
  153. matched = match;
  154. }
  155. }
  156. /*
  157. * We didn't find a pattern that had never been matched, but
  158. * we did find a match, so count it and exit.
  159. */
  160. if (matched != NULL) {
  161. matched->matches++;
  162. return (0);
  163. }
  164. /* If there were inclusions, default is to exclude. */
  165. if (matching->inclusions != NULL)
  166. return (1);
  167. /* No explicit inclusions, default is to match. */
  168. return (0);
  169. }
  170. /*
  171. * This is a little odd, but it matches the default behavior of
  172. * gtar. In particular, 'a*b' will match 'foo/a1111/222b/bar'
  173. *
  174. */
  175. static int
  176. match_exclusion(struct match *match, const char *pathname)
  177. {
  178. const char *p;
  179. if (*match->pattern == '*' || *match->pattern == '/')
  180. return (pathmatch(match->pattern, pathname) == 0);
  181. for (p = pathname; p != NULL; p = strchr(p, '/')) {
  182. if (*p == '/')
  183. p++;
  184. if (pathmatch(match->pattern, p) == 0)
  185. return (1);
  186. }
  187. return (0);
  188. }
  189. /*
  190. * Again, mimic gtar: inclusions are always anchored (have to match
  191. * the beginning of the path) even though exclusions are not anchored.
  192. */
  193. int
  194. match_inclusion(struct match *match, const char *pathname)
  195. {
  196. return (pathmatch(match->pattern, pathname) == 0);
  197. }
  198. void
  199. cleanup_exclusions(struct bsdtar *bsdtar)
  200. {
  201. struct match *p, *q;
  202. if (bsdtar->matching) {
  203. p = bsdtar->matching->inclusions;
  204. while (p != NULL) {
  205. q = p;
  206. p = p->next;
  207. free(q);
  208. }
  209. p = bsdtar->matching->exclusions;
  210. while (p != NULL) {
  211. q = p;
  212. p = p->next;
  213. free(q);
  214. }
  215. free(bsdtar->matching);
  216. }
  217. }
  218. static void
  219. initialize_matching(struct bsdtar *bsdtar)
  220. {
  221. bsdtar->matching = malloc(sizeof(*bsdtar->matching));
  222. if (bsdtar->matching == NULL)
  223. bsdtar_errc(bsdtar, 1, errno, "No memory");
  224. memset(bsdtar->matching, 0, sizeof(*bsdtar->matching));
  225. }
  226. int
  227. unmatched_inclusions(struct bsdtar *bsdtar)
  228. {
  229. struct matching *matching;
  230. matching = bsdtar->matching;
  231. if (matching == NULL)
  232. return (0);
  233. return (matching->inclusions_unmatched_count);
  234. }
  235. int
  236. unmatched_inclusions_warn(struct bsdtar *bsdtar, const char *msg)
  237. {
  238. struct matching *matching;
  239. struct match *p;
  240. matching = bsdtar->matching;
  241. if (matching == NULL)
  242. return (0);
  243. p = matching->inclusions;
  244. while (p != NULL) {
  245. if (p->matches == 0) {
  246. bsdtar->return_value = 1;
  247. bsdtar_warnc(bsdtar, 0, "%s: %s",
  248. p->pattern, msg);
  249. }
  250. p = p->next;
  251. }
  252. return (matching->inclusions_unmatched_count);
  253. }
  254. /*
  255. * TODO: Extend this so that the following matches work:
  256. * "foo//bar" == "foo/bar"
  257. * "foo/./bar" == "foo/bar"
  258. * "./foo" == "foo"
  259. *
  260. * The POSIX fnmatch() function doesn't handle any of these, but
  261. * all are common situations that arise when paths are generated within
  262. * large scripts. E.g., the following is quite common:
  263. * MYPATH=foo/ TARGET=$MYPATH/bar
  264. * It may be worthwhile to edit such paths at write time as well,
  265. * especially when such editing may avoid the need for long pathname
  266. * extensions.
  267. */
  268. static int
  269. pathmatch(const char *pattern, const char *string)
  270. {
  271. /*
  272. * Strip leading "./" or ".//" so that, e.g.,
  273. * "foo" matches "./foo". In particular, this
  274. * opens up an optimization for the writer to
  275. * elide leading "./".
  276. */
  277. if (pattern[0] == '.' && pattern[1] == '/') {
  278. pattern += 2;
  279. while (pattern[0] == '/')
  280. ++pattern;
  281. }
  282. if (string[0] == '.' && string[1] == '/') {
  283. string += 2;
  284. while (string[0] == '/')
  285. ++string;
  286. }
  287. return (bsdtar_fnmatch(pattern, string));
  288. }
  289. #if defined(HAVE_FNMATCH) && defined(HAVE_FNM_LEADING_DIR)
  290. /* Use system fnmatch() if it suits our needs. */
  291. #include <fnmatch.h>
  292. static int
  293. bsdtar_fnmatch(const char *pattern, const char *string)
  294. {
  295. return (fnmatch(pattern, string, FNM_LEADING_DIR));
  296. }
  297. #else
  298. /*
  299. * The following was hacked from BSD C library
  300. * code: src/lib/libc/gen/fnmatch.c,v 1.15 2002/02/01
  301. *
  302. * In particular, most of the flags were ripped out: this always
  303. * behaves like FNM_LEADING_DIR is set and other flags specified
  304. * by POSIX are unset.
  305. *
  306. * Normally, I would not conditionally compile something like this: If
  307. * I have to support it anyway, everyone may as well use it. ;-)
  308. * However, the full POSIX spec for fnmatch() includes a lot of
  309. * advanced character handling that I'm not ready to put in here, so
  310. * it's probably best if people use a local version when it's available.
  311. */
  312. /*
  313. * Copyright (c) 1989, 1993, 1994
  314. * The Regents of the University of California. All rights reserved.
  315. *
  316. * This code is derived from software contributed to Berkeley by
  317. * Guido van Rossum.
  318. *
  319. * Redistribution and use in source and binary forms, with or without
  320. * modification, are permitted provided that the following conditions
  321. * are met:
  322. * 1. Redistributions of source code must retain the above copyright
  323. * notice, this list of conditions and the following disclaimer.
  324. * 2. Redistributions in binary form must reproduce the above copyright
  325. * notice, this list of conditions and the following disclaimer in the
  326. * documentation and/or other materials provided with the distribution.
  327. * 4. Neither the name of the University nor the names of its contributors
  328. * may be used to endorse or promote products derived from this software
  329. * without specific prior written permission.
  330. *
  331. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  332. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  333. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  334. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  335. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  336. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  337. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  338. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  339. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  340. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  341. * SUCH DAMAGE.
  342. */
  343. static int
  344. bsdtar_fnmatch(const char *pattern, const char *string)
  345. {
  346. const char *saved_pattern;
  347. int negate, matched;
  348. char c;
  349. for (;;) {
  350. switch (c = *pattern++) {
  351. case '\0':
  352. if (*string == '/' || *string == '\0')
  353. return (0);
  354. return (1);
  355. case '?':
  356. if (*string == '\0')
  357. return (1);
  358. ++string;
  359. break;
  360. case '*':
  361. c = *pattern;
  362. /* Collapse multiple stars. */
  363. while (c == '*')
  364. c = *++pattern;
  365. /* Optimize for pattern with * at end. */
  366. if (c == '\0')
  367. return (0);
  368. /* General case, use recursion. */
  369. while (*string != '\0') {
  370. if (!bsdtar_fnmatch(pattern, string))
  371. return (0);
  372. ++string;
  373. }
  374. return (1);
  375. case '[':
  376. if (*string == '\0')
  377. return (1);
  378. saved_pattern = pattern;
  379. if (*pattern == '!' || *pattern == '^') {
  380. negate = 1;
  381. ++pattern;
  382. } else
  383. negate = 0;
  384. matched = 0;
  385. c = *pattern++;
  386. do {
  387. if (c == '\\')
  388. c = *pattern++;
  389. if (c == '\0') {
  390. pattern = saved_pattern;
  391. c = '[';
  392. goto norm;
  393. }
  394. if (*pattern == '-') {
  395. char c2 = *(pattern + 1);
  396. if (c2 == '\0') {
  397. pattern = saved_pattern;
  398. c = '[';
  399. goto norm;
  400. }
  401. if (c2 == ']') {
  402. /* [a-] is not a range. */
  403. if (c == *string
  404. || '-' == *string)
  405. matched = 1;
  406. pattern ++;
  407. } else {
  408. if (c <= *string
  409. && *string <= c2)
  410. matched = 1;
  411. pattern += 2;
  412. }
  413. } else if (c == *string)
  414. matched = 1;
  415. c = *pattern++;
  416. } while (c != ']');
  417. if (matched == negate)
  418. return (1);
  419. ++string;
  420. break;
  421. case '\\':
  422. if ((c = *pattern++) == '\0') {
  423. c = '\\';
  424. --pattern;
  425. }
  426. /* FALLTHROUGH */
  427. default:
  428. norm:
  429. if (c != *string)
  430. return (1);
  431. string++;
  432. break;
  433. }
  434. }
  435. /* NOTREACHED */
  436. }
  437. #endif