123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485 |
- /*-
- * Copyright 2006-2025 Tarsnap Backup Inc.
- * All rights reserved.
- *
- * Portions of the file below are covered by the following license:
- *
- * Copyright (c) 2003-2007 Tim Kientzle
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #include "bsdtar_platform.h"
- __FBSDID("$FreeBSD: src/usr.bin/tar/matching.c,v 1.16 2008/08/18 18:13:40 kientzle Exp $");
- #ifdef HAVE_ERRNO_H
- #include <errno.h>
- #endif
- #ifdef HAVE_STDLIB_H
- #include <stdlib.h>
- #endif
- #ifdef HAVE_STRING_H
- #include <string.h>
- #endif
- #include "bsdtar.h"
- struct match {
- struct match *next;
- int matches;
- char pattern[1];
- };
- struct matching {
- struct match *exclusions;
- int exclusions_count;
- struct match *inclusions;
- int inclusions_count;
- int inclusions_unmatched_count;
- };
- static void add_pattern(struct bsdtar *, struct match **list,
- const char *pattern);
- static int bsdtar_fnmatch(const char *p, const char *s);
- static void initialize_matching(struct bsdtar *);
- static int match_exclusion(struct match *, const char *pathname);
- static int match_inclusion(struct match *, const char *pathname);
- static int pathmatch(const char *p, const char *s);
- /*
- * The matching logic here needs to be re-thought. I started out to
- * try to mimic gtar's matching logic, but it's not entirely
- * consistent. In particular 'tar -t' and 'tar -x' interpret patterns
- * on the command line as anchored, but --exclude doesn't.
- */
- /*
- * Utility functions to manage exclusion/inclusion patterns
- */
- int
- exclude(struct bsdtar *bsdtar, const char *pattern)
- {
- struct matching *matching;
- if (bsdtar->matching == NULL)
- initialize_matching(bsdtar);
- matching = bsdtar->matching;
- add_pattern(bsdtar, &(matching->exclusions), pattern);
- matching->exclusions_count++;
- return (0);
- }
- int
- exclude_from_file(struct bsdtar *bsdtar, const char *pathname)
- {
- return (process_lines(bsdtar, pathname, &exclude,
- bsdtar->option_null_input));
- }
- int
- include(struct bsdtar *bsdtar, const char *pattern)
- {
- struct matching *matching;
- if (bsdtar->matching == NULL)
- initialize_matching(bsdtar);
- matching = bsdtar->matching;
- add_pattern(bsdtar, &(matching->inclusions), pattern);
- matching->inclusions_count++;
- matching->inclusions_unmatched_count++;
- return (0);
- }
- int
- include_from_file(struct bsdtar *bsdtar, const char *pathname)
- {
- return (process_lines(bsdtar, pathname, &include,
- bsdtar->option_null_input));
- }
- static void
- add_pattern(struct bsdtar *bsdtar, struct match **list, const char *pattern)
- {
- struct match *match;
- size_t len;
- len = strlen(pattern);
- match = malloc(sizeof(*match) + len + 1);
- if (match == NULL)
- bsdtar_errc(bsdtar, 1, errno, "Out of memory");
- strcpy(match->pattern, pattern);
- /* Both "foo/" and "foo" should match "foo/bar". */
- if (len && match->pattern[len - 1] == '/')
- match->pattern[len - 1] = '\0';
- match->next = *list;
- *list = match;
- match->matches = 0;
- }
- int
- excluded(struct bsdtar *bsdtar, const char *pathname)
- {
- struct matching *matching;
- struct match *match;
- struct match *matched;
- matching = bsdtar->matching;
- if (matching == NULL)
- return (0);
- /* Exclusions take priority */
- for (match = matching->exclusions; match != NULL; match = match->next){
- if (match_exclusion(match, pathname))
- return (1);
- }
- /* Then check for inclusions */
- matched = NULL;
- for (match = matching->inclusions; match != NULL; match = match->next){
- if (match_inclusion(match, pathname)) {
- /*
- * If this pattern has never been matched,
- * then we're done.
- */
- if (match->matches == 0) {
- match->matches++;
- matching->inclusions_unmatched_count--;
- return (0);
- }
- /*
- * Otherwise, remember the match but keep checking
- * in case we can tick off an unmatched pattern.
- */
- matched = match;
- }
- }
- /*
- * We didn't find a pattern that had never been matched, but
- * we did find a match, so count it and exit.
- */
- if (matched != NULL) {
- matched->matches++;
- return (0);
- }
- /* If there were inclusions, default is to exclude. */
- if (matching->inclusions != NULL)
- return (1);
- /* No explicit inclusions, default is to match. */
- return (0);
- }
- /*
- * This is a little odd, but it matches the default behavior of
- * gtar. In particular, 'a*b' will match 'foo/a1111/222b/bar'
- *
- */
- static int
- match_exclusion(struct match *match, const char *pathname)
- {
- const char *p;
- if (*match->pattern == '*' || *match->pattern == '/')
- return (pathmatch(match->pattern, pathname) == 0);
- for (p = pathname; p != NULL; p = strchr(p, '/')) {
- if (*p == '/')
- p++;
- if (pathmatch(match->pattern, p) == 0)
- return (1);
- }
- return (0);
- }
- /*
- * Again, mimic gtar: inclusions are always anchored (have to match
- * the beginning of the path) even though exclusions are not anchored.
- */
- int
- match_inclusion(struct match *match, const char *pathname)
- {
- return (pathmatch(match->pattern, pathname) == 0);
- }
- void
- cleanup_exclusions(struct bsdtar *bsdtar)
- {
- struct match *p, *q;
- if (bsdtar->matching) {
- p = bsdtar->matching->inclusions;
- while (p != NULL) {
- q = p;
- p = p->next;
- free(q);
- }
- p = bsdtar->matching->exclusions;
- while (p != NULL) {
- q = p;
- p = p->next;
- free(q);
- }
- free(bsdtar->matching);
- }
- }
- static void
- initialize_matching(struct bsdtar *bsdtar)
- {
- bsdtar->matching = malloc(sizeof(*bsdtar->matching));
- if (bsdtar->matching == NULL)
- bsdtar_errc(bsdtar, 1, errno, "No memory");
- memset(bsdtar->matching, 0, sizeof(*bsdtar->matching));
- }
- int
- unmatched_inclusions(struct bsdtar *bsdtar)
- {
- struct matching *matching;
- matching = bsdtar->matching;
- if (matching == NULL)
- return (0);
- return (matching->inclusions_unmatched_count);
- }
- int
- unmatched_inclusions_warn(struct bsdtar *bsdtar, const char *msg)
- {
- struct matching *matching;
- struct match *p;
- matching = bsdtar->matching;
- if (matching == NULL)
- return (0);
- p = matching->inclusions;
- while (p != NULL) {
- if (p->matches == 0) {
- bsdtar->return_value = 1;
- bsdtar_warnc(bsdtar, 0, "%s: %s",
- p->pattern, msg);
- }
- p = p->next;
- }
- return (matching->inclusions_unmatched_count);
- }
- /*
- * TODO: Extend this so that the following matches work:
- * "foo//bar" == "foo/bar"
- * "foo/./bar" == "foo/bar"
- * "./foo" == "foo"
- *
- * The POSIX fnmatch() function doesn't handle any of these, but
- * all are common situations that arise when paths are generated within
- * large scripts. E.g., the following is quite common:
- * MYPATH=foo/ TARGET=$MYPATH/bar
- * It may be worthwhile to edit such paths at write time as well,
- * especially when such editing may avoid the need for long pathname
- * extensions.
- */
- static int
- pathmatch(const char *pattern, const char *string)
- {
- /*
- * Strip leading "./" or ".//" so that, e.g.,
- * "foo" matches "./foo". In particular, this
- * opens up an optimization for the writer to
- * elide leading "./".
- */
- if (pattern[0] == '.' && pattern[1] == '/') {
- pattern += 2;
- while (pattern[0] == '/')
- ++pattern;
- }
- if (string[0] == '.' && string[1] == '/') {
- string += 2;
- while (string[0] == '/')
- ++string;
- }
- return (bsdtar_fnmatch(pattern, string));
- }
- #if defined(HAVE_FNMATCH) && defined(HAVE_FNM_LEADING_DIR)
- /* Use system fnmatch() if it suits our needs. */
- #include <fnmatch.h>
- static int
- bsdtar_fnmatch(const char *pattern, const char *string)
- {
- return (fnmatch(pattern, string, FNM_LEADING_DIR));
- }
- #else
- /*
- * The following was hacked from BSD C library
- * code: src/lib/libc/gen/fnmatch.c,v 1.15 2002/02/01
- *
- * In particular, most of the flags were ripped out: this always
- * behaves like FNM_LEADING_DIR is set and other flags specified
- * by POSIX are unset.
- *
- * Normally, I would not conditionally compile something like this: If
- * I have to support it anyway, everyone may as well use it. ;-)
- * However, the full POSIX spec for fnmatch() includes a lot of
- * advanced character handling that I'm not ready to put in here, so
- * it's probably best if people use a local version when it's available.
- */
- /*
- * Copyright (c) 1989, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Guido van Rossum.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- static int
- bsdtar_fnmatch(const char *pattern, const char *string)
- {
- const char *saved_pattern;
- int negate, matched;
- char c;
- for (;;) {
- switch (c = *pattern++) {
- case '\0':
- if (*string == '/' || *string == '\0')
- return (0);
- return (1);
- case '?':
- if (*string == '\0')
- return (1);
- ++string;
- break;
- case '*':
- c = *pattern;
- /* Collapse multiple stars. */
- while (c == '*')
- c = *++pattern;
- /* Optimize for pattern with * at end. */
- if (c == '\0')
- return (0);
- /* General case, use recursion. */
- while (*string != '\0') {
- if (!bsdtar_fnmatch(pattern, string))
- return (0);
- ++string;
- }
- return (1);
- case '[':
- if (*string == '\0')
- return (1);
- saved_pattern = pattern;
- if (*pattern == '!' || *pattern == '^') {
- negate = 1;
- ++pattern;
- } else
- negate = 0;
- matched = 0;
- c = *pattern++;
- do {
- if (c == '\\')
- c = *pattern++;
- if (c == '\0') {
- pattern = saved_pattern;
- c = '[';
- goto norm;
- }
- if (*pattern == '-') {
- char c2 = *(pattern + 1);
- if (c2 == '\0') {
- pattern = saved_pattern;
- c = '[';
- goto norm;
- }
- if (c2 == ']') {
- /* [a-] is not a range. */
- if (c == *string
- || '-' == *string)
- matched = 1;
- pattern ++;
- } else {
- if (c <= *string
- && *string <= c2)
- matched = 1;
- pattern += 2;
- }
- } else if (c == *string)
- matched = 1;
- c = *pattern++;
- } while (c != ']');
- if (matched == negate)
- return (1);
- ++string;
- break;
- case '\\':
- if ((c = *pattern++) == '\0') {
- c = '\\';
- --pattern;
- }
- /* FALLTHROUGH */
- default:
- norm:
- if (c != *string)
- return (1);
- string++;
- break;
- }
- }
- /* NOTREACHED */
- }
- #endif
|