| /*- |
| * Copyright (c) 2003-2007 Tim Kientzle |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "bsdtar_platform.h" |
| __FBSDID("$FreeBSD: src/usr.bin/tar/matching.c,v 1.16 2008/08/18 18:13:40 kientzle Exp $"); |
| |
| #ifdef HAVE_ERRNO_H |
| #include <errno.h> |
| #endif |
| #ifdef HAVE_STDLIB_H |
| #include <stdlib.h> |
| #endif |
| #ifdef HAVE_STRING_H |
| #include <string.h> |
| #endif |
| |
| #include "bsdtar.h" |
| |
| struct match { |
| struct match *next; |
| int matches; |
| char pattern[1]; |
| }; |
| |
| struct matching { |
| struct match *exclusions; |
| int exclusions_count; |
| struct match *inclusions; |
| int inclusions_count; |
| int inclusions_unmatched_count; |
| }; |
| |
| |
| static void add_pattern(struct bsdtar *, struct match **list, |
| const char *pattern); |
| static int bsdtar_fnmatch(const char *p, const char *s); |
| static void initialize_matching(struct bsdtar *); |
| static int match_exclusion(struct match *, const char *pathname); |
| static int match_inclusion(struct match *, const char *pathname); |
| static int pathmatch(const char *p, const char *s); |
| |
| /* |
| * The matching logic here needs to be re-thought. I started out to |
| * try to mimic gtar's matching logic, but it's not entirely |
| * consistent. In particular 'tar -t' and 'tar -x' interpret patterns |
| * on the command line as anchored, but --exclude doesn't. |
| */ |
| |
| /* |
| * Utility functions to manage exclusion/inclusion patterns |
| */ |
| |
| int |
| exclude(struct bsdtar *bsdtar, const char *pattern) |
| { |
| struct matching *matching; |
| |
| if (bsdtar->matching == NULL) |
| initialize_matching(bsdtar); |
| matching = bsdtar->matching; |
| add_pattern(bsdtar, &(matching->exclusions), pattern); |
| matching->exclusions_count++; |
| return (0); |
| } |
| |
| int |
| exclude_from_file(struct bsdtar *bsdtar, const char *pathname) |
| { |
| return (process_lines(bsdtar, pathname, &exclude)); |
| } |
| |
| int |
| include(struct bsdtar *bsdtar, const char *pattern) |
| { |
| struct matching *matching; |
| |
| if (bsdtar->matching == NULL) |
| initialize_matching(bsdtar); |
| matching = bsdtar->matching; |
| add_pattern(bsdtar, &(matching->inclusions), pattern); |
| matching->inclusions_count++; |
| matching->inclusions_unmatched_count++; |
| return (0); |
| } |
| |
| int |
| include_from_file(struct bsdtar *bsdtar, const char *pathname) |
| { |
| return (process_lines(bsdtar, pathname, &include)); |
| } |
| |
| static void |
| add_pattern(struct bsdtar *bsdtar, struct match **list, const char *pattern) |
| { |
| struct match *match; |
| |
| match = malloc(sizeof(*match) + strlen(pattern) + 1); |
| if (match == NULL) |
| bsdtar_errc(bsdtar, 1, errno, "Out of memory"); |
| strcpy(match->pattern, pattern); |
| /* Both "foo/" and "foo" should match "foo/bar". */ |
| if (match->pattern[strlen(match->pattern)-1] == '/') |
| match->pattern[strlen(match->pattern)-1] = '\0'; |
| match->next = *list; |
| *list = match; |
| match->matches = 0; |
| } |
| |
| |
| int |
| excluded(struct bsdtar *bsdtar, const char *pathname) |
| { |
| struct matching *matching; |
| struct match *match; |
| struct match *matched; |
| |
| matching = bsdtar->matching; |
| if (matching == NULL) |
| return (0); |
| |
| /* Exclusions take priority */ |
| for (match = matching->exclusions; match != NULL; match = match->next){ |
| if (match_exclusion(match, pathname)) |
| return (1); |
| } |
| |
| /* Then check for inclusions */ |
| matched = NULL; |
| for (match = matching->inclusions; match != NULL; match = match->next){ |
| if (match_inclusion(match, pathname)) { |
| /* |
| * If this pattern has never been matched, |
| * then we're done. |
| */ |
| if (match->matches == 0) { |
| match->matches++; |
| matching->inclusions_unmatched_count--; |
| return (0); |
| } |
| /* |
| * Otherwise, remember the match but keep checking |
| * in case we can tick off an unmatched pattern. |
| */ |
| matched = match; |
| } |
| } |
| /* |
| * We didn't find a pattern that had never been matched, but |
| * we did find a match, so count it and exit. |
| */ |
| if (matched != NULL) { |
| matched->matches++; |
| return (0); |
| } |
| |
| /* If there were inclusions, default is to exclude. */ |
| if (matching->inclusions != NULL) |
| return (1); |
| |
| /* No explicit inclusions, default is to match. */ |
| return (0); |
| } |
| |
| /* |
| * This is a little odd, but it matches the default behavior of |
| * gtar. In particular, 'a*b' will match 'foo/a1111/222b/bar' |
| * |
| */ |
| static int |
| match_exclusion(struct match *match, const char *pathname) |
| { |
| const char *p; |
| |
| if (*match->pattern == '*' || *match->pattern == '/') |
| return (pathmatch(match->pattern, pathname) == 0); |
| |
| for (p = pathname; p != NULL; p = strchr(p, '/')) { |
| if (*p == '/') |
| p++; |
| if (pathmatch(match->pattern, p) == 0) |
| return (1); |
| } |
| return (0); |
| } |
| |
| /* |
| * Again, mimic gtar: inclusions are always anchored (have to match |
| * the beginning of the path) even though exclusions are not anchored. |
| */ |
| int |
| match_inclusion(struct match *match, const char *pathname) |
| { |
| return (pathmatch(match->pattern, pathname) == 0); |
| } |
| |
| void |
| cleanup_exclusions(struct bsdtar *bsdtar) |
| { |
| struct match *p, *q; |
| |
| if (bsdtar->matching) { |
| p = bsdtar->matching->inclusions; |
| while (p != NULL) { |
| q = p; |
| p = p->next; |
| free(q); |
| } |
| p = bsdtar->matching->exclusions; |
| while (p != NULL) { |
| q = p; |
| p = p->next; |
| free(q); |
| } |
| free(bsdtar->matching); |
| } |
| } |
| |
| static void |
| initialize_matching(struct bsdtar *bsdtar) |
| { |
| bsdtar->matching = malloc(sizeof(*bsdtar->matching)); |
| if (bsdtar->matching == NULL) |
| bsdtar_errc(bsdtar, 1, errno, "No memory"); |
| memset(bsdtar->matching, 0, sizeof(*bsdtar->matching)); |
| } |
| |
| int |
| unmatched_inclusions(struct bsdtar *bsdtar) |
| { |
| struct matching *matching; |
| |
| matching = bsdtar->matching; |
| if (matching == NULL) |
| return (0); |
| return (matching->inclusions_unmatched_count); |
| } |
| |
| |
| int |
| unmatched_inclusions_warn(struct bsdtar *bsdtar, const char *msg) |
| { |
| struct matching *matching; |
| struct match *p; |
| |
| matching = bsdtar->matching; |
| if (matching == NULL) |
| return (0); |
| |
| p = matching->inclusions; |
| while (p != NULL) { |
| if (p->matches == 0) { |
| bsdtar->return_value = 1; |
| bsdtar_warnc(bsdtar, 0, "%s: %s", |
| p->pattern, msg); |
| } |
| p = p->next; |
| } |
| return (matching->inclusions_unmatched_count); |
| } |
| |
| /* |
| * TODO: Extend this so that the following matches work: |
| * "foo//bar" == "foo/bar" |
| * "foo/./bar" == "foo/bar" |
| * "./foo" == "foo" |
| * |
| * The POSIX fnmatch() function doesn't handle any of these, but |
| * all are common situations that arise when paths are generated within |
| * large scripts. E.g., the following is quite common: |
| * MYPATH=foo/ TARGET=$MYPATH/bar |
| * It may be worthwhile to edit such paths at write time as well, |
| * especially when such editing may avoid the need for long pathname |
| * extensions. |
| */ |
| static int |
| pathmatch(const char *pattern, const char *string) |
| { |
| /* |
| * Strip leading "./" or ".//" so that, e.g., |
| * "foo" matches "./foo". In particular, this |
| * opens up an optimization for the writer to |
| * elide leading "./". |
| */ |
| if (pattern[0] == '.' && pattern[1] == '/') { |
| pattern += 2; |
| while (pattern[0] == '/') |
| ++pattern; |
| } |
| if (string[0] == '.' && string[1] == '/') { |
| string += 2; |
| while (string[0] == '/') |
| ++string; |
| } |
| return (bsdtar_fnmatch(pattern, string)); |
| } |
| |
| |
| #if defined(HAVE_FNMATCH) && defined(HAVE_FNM_LEADING_DIR) |
| |
| /* Use system fnmatch() if it suits our needs. */ |
| /* On Linux, _GNU_SOURCE must be defined to get FNM_LEADING_DIR. */ |
| #define _GNU_SOURCE |
| #include <fnmatch.h> |
| static int |
| bsdtar_fnmatch(const char *pattern, const char *string) |
| { |
| return (fnmatch(pattern, string, FNM_LEADING_DIR)); |
| } |
| |
| #else |
| /* |
| * The following was hacked from BSD C library |
| * code: src/lib/libc/gen/fnmatch.c,v 1.15 2002/02/01 |
| * |
| * In particular, most of the flags were ripped out: this always |
| * behaves like FNM_LEADING_DIR is set and other flags specified |
| * by POSIX are unset. |
| * |
| * Normally, I would not conditionally compile something like this: If |
| * I have to support it anyway, everyone may as well use it. ;-) |
| * However, the full POSIX spec for fnmatch() includes a lot of |
| * advanced character handling that I'm not ready to put in here, so |
| * it's probably best if people use a local version when it's available. |
| */ |
| |
| /* |
| * Copyright (c) 1989, 1993, 1994 |
| * The Regents of the University of California. All rights reserved. |
| * |
| * This code is derived from software contributed to Berkeley by |
| * Guido van Rossum. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 4. Neither the name of the University nor the names of its contributors |
| * may be used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| */ |
| |
| static int |
| bsdtar_fnmatch(const char *pattern, const char *string) |
| { |
| const char *saved_pattern; |
| int negate, matched; |
| char c; |
| |
| for (;;) { |
| switch (c = *pattern++) { |
| case '\0': |
| if (*string == '/' || *string == '\0') |
| return (0); |
| return (1); |
| case '?': |
| if (*string == '\0') |
| return (1); |
| ++string; |
| break; |
| case '*': |
| c = *pattern; |
| /* Collapse multiple stars. */ |
| while (c == '*') |
| c = *++pattern; |
| |
| /* Optimize for pattern with * at end. */ |
| if (c == '\0') |
| return (0); |
| |
| /* General case, use recursion. */ |
| while (*string != '\0') { |
| if (!bsdtar_fnmatch(pattern, string)) |
| return (0); |
| ++string; |
| } |
| return (1); |
| case '[': |
| if (*string == '\0') |
| return (1); |
| saved_pattern = pattern; |
| if (*pattern == '!' || *pattern == '^') { |
| negate = 1; |
| ++pattern; |
| } else |
| negate = 0; |
| matched = 0; |
| c = *pattern++; |
| do { |
| if (c == '\\') |
| c = *pattern++; |
| if (c == '\0') { |
| pattern = saved_pattern; |
| c = '['; |
| goto norm; |
| } |
| if (*pattern == '-') { |
| char c2 = *(pattern + 1); |
| if (c2 == '\0') { |
| pattern = saved_pattern; |
| c = '['; |
| goto norm; |
| } |
| if (c2 == ']') { |
| /* [a-] is not a range. */ |
| if (c == *string |
| || '-' == *string) |
| matched = 1; |
| pattern ++; |
| } else { |
| if (c <= *string |
| && *string <= c2) |
| matched = 1; |
| pattern += 2; |
| } |
| } else if (c == *string) |
| matched = 1; |
| c = *pattern++; |
| } while (c != ']'); |
| if (matched == negate) |
| return (1); |
| ++string; |
| break; |
| case '\\': |
| if ((c = *pattern++) == '\0') { |
| c = '\\'; |
| --pattern; |
| } |
| /* FALLTHROUGH */ |
| default: |
| norm: |
| if (c != *string) |
| return (1); |
| string++; |
| break; |
| } |
| } |
| /* NOTREACHED */ |
| } |
| |
| #endif |