123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473 |
- --- match.c 2011-09-07 23:00:58.037644003 +0200
- +++ match.final.c 2011-09-07 23:32:19.433644002 +0200
- @@ -27,16 +27,14 @@
-
- ---------------------------------------------------------------------------
-
- - Copyright on recmatch() from Zip's util.c (although recmatch() was almost
- - certainly written by Mark Adler...ask me how I can tell :-) ):
- + Copyright on recmatch() from Zip's util.c
- + Copyright (c) 1990-2005 Info-ZIP. All rights reserved.
-
- - Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
- - Kai Uwe Rommel and Igor Mandrichenko.
- + See the accompanying file LICENSE, version 2004-May-22 or later
- + for terms of use.
- + If, for some reason, both of these files are missing, the Info-ZIP license
- + also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
-
- - Permission is granted to any individual or institution to use, copy,
- - or redistribute this software so long as all of the original files are
- - included unmodified, that it is not sold for profit, and that this copy-
- - right notice is retained.
-
- ---------------------------------------------------------------------------
-
- @@ -53,7 +51,7 @@
-
- A set is composed of characters or ranges; a range looks like ``character
- hyphen character'' (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of
- - characters allowed in the [..] pattern construct. Other characters are
- + characters ALlowed in the [..] pattern construct. Other characters are
- allowed (i.e., 8-bit characters) if your system will support them.
-
- To suppress the special syntactic significance of any of ``[]*?!^-\'', in-
- @@ -101,8 +99,32 @@
- # define WILDCHAR '?'
- # define BEG_RANGE '['
- # define END_RANGE ']'
- +# define WILDCHR_SINGLE '?'
- +# define DIRSEP_CHR '/'
- +# define WILDCHR_MULTI '*'
- #endif
-
- +#ifdef WILD_STOP_AT_DIR
- + int wild_stop_at_dir = 1; /* default wildcards do not include / in matches */
- +#else
- + int wild_stop_at_dir = 0; /* default wildcards do include / in matches */
- +#endif
- +
- +
- +
- +/*
- + * case mapping functions. case_map is used to ignore case in comparisons,
- + * to_up is used to force upper case even on Unix (for dosify option).
- + */
- +#ifdef USE_CASE_MAP
- +# define case_map(c) upper[(c) & 0xff]
- +# define to_up(c) upper[(c) & 0xff]
- +#else
- +# define case_map(c) (c)
- +# define to_up(c) ((c) >= 'a' && (c) <= 'z' ? (c)-'a'+'A' : (c))
- +#endif /* USE_CASE_MAP */
- +
- +
- #if 0 /* GRR: add this to unzip.h someday... */
- #if !(defined(MSDOS) && defined(DOSWILD))
- #ifdef WILD_STOP_AT_DIR
- @@ -114,8 +136,8 @@
- int ignore_case __WDLPRO));
- #endif
- #endif /* 0 */
- -static int recmatch OF((ZCONST uch *pattern, ZCONST uch *string,
- - int ignore_case __WDLPRO));
- +static int recmatch OF((ZCONST char *, ZCONST char *,
- + int));
- static char *isshexp OF((ZCONST char *p));
- static int namecmp OF((ZCONST char *s1, ZCONST char *s2));
-
- @@ -154,192 +176,236 @@
- }
- dospattern[j-1] = '\0'; /* nuke the end "." */
- }
- - j = recmatch((uch *)dospattern, (uch *)string, ignore_case __WDL);
- + j = recmatch(dospattern, string, ignore_case);
- free(dospattern);
- return j == 1;
- } else
- #endif /* MSDOS && DOSWILD */
- - return recmatch((uch *)pattern, (uch *)string, ignore_case __WDL) == 1;
- + return recmatch(pattern, string, ignore_case) == 1;
- }
-
-
-
- -static int recmatch(p, s, ic __WDL)
- - ZCONST uch *p; /* sh pattern to match */
- - ZCONST uch *s; /* string to which to match it */
- - int ic; /* true for case insensitivity */
- - __WDLDEF /* directory sepchar for WildStopAtDir mode, or 0 */
- +static int recmatch(p, s, cs)
- +ZCONST char *p; /* sh pattern to match */
- +ZCONST char *s; /* string to match it to */
- +int cs; /* flag: force case-sensitive matching */
- /* Recursively compare the sh pattern p with the string s and return 1 if
- - * they match, and 0 or 2 if they don't or if there is a syntax error in the
- - * pattern. This routine recurses on itself no more deeply than the number
- - * of characters in the pattern. */
- + they match, and 0 or 2 if they don't or if there is a syntax error in the
- + pattern. This routine recurses on itself no deeper than the number of
- + characters in the pattern. */
- {
- - unsigned int c; /* pattern char or start of range in [-] loop */
- + int c; /* pattern char or start of range in [-] loop */
- + /* Get first character, the pattern for new recmatch calls follows */
- + /* borrowed from Zip's global.c */
- + int no_wild = 0;
- + int allow_regex=1;
- + /* This fix provided by akt@m5.dion.ne.jp for Japanese.
- + See 21 July 2006 mail.
- + It only applies when p is pointing to a doublebyte character and
- + things like / and wildcards are not doublebyte. This probably
- + should not be needed. */
-
- - /* Get first character, the pattern for new recmatch calls follows */
- - c = *p; INCSTR(p);
- +#ifdef _MBCS
- + if (CLEN(p) == 2) {
- + if (CLEN(s) == 2) {
- + return (*p == *s && *(p+1) == *(s+1)) ?
- + recmatch(p + 2, s + 2, cs) : 0;
- + } else {
- + return 0;
- + }
- + }
- +#endif /* ?_MBCS */
-
- - /* If that was the end of the pattern, match if string empty too */
- - if (c == 0)
- - return *s == 0;
- + c = *POSTINCSTR(p);
-
- - /* '?' (or '%') matches any character (but not an empty string). */
- - if (c == WILDCHAR)
- -#ifdef WILD_STOP_AT_DIR
- - /* If uO.W_flag is non-zero, it won't match '/' */
- - return (*s && (!sepc || *s != (uch)sepc))
- - ? recmatch(p, s + CLEN(s), ic, sepc) : 0;
- -#else
- - return *s ? recmatch(p, s + CLEN(s), ic) : 0;
- -#endif
- + /* If that was the end of the pattern, match if string empty too */
- + if (c == 0)
- + return *s == 0;
- +
- + /* '?' (or '%' or '#') matches any character (but not an empty string) */
- + if (c == WILDCHR_SINGLE) {
- + if (wild_stop_at_dir)
- + return (*s && *s != DIRSEP_CHR) ? recmatch(p, s + CLEN(s), cs) : 0;
- + else
- + return *s ? recmatch(p, s + CLEN(s), cs) : 0;
- + }
-
- - /* '*' matches any number of characters, including zero */
- + /* WILDCHR_MULTI ('*') matches any number of characters, including zero */
- #ifdef AMIGA
- - if (c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */
- - c = '*', p++;
- + if (!no_wild && c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */
- + c = WILDCHR_MULTI, p++;
- #endif /* AMIGA */
- - if (c == '*') {
- -#ifdef WILD_STOP_AT_DIR
- - if (sepc) {
- - /* check for single "*" or double "**" */
- -# ifdef AMIGA
- - if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
- - c = '*', p++;
- - if (c != '*') {
- -# else /* !AMIGA */
- - if (*p != '*') {
- -# endif /* ?AMIGA */
- - /* single "*": this doesn't match the dirsep character */
- - for (; *s && *s != (uch)sepc; INCSTR(s))
- - if ((c = recmatch(p, s, ic, sepc)) != 0)
- - return (int)c;
- - /* end of pattern: matched if at end of string, else continue */
- - if (*p == '\0')
- - return (*s == 0);
- - /* continue to match if at sepc in pattern, else give up */
- - return (*p == (uch)sepc || (*p == '\\' && p[1] == (uch)sepc))
- - ? recmatch(p, s, ic, sepc) : 2;
- - }
- - /* "**": this matches slashes */
- - ++p; /* move p behind the second '*' */
- - /* and continue with the non-W_flag code variant */
- - }
- -#endif /* WILD_STOP_AT_DIR */
- + if (!no_wild && c == WILDCHR_MULTI)
- + {
- + if (wild_stop_at_dir) {
- + /* Check for an immediately following WILDCHR_MULTI */
- +# ifdef AMIGA
- + if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
- + c = WILDCHR_MULTI, p++;
- + if (c != WILDCHR_MULTI) {
- +# else /* !AMIGA */
- + if (*p != WILDCHR_MULTI) {
- +# endif /* ?AMIGA */
- + /* Single WILDCHR_MULTI ('*'): this doesn't match slashes */
- + for (; *s && *s != DIRSEP_CHR; INCSTR(s))
- + if ((c = recmatch(p, s, cs)) != 0)
- + return c;
- + /* end of pattern: matched if at end of string, else continue */
- if (*p == 0)
- - return 1;
- - if (isshexp((ZCONST char *)p) == NULL) {
- - /* Optimization for rest of pattern being a literal string:
- - * If there are no other shell expression chars in the rest
- - * of the pattern behind the multi-char wildcard, then just
- - * compare the literal string tail.
- - */
- - ZCONST uch *srest;
- -
- - srest = s + (strlen((ZCONST char *)s) - strlen((ZCONST char *)p));
- - if (srest - s < 0)
- - /* remaining literal string from pattern is longer than rest
- - * of test string, there can't be a match
- - */
- - return 0;
- - else
- - /* compare the remaining literal pattern string with the last
- - * bytes of the test string to check for a match
- - */
- + return (*s == 0);
- + /* continue to match if at DIRSEP_CHR in pattern, else give up */
- + return (*p == DIRSEP_CHR || (*p == '\\' && p[1] == DIRSEP_CHR))
- + ? recmatch(p, s, cs) : 2;
- + }
- + /* Two consecutive WILDCHR_MULTI ("**"): this matches DIRSEP_CHR ('/') */
- + p++; /* move p past the second WILDCHR_MULTI */
- + /* continue with the normal non-WILD_STOP_AT_DIR code */
- + } /* wild_stop_at_dir */
- +
- + /* Not wild_stop_at_dir */
- + if (*p == 0)
- + return 1;
- + if (!isshexp((char *)p))
- + {
- + /* optimization for rest of pattern being a literal string */
- +
- + /* optimization to handle patterns like *.txt */
- + /* if the first char in the pattern is '*' and there */
- + /* are no other shell expression chars, i.e. a literal string */
- + /* then just compare the literal string at the end */
- +
- + ZCONST char *srest;
- +
- + srest = s + (strlen(s) - strlen(p));
- + if (srest - s < 0)
- + /* remaining literal string from pattern is longer than rest of
- + test string, there can't be a match
- + */
- + return 0;
- + else
- + /* compare the remaining literal pattern string with the last bytes
- + of the test string to check for a match */
- #ifdef _MBCS
- - {
- - ZCONST uch *q = s;
- + {
- + ZCONST char *q = s;
-
- - /* MBCS-aware code must not scan backwards into a string from
- - * the end.
- - * So, we have to move forward by character from our well-known
- - * character position s in the test string until we have
- - * advanced to the srest position.
- - */
- - while (q < srest)
- - INCSTR(q);
- - /* In case the byte *srest is a trailing byte of a multibyte
- - * character in the test string s, we have actually advanced
- - * past the position (srest).
- - * For this case, the match has failed!
- - */
- - if (q != srest)
- - return 0;
- - return ((ic
- - ? namecmp((ZCONST char *)p, (ZCONST char *)q)
- - : strcmp((ZCONST char *)p, (ZCONST char *)q)
- - ) == 0);
- - }
- + /* MBCS-aware code must not scan backwards into a string from
- + * the end.
- + * So, we have to move forward by character from our well-known
- + * character position s in the test string until we have advanced
- + * to the srest position.
- + */
- + while (q < srest)
- + INCSTR(q);
- + /* In case the byte *srest is a trailing byte of a multibyte
- + * character, we have actually advanced past the position (srest).
- + * For this case, the match has failed!
- + */
- + if (q != srest)
- + return 0;
- + return ((cs ? strcmp(p, q) : namecmp(p, q)) == 0);
- + }
- #else /* !_MBCS */
- - return ((ic
- - ? namecmp((ZCONST char *)p, (ZCONST char *)srest)
- - : strcmp((ZCONST char *)p, (ZCONST char *)srest)
- - ) == 0);
- + return ((cs ? strcmp(p, srest) : namecmp(p, srest)) == 0);
- #endif /* ?_MBCS */
- - } else {
- - /* pattern contains more wildcards, continue with recursion... */
- - for (; *s; INCSTR(s))
- - if ((c = recmatch(p, s, ic __WDL)) != 0)
- - return (int)c;
- - return 2; /* 2 means give up--match will return false */
- - }
- }
- -
- - /* Parse and process the list of characters and ranges in brackets */
- - if (c == BEG_RANGE) {
- - int e; /* flag true if next char to be taken literally */
- - ZCONST uch *q; /* pointer to end of [-] group */
- - int r; /* flag true to match anything but the range */
- -
- - if (*s == 0) /* need a character to match */
- - return 0;
- - p += (r = (*p == '!' || *p == '^')); /* see if reverse */
- - for (q = p, e = 0; *q; INCSTR(q)) /* find closing bracket */
- - if (e)
- - e = 0;
- - else
- - if (*q == '\\') /* GRR: change to ^ for MS-DOS, OS/2? */
- - e = 1;
- - else if (*q == END_RANGE)
- - break;
- - if (*q != END_RANGE) /* nothing matches if bad syntax */
- - return 0;
- - for (c = 0, e = (*p == '-'); p < q; INCSTR(p)) {
- - /* go through the list */
- - if (!e && *p == '\\') /* set escape flag if \ */
- - e = 1;
- - else if (!e && *p == '-') /* set start of range if - */
- - c = *(p-1);
- - else {
- - unsigned int cc = Case(*s);
- -
- - if (*(p+1) != '-')
- - for (c = c ? c : *p; c <= *p; c++) /* compare range */
- - if ((unsigned)Case(c) == cc) /* typecast for MSC bug */
- - return r ? 0 : recmatch(q + 1, s + 1, ic __WDL);
- - c = e = 0; /* clear range, escape flags */
- - }
- - }
- - return r ? recmatch(q + CLEN(q), s + CLEN(s), ic __WDL) : 0;
- - /* bracket match failed */
- + else
- + {
- + /* pattern contains more wildcards, continue with recursion... */
- + for (; *s; INCSTR(s))
- + if ((c = recmatch(p, s, cs)) != 0)
- + return c;
- + return 2; /* 2 means give up--shmatch will return false */
- }
- + }
-
- - /* if escape ('\\'), just compare next character */
- - if (c == '\\' && (c = *p++) == 0) /* if \ at end, then syntax error */
- - return 0;
- +#ifndef VMS /* No bracket matching in VMS */
- + /* Parse and process the list of characters and ranges in brackets */
- + if (!no_wild && allow_regex && c == '[')
- + {
- + int e; /* flag true if next char to be taken literally */
- + ZCONST char *q; /* pointer to end of [-] group */
- + int r; /* flag true to match anything but the range */
- +
- + if (*s == 0) /* need a character to match */
- + return 0;
- + p += (r = (*p == '!' || *p == '^')); /* see if reverse */
- + for (q = p, e = 0; *q; q++) /* find closing bracket */
- + if (e)
- + e = 0;
- + else
- + if (*q == '\\')
- + e = 1;
- + else if (*q == ']')
- + break;
- + if (*q != ']') /* nothing matches if bad syntax */
- + return 0;
- + for (c = 0, e = *p == '-'; p < q; p++) /* go through the list */
- + {
- + if (e == 0 && *p == '\\') /* set escape flag if \ */
- + e = 1;
- + else if (e == 0 && *p == '-') /* set start of range if - */
- + c = *(p-1);
- + else
- + {
- + uch cc = (cs ? (uch)*s : case_map((uch)*s));
- + uch uc = (uch) c;
- + if (*(p+1) != '-')
- + for (uc = uc ? uc : (uch)*p; uc <= (uch)*p; uc++)
- + /* compare range */
- + if ((cs ? uc : case_map(uc)) == cc)
- + return r ? 0 : recmatch(q + CLEN(q), s + CLEN(s), cs);
- + c = e = 0; /* clear range, escape flags */
- + }
- + }
- + return r ? recmatch(q + CLEN(q), s + CLEN(s), cs) : 0;
- + /* bracket match failed */
- + }
- +#endif /* !VMS */
-
- - /* just a character--compare it */
- -#ifdef QDOS
- - return QMatch(Case((uch)c), Case(*s)) ?
- - recmatch(p, s + CLEN(s), ic __WDL) : 0;
- -#else
- - return Case((uch)c) == Case(*s) ?
- - recmatch(p, s + CLEN(s), ic __WDL) : 0;
- -#endif
- + /* If escape ('\'), just compare next character */
- + if (!no_wild && c == '\\')
- + if ((c = *p++) == '\0') /* if \ at end, then syntax error */
- + return 0;
- +
- +#ifdef VMS
- + /* 2005-11-06 SMS.
- + Handle "..." wildcard in p with "." or "]" in s.
- + */
- + if ((c == '.') && (*p == '.') && (*(p+ CLEN( p)) == '.') &&
- + ((*s == '.') || (*s == ']')))
- + {
- + /* Match "...]" with "]". Continue after "]" in both. */
- + if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']'))
- + return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs);
- +
- + /* Else, look for a reduced match in s, until "]" in or end of s. */
- + for (; *s && (*s != ']'); INCSTR(s))
- + if (*s == '.')
- + /* If reduced match, then continue after "..." in p, "." in s. */
- + if ((c = recmatch( (p+ CLEN( p)), s, cs)) != 0)
- + return (int)c;
- +
- + /* Match "...]" with "]". Continue after "]" in both. */
- + if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']'))
- + return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs);
- +
- + /* No reduced match. Quit. */
- + return 2;
- + }
- +
- +#endif /* def VMS */
- +
- + /* Just a character--compare it */
- + return (cs ? c == *s : case_map((uch)c) == case_map((uch)*s)) ?
- + recmatch(p, s + CLEN(s), cs) : 0;
- +}
-
- -} /* end function recmatch() */
-
-
-
- +/*************************************************************************************************/
- static char *isshexp(p)
- ZCONST char *p;
- /* If p is a sh expression, a pointer to the first special character is
|