unzip-6.0-fix-recmatch.patch 18 KB


  1. diff -up unzip60/match.c.recmatch unzip60/match.c
  2. --- unzip60/match.c.recmatch 2005-08-14 13:00:36.000000000 -0400
  3. +++ unzip60/match.c 2013-05-28 10:29:57.949077543 -0400
  4. @@ -27,16 +27,14 @@
  5. ---------------------------------------------------------------------------
  6. - Copyright on recmatch() from Zip's util.c (although recmatch() was almost
  7. - certainly written by Mark Adler...ask me how I can tell :-) ):
  8. + Copyright on recmatch() from Zip's util.c
  9. + Copyright (c) 1990-2005 Info-ZIP. All rights reserved.
  10. - Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
  11. - Kai Uwe Rommel and Igor Mandrichenko.
  12. + See the accompanying file LICENSE, version 2004-May-22 or later
  13. + for terms of use.
  14. + If, for some reason, both of these files are missing, the Info-ZIP license
  15. + also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
  16. - Permission is granted to any individual or institution to use, copy,
  17. - or redistribute this software so long as all of the original files are
  18. - included unmodified, that it is not sold for profit, and that this copy-
  19. - right notice is retained.
  20. ---------------------------------------------------------------------------
  21. @@ -53,7 +51,7 @@
  22. A set is composed of characters or ranges; a range looks like ``character
  23. hyphen character'' (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of
  24. - characters allowed in the [..] pattern construct. Other characters are
  25. + characters ALlowed in the [..] pattern construct. Other characters are
  26. allowed (i.e., 8-bit characters) if your system will support them.
  27. To suppress the special syntactic significance of any of ``[]*?!^-\'', in-
  28. @@ -101,8 +99,32 @@
  29. # define WILDCHAR '?'
  30. # define BEG_RANGE '['
  31. # define END_RANGE ']'
  32. +# define WILDCHR_SINGLE '?'
  33. +# define DIRSEP_CHR '/'
  34. +# define WILDCHR_MULTI '*'
  35. #endif
  36. +#ifdef WILD_STOP_AT_DIR
  37. + int wild_stop_at_dir = 1; /* default wildcards do not include / in matches */
  38. +#else
  39. + int wild_stop_at_dir = 0; /* default wildcards do include / in matches */
  40. +#endif
  41. +
  42. +
  43. +
  44. +/*
  45. + * case mapping functions. case_map is used to ignore case in comparisons,
  46. + * to_up is used to force upper case even on Unix (for dosify option).
  47. + */
  48. +#ifdef USE_CASE_MAP
  49. +# define case_map(c) upper[(c) & 0xff]
  50. +# define to_up(c) upper[(c) & 0xff]
  51. +#else
  52. +# define case_map(c) (c)
  53. +# define to_up(c) ((c) >= 'a' && (c) <= 'z' ? (c)-'a'+'A' : (c))
  54. +#endif /* USE_CASE_MAP */
  55. +
  56. +
  57. #if 0 /* GRR: add this to unzip.h someday... */
  58. #if !(defined(MSDOS) && defined(DOSWILD))
  59. #ifdef WILD_STOP_AT_DIR
  60. @@ -114,8 +136,8 @@ int recmatch OF((ZCONST uch *pattern, ZC
  61. int ignore_case __WDLPRO));
  62. #endif
  63. #endif /* 0 */
  64. -static int recmatch OF((ZCONST uch *pattern, ZCONST uch *string,
  65. - int ignore_case __WDLPRO));
  66. +static int recmatch OF((ZCONST char *, ZCONST char *,
  67. + int));
  68. static char *isshexp OF((ZCONST char *p));
  69. static int namecmp OF((ZCONST char *s1, ZCONST char *s2));
  70. @@ -154,192 +176,240 @@ int match(string, pattern, ignore_case _
  71. }
  72. dospattern[j-1] = '\0'; /* nuke the end "." */
  73. }
  74. - j = recmatch((uch *)dospattern, (uch *)string, ignore_case __WDL);
  75. + j = recmatch(dospattern, string, ignore_case);
  76. free(dospattern);
  77. return j == 1;
  78. } else
  79. #endif /* MSDOS && DOSWILD */
  80. - return recmatch((uch *)pattern, (uch *)string, ignore_case __WDL) == 1;
  81. + return recmatch(pattern, string, ignore_case) == 1;
  82. }
  83. +#ifdef _MBCS
  84. +
  85. +char *___tmp_ptr;
  86. +#endif
  87. -static int recmatch(p, s, ic __WDL)
  88. - ZCONST uch *p; /* sh pattern to match */
  89. - ZCONST uch *s; /* string to which to match it */
  90. - int ic; /* true for case insensitivity */
  91. - __WDLDEF /* directory sepchar for WildStopAtDir mode, or 0 */
  92. +static int recmatch(p, s, cs)
  93. +ZCONST char *p; /* sh pattern to match */
  94. +ZCONST char *s; /* string to match it to */
  95. +int cs; /* flag: force case-sensitive matching */
  96. /* Recursively compare the sh pattern p with the string s and return 1 if
  97. - * they match, and 0 or 2 if they don't or if there is a syntax error in the
  98. - * pattern. This routine recurses on itself no more deeply than the number
  99. - * of characters in the pattern. */
  100. + they match, and 0 or 2 if they don't or if there is a syntax error in the
  101. + pattern. This routine recurses on itself no deeper than the number of
  102. + characters in the pattern. */
  103. {
  104. - unsigned int c; /* pattern char or start of range in [-] loop */
  105. + int c; /* pattern char or start of range in [-] loop */
  106. + /* Get first character, the pattern for new recmatch calls follows */
  107. + /* borrowed from Zip's global.c */
  108. + int no_wild = 0;
  109. + int allow_regex=1;
  110. + /* This fix provided by akt@m5.dion.ne.jp for Japanese.
  111. + See 21 July 2006 mail.
  112. + It only applies when p is pointing to a doublebyte character and
  113. + things like / and wildcards are not doublebyte. This probably
  114. + should not be needed. */
  115. - /* Get first character, the pattern for new recmatch calls follows */
  116. - c = *p; INCSTR(p);
  117. +#ifdef _MBCS
  118. + if (CLEN(p) == 2) {
  119. + if (CLEN(s) == 2) {
  120. + return (*p == *s && *(p+1) == *(s+1)) ?
  121. + recmatch(p + 2, s + 2, cs) : 0;
  122. + } else {
  123. + return 0;
  124. + }
  125. + }
  126. +#endif /* ?_MBCS */
  127. - /* If that was the end of the pattern, match if string empty too */
  128. - if (c == 0)
  129. - return *s == 0;
  130. + c = *POSTINCSTR(p);
  131. - /* '?' (or '%') matches any character (but not an empty string). */
  132. - if (c == WILDCHAR)
  133. -#ifdef WILD_STOP_AT_DIR
  134. - /* If uO.W_flag is non-zero, it won't match '/' */
  135. - return (*s && (!sepc || *s != (uch)sepc))
  136. - ? recmatch(p, s + CLEN(s), ic, sepc) : 0;
  137. -#else
  138. - return *s ? recmatch(p, s + CLEN(s), ic) : 0;
  139. -#endif
  140. + /* If that was the end of the pattern, match if string empty too */
  141. + if (c == 0)
  142. + return *s == 0;
  143. +
  144. + /* '?' (or '%' or '#') matches any character (but not an empty string) */
  145. + if (c == WILDCHR_SINGLE) {
  146. + if (wild_stop_at_dir)
  147. + return (*s && *s != DIRSEP_CHR) ? recmatch(p, s + CLEN(s), cs) : 0;
  148. + else
  149. + return *s ? recmatch(p, s + CLEN(s), cs) : 0;
  150. + }
  151. - /* '*' matches any number of characters, including zero */
  152. + /* WILDCHR_MULTI ('*') matches any number of characters, including zero */
  153. #ifdef AMIGA
  154. - if (c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */
  155. - c = '*', p++;
  156. + if (!no_wild && c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */
  157. + c = WILDCHR_MULTI, p++;
  158. #endif /* AMIGA */
  159. - if (c == '*') {
  160. -#ifdef WILD_STOP_AT_DIR
  161. - if (sepc) {
  162. - /* check for single "*" or double "**" */
  163. -# ifdef AMIGA
  164. - if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
  165. - c = '*', p++;
  166. - if (c != '*') {
  167. -# else /* !AMIGA */
  168. - if (*p != '*') {
  169. -# endif /* ?AMIGA */
  170. - /* single "*": this doesn't match the dirsep character */
  171. - for (; *s && *s != (uch)sepc; INCSTR(s))
  172. - if ((c = recmatch(p, s, ic, sepc)) != 0)
  173. - return (int)c;
  174. - /* end of pattern: matched if at end of string, else continue */
  175. - if (*p == '\0')
  176. - return (*s == 0);
  177. - /* continue to match if at sepc in pattern, else give up */
  178. - return (*p == (uch)sepc || (*p == '\\' && p[1] == (uch)sepc))
  179. - ? recmatch(p, s, ic, sepc) : 2;
  180. - }
  181. - /* "**": this matches slashes */
  182. - ++p; /* move p behind the second '*' */
  183. - /* and continue with the non-W_flag code variant */
  184. - }
  185. -#endif /* WILD_STOP_AT_DIR */
  186. + if (!no_wild && c == WILDCHR_MULTI)
  187. + {
  188. + if (wild_stop_at_dir) {
  189. + /* Check for an immediately following WILDCHR_MULTI */
  190. +# ifdef AMIGA
  191. + if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
  192. + c = WILDCHR_MULTI, p++;
  193. + if (c != WILDCHR_MULTI) {
  194. +# else /* !AMIGA */
  195. + if (*p != WILDCHR_MULTI) {
  196. +# endif /* ?AMIGA */
  197. + /* Single WILDCHR_MULTI ('*'): this doesn't match slashes */
  198. + for (; *s && *s != DIRSEP_CHR; INCSTR(s))
  199. + if ((c = recmatch(p, s, cs)) != 0)
  200. + return c;
  201. + /* end of pattern: matched if at end of string, else continue */
  202. if (*p == 0)
  203. - return 1;
  204. - if (isshexp((ZCONST char *)p) == NULL) {
  205. - /* Optimization for rest of pattern being a literal string:
  206. - * If there are no other shell expression chars in the rest
  207. - * of the pattern behind the multi-char wildcard, then just
  208. - * compare the literal string tail.
  209. - */
  210. - ZCONST uch *srest;
  211. -
  212. - srest = s + (strlen((ZCONST char *)s) - strlen((ZCONST char *)p));
  213. - if (srest - s < 0)
  214. - /* remaining literal string from pattern is longer than rest
  215. - * of test string, there can't be a match
  216. - */
  217. - return 0;
  218. - else
  219. - /* compare the remaining literal pattern string with the last
  220. - * bytes of the test string to check for a match
  221. - */
  222. + return (*s == 0);
  223. + /* continue to match if at DIRSEP_CHR in pattern, else give up */
  224. + return (*p == DIRSEP_CHR || (*p == '\\' && p[1] == DIRSEP_CHR))
  225. + ? recmatch(p, s, cs) : 2;
  226. + }
  227. + /* Two consecutive WILDCHR_MULTI ("**"): this matches DIRSEP_CHR ('/') */
  228. + p++; /* move p past the second WILDCHR_MULTI */
  229. + /* continue with the normal non-WILD_STOP_AT_DIR code */
  230. + } /* wild_stop_at_dir */
  231. +
  232. + /* Not wild_stop_at_dir */
  233. + if (*p == 0)
  234. + return 1;
  235. + if (!isshexp((char *)p))
  236. + {
  237. + /* optimization for rest of pattern being a literal string */
  238. +
  239. + /* optimization to handle patterns like *.txt */
  240. + /* if the first char in the pattern is '*' and there */
  241. + /* are no other shell expression chars, i.e. a literal string */
  242. + /* then just compare the literal string at the end */
  243. +
  244. + ZCONST char *srest;
  245. +
  246. + srest = s + (strlen(s) - strlen(p));
  247. + if (srest - s < 0)
  248. + /* remaining literal string from pattern is longer than rest of
  249. + test string, there can't be a match
  250. + */
  251. + return 0;
  252. + else
  253. + /* compare the remaining literal pattern string with the last bytes
  254. + of the test string to check for a match */
  255. #ifdef _MBCS
  256. - {
  257. - ZCONST uch *q = s;
  258. + {
  259. + ZCONST char *q = s;
  260. - /* MBCS-aware code must not scan backwards into a string from
  261. - * the end.
  262. - * So, we have to move forward by character from our well-known
  263. - * character position s in the test string until we have
  264. - * advanced to the srest position.
  265. - */
  266. - while (q < srest)
  267. - INCSTR(q);
  268. - /* In case the byte *srest is a trailing byte of a multibyte
  269. - * character in the test string s, we have actually advanced
  270. - * past the position (srest).
  271. - * For this case, the match has failed!
  272. - */
  273. - if (q != srest)
  274. - return 0;
  275. - return ((ic
  276. - ? namecmp((ZCONST char *)p, (ZCONST char *)q)
  277. - : strcmp((ZCONST char *)p, (ZCONST char *)q)
  278. - ) == 0);
  279. - }
  280. + /* MBCS-aware code must not scan backwards into a string from
  281. + * the end.
  282. + * So, we have to move forward by character from our well-known
  283. + * character position s in the test string until we have advanced
  284. + * to the srest position.
  285. + */
  286. + while (q < srest)
  287. + INCSTR(q);
  288. + /* In case the byte *srest is a trailing byte of a multibyte
  289. + * character, we have actually advanced past the position (srest).
  290. + * For this case, the match has failed!
  291. + */
  292. + if (q != srest)
  293. + return 0;
  294. + return ((cs ? strcmp(p, q) : namecmp(p, q)) == 0);
  295. + }
  296. #else /* !_MBCS */
  297. - return ((ic
  298. - ? namecmp((ZCONST char *)p, (ZCONST char *)srest)
  299. - : strcmp((ZCONST char *)p, (ZCONST char *)srest)
  300. - ) == 0);
  301. + return ((cs ? strcmp(p, srest) : namecmp(p, srest)) == 0);
  302. #endif /* ?_MBCS */
  303. - } else {
  304. - /* pattern contains more wildcards, continue with recursion... */
  305. - for (; *s; INCSTR(s))
  306. - if ((c = recmatch(p, s, ic __WDL)) != 0)
  307. - return (int)c;
  308. - return 2; /* 2 means give up--match will return false */
  309. - }
  310. }
  311. -
  312. - /* Parse and process the list of characters and ranges in brackets */
  313. - if (c == BEG_RANGE) {
  314. - int e; /* flag true if next char to be taken literally */
  315. - ZCONST uch *q; /* pointer to end of [-] group */
  316. - int r; /* flag true to match anything but the range */
  317. -
  318. - if (*s == 0) /* need a character to match */
  319. - return 0;
  320. - p += (r = (*p == '!' || *p == '^')); /* see if reverse */
  321. - for (q = p, e = 0; *q; INCSTR(q)) /* find closing bracket */
  322. - if (e)
  323. - e = 0;
  324. - else
  325. - if (*q == '\\') /* GRR: change to ^ for MS-DOS, OS/2? */
  326. - e = 1;
  327. - else if (*q == END_RANGE)
  328. - break;
  329. - if (*q != END_RANGE) /* nothing matches if bad syntax */
  330. - return 0;
  331. - for (c = 0, e = (*p == '-'); p < q; INCSTR(p)) {
  332. - /* go through the list */
  333. - if (!e && *p == '\\') /* set escape flag if \ */
  334. - e = 1;
  335. - else if (!e && *p == '-') /* set start of range if - */
  336. - c = *(p-1);
  337. - else {
  338. - unsigned int cc = Case(*s);
  339. -
  340. - if (*(p+1) != '-')
  341. - for (c = c ? c : *p; c <= *p; c++) /* compare range */
  342. - if ((unsigned)Case(c) == cc) /* typecast for MSC bug */
  343. - return r ? 0 : recmatch(q + 1, s + 1, ic __WDL);
  344. - c = e = 0; /* clear range, escape flags */
  345. - }
  346. - }
  347. - return r ? recmatch(q + CLEN(q), s + CLEN(s), ic __WDL) : 0;
  348. - /* bracket match failed */
  349. + else
  350. + {
  351. + /* pattern contains more wildcards, continue with recursion... */
  352. + for (; *s; INCSTR(s))
  353. + if ((c = recmatch(p, s, cs)) != 0)
  354. + return c;
  355. + return 2; /* 2 means give up--shmatch will return false */
  356. }
  357. + }
  358. - /* if escape ('\\'), just compare next character */
  359. - if (c == '\\' && (c = *p++) == 0) /* if \ at end, then syntax error */
  360. - return 0;
  361. +#ifndef VMS /* No bracket matching in VMS */
  362. + /* Parse and process the list of characters and ranges in brackets */
  363. + if (!no_wild && allow_regex && c == '[')
  364. + {
  365. + int e; /* flag true if next char to be taken literally */
  366. + ZCONST char *q; /* pointer to end of [-] group */
  367. + int r; /* flag true to match anything but the range */
  368. +
  369. + if (*s == 0) /* need a character to match */
  370. + return 0;
  371. + p += (r = (*p == '!' || *p == '^')); /* see if reverse */
  372. + for (q = p, e = 0; *q; q++) /* find closing bracket */
  373. + if (e)
  374. + e = 0;
  375. + else
  376. + if (*q == '\\')
  377. + e = 1;
  378. + else if (*q == ']')
  379. + break;
  380. + if (*q != ']') /* nothing matches if bad syntax */
  381. + return 0;
  382. + for (c = 0, e = *p == '-'; p < q; p++) /* go through the list */
  383. + {
  384. + if (e == 0 && *p == '\\') /* set escape flag if \ */
  385. + e = 1;
  386. + else if (e == 0 && *p == '-') /* set start of range if - */
  387. + c = *(p-1);
  388. + else
  389. + {
  390. + uch cc = (cs ? (uch)*s : case_map((uch)*s));
  391. + uch uc = (uch) c;
  392. + if (*(p+1) != '-')
  393. + for (uc = uc ? uc : (uch)*p; uc <= (uch)*p; uc++)
  394. + /* compare range */
  395. + if ((cs ? uc : case_map(uc)) == cc)
  396. + return r ? 0 : recmatch(q + CLEN(q), s + CLEN(s), cs);
  397. + c = e = 0; /* clear range, escape flags */
  398. + }
  399. + }
  400. + return r ? recmatch(q + CLEN(q), s + CLEN(s), cs) : 0;
  401. + /* bracket match failed */
  402. + }
  403. +#endif /* !VMS */
  404. - /* just a character--compare it */
  405. -#ifdef QDOS
  406. - return QMatch(Case((uch)c), Case(*s)) ?
  407. - recmatch(p, s + CLEN(s), ic __WDL) : 0;
  408. -#else
  409. - return Case((uch)c) == Case(*s) ?
  410. - recmatch(p, s + CLEN(s), ic __WDL) : 0;
  411. -#endif
  412. + /* If escape ('\'), just compare next character */
  413. + if (!no_wild && c == '\\')
  414. + if ((c = *p++) == '\0') /* if \ at end, then syntax error */
  415. + return 0;
  416. +
  417. +#ifdef VMS
  418. + /* 2005-11-06 SMS.
  419. + Handle "..." wildcard in p with "." or "]" in s.
  420. + */
  421. + if ((c == '.') && (*p == '.') && (*(p+ CLEN( p)) == '.') &&
  422. + ((*s == '.') || (*s == ']')))
  423. + {
  424. + /* Match "...]" with "]". Continue after "]" in both. */
  425. + if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']'))
  426. + return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs);
  427. +
  428. + /* Else, look for a reduced match in s, until "]" in or end of s. */
  429. + for (; *s && (*s != ']'); INCSTR(s))
  430. + if (*s == '.')
  431. + /* If reduced match, then continue after "..." in p, "." in s. */
  432. + if ((c = recmatch( (p+ CLEN( p)), s, cs)) != 0)
  433. + return (int)c;
  434. +
  435. + /* Match "...]" with "]". Continue after "]" in both. */
  436. + if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']'))
  437. + return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs);
  438. +
  439. + /* No reduced match. Quit. */
  440. + return 2;
  441. + }
  442. +
  443. +#endif /* def VMS */
  444. +
  445. + /* Just a character--compare it */
  446. + return (cs ? c == *s : case_map((uch)c) == case_map((uch)*s)) ?
  447. + recmatch(p, s + CLEN(s), cs) : 0;
  448. +}
  449. -} /* end function recmatch() */
  450. +/*************************************************************************************************/
  451. static char *isshexp(p)
  452. ZCONST char *p;
  453. /* If p is a sh expression, a pointer to the first special character is