wildmatch.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. /*
  2. ** Do shell-style pattern matching for ?, \, [], and * characters.
  3. ** It is 8bit clean.
  4. **
  5. ** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
  6. ** Rich $alz is now <rsalz@bbn.com>.
  7. **
  8. ** Modified by Wayne Davison to special-case '/' matching, to make '**'
  9. ** work differently than '*', and to fix the character-class code.
  10. */
  11. #include "rsync.h"
  12. /* What character marks an inverted character class? */
  13. #define NEGATE_CLASS '!'
  14. #define NEGATE_CLASS2 '^'
  15. #define FALSE 0
  16. #define TRUE 1
  17. #define ABORT_ALL -1
  18. #define ABORT_TO_STARSTAR -2
  19. #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
  20. && *(class) == *(litmatch) \
  21. && strncmp((char*)class, litmatch, len) == 0)
  22. #if defined STDC_HEADERS || !defined isascii
  23. # define ISASCII(c) 1
  24. #else
  25. # define ISASCII(c) isascii(c)
  26. #endif
  27. #ifdef isblank
  28. # define ISBLANK(c) (ISASCII(c) && isblank(c))
  29. #else
  30. # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
  31. #endif
  32. #ifdef isgraph
  33. # define ISGRAPH(c) (ISASCII(c) && isgraph(c))
  34. #else
  35. # define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
  36. #endif
  37. #define ISPRINT(c) (ISASCII(c) && isprint(c))
  38. #define ISDIGIT(c) (ISASCII(c) && isdigit(c))
  39. #define ISALNUM(c) (ISASCII(c) && isalnum(c))
  40. #define ISALPHA(c) (ISASCII(c) && isalpha(c))
  41. #define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
  42. #define ISLOWER(c) (ISASCII(c) && islower(c))
  43. #define ISPUNCT(c) (ISASCII(c) && ispunct(c))
  44. #define ISSPACE(c) (ISASCII(c) && isspace(c))
  45. #define ISUPPER(c) (ISASCII(c) && isupper(c))
  46. #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
  47. #ifdef WILD_TEST_ITERATIONS
  48. int wildmatch_iteration_count;
  49. #endif
  50. static int force_lower_case = 0;
  51. /* Match pattern "p" against the a virtually-joined string consisting
  52. * of "text" and any strings in array "a". */
  53. static int dowild(const uchar *p, const uchar *text, const uchar*const *a)
  54. {
  55. uchar p_ch;
  56. #ifdef WILD_TEST_ITERATIONS
  57. wildmatch_iteration_count++;
  58. #endif
  59. for ( ; (p_ch = *p) != '\0'; text++, p++) {
  60. int matched, special;
  61. uchar t_ch, prev_ch;
  62. while ((t_ch = *text) == '\0') {
  63. if (*a == NULL) {
  64. if (p_ch != '*')
  65. return ABORT_ALL;
  66. break;
  67. }
  68. text = *a++;
  69. }
  70. if (force_lower_case && ISUPPER(t_ch))
  71. t_ch = tolower(t_ch);
  72. switch (p_ch) {
  73. case '\\':
  74. /* Literal match with following character. Note that the test
  75. * in "default" handles the p[1] == '\0' failure case. */
  76. p_ch = *++p;
  77. /* FALLTHROUGH */
  78. default:
  79. if (t_ch != p_ch)
  80. return FALSE;
  81. continue;
  82. case '?':
  83. /* Match anything but '/'. */
  84. if (t_ch == '/')
  85. return FALSE;
  86. continue;
  87. case '*':
  88. if (*++p == '*') {
  89. while (*++p == '*') {}
  90. special = TRUE;
  91. } else
  92. special = FALSE;
  93. if (*p == '\0') {
  94. /* Trailing "**" matches everything. Trailing "*" matches
  95. * only if there are no more slash characters. */
  96. if (!special) {
  97. do {
  98. if (strchr((char*)text, '/') != NULL)
  99. return FALSE;
  100. } while ((text = *a++) != NULL);
  101. }
  102. return TRUE;
  103. }
  104. while (1) {
  105. if (t_ch == '\0') {
  106. if ((text = *a++) == NULL)
  107. break;
  108. t_ch = *text;
  109. continue;
  110. }
  111. if ((matched = dowild(p, text, a)) != FALSE) {
  112. if (!special || matched != ABORT_TO_STARSTAR)
  113. return matched;
  114. } else if (!special && t_ch == '/')
  115. return ABORT_TO_STARSTAR;
  116. t_ch = *++text;
  117. }
  118. return ABORT_ALL;
  119. case '[':
  120. p_ch = *++p;
  121. #ifdef NEGATE_CLASS2
  122. if (p_ch == NEGATE_CLASS2)
  123. p_ch = NEGATE_CLASS;
  124. #endif
  125. /* Assign literal TRUE/FALSE because of "matched" comparison. */
  126. special = p_ch == NEGATE_CLASS? TRUE : FALSE;
  127. if (special) {
  128. /* Inverted character class. */
  129. p_ch = *++p;
  130. }
  131. prev_ch = 0;
  132. matched = FALSE;
  133. do {
  134. if (!p_ch)
  135. return ABORT_ALL;
  136. if (p_ch == '\\') {
  137. p_ch = *++p;
  138. if (!p_ch)
  139. return ABORT_ALL;
  140. if (t_ch == p_ch)
  141. matched = TRUE;
  142. } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
  143. p_ch = *++p;
  144. if (p_ch == '\\') {
  145. p_ch = *++p;
  146. if (!p_ch)
  147. return ABORT_ALL;
  148. }
  149. if (t_ch <= p_ch && t_ch >= prev_ch)
  150. matched = TRUE;
  151. p_ch = 0; /* This makes "prev_ch" get set to 0. */
  152. } else if (p_ch == '[' && p[1] == ':') {
  153. const uchar *s;
  154. int i;
  155. for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
  156. if (!p_ch)
  157. return ABORT_ALL;
  158. i = p - s - 1;
  159. if (i < 0 || p[-1] != ':') {
  160. /* Didn't find ":]", so treat like a normal set. */
  161. p = s - 2;
  162. p_ch = '[';
  163. if (t_ch == p_ch)
  164. matched = TRUE;
  165. continue;
  166. }
  167. if (CC_EQ(s,i, "alnum")) {
  168. if (ISALNUM(t_ch))
  169. matched = TRUE;
  170. } else if (CC_EQ(s,i, "alpha")) {
  171. if (ISALPHA(t_ch))
  172. matched = TRUE;
  173. } else if (CC_EQ(s,i, "blank")) {
  174. if (ISBLANK(t_ch))
  175. matched = TRUE;
  176. } else if (CC_EQ(s,i, "cntrl")) {
  177. if (ISCNTRL(t_ch))
  178. matched = TRUE;
  179. } else if (CC_EQ(s,i, "digit")) {
  180. if (ISDIGIT(t_ch))
  181. matched = TRUE;
  182. } else if (CC_EQ(s,i, "graph")) {
  183. if (ISGRAPH(t_ch))
  184. matched = TRUE;
  185. } else if (CC_EQ(s,i, "lower")) {
  186. if (ISLOWER(t_ch))
  187. matched = TRUE;
  188. } else if (CC_EQ(s,i, "print")) {
  189. if (ISPRINT(t_ch))
  190. matched = TRUE;
  191. } else if (CC_EQ(s,i, "punct")) {
  192. if (ISPUNCT(t_ch))
  193. matched = TRUE;
  194. } else if (CC_EQ(s,i, "space")) {
  195. if (ISSPACE(t_ch))
  196. matched = TRUE;
  197. } else if (CC_EQ(s,i, "upper")) {
  198. if (ISUPPER(t_ch))
  199. matched = TRUE;
  200. } else if (CC_EQ(s,i, "xdigit")) {
  201. if (ISXDIGIT(t_ch))
  202. matched = TRUE;
  203. } else /* malformed [:class:] string */
  204. return ABORT_ALL;
  205. p_ch = 0; /* This makes "prev_ch" get set to 0. */
  206. } else if (t_ch == p_ch)
  207. matched = TRUE;
  208. } while (prev_ch = p_ch, (p_ch = *++p) != ']');
  209. if (matched == special || t_ch == '/')
  210. return FALSE;
  211. continue;
  212. }
  213. }
  214. do {
  215. if (*text)
  216. return FALSE;
  217. } while ((text = *a++) != NULL);
  218. return TRUE;
  219. }
  220. /* Match literal string "s" against the a virtually-joined string consisting
  221. * of "text" and any strings in array "a". */
  222. static int doliteral(const uchar *s, const uchar *text, const uchar*const *a)
  223. {
  224. for ( ; *s != '\0'; text++, s++) {
  225. while (*text == '\0') {
  226. if ((text = *a++) == NULL)
  227. return FALSE;
  228. }
  229. if (*text != *s)
  230. return FALSE;
  231. }
  232. do {
  233. if (*text)
  234. return FALSE;
  235. } while ((text = *a++) != NULL);
  236. return TRUE;
  237. }
  238. /* Return the last "count" path elements from the concatenated string.
  239. * We return a string pointer to the start of the string, and update the
  240. * array pointer-pointer to point to any remaining string elements. */
  241. static const uchar *trailing_N_elements(const uchar*const **a_ptr, int count)
  242. {
  243. const uchar*const *a = *a_ptr;
  244. const uchar*const *first_a = a;
  245. while (*a)
  246. a++;
  247. while (a != first_a) {
  248. const uchar *s = *--a;
  249. s += strlen((char*)s);
  250. while (--s >= *a) {
  251. if (*s == '/' && !--count) {
  252. *a_ptr = a+1;
  253. return s+1;
  254. }
  255. }
  256. }
  257. if (count == 1) {
  258. *a_ptr = a+1;
  259. return *a;
  260. }
  261. return NULL;
  262. }
  263. /* Match the "pattern" against the "text" string. */
  264. int wildmatch(const char *pattern, const char *text)
  265. {
  266. static const uchar *nomore[1]; /* A NULL pointer. */
  267. #ifdef WILD_TEST_ITERATIONS
  268. wildmatch_iteration_count = 0;
  269. #endif
  270. return dowild((const uchar*)pattern, (const uchar*)text, nomore) == TRUE;
  271. }
  272. /* Match the "pattern" against the forced-to-lower-case "text" string. */
  273. int iwildmatch(const char *pattern, const char *text)
  274. {
  275. static const uchar *nomore[1]; /* A NULL pointer. */
  276. int ret;
  277. #ifdef WILD_TEST_ITERATIONS
  278. wildmatch_iteration_count = 0;
  279. #endif
  280. force_lower_case = 1;
  281. ret = dowild((const uchar*)pattern, (const uchar*)text, nomore) == TRUE;
  282. force_lower_case = 0;
  283. return ret;
  284. }
  285. /* Match pattern "p" against the a virtually-joined string consisting
  286. * of all the pointers in array "texts" (which has a NULL pointer at the
  287. * end). The int "where" can be 0 (normal matching), > 0 (match only
  288. * the trailing N slash-separated filename components of "texts"), or < 0
  289. * (match the "pattern" at the start or after any slash in "texts"). */
  290. int wildmatch_array(const char *pattern, const char*const *texts, int where)
  291. {
  292. const uchar *p = (const uchar*)pattern;
  293. const uchar*const *a = (const uchar*const*)texts;
  294. const uchar *text;
  295. int matched;
  296. #ifdef WILD_TEST_ITERATIONS
  297. wildmatch_iteration_count = 0;
  298. #endif
  299. if (where > 0)
  300. text = trailing_N_elements(&a, where);
  301. else
  302. text = *a++;
  303. if (!text)
  304. return FALSE;
  305. if ((matched = dowild(p, text, a)) != TRUE && where < 0
  306. && matched != ABORT_ALL) {
  307. while (1) {
  308. if (*text == '\0') {
  309. if ((text = (uchar*)*a++) == NULL)
  310. return FALSE;
  311. continue;
  312. }
  313. if (*text++ == '/' && (matched = dowild(p, text, a)) != FALSE
  314. && matched != ABORT_TO_STARSTAR)
  315. break;
  316. }
  317. }
  318. return matched == TRUE;
  319. }
  320. /* Match literal string "s" against the a virtually-joined string consisting
  321. * of all the pointers in array "texts" (which has a NULL pointer at the
  322. * end). The int "where" can be 0 (normal matching), or > 0 (match
  323. * only the trailing N slash-separated filename components of "texts"). */
  324. int litmatch_array(const char *string, const char*const *texts, int where)
  325. {
  326. const uchar *s = (const uchar*)string;
  327. const uchar*const *a = (const uchar* const*)texts;
  328. const uchar *text;
  329. if (where > 0)
  330. text = trailing_N_elements(&a, where);
  331. else
  332. text = *a++;
  333. if (!text)
  334. return FALSE;
  335. return doliteral(s, text, a) == TRUE;
  336. }