regex.m4 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. # serial 73
  2. # Copyright (C) 1996-2001, 2003-2023 Free Software Foundation, Inc.
  3. #
  4. # This file is free software; the Free Software Foundation
  5. # gives unlimited permission to copy and/or distribute it,
  6. # with or without modifications, as long as this notice is preserved.
  7. dnl Initially derived from code in GNU grep.
  8. dnl Mostly written by Jim Meyering.
  9. AC_PREREQ([2.50])
  10. AC_DEFUN([gl_REGEX],
  11. [
  12. AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
  13. AC_ARG_WITH([included-regex],
  14. [AS_HELP_STRING([--without-included-regex],
  15. [don't compile regex; this is the default on systems
  16. with recent-enough versions of the GNU C Library
  17. (use with caution on other systems).])])
  18. case $with_included_regex in #(
  19. yes|no) ac_use_included_regex=$with_included_regex
  20. ;;
  21. '')
  22. # If the system regex support is good enough that it passes the
  23. # following run test, then default to *not* using the included regex.c.
  24. # If cross compiling, assume the test would fail and use the included
  25. # regex.c.
  26. AC_CHECK_DECLS_ONCE([alarm])
  27. AC_CHECK_HEADERS_ONCE([malloc.h])
  28. AC_CACHE_CHECK([for working re_compile_pattern],
  29. [gl_cv_func_re_compile_pattern_working],
  30. [AC_RUN_IFELSE(
  31. [AC_LANG_PROGRAM(
  32. [[#include <regex.h>
  33. #include <locale.h>
  34. #include <limits.h>
  35. #include <string.h>
  36. #if defined M_CHECK_ACTION || HAVE_DECL_ALARM
  37. # include <signal.h>
  38. # include <unistd.h>
  39. #endif
  40. #if HAVE_MALLOC_H
  41. # include <malloc.h>
  42. #endif
  43. #ifdef M_CHECK_ACTION
  44. /* Exit with distinguishable exit code. */
  45. static void sigabrt_no_core (int sig) { raise (SIGTERM); }
  46. #endif
  47. ]],
  48. [[int result = 0;
  49. static struct re_pattern_buffer regex;
  50. unsigned char folded_chars[UCHAR_MAX + 1];
  51. int i;
  52. const char *s;
  53. struct re_registers regs;
  54. /* Some builds of glibc go into an infinite loop on this
  55. test. Use alarm to force death, and mallopt to avoid
  56. malloc recursion in diagnosing the corrupted heap. */
  57. #if HAVE_DECL_ALARM
  58. signal (SIGALRM, SIG_DFL);
  59. alarm (2);
  60. #endif
  61. #ifdef M_CHECK_ACTION
  62. signal (SIGABRT, sigabrt_no_core);
  63. mallopt (M_CHECK_ACTION, 2);
  64. #endif
  65. if (setlocale (LC_ALL, "en_US.UTF-8"))
  66. {
  67. {
  68. /* https://sourceware.org/ml/libc-hacker/2006-09/msg00008.html
  69. This test needs valgrind to catch the bug on Debian
  70. GNU/Linux 3.1 x86, but it might catch the bug better
  71. on other platforms and it shouldn't hurt to try the
  72. test here. */
  73. static char const pat[] = "insert into";
  74. static char const data[] =
  75. "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK";
  76. re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE
  77. | RE_ICASE);
  78. memset (&regex, 0, sizeof regex);
  79. s = re_compile_pattern (pat, sizeof pat - 1, &regex);
  80. if (s)
  81. result |= 1;
  82. else
  83. {
  84. if (re_search (&regex, data, sizeof data - 1,
  85. 0, sizeof data - 1, &regs)
  86. != -1)
  87. result |= 1;
  88. regfree (&regex);
  89. }
  90. }
  91. {
  92. /* This test is from glibc bug 15078.
  93. The test case is from Andreas Schwab in
  94. <https://sourceware.org/ml/libc-alpha/2013-01/msg00967.html>.
  95. */
  96. static char const pat[] = "[^x]x";
  97. static char const data[] =
  98. /* <U1000><U103B><U103D><U1014><U103A><U102F><U1015><U103A> */
  99. "\xe1\x80\x80"
  100. "\xe1\x80\xbb"
  101. "\xe1\x80\xbd"
  102. "\xe1\x80\x94"
  103. "\xe1\x80\xba"
  104. "\xe1\x80\xaf"
  105. "\xe1\x80\x95"
  106. "\xe1\x80\xba"
  107. "x";
  108. re_set_syntax (0);
  109. memset (&regex, 0, sizeof regex);
  110. s = re_compile_pattern (pat, sizeof pat - 1, &regex);
  111. if (s)
  112. result |= 1;
  113. else
  114. {
  115. i = re_search (&regex, data, sizeof data - 1,
  116. 0, sizeof data - 1, 0);
  117. if (i != 0 && i != 21)
  118. result |= 1;
  119. regfree (&regex);
  120. }
  121. }
  122. if (! setlocale (LC_ALL, "C"))
  123. return 1;
  124. }
  125. /* This test is from glibc bug 3957, reported by Andrew Mackey. */
  126. re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE);
  127. memset (&regex, 0, sizeof regex);
  128. s = re_compile_pattern ("a[^x]b", 6, &regex);
  129. if (s)
  130. result |= 2;
  131. else
  132. {
  133. /* This should fail, but succeeds for glibc-2.5. */
  134. if (re_search (&regex, "a\nb", 3, 0, 3, &regs) != -1)
  135. result |= 2;
  136. regfree (&regex);
  137. }
  138. /* This regular expression is from Spencer ere test number 75
  139. in grep-2.3. */
  140. re_set_syntax (RE_SYNTAX_POSIX_EGREP);
  141. memset (&regex, 0, sizeof regex);
  142. for (i = 0; i <= UCHAR_MAX; i++)
  143. folded_chars[i] = i;
  144. regex.translate = folded_chars;
  145. s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, &regex);
  146. /* This should fail with _Invalid character class name_ error. */
  147. if (!s)
  148. {
  149. result |= 4;
  150. regfree (&regex);
  151. }
  152. /* Ensure that [b-a] is diagnosed as invalid, when
  153. using RE_NO_EMPTY_RANGES. */
  154. re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
  155. memset (&regex, 0, sizeof regex);
  156. s = re_compile_pattern ("a[b-a]", 6, &regex);
  157. if (s == 0)
  158. {
  159. result |= 8;
  160. regfree (&regex);
  161. }
  162. /* This should succeed, but does not for glibc-2.1.3. */
  163. memset (&regex, 0, sizeof regex);
  164. s = re_compile_pattern ("{1", 2, &regex);
  165. if (s)
  166. result |= 8;
  167. else
  168. regfree (&regex);
  169. /* The following example is derived from a problem report
  170. against gawk from Jorge Stolfi <stolfi@ic.unicamp.br>. */
  171. memset (&regex, 0, sizeof regex);
  172. s = re_compile_pattern ("[an\371]*n", 7, &regex);
  173. if (s)
  174. result |= 8;
  175. else
  176. {
  177. /* This should match, but does not for glibc-2.2.1. */
  178. if (re_match (&regex, "an", 2, 0, &regs) != 2)
  179. result |= 8;
  180. else
  181. {
  182. free (regs.start);
  183. free (regs.end);
  184. }
  185. regfree (&regex);
  186. }
  187. memset (&regex, 0, sizeof regex);
  188. s = re_compile_pattern ("x", 1, &regex);
  189. if (s)
  190. result |= 8;
  191. else
  192. {
  193. /* glibc-2.2.93 does not work with a negative RANGE argument. */
  194. if (re_search (&regex, "wxy", 3, 2, -2, &regs) != 1)
  195. result |= 8;
  196. else
  197. {
  198. free (regs.start);
  199. free (regs.end);
  200. }
  201. regfree (&regex);
  202. }
  203. /* The version of regex.c in older versions of gnulib
  204. ignored RE_ICASE. Detect that problem too. */
  205. re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE);
  206. memset (&regex, 0, sizeof regex);
  207. s = re_compile_pattern ("x", 1, &regex);
  208. if (s)
  209. result |= 16;
  210. else
  211. {
  212. if (re_search (&regex, "WXY", 3, 0, 3, &regs) < 0)
  213. result |= 16;
  214. else
  215. {
  216. free (regs.start);
  217. free (regs.end);
  218. }
  219. regfree (&regex);
  220. }
  221. /* Catch a bug reported by Vin Shelton in
  222. https://lists.gnu.org/r/bug-coreutils/2007-06/msg00089.html
  223. */
  224. re_set_syntax (RE_SYNTAX_POSIX_BASIC
  225. & ~RE_CONTEXT_INVALID_DUP
  226. & ~RE_NO_EMPTY_RANGES);
  227. memset (&regex, 0, sizeof regex);
  228. s = re_compile_pattern ("[[:alnum:]_-]\\\\+\$", 16, &regex);
  229. if (s)
  230. result |= 32;
  231. else
  232. regfree (&regex);
  233. /* REG_STARTEND was added to glibc on 2004-01-15.
  234. Reject older versions. */
  235. if (! REG_STARTEND)
  236. result |= 64;
  237. /* Matching with the compiled form of this regexp would provoke
  238. an assertion failure prior to glibc-2.28:
  239. regexec.c:1375: pop_fail_stack: Assertion 'num >= 0' failed
  240. With glibc-2.28, compilation fails and reports the invalid
  241. back reference. */
  242. re_set_syntax (RE_SYNTAX_POSIX_EGREP);
  243. memset (&regex, 0, sizeof regex);
  244. s = re_compile_pattern ("0|()0|\\\\1|0", 10, &regex);
  245. if (!s)
  246. {
  247. memset (&regs, 0, sizeof regs);
  248. i = re_search (&regex, "x", 1, 0, 1, &regs);
  249. if (i != -1)
  250. result |= 64;
  251. if (0 <= i)
  252. {
  253. free (regs.start);
  254. free (regs.end);
  255. }
  256. regfree (&regex);
  257. }
  258. else
  259. {
  260. if (strcmp (s, "Invalid back reference"))
  261. result |= 64;
  262. }
  263. /* glibc bug 11053. */
  264. re_set_syntax (RE_SYNTAX_POSIX_BASIC);
  265. memset (&regex, 0, sizeof regex);
  266. static char const pat_sub2[] = "\\\\(a*\\\\)*a*\\\\1";
  267. s = re_compile_pattern (pat_sub2, sizeof pat_sub2 - 1, &regex);
  268. if (s)
  269. result |= 64;
  270. else
  271. {
  272. memset (&regs, 0, sizeof regs);
  273. static char const data[] = "a";
  274. int datalen = sizeof data - 1;
  275. i = re_search (&regex, data, datalen, 0, datalen, &regs);
  276. if (i != 0)
  277. result |= 64;
  278. else if (regs.num_regs < 2)
  279. result |= 64;
  280. else if (! (regs.start[0] == 0 && regs.end[0] == 1))
  281. result |= 64;
  282. else if (! (regs.start[1] == 0 && regs.end[1] == 0))
  283. result |= 64;
  284. regfree (&regex);
  285. free (regs.start);
  286. free (regs.end);
  287. }
  288. #if 0
  289. /* It would be nice to reject hosts whose regoff_t values are too
  290. narrow (including glibc on hosts with 64-bit ptrdiff_t and
  291. 32-bit int), but we should wait until glibc implements this
  292. feature. Otherwise, support for equivalence classes and
  293. multibyte collation symbols would always be broken except
  294. when compiling --without-included-regex. */
  295. if (sizeof (regoff_t) < sizeof (ptrdiff_t)
  296. || sizeof (regoff_t) < sizeof (ssize_t))
  297. result |= 64;
  298. #endif
  299. return result;
  300. ]])],
  301. [gl_cv_func_re_compile_pattern_working=yes],
  302. [gl_cv_func_re_compile_pattern_working=no],
  303. [case "$host_os" in
  304. # Guess no on native Windows.
  305. mingw*) gl_cv_func_re_compile_pattern_working="guessing no" ;;
  306. # Otherwise obey --enable-cross-guesses.
  307. *) gl_cv_func_re_compile_pattern_working="$gl_cross_guess_normal" ;;
  308. esac
  309. ])
  310. ])
  311. case "$gl_cv_func_re_compile_pattern_working" in #(
  312. *yes) ac_use_included_regex=no;; #(
  313. *no) ac_use_included_regex=yes;;
  314. esac
  315. ;;
  316. *) AC_MSG_ERROR([Invalid value for --with-included-regex: $with_included_regex])
  317. ;;
  318. esac
  319. if test $ac_use_included_regex = yes; then
  320. AC_DEFINE([_REGEX_INCLUDE_LIMITS_H], [1],
  321. [Define if you want <regex.h> to include <limits.h>, so that it
  322. consistently overrides <limits.h>'s RE_DUP_MAX.])
  323. AC_DEFINE([_REGEX_LARGE_OFFSETS], [1],
  324. [Define if you want regoff_t to be at least as wide POSIX requires.])
  325. AC_DEFINE([re_syntax_options], [rpl_re_syntax_options],
  326. [Define to rpl_re_syntax_options if the replacement should be used.])
  327. AC_DEFINE([re_set_syntax], [rpl_re_set_syntax],
  328. [Define to rpl_re_set_syntax if the replacement should be used.])
  329. AC_DEFINE([re_compile_pattern], [rpl_re_compile_pattern],
  330. [Define to rpl_re_compile_pattern if the replacement should be used.])
  331. AC_DEFINE([re_compile_fastmap], [rpl_re_compile_fastmap],
  332. [Define to rpl_re_compile_fastmap if the replacement should be used.])
  333. AC_DEFINE([re_search], [rpl_re_search],
  334. [Define to rpl_re_search if the replacement should be used.])
  335. AC_DEFINE([re_search_2], [rpl_re_search_2],
  336. [Define to rpl_re_search_2 if the replacement should be used.])
  337. AC_DEFINE([re_match], [rpl_re_match],
  338. [Define to rpl_re_match if the replacement should be used.])
  339. AC_DEFINE([re_match_2], [rpl_re_match_2],
  340. [Define to rpl_re_match_2 if the replacement should be used.])
  341. AC_DEFINE([re_set_registers], [rpl_re_set_registers],
  342. [Define to rpl_re_set_registers if the replacement should be used.])
  343. AC_DEFINE([re_comp], [rpl_re_comp],
  344. [Define to rpl_re_comp if the replacement should be used.])
  345. AC_DEFINE([re_exec], [rpl_re_exec],
  346. [Define to rpl_re_exec if the replacement should be used.])
  347. AC_DEFINE([regcomp], [rpl_regcomp],
  348. [Define to rpl_regcomp if the replacement should be used.])
  349. AC_DEFINE([regexec], [rpl_regexec],
  350. [Define to rpl_regexec if the replacement should be used.])
  351. AC_DEFINE([regerror], [rpl_regerror],
  352. [Define to rpl_regerror if the replacement should be used.])
  353. AC_DEFINE([regfree], [rpl_regfree],
  354. [Define to rpl_regfree if the replacement should be used.])
  355. fi
  356. ])
  357. # Prerequisites of lib/regex.c and lib/regex_internal.c.
  358. AC_DEFUN([gl_PREREQ_REGEX],
  359. [
  360. AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])
  361. AC_REQUIRE([AC_C_INLINE])
  362. AC_REQUIRE([AC_C_RESTRICT])
  363. AC_REQUIRE([AC_TYPE_MBSTATE_T])
  364. AC_REQUIRE([gl_EEMALLOC])
  365. AC_CHECK_HEADERS([libintl.h])
  366. AC_CHECK_FUNCS_ONCE([isblank iswctype])
  367. AC_CHECK_DECLS([isblank], [], [], [[#include <ctype.h>]])
  368. ])