regexp.test 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. ;;;; regexp.test --- test Guile's regular expression functions -*- scheme -*-
  2. ;;;; Jim Blandy <jimb@red-bean.com> --- September 1999
  3. ;;;;
  4. ;;;; Copyright (C) 1999, 2004, 2006, 2007, 2008 Free Software Foundation, Inc.
  5. ;;;;
  6. ;;;; This program is free software; you can redistribute it and/or modify
  7. ;;;; it under the terms of the GNU General Public License as published by
  8. ;;;; the Free Software Foundation; either version 2, or (at your option)
  9. ;;;; any later version.
  10. ;;;;
  11. ;;;; This program is distributed in the hope that it will be useful,
  12. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. ;;;; GNU General Public License for more details.
  15. ;;;;
  16. ;;;; You should have received a copy of the GNU General Public License
  17. ;;;; along with this software; see the file COPYING. If not, write to
  18. ;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19. ;;;; Boston, MA 02110-1301 USA
  20. (define-module (test-suite test-regexp)
  21. #:use-module (test-suite lib)
  22. #:use-module (ice-9 regex))
  23. ;;; Run a regexp-substitute or regexp-substitute/global test, once
  24. ;;; providing a real port and once providing #f, requesting direct
  25. ;;; string output.
  26. (define (vary-port func expected . args)
  27. (pass-if "port is string port"
  28. (equal? expected
  29. (call-with-output-string
  30. (lambda (port)
  31. (apply func port args)))))
  32. (pass-if "port is #f"
  33. (equal? expected
  34. (apply func #f args))))
  35. (define (object->string obj)
  36. (call-with-output-string
  37. (lambda (port)
  38. (write obj port))))
  39. ;;;
  40. ;;; make-regexp
  41. ;;;
  42. (with-test-prefix "make-regexp"
  43. (pass-if-exception "no args" exception:wrong-num-args
  44. (make-regexp))
  45. (pass-if-exception "bad pat arg" exception:wrong-type-arg
  46. (make-regexp 'blah))
  47. ;; in guile prior to 1.6.5 make-regex didn't validate its flags args
  48. (pass-if-exception "bad arg 2" exception:wrong-type-arg
  49. (make-regexp "xyz" 'abc))
  50. (pass-if-exception "bad arg 3" exception:wrong-type-arg
  51. (make-regexp "xyz" regexp/icase 'abc)))
  52. ;;;
  53. ;;; match:string
  54. ;;;
  55. (with-test-prefix "match:string"
  56. (pass-if "foo"
  57. (string=? "foo" (match:string (string-match ".*" "foo"))))
  58. (pass-if "foo offset 1"
  59. (string=? "foo" (match:string (string-match ".*" "foo" 1)))))
  60. ;;;
  61. ;;; regexp-exec
  62. ;;;
  63. (with-test-prefix "regexp-exec"
  64. (pass-if-exception "non-integer offset" exception:wrong-type-arg
  65. (let ((re (make-regexp "ab+")))
  66. (regexp-exec re "aaaabbbb" 1.5 'bogus-flags-arg)))
  67. (pass-if-exception "non-string input" exception:wrong-type-arg
  68. (let ((re (make-regexp "ab+")))
  69. (regexp-exec re 'not-a-string)))
  70. (pass-if-exception "non-string input, with offset" exception:wrong-type-arg
  71. (let ((re (make-regexp "ab+")))
  72. (regexp-exec re 'not-a-string 5)))
  73. ;; in guile 1.8.1 and earlier, a #\nul character in the input string was
  74. ;; only detected in a critical section, and the resulting error throw
  75. ;; abort()ed the program
  76. (pass-if-exception "nul in input" exception:string-contains-nul
  77. (let ((re (make-regexp "ab+")))
  78. (regexp-exec re (string #\a #\b (integer->char 0)))))
  79. ;; in guile 1.8.1 and earlier, a bogus flags argument was only detected
  80. ;; inside a critical section, and the resulting error throw abort()ed the
  81. ;; program
  82. (pass-if-exception "non-integer flags" exception:wrong-type-arg
  83. (let ((re (make-regexp "ab+")))
  84. (regexp-exec re "aaaabbbb" 0 'bogus-flags-arg))))
  85. ;;;
  86. ;;; fold-matches
  87. ;;;
  88. (with-test-prefix "fold-matches"
  89. (pass-if "without flags"
  90. (equal? '("hello")
  91. (fold-matches "^[a-z]+$" "hello" '()
  92. (lambda (match result)
  93. (cons (match:substring match)
  94. result)))))
  95. (pass-if "with flags"
  96. ;; Prior to 1.8.6, passing an additional flag would not work.
  97. (null?
  98. (fold-matches "^[a-z]+$" "hello" '()
  99. (lambda (match result)
  100. (cons (match:substring match)
  101. result))
  102. (logior regexp/notbol regexp/noteol)))))
  103. ;;;
  104. ;;; regexp-quote
  105. ;;;
  106. (with-test-prefix "regexp-quote"
  107. (pass-if-exception "no args" exception:wrong-num-args
  108. (regexp-quote))
  109. (pass-if-exception "bad string arg" exception:wrong-type-arg
  110. (regexp-quote 'blah))
  111. (let ((lst `((regexp/basic ,regexp/basic)
  112. (regexp/extended ,regexp/extended)))
  113. ;; string of all characters, except #\nul which doesn't work because
  114. ;; it's the usual end-of-string for the underlying C regexec()
  115. (allchars (list->string (map integer->char
  116. (cdr (iota char-code-limit))))))
  117. (for-each
  118. (lambda (elem)
  119. (let ((name (car elem))
  120. (flag (cadr elem)))
  121. (with-test-prefix name
  122. ;; try on each individual character, except #\nul
  123. (do ((i 1 (1+ i)))
  124. ((>= i char-code-limit))
  125. (let* ((c (integer->char i))
  126. (s (string c))
  127. (q (regexp-quote s)))
  128. (pass-if (list "char" i c s q)
  129. (let ((m (regexp-exec (make-regexp q flag) s)))
  130. (and (= 0 (match:start m))
  131. (= 1 (match:end m)))))))
  132. ;; try on pattern "aX" where X is each character, except #\nul
  133. ;; this exposes things like "?" which are special only when they
  134. ;; follow a pattern to repeat or whatever ("a" in this case)
  135. (do ((i 1 (1+ i)))
  136. ((>= i char-code-limit))
  137. (let* ((c (integer->char i))
  138. (s (string #\a c))
  139. (q (regexp-quote s)))
  140. (pass-if (list "string \"aX\"" i c s q)
  141. (let ((m (regexp-exec (make-regexp q flag) s)))
  142. (and (= 0 (match:start m))
  143. (= 2 (match:end m)))))))
  144. (pass-if "string of all chars"
  145. (let ((m (regexp-exec (make-regexp (regexp-quote allchars)
  146. flag) allchars)))
  147. (and (= 0 (match:start m))
  148. (= (string-length allchars) (match:end m))))))))
  149. lst)))
  150. ;;;
  151. ;;; regexp-substitute
  152. ;;;
  153. (with-test-prefix "regexp-substitute"
  154. (let ((match
  155. (string-match "patleft(sub1)patmid(sub2)patright"
  156. "contleftpatleftsub1patmidsub2patrightcontright")))
  157. (define (try expected . args)
  158. (with-test-prefix (object->string args)
  159. (apply vary-port regexp-substitute expected match args)))
  160. (try "")
  161. (try "string1" "string1")
  162. (try "string1string2" "string1" "string2")
  163. (try "patleftsub1patmidsub2patright" 0)
  164. (try "hi-patleftsub1patmidsub2patright-bye" "hi-" 0 "-bye")
  165. (try "sub1" 1)
  166. (try "hi-sub1-bye" "hi-" 1 "-bye")
  167. (try "hi-sub2-bye" "hi-" 2 "-bye")
  168. (try "contleft" 'pre)
  169. (try "contright" 'post)
  170. (try "contrightcontleft" 'post 'pre)
  171. (try "contrightcontleftcontrightcontleft" 'post 'pre 'post 'pre)
  172. (try "contrightsub2sub1contleft" 'post 2 1 'pre)
  173. (try "foosub1sub1sub1sub1bar" "foo" 1 1 1 1 "bar")))
  174. (with-test-prefix "regexp-substitute/global"
  175. (define (try expected . args)
  176. (with-test-prefix (object->string args)
  177. (apply vary-port regexp-substitute/global expected args)))
  178. (try "hi" "a(x*)b" "ab" "hi")
  179. (try "" "a(x*)b" "ab" 1)
  180. (try "xx" "a(x*)b" "axxb" 1)
  181. (try "xx" "a(x*)b" "_axxb_" 1)
  182. (try "pre" "a(x*)b" "preaxxbpost" 'pre)
  183. (try "post" "a(x*)b" "preaxxbpost" 'post)
  184. (try "string" "x" "string" 'pre "y" 'post)
  185. (try "4" "a(x*)b" "_axxb_" (lambda (m)
  186. (number->string (match:end m 1))))
  187. (try "_aybycyd_" "x+" "_axbxxcxxxd_" 'pre "y" 'post)
  188. ;; This should not go into an infinite loop, just because the regexp
  189. ;; can match the empty string. This test also kind of beats on our
  190. ;; definition of where a null string can match.
  191. (try "y_yaybycydy_y" "x*" "_axbxxcxxxd_" 'pre "y" 'post)
  192. ;; These kind of bother me. The extension from regexp-substitute to
  193. ;; regexp-substitute/global is only natural if your item list
  194. ;; includes both pre and post. If those are required, why bother
  195. ;; to include them at all?
  196. (try "4:7:12:_" "a(x*)b" "_axxbaxbaxxxb_"
  197. (lambda (m) (number->string (match:end m 1))) ":"
  198. 'post)
  199. (try "4:10:19:_:19:10:4" "a(x*)b" "_axxbaxxxxbaxxxxxxxb_"
  200. (lambda (m) (number->string (match:end m 1))) ":"
  201. 'post
  202. ":" (lambda (m) (number->string (match:end m 1))))
  203. ;; Jan Nieuwenhuizen's bug, 2 Sep 1999
  204. (try "" "_" (make-string 500 #\_)
  205. 'post))