regex.scm 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. ;;;; Copyright (C) 1997, 1999 Free Software Foundation, Inc.
  2. ;;;;
  3. ;;;; This program is free software; you can redistribute it and/or modify
  4. ;;;; it under the terms of the GNU General Public License as published by
  5. ;;;; the Free Software Foundation; either version 2, or (at your option)
  6. ;;;; any later version.
  7. ;;;;
  8. ;;;; This program is distributed in the hope that it will be useful,
  9. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. ;;;; GNU General Public License for more details.
  12. ;;;;
  13. ;;;; You should have received a copy of the GNU General Public License
  14. ;;;; along with this software; see the file COPYING. If not, write to
  15. ;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  16. ;;;; Boston, MA 02111-1307 USA
  17. ;;;;
  18. ;;;; POSIX regex support functions.
  19. (define-module (ice-9 regex))
  20. ;;; FIXME:
  21. ;;; It is not clear what should happen if a `match' function
  22. ;;; is passed a `match number' which is out of bounds for the
  23. ;;; regexp match: return #f, or throw an error? These routines
  24. ;;; throw an out-of-range error.
  25. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  26. ;;;; These procedures are not defined in SCSH, but I found them useful.
  27. (define-public (match:count match)
  28. (- (vector-length match) 1))
  29. (define-public (match:string match)
  30. (vector-ref match 0))
  31. (define-public (match:prefix match)
  32. (make-shared-substring (match:string match)
  33. 0
  34. (match:start match 0)))
  35. (define-public (match:suffix match)
  36. (make-shared-substring (match:string match)
  37. (match:end match 0)))
  38. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  39. ;;;; SCSH compatibility routines.
  40. (define-public (regexp-match? match)
  41. (and (vector? match)
  42. (string? (vector-ref match 0))
  43. (let loop ((i 1))
  44. (cond ((>= i (vector-length match)) #t)
  45. ((and (pair? (vector-ref match i))
  46. (integer? (car (vector-ref match i)))
  47. (integer? (cdr (vector-ref match i))))
  48. (loop (+ 1 i)))
  49. (else #f)))))
  50. (define-public (regexp-quote regexp)
  51. (call-with-output-string
  52. (lambda (p)
  53. (let loop ((i 0))
  54. (and (< i (string-length regexp))
  55. (begin
  56. (case (string-ref regexp i)
  57. ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\})
  58. (write-char #\\ p)))
  59. (write-char (string-ref regexp i) p)
  60. (loop (1+ i))))))))
  61. (define-public (match:start match . args)
  62. (let* ((matchnum (if (pair? args)
  63. (+ 1 (car args))
  64. 1))
  65. (start (car (vector-ref match matchnum))))
  66. (if (= start -1) #f start)))
  67. (define-public (match:end match . args)
  68. (let* ((matchnum (if (pair? args)
  69. (+ 1 (car args))
  70. 1))
  71. (end (cdr (vector-ref match matchnum))))
  72. (if (= end -1) #f end)))
  73. (define-public (match:substring match . args)
  74. (let* ((matchnum (if (pair? args)
  75. (car args)
  76. 0))
  77. (start (match:start match matchnum))
  78. (end (match:end match matchnum)))
  79. (and start end (make-shared-substring (match:string match)
  80. start
  81. end))))
  82. (define-public (string-match pattern str . args)
  83. (let ((rx (make-regexp pattern))
  84. (start (if (pair? args) (car args) 0)))
  85. (regexp-exec rx str start)))
  86. (define-public (regexp-substitute port match . items)
  87. ;; If `port' is #f, send output to a string.
  88. (if (not port)
  89. (call-with-output-string
  90. (lambda (p)
  91. (apply regexp-substitute p match items)))
  92. ;; Otherwise, process each substitution argument in `items'.
  93. (for-each (lambda (obj)
  94. (cond ((string? obj) (display obj port))
  95. ((integer? obj) (display (match:substring match obj) port))
  96. ((eq? 'pre obj) (display (match:prefix match) port))
  97. ((eq? 'post obj) (display (match:suffix match) port))
  98. (else (error 'wrong-type-arg obj))))
  99. items)))
  100. ;;; If we call fold-matches, below, with a regexp that can match the
  101. ;;; empty string, it's not obvious what "all the matches" means. How
  102. ;;; many empty strings are there in the string "a"? Our answer:
  103. ;;;
  104. ;;; This function applies PROC to every non-overlapping, maximal
  105. ;;; match of REGEXP in STRING.
  106. ;;;
  107. ;;; "non-overlapping": There are two non-overlapping matches of "" in
  108. ;;; "a" --- one before the `a', and one after. There are three
  109. ;;; non-overlapping matches of "q|x*" in "aqb": the empty strings
  110. ;;; before `a' and after `b', and `q'. The two empty strings before
  111. ;;; and after `q' don't count, because they overlap with the match of
  112. ;;; "q".
  113. ;;;
  114. ;;; "maximal": There are three distinct maximal matches of "x*" in
  115. ;;; "axxxb": one before the `a', one covering `xxx', and one after the
  116. ;;; `b'. Around or within `xxx', only the match covering all three
  117. ;;; x's counts, because the rest are not maximal.
  118. (define-public (fold-matches regexp string init proc . flags)
  119. (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp)))
  120. (flags (if (null? flags) 0 flags)))
  121. (let loop ((start 0)
  122. (value init)
  123. (abuts #f)) ; True if start abuts a previous match.
  124. (let ((m (if (> start (string-length string)) #f
  125. (regexp-exec regexp string start flags))))
  126. (cond
  127. ((not m) value)
  128. ((and (= (match:start m) (match:end m)) abuts)
  129. ;; We matched an empty string, but that would overlap the
  130. ;; match immediately before. Try again at a position
  131. ;; further to the right.
  132. (loop (+ start 1) value #f))
  133. (else
  134. (loop (match:end m) (proc m value) #t)))))))
  135. (define-public (list-matches regexp string . flags)
  136. (reverse! (apply fold-matches regexp string '() cons flags)))
  137. (define-public (regexp-substitute/global port regexp string . items)
  138. ;; If `port' is #f, send output to a string.
  139. (if (not port)
  140. (call-with-output-string
  141. (lambda (p)
  142. (apply regexp-substitute/global p regexp string items)))
  143. ;; Walk the set of non-overlapping, maximal matches.
  144. (let next-match ((matches (list-matches regexp string))
  145. (start 0))
  146. (if (null? matches)
  147. (display (make-shared-substring string start) port)
  148. (let ((m (car matches)))
  149. ;; Process all of the items for this match. Don't use
  150. ;; for-each, because we need to make sure 'post at the
  151. ;; end of the item list is a tail call.
  152. (let next-item ((items items))
  153. (define (do-item item)
  154. (cond
  155. ((string? item) (display item port))
  156. ((integer? item) (display (match:substring m item) port))
  157. ((procedure? item) (display (item m) port))
  158. ((eq? item 'pre)
  159. (display
  160. (make-shared-substring string start (match:start m))
  161. port))
  162. ((eq? item 'post)
  163. (next-match (cdr matches) (match:end m)))
  164. (else (error 'wrong-type-arg item))))
  165. (if (pair? items)
  166. (if (null? (cdr items))
  167. (do-item (car items)) ; This is a tail call.
  168. (begin
  169. (do-item (car items)) ; This is not.
  170. (next-item (cdr items)))))))))))