string-fun.scm 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. ;;;; string-fun.scm --- string manipulation functions
  2. ;;;;
  3. ;;;; Copyright (C) 1995, 1996, 1997, 1999, 2001, 2006 Free Software Foundation, Inc.
  4. ;;;;
  5. ;;;; This library is free software; you can redistribute it and/or
  6. ;;;; modify it under the terms of the GNU Lesser General Public
  7. ;;;; License as published by the Free Software Foundation; either
  8. ;;;; version 2.1 of the License, or (at your option) any later version.
  9. ;;;;
  10. ;;;; This library is distributed in the hope that it will be useful,
  11. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. ;;;; Lesser General Public License for more details.
  14. ;;;;
  15. ;;;; You should have received a copy of the GNU Lesser General Public
  16. ;;;; License along with this library; if not, write to the Free Software
  17. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18. ;;;;
  19. (define-module (ice-9 string-fun)
  20. :export (split-after-char split-before-char split-discarding-char
  21. split-after-char-last split-before-char-last
  22. split-discarding-char-last split-before-predicate
  23. split-after-predicate split-discarding-predicate
  24. separate-fields-discarding-char separate-fields-after-char
  25. separate-fields-before-char string-prefix-predicate string-prefix=?
  26. sans-surrounding-whitespace sans-trailing-whitespace
  27. sans-leading-whitespace sans-final-newline has-trailing-newline?))
  28. ;;;;
  29. ;;;
  30. ;;; Various string funcitons, particularly those that take
  31. ;;; advantage of the "shared substring" capability.
  32. ;;;
  33. ;;; {String Fun: Dividing Strings Into Fields}
  34. ;;;
  35. ;;; The names of these functions are very regular.
  36. ;;; Here is a grammar of a call to one of these:
  37. ;;;
  38. ;;; <string-function-invocation>
  39. ;;; := (<action>-<seperator-disposition>-<seperator-determination> <seperator-param> <str> <ret>)
  40. ;;;
  41. ;;; <str> = the string
  42. ;;;
  43. ;;; <ret> = The continuation. String functions generally return
  44. ;;; multiple values by passing them to this procedure.
  45. ;;;
  46. ;;; <action> = split
  47. ;;; | separate-fields
  48. ;;;
  49. ;;; "split" means to divide a string into two parts.
  50. ;;; <ret> will be called with two arguments.
  51. ;;;
  52. ;;; "separate-fields" means to divide a string into as many
  53. ;;; parts as possible. <ret> will be called with
  54. ;;; however many fields are found.
  55. ;;;
  56. ;;; <seperator-disposition> = before
  57. ;;; | after
  58. ;;; | discarding
  59. ;;;
  60. ;;; "before" means to leave the seperator attached to
  61. ;;; the beginning of the field to its right.
  62. ;;; "after" means to leave the seperator attached to
  63. ;;; the end of the field to its left.
  64. ;;; "discarding" means to discard seperators.
  65. ;;;
  66. ;;; Other dispositions might be handy. For example, "isolate"
  67. ;;; could mean to treat the separator as a field unto itself.
  68. ;;;
  69. ;;; <seperator-determination> = char
  70. ;;; | predicate
  71. ;;;
  72. ;;; "char" means to use a particular character as field seperator.
  73. ;;; "predicate" means to check each character using a particular predicate.
  74. ;;;
  75. ;;; Other determinations might be handy. For example, "character-set-member".
  76. ;;;
  77. ;;; <seperator-param> = A parameter that completes the meaning of the determinations.
  78. ;;; For example, if the determination is "char", then this parameter
  79. ;;; says which character. If it is "predicate", the parameter is the
  80. ;;; predicate.
  81. ;;;
  82. ;;;
  83. ;;; For example:
  84. ;;;
  85. ;;; (separate-fields-discarding-char #\, "foo, bar, baz, , bat" list)
  86. ;;; => ("foo" " bar" " baz" " " " bat")
  87. ;;;
  88. ;;; (split-after-char #\- 'an-example-of-split list)
  89. ;;; => ("an-" "example-of-split")
  90. ;;;
  91. ;;; As an alternative to using a determination "predicate", or to trying to do anything
  92. ;;; complicated with these functions, consider using regular expressions.
  93. ;;;
  94. (define (split-after-char char str ret)
  95. (let ((end (cond
  96. ((string-index str char) => 1+)
  97. (else (string-length str)))))
  98. (ret (substring str 0 end)
  99. (substring str end))))
  100. (define (split-before-char char str ret)
  101. (let ((end (or (string-index str char)
  102. (string-length str))))
  103. (ret (substring str 0 end)
  104. (substring str end))))
  105. (define (split-discarding-char char str ret)
  106. (let ((end (string-index str char)))
  107. (if (not end)
  108. (ret str "")
  109. (ret (substring str 0 end)
  110. (substring str (1+ end))))))
  111. (define (split-after-char-last char str ret)
  112. (let ((end (cond
  113. ((string-rindex str char) => 1+)
  114. (else 0))))
  115. (ret (substring str 0 end)
  116. (substring str end))))
  117. (define (split-before-char-last char str ret)
  118. (let ((end (or (string-rindex str char) 0)))
  119. (ret (substring str 0 end)
  120. (substring str end))))
  121. (define (split-discarding-char-last char str ret)
  122. (let ((end (string-rindex str char)))
  123. (if (not end)
  124. (ret str "")
  125. (ret (substring str 0 end)
  126. (substring str (1+ end))))))
  127. (define (split-before-predicate pred str ret)
  128. (let loop ((n 0))
  129. (cond
  130. ((= n (string-length str)) (ret str ""))
  131. ((not (pred (string-ref str n))) (loop (1+ n)))
  132. (else (ret (substring str 0 n)
  133. (substring str n))))))
  134. (define (split-after-predicate pred str ret)
  135. (let loop ((n 0))
  136. (cond
  137. ((= n (string-length str)) (ret str ""))
  138. ((not (pred (string-ref str n))) (loop (1+ n)))
  139. (else (ret (substring str 0 (1+ n))
  140. (substring str (1+ n)))))))
  141. (define (split-discarding-predicate pred str ret)
  142. (let loop ((n 0))
  143. (cond
  144. ((= n (string-length str)) (ret str ""))
  145. ((not (pred (string-ref str n))) (loop (1+ n)))
  146. (else (ret (substring str 0 n)
  147. (substring str (1+ n)))))))
  148. (define (separate-fields-discarding-char ch str ret)
  149. (let loop ((fields '())
  150. (str str))
  151. (cond
  152. ((string-rindex str ch)
  153. => (lambda (w) (loop (cons (substring str (+ 1 w)) fields)
  154. (substring str 0 w))))
  155. (else (apply ret str fields)))))
  156. (define (separate-fields-after-char ch str ret)
  157. (reverse
  158. (let loop ((fields '())
  159. (str str))
  160. (cond
  161. ((string-index str ch)
  162. => (lambda (w) (loop (cons (substring str 0 (+ 1 w)) fields)
  163. (substring str (+ 1 w)))))
  164. (else (apply ret str fields))))))
  165. (define (separate-fields-before-char ch str ret)
  166. (let loop ((fields '())
  167. (str str))
  168. (cond
  169. ((string-rindex str ch)
  170. => (lambda (w) (loop (cons (substring str w) fields)
  171. (substring str 0 w))))
  172. (else (apply ret str fields)))))
  173. ;;; {String Fun: String Prefix Predicates}
  174. ;;;
  175. ;;; Very simple:
  176. ;;;
  177. ;;; (define-public ((string-prefix-predicate pred?) prefix str)
  178. ;;; (and (<= (string-length prefix) (string-length str))
  179. ;;; (pred? prefix (substring str 0 (string-length prefix)))))
  180. ;;;
  181. ;;; (define-public string-prefix=? (string-prefix-predicate string=?))
  182. ;;;
  183. (define ((string-prefix-predicate pred?) prefix str)
  184. (and (<= (string-length prefix) (string-length str))
  185. (pred? prefix (substring str 0 (string-length prefix)))))
  186. (define string-prefix=? (string-prefix-predicate string=?))
  187. ;;; {String Fun: Strippers}
  188. ;;;
  189. ;;; <stripper> = sans-<removable-part>
  190. ;;;
  191. ;;; <removable-part> = surrounding-whitespace
  192. ;;; | trailing-whitespace
  193. ;;; | leading-whitespace
  194. ;;; | final-newline
  195. ;;;
  196. (define (sans-surrounding-whitespace s)
  197. (let ((st 0)
  198. (end (string-length s)))
  199. (while (and (< st (string-length s))
  200. (char-whitespace? (string-ref s st)))
  201. (set! st (1+ st)))
  202. (while (and (< 0 end)
  203. (char-whitespace? (string-ref s (1- end))))
  204. (set! end (1- end)))
  205. (if (< end st)
  206. ""
  207. (substring s st end))))
  208. (define (sans-trailing-whitespace s)
  209. (let ((st 0)
  210. (end (string-length s)))
  211. (while (and (< 0 end)
  212. (char-whitespace? (string-ref s (1- end))))
  213. (set! end (1- end)))
  214. (if (< end st)
  215. ""
  216. (substring s st end))))
  217. (define (sans-leading-whitespace s)
  218. (let ((st 0)
  219. (end (string-length s)))
  220. (while (and (< st (string-length s))
  221. (char-whitespace? (string-ref s st)))
  222. (set! st (1+ st)))
  223. (if (< end st)
  224. ""
  225. (substring s st end))))
  226. (define (sans-final-newline str)
  227. (cond
  228. ((= 0 (string-length str))
  229. str)
  230. ((char=? #\nl (string-ref str (1- (string-length str))))
  231. (substring str 0 (1- (string-length str))))
  232. (else str)))
  233. ;;; {String Fun: has-trailing-newline?}
  234. ;;;
  235. (define (has-trailing-newline? str)
  236. (and (< 0 (string-length str))
  237. (char=? #\nl (string-ref str (1- (string-length str))))))
  238. ;;; {String Fun: with-regexp-parts}
  239. ;;; This relies on the older, hairier regexp interface, which we don't
  240. ;;; particularly want to implement, and it's not used anywhere, so
  241. ;;; we're just going to drop it for now.
  242. ;;; (define-public (with-regexp-parts regexp fields str return fail)
  243. ;;; (let ((parts (regexec regexp str fields)))
  244. ;;; (if (number? parts)
  245. ;;; (fail parts)
  246. ;;; (apply return parts))))