string-fun.scm 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. ;;;; string-fun.scm --- string manipulation functions
  2. ;;;;
  3. ;;;; Copyright (C) 1995, 1996, 1997, 1999, 2001, 2006 Free Software Foundation, Inc.
  4. ;;;;
  5. ;;;; This library is free software; you can redistribute it and/or
  6. ;;;; modify it under the terms of the GNU Lesser General Public
  7. ;;;; License as published by the Free Software Foundation; either
  8. ;;;; version 3 of the License, or (at your option) any later version.
  9. ;;;;
  10. ;;;; This library is distributed in the hope that it will be useful,
  11. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. ;;;; Lesser General Public License for more details.
  14. ;;;;
  15. ;;;; You should have received a copy of the GNU Lesser General Public
  16. ;;;; License along with this library; if not, write to the Free Software
  17. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18. ;;;;
  19. (define-module (ice-9 string-fun)
  20. :export (split-after-char split-before-char split-discarding-char
  21. split-after-char-last split-before-char-last
  22. split-discarding-char-last split-before-predicate
  23. split-after-predicate split-discarding-predicate
  24. separate-fields-discarding-char separate-fields-after-char
  25. separate-fields-before-char string-prefix-predicate string-prefix=?
  26. sans-surrounding-whitespace sans-trailing-whitespace
  27. sans-leading-whitespace sans-final-newline has-trailing-newline?
  28. string-replace-substring))
  29. ;;;;
  30. ;;;
  31. ;;; Various string functions, particularly those that take
  32. ;;; advantage of the "shared substring" capability.
  33. ;;; FIXME Document these functions in Miscellaneous String Operations::
  34. ;;; in doc/ref/api-data.texi.
  35. ;;;
  36. ;;; {String Fun: Dividing Strings Into Fields}
  37. ;;;
  38. ;;; The names of these functions are very regular.
  39. ;;; Here is a grammar of a call to one of these:
  40. ;;;
  41. ;;; <string-function-invocation>
  42. ;;; := (<action>-<seperator-disposition>-<seperator-determination> <seperator-param> <str> <ret>)
  43. ;;;
  44. ;;; <str> = the string
  45. ;;;
  46. ;;; <ret> = The continuation. String functions generally return
  47. ;;; multiple values by passing them to this procedure.
  48. ;;;
  49. ;;; <action> = split
  50. ;;; | separate-fields
  51. ;;;
  52. ;;; "split" means to divide a string into two parts.
  53. ;;; <ret> will be called with two arguments.
  54. ;;;
  55. ;;; "separate-fields" means to divide a string into as many
  56. ;;; parts as possible. <ret> will be called with
  57. ;;; however many fields are found.
  58. ;;;
  59. ;;; <seperator-disposition> = before
  60. ;;; | after
  61. ;;; | discarding
  62. ;;;
  63. ;;; "before" means to leave the seperator attached to
  64. ;;; the beginning of the field to its right.
  65. ;;; "after" means to leave the seperator attached to
  66. ;;; the end of the field to its left.
  67. ;;; "discarding" means to discard seperators.
  68. ;;;
  69. ;;; Other dispositions might be handy. For example, "isolate"
  70. ;;; could mean to treat the separator as a field unto itself.
  71. ;;;
  72. ;;; <seperator-determination> = char
  73. ;;; | predicate
  74. ;;;
  75. ;;; "char" means to use a particular character as field seperator.
  76. ;;; "predicate" means to check each character using a particular predicate.
  77. ;;;
  78. ;;; Other determinations might be handy. For example, "character-set-member".
  79. ;;;
  80. ;;; <seperator-param> = A parameter that completes the meaning of the determinations.
  81. ;;; For example, if the determination is "char", then this parameter
  82. ;;; says which character. If it is "predicate", the parameter is the
  83. ;;; predicate.
  84. ;;;
  85. ;;;
  86. ;;; For example:
  87. ;;;
  88. ;;; (separate-fields-discarding-char #\, "foo, bar, baz, , bat" list)
  89. ;;; => ("foo" " bar" " baz" " " " bat")
  90. ;;;
  91. ;;; (split-after-char #\- 'an-example-of-split list)
  92. ;;; => ("an-" "example-of-split")
  93. ;;;
  94. ;;; As an alternative to using a determination "predicate", or to trying to do anything
  95. ;;; complicated with these functions, consider using regular expressions.
  96. ;;;
  97. (define (split-after-char char str ret)
  98. (let ((end (cond
  99. ((string-index str char) => 1+)
  100. (else (string-length str)))))
  101. (ret (substring str 0 end)
  102. (substring str end))))
  103. (define (split-before-char char str ret)
  104. (let ((end (or (string-index str char)
  105. (string-length str))))
  106. (ret (substring str 0 end)
  107. (substring str end))))
  108. (define (split-discarding-char char str ret)
  109. (let ((end (string-index str char)))
  110. (if (not end)
  111. (ret str "")
  112. (ret (substring str 0 end)
  113. (substring str (1+ end))))))
  114. (define (split-after-char-last char str ret)
  115. (let ((end (cond
  116. ((string-rindex str char) => 1+)
  117. (else 0))))
  118. (ret (substring str 0 end)
  119. (substring str end))))
  120. (define (split-before-char-last char str ret)
  121. (let ((end (or (string-rindex str char) 0)))
  122. (ret (substring str 0 end)
  123. (substring str end))))
  124. (define (split-discarding-char-last char str ret)
  125. (let ((end (string-rindex str char)))
  126. (if (not end)
  127. (ret str "")
  128. (ret (substring str 0 end)
  129. (substring str (1+ end))))))
  130. (define (split-before-predicate pred str ret)
  131. (let loop ((n 0))
  132. (cond
  133. ((= n (string-length str)) (ret str ""))
  134. ((not (pred (string-ref str n))) (loop (1+ n)))
  135. (else (ret (substring str 0 n)
  136. (substring str n))))))
  137. (define (split-after-predicate pred str ret)
  138. (let loop ((n 0))
  139. (cond
  140. ((= n (string-length str)) (ret str ""))
  141. ((not (pred (string-ref str n))) (loop (1+ n)))
  142. (else (ret (substring str 0 (1+ n))
  143. (substring str (1+ n)))))))
  144. (define (split-discarding-predicate pred str ret)
  145. (let loop ((n 0))
  146. (cond
  147. ((= n (string-length str)) (ret str ""))
  148. ((not (pred (string-ref str n))) (loop (1+ n)))
  149. (else (ret (substring str 0 n)
  150. (substring str (1+ n)))))))
  151. (define (separate-fields-discarding-char ch str ret)
  152. (let loop ((fields '())
  153. (str str))
  154. (cond
  155. ((string-rindex str ch)
  156. => (lambda (w) (loop (cons (substring str (+ 1 w)) fields)
  157. (substring str 0 w))))
  158. (else (apply ret str fields)))))
  159. (define (separate-fields-after-char ch str ret)
  160. (reverse
  161. (let loop ((fields '())
  162. (str str))
  163. (cond
  164. ((string-index str ch)
  165. => (lambda (w) (loop (cons (substring str 0 (+ 1 w)) fields)
  166. (substring str (+ 1 w)))))
  167. (else (apply ret str fields))))))
  168. (define (separate-fields-before-char ch str ret)
  169. (let loop ((fields '())
  170. (str str))
  171. (cond
  172. ((string-rindex str ch)
  173. => (lambda (w) (loop (cons (substring str w) fields)
  174. (substring str 0 w))))
  175. (else (apply ret str fields)))))
  176. ;;; {String Fun: String Prefix Predicates}
  177. ;;;
  178. ;;; Very simple:
  179. ;;;
  180. ;;; (define-public ((string-prefix-predicate pred?) prefix str)
  181. ;;; (and (<= (string-length prefix) (string-length str))
  182. ;;; (pred? prefix (substring str 0 (string-length prefix)))))
  183. ;;;
  184. ;;; (define-public string-prefix=? (string-prefix-predicate string=?))
  185. ;;;
  186. (define (string-prefix-predicate pred?)
  187. (lambda (prefix str)
  188. (and (<= (string-length prefix) (string-length str))
  189. (pred? prefix (substring str 0 (string-length prefix))))))
  190. (define string-prefix=? (string-prefix-predicate string=?))
  191. ;;; {String Fun: Strippers}
  192. ;;;
  193. ;;; <stripper> = sans-<removable-part>
  194. ;;;
  195. ;;; <removable-part> = surrounding-whitespace
  196. ;;; | trailing-whitespace
  197. ;;; | leading-whitespace
  198. ;;; | final-newline
  199. ;;;
  200. (define (sans-surrounding-whitespace s)
  201. (let ((st 0)
  202. (end (string-length s)))
  203. (while (and (< st (string-length s))
  204. (char-whitespace? (string-ref s st)))
  205. (set! st (1+ st)))
  206. (while (and (< 0 end)
  207. (char-whitespace? (string-ref s (1- end))))
  208. (set! end (1- end)))
  209. (if (< end st)
  210. ""
  211. (substring s st end))))
  212. (define (sans-trailing-whitespace s)
  213. (let ((st 0)
  214. (end (string-length s)))
  215. (while (and (< 0 end)
  216. (char-whitespace? (string-ref s (1- end))))
  217. (set! end (1- end)))
  218. (if (< end st)
  219. ""
  220. (substring s st end))))
  221. (define (sans-leading-whitespace s)
  222. (let ((st 0)
  223. (end (string-length s)))
  224. (while (and (< st (string-length s))
  225. (char-whitespace? (string-ref s st)))
  226. (set! st (1+ st)))
  227. (if (< end st)
  228. ""
  229. (substring s st end))))
  230. (define (sans-final-newline str)
  231. (cond
  232. ((= 0 (string-length str))
  233. str)
  234. ((char=? #\nl (string-ref str (1- (string-length str))))
  235. (substring str 0 (1- (string-length str))))
  236. (else str)))
  237. ;;; {String Fun: has-trailing-newline?}
  238. ;;;
  239. (define (has-trailing-newline? str)
  240. (and (< 0 (string-length str))
  241. (char=? #\nl (string-ref str (1- (string-length str))))))
  242. ;;; {String Fun: with-regexp-parts}
  243. ;;; This relies on the older, hairier regexp interface, which we don't
  244. ;;; particularly want to implement, and it's not used anywhere, so
  245. ;;; we're just going to drop it for now.
  246. ;;; (define-public (with-regexp-parts regexp fields str return fail)
  247. ;;; (let ((parts (regexec regexp str fields)))
  248. ;;; (if (number? parts)
  249. ;;; (fail parts)
  250. ;;; (apply return parts))))
  251. ;;; {String Fun: string-replace-substring}
  252. ;;;
  253. ;; string-replace-substring By A. Wingo in
  254. ;; https://lists.gnu.org/archive/html/guile-devel/2014-03/msg00058.html
  255. ;; also in string-replace-substring guix:guix/utils.scm.
  256. (define (string-replace-substring str substring replacement)
  257. "Return a new string where every instance of @var{substring} in string
  258. @var{str} has been replaced by @var{replacement}. For example:
  259. @lisp
  260. (string-replace-substring \"a ring of strings\" \"ring\" \"rut\")
  261. @result{} \"a rut of struts\"
  262. @end lisp
  263. "
  264. (let ((sublen (string-length substring)))
  265. (with-output-to-string
  266. (lambda ()
  267. (let lp ((start 0))
  268. (cond
  269. ((string-contains str substring start)
  270. => (lambda (end)
  271. (display (substring/shared str start end))
  272. (display replacement)
  273. (lp (+ end sublen))))
  274. (else
  275. (display (substring/shared str start)))))))))