123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281 |
- ;;;; string-fun.scm --- string manipulation functions
- ;;;;
- ;;;; Copyright (C) 1995, 1996, 1997, 1999, 2001, 2006 Free Software Foundation, Inc.
- ;;;;
- ;;;; This library is free software; you can redistribute it and/or
- ;;;; modify it under the terms of the GNU Lesser General Public
- ;;;; License as published by the Free Software Foundation; either
- ;;;; version 3 of the License, or (at your option) any later version.
- ;;;;
- ;;;; This library is distributed in the hope that it will be useful,
- ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- ;;;; Lesser General Public License for more details.
- ;;;;
- ;;;; You should have received a copy of the GNU Lesser General Public
- ;;;; License along with this library; if not, write to the Free Software
- ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ;;;;
- (define-module (ice-9 string-fun)
- :export (split-after-char split-before-char split-discarding-char
- split-after-char-last split-before-char-last
- split-discarding-char-last split-before-predicate
- split-after-predicate split-discarding-predicate
- separate-fields-discarding-char separate-fields-after-char
- separate-fields-before-char string-prefix-predicate string-prefix=?
- sans-surrounding-whitespace sans-trailing-whitespace
- sans-leading-whitespace sans-final-newline has-trailing-newline?))
- ;;;;
- ;;;
- ;;; Various string funcitons, particularly those that take
- ;;; advantage of the "shared substring" capability.
- ;;;
- ;;; {String Fun: Dividing Strings Into Fields}
- ;;;
- ;;; The names of these functions are very regular.
- ;;; Here is a grammar of a call to one of these:
- ;;;
- ;;; <string-function-invocation>
- ;;; := (<action>-<seperator-disposition>-<seperator-determination> <seperator-param> <str> <ret>)
- ;;;
- ;;; <str> = the string
- ;;;
- ;;; <ret> = The continuation. String functions generally return
- ;;; multiple values by passing them to this procedure.
- ;;;
- ;;; <action> = split
- ;;; | separate-fields
- ;;;
- ;;; "split" means to divide a string into two parts.
- ;;; <ret> will be called with two arguments.
- ;;;
- ;;; "separate-fields" means to divide a string into as many
- ;;; parts as possible. <ret> will be called with
- ;;; however many fields are found.
- ;;;
- ;;; <seperator-disposition> = before
- ;;; | after
- ;;; | discarding
- ;;;
- ;;; "before" means to leave the seperator attached to
- ;;; the beginning of the field to its right.
- ;;; "after" means to leave the seperator attached to
- ;;; the end of the field to its left.
- ;;; "discarding" means to discard seperators.
- ;;;
- ;;; Other dispositions might be handy. For example, "isolate"
- ;;; could mean to treat the separator as a field unto itself.
- ;;;
- ;;; <seperator-determination> = char
- ;;; | predicate
- ;;;
- ;;; "char" means to use a particular character as field seperator.
- ;;; "predicate" means to check each character using a particular predicate.
- ;;;
- ;;; Other determinations might be handy. For example, "character-set-member".
- ;;;
- ;;; <seperator-param> = A parameter that completes the meaning of the determinations.
- ;;; For example, if the determination is "char", then this parameter
- ;;; says which character. If it is "predicate", the parameter is the
- ;;; predicate.
- ;;;
- ;;;
- ;;; For example:
- ;;;
- ;;; (separate-fields-discarding-char #\, "foo, bar, baz, , bat" list)
- ;;; => ("foo" " bar" " baz" " " " bat")
- ;;;
- ;;; (split-after-char #\- 'an-example-of-split list)
- ;;; => ("an-" "example-of-split")
- ;;;
- ;;; As an alternative to using a determination "predicate", or to trying to do anything
- ;;; complicated with these functions, consider using regular expressions.
- ;;;
- (define (split-after-char char str ret)
- (let ((end (cond
- ((string-index str char) => 1+)
- (else (string-length str)))))
- (ret (substring str 0 end)
- (substring str end))))
- (define (split-before-char char str ret)
- (let ((end (or (string-index str char)
- (string-length str))))
- (ret (substring str 0 end)
- (substring str end))))
- (define (split-discarding-char char str ret)
- (let ((end (string-index str char)))
- (if (not end)
- (ret str "")
- (ret (substring str 0 end)
- (substring str (1+ end))))))
- (define (split-after-char-last char str ret)
- (let ((end (cond
- ((string-rindex str char) => 1+)
- (else 0))))
- (ret (substring str 0 end)
- (substring str end))))
- (define (split-before-char-last char str ret)
- (let ((end (or (string-rindex str char) 0)))
- (ret (substring str 0 end)
- (substring str end))))
- (define (split-discarding-char-last char str ret)
- (let ((end (string-rindex str char)))
- (if (not end)
- (ret str "")
- (ret (substring str 0 end)
- (substring str (1+ end))))))
- (define (split-before-predicate pred str ret)
- (let loop ((n 0))
- (cond
- ((= n (string-length str)) (ret str ""))
- ((not (pred (string-ref str n))) (loop (1+ n)))
- (else (ret (substring str 0 n)
- (substring str n))))))
- (define (split-after-predicate pred str ret)
- (let loop ((n 0))
- (cond
- ((= n (string-length str)) (ret str ""))
- ((not (pred (string-ref str n))) (loop (1+ n)))
- (else (ret (substring str 0 (1+ n))
- (substring str (1+ n)))))))
- (define (split-discarding-predicate pred str ret)
- (let loop ((n 0))
- (cond
- ((= n (string-length str)) (ret str ""))
- ((not (pred (string-ref str n))) (loop (1+ n)))
- (else (ret (substring str 0 n)
- (substring str (1+ n)))))))
- (define (separate-fields-discarding-char ch str ret)
- (let loop ((fields '())
- (str str))
- (cond
- ((string-rindex str ch)
- => (lambda (w) (loop (cons (substring str (+ 1 w)) fields)
- (substring str 0 w))))
- (else (apply ret str fields)))))
- (define (separate-fields-after-char ch str ret)
- (reverse
- (let loop ((fields '())
- (str str))
- (cond
- ((string-index str ch)
- => (lambda (w) (loop (cons (substring str 0 (+ 1 w)) fields)
- (substring str (+ 1 w)))))
- (else (apply ret str fields))))))
- (define (separate-fields-before-char ch str ret)
- (let loop ((fields '())
- (str str))
- (cond
- ((string-rindex str ch)
- => (lambda (w) (loop (cons (substring str w) fields)
- (substring str 0 w))))
- (else (apply ret str fields)))))
- ;;; {String Fun: String Prefix Predicates}
- ;;;
- ;;; Very simple:
- ;;;
- ;;; (define-public ((string-prefix-predicate pred?) prefix str)
- ;;; (and (<= (string-length prefix) (string-length str))
- ;;; (pred? prefix (substring str 0 (string-length prefix)))))
- ;;;
- ;;; (define-public string-prefix=? (string-prefix-predicate string=?))
- ;;;
- (define (string-prefix-predicate pred?)
- (lambda (prefix str)
- (and (<= (string-length prefix) (string-length str))
- (pred? prefix (substring str 0 (string-length prefix))))))
- (define string-prefix=? (string-prefix-predicate string=?))
- ;;; {String Fun: Strippers}
- ;;;
- ;;; <stripper> = sans-<removable-part>
- ;;;
- ;;; <removable-part> = surrounding-whitespace
- ;;; | trailing-whitespace
- ;;; | leading-whitespace
- ;;; | final-newline
- ;;;
- (define (sans-surrounding-whitespace s)
- (let ((st 0)
- (end (string-length s)))
- (while (and (< st (string-length s))
- (char-whitespace? (string-ref s st)))
- (set! st (1+ st)))
- (while (and (< 0 end)
- (char-whitespace? (string-ref s (1- end))))
- (set! end (1- end)))
- (if (< end st)
- ""
- (substring s st end))))
- (define (sans-trailing-whitespace s)
- (let ((st 0)
- (end (string-length s)))
- (while (and (< 0 end)
- (char-whitespace? (string-ref s (1- end))))
- (set! end (1- end)))
- (if (< end st)
- ""
- (substring s st end))))
- (define (sans-leading-whitespace s)
- (let ((st 0)
- (end (string-length s)))
- (while (and (< st (string-length s))
- (char-whitespace? (string-ref s st)))
- (set! st (1+ st)))
- (if (< end st)
- ""
- (substring s st end))))
- (define (sans-final-newline str)
- (cond
- ((= 0 (string-length str))
- str)
- ((char=? #\nl (string-ref str (1- (string-length str))))
- (substring str 0 (1- (string-length str))))
- (else str)))
- ;;; {String Fun: has-trailing-newline?}
- ;;;
- (define (has-trailing-newline? str)
- (and (< 0 (string-length str))
- (char=? #\nl (string-ref str (1- (string-length str))))))
- ;;; {String Fun: with-regexp-parts}
- ;;; This relies on the older, hairier regexp interface, which we don't
- ;;; particularly want to implement, and it's not used anywhere, so
- ;;; we're just going to drop it for now.
- ;;; (define-public (with-regexp-parts regexp fields str return fail)
- ;;; (let ((parts (regexec regexp str fields)))
- ;;; (if (number? parts)
- ;;; (fail parts)
- ;;; (apply return parts))))
|