123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375 |
- (define-module (data-mining util)
- #:use-module (srfi srfi-1)
- #:use-module (srfi srfi-26)
- #:use-module (ice-9 receive)
- #:use-module (ice-9 threads) ;par-map
- #:export (scan
- successive-apply
- successive-filter
- take-indices
- gather
- list-insert
- list-split
- list-mask
- list-indices
- borders
- combine
- recursive-map
- for-each/key+value
- map/key+value
- filter-map/key+value
- substitute-bindings
- substitute-map
- alist-merge
- reverse-map
- max*
- min*
- extremum*
- max+value*
- min+value*
- extremum+value*
- par-extremum+value*
- permutations
- k-combinations
- powerset))
- ;; Utility Procedures and generics
- ;; ===============================
- (define (scan proc lst)
- "Prefix scan across LST. Apply PROC as (proc p e) for each e in
- LST, and P being the result of the previous application of PROC,
- except for the first application, when p is the head of LST."
- (reverse!
- (fold
- (lambda (e l) (cons (proc (car l) e) l))
- (list (car lst))
- (cdr lst))))
- (define (successive-apply procs init)
- "Apply successive procedures from PROCS to LST, accumulating
- intermediate results. The first element of the returned list is the
- result of applying the first procedure in PROCS to INIT.
- e.g.:
- > (define ps '(p1 p2 p3 p4))
- > (successive-apply ps l)
- => ((p1 l) (p2 (p1 l)) (p3 (p2 (p1 l))) (p4 (p3 (p2 (p1 l)))))"
- (reverse!
- (fold
- (lambda (p l) (cons (p (car l)) l))
- (list ((car procs) init))
- (cdr procs))))
- (define (successive-filter preds lst)
- "Apply successive filter predicates to LST"
- (successive-apply (map (lambda (p)
- (cut filter p <>))
- preds)
- lst))
- (define (list-insert l i e)
- "Insert into L at position I the element E"
- (cond ((null? l) (list e))
- ((<= i 0) (cons e l))
- (else (cons (car l)
- (list-insert (cdr l) (1- i) e)))))
- (define (take-indices lst ind)
- "Return a subset of elements from LST. The elements in the
- resulting list are those which occupied indices in IND. The indices
- in IND need not be sorted."
- (define (take-indices-iter i inds l acc)
- (if (or (null? inds) (null? l))
- (reverse! acc)
- (if (= i (car inds))
- (take-indices-iter (1+ i) (cdr inds) (cdr l) (cons (car l) acc))
- (take-indices-iter (1+ i) inds (cdr l) acc))))
- (let ((sind (sort ind <)))
- (take-indices-iter 0 sind lst '())))
- (define (gather lst ind)
- (map (cut list-ref lst <>) ind))
- (define* (list-mask lst mask #:optional (dflt #f))
- "Return a new list of the same length as LST. For each element
- index i in MASK the returned list has a copy of the corresponding
- element from LST. Otherwise DFLT.
- E.g. (list-mask '(0 1 2 3 4 5) '(1 2 5))
- => '(#f 1 2 #f #f 5)"
- (fold
- (lambda (i result)
- (list-set! result i (list-ref lst i))
- result)
- (make-list (length lst) dflt)
- mask))
- (define (list-indices pred . lsts)
- "Returns a list of all indices for which (pred lst1 lst2 ...) is
- satisfied. Similar to list-index."
- (let loop ((count 0)
- (result '())
- (lsts lsts))
- (if (any null? lsts)
- (reverse! result)
- (loop (1+ count)
- (if (apply pred (map car lsts))
- (cons count result)
- result)
- (map cdr lsts)))))
- (define (list-split lst n)
- "Split a list into n sublists of roughly equal size, and return a
- new list containing those sublists as elements.
- e.g.:
- > (list-split (iota 10) 3)
- => ((0 1 2) (3 4 5) (6 7 8 9))"
- (if (or (null? lst) (= n 0))
- '()
- (let ((s (quotient (length lst) n)))
- (cons (list-head lst s)
- (list-split (list-tail lst s)
- (1- n))))))
- (define* (borders lst #:optional (pred eq?))
- "Return the pairs of consecutive values (a . b) from LST for
- which (PRED a b) does not return #t"
- (remove (lambda (e) (pred (car e) (cdr e)))
- (combine cons lst 2)))
- (define (combine proc lst n)
- "Applies procedure PROC to groups of N consecutive elements from LST,
- resulting in a new list with (- (length LST) N) elements."
- (define (combine-iter g l acc)
- (if (< (length l) n)
- (append acc (list (apply proc g))) ;do the last group
- (combine-iter
- (append (cdr g) (list (list-ref l (1- n))))
- (cdr l)
- (append acc (list (apply proc g))))))
- (combine-iter (take lst n) (cdr lst) '()))
- (define (recursive-map proc lst)
- "Apply procedure PROC to elements in LST. If an element, ELEM, is
- itself a list, then recursively apply PROC to the elements of ELEM."
- (map (lambda (e)
- (cond ((list? e) (recursive-map proc e))
- ((pair? e) (list (proc (car e)) (proc (cdr e))))
- (else (proc e))))
- lst))
- (define (apply-pair proc)
- "Return a procedure that takes as argument a pair and applies the
- arity-2 procedure PROC to the car and cdr of that pair."
- (lambda (p) (proc (car p) (cdr p))))
- (define (for-each/key+value proc lst)
- "Where LST is a list of key+value pairs, call (PROC key value)"
- (for-each (apply-pair proc) lst))
- (define (map/key+value proc lst)
- "Where LST is a list of key+value pairs, call (PROC key value) and
- return a list of the results."
- (map (apply-pair proc) lst))
- (define (filter-map/key+value proc lst)
- "Map PROC on LST but leave out applications that result in #f"
- (filter-map (apply-pair proc) lst))
- (define (substitute-bindings lst bindings)
- "For every symbol that is a key in BINDINGS, substitute the
- corresponding value into the resulting list."
- (recursive-map (lambda (a)
- (if (symbol? a)
- (let ((binding (assq a bindings)))
- (if binding
- (cdr binding)
- a))
- a))
- lst))
- (define (substitute-map lst mapping)
- "More general form of substitute-bindings that replaces matching key
- from MAPPING into LST. I.e. the keys in the MAPPING alist need not be
- symbols."
- (recursive-map (lambda (a)
- (let ((binding (assoc a mapping)))
- (if binding
- (cdr binding)
- a)))
- lst))
- (define (alist-merge proc . rest)
- "Merge a number of alists together. If two alists share a common key,
- then the corresponding values are merged by applying PROC to those
- values."
- (define (merge-iter ks acc)
- (if (null? ks) acc
- (let* ((key (car ks))
- (values (map cdr (filter-map (cut assoc key <>) rest))))
- (merge-iter (cdr ks) (acons key (apply proc values) acc)))))
- (merge-iter (apply (cut lset-union equal? <...>)
- (map (cut map car <>) rest))
- '()))
- (define (reverse-map alst)
- "Reverse the mapping in ALST such that the values now point to the
- keys. The values in ALST must be unique
- E.g: (reverse-map '((\"a\" . 1) (\"b\" . 2)))
- => ((1 . \"a\") (2 . \"b\"))"
- (define (swap-pair p) (cons (cdr p) (car p)))
- (map swap-pair alst))
- (define (memoized-extremum+value* proc cmp x x* . rest)
- (if (null? rest)
- (values x x*)
- (let* ((y (car rest))
- (y* (proc y)))
- (if (cmp x* y*)
- (apply memoized-extremum+value* proc cmp x x* (cdr rest))
- (apply memoized-extremum+value* proc cmp y y* (cdr rest))))))
- (define extremum*
- (case-lambda
- ((proc cmp x) x)
- ((proc cmp x . rest)
- (receive (extremum value)
- (apply memoized-extremum+value* proc cmp x (proc x) rest)
- extremum))))
- (define (extremum+value* proc cmp x . rest)
- (apply memoized-extremum+value* proc cmp x (proc x) rest))
- (define (par-extremum+value* proc cmp . args)
- "Apply extremum+value* in parallel."
- ;; Do extremum+value* on chunks of args, then aggregate results.
- (let ((result
- (apply extremum* cdr cmp
- (par-map
- (lambda (lst)
- (receive (x x*)
- (apply extremum+value* proc cmp lst)
- (cons x x*)))
- (list-split args
- ;; min ensures that each there's at least
- ;; one element in each resulting list.
- (min (length args)
- (current-processor-count)))))))
- (values (car result) (cdr result))))
- (define (max* proc . args)
- "Return the maximal element of the arguments given, where comparison
- is based on the value returned by applying PROC to each. PROC is not
- called more than once for each argument."
- (apply extremum* proc > args))
- (define (max+value* proc . args)
- "Like max* but returns two values: The maximal element, max, and the
- value of (PROC MAX)."
- (apply extremum+value* proc > args))
- (define (min* proc . args)
- "Like max* but return the minimal argument."
- (apply extremum* proc < args))
- (define (min+value* proc . args)
- "Like max+value* but for the minimal argument."
- (apply extremum+value* proc < args))
- (define (permutations lst)
- "Return a list where each element of the list is a permutation of the
- input list LST."
- (cond ((null? lst) '(()))
- ((= (length lst) 1) (list lst))
- (else
- (let ((head (car lst))
- (rec-perms (permutations (cdr lst))))
- (concatenate
- (map (lambda (p)
- (map (cut list-insert p <> head)
- (iota (1+ (length p)))))
- rec-perms))))))
- ;; This is SUPER expensive!!
- (define (k-combinations lst k)
- ;; TODO: Try an iterative approach. Get rid of 'drop' and 'iota',
- ;; which become expensive.
- (if (> k 0)
- (concatenate (map (lambda (e i)
- (map (cut cons e <>)
- (k-combinations (drop lst i)
- (1- k))))
- lst
- (iota (length lst) 1)))
- '(())))
- (define* (powerset lst #:optional (lower 0) (upper (length lst)))
- "Return the poweset of LST whose subsets have cardinality at least
- LOWER and at most UPPER."
- (concatenate
- (map (cut k-combinations lst <>)
- (iota (- upper lower -1) lower))))
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Tests
- (use-modules (srfi srfi-64))
- (use-modules (ice-9 format))
- (test-begin "util-test")
- ;;; Check min*, max*, and extremum*
- (test-equal '(4 . "a") (max* car '(1 . "c") '(4 . "a")))
- (receive (max value)
- (max+value* car '(1 . 20) '(4 . 2))
- (test-equal '(4 . 2) max)
- (test-equal 4 value))
- (test-equal '(1 . "c") (min* car '(1 . "c") '(4 . "a")))
- (receive (min value)
- (min+value* car '(1 . 20) '(4 . 2))
- (test-equal '(1 . 20) min)
- (test-equal 1 value))
- (receive (min value)
- (extremum+value* cdr string<? '(1 . "c") '(4 . "a") '(0 . "b"))
- (test-equal '(4 . "a") min)
- (test-equal "a" value))
- ;;; Check substitute-bindings and substitute-map
- (define vals '(("r" . 2) (foo . 20) ("bar" . "baz")))
- (test-equal "substitute-bindings"
- '(+ 20 20)
- (substitute-bindings '(+ foo foo) vals))
- (test-equal "substitute-map"
- '(format #t "~a: ~a\n" "baz" (/ 2 20))
- (substitute-map '(format #t "~a: ~a\n" "bar" (/ "r" foo)) vals))
- ;;; Check list-mask
- (test-equal "list-mask"
- '(#f 1 2 #f #f 5)
- (list-mask '(0 1 2 3 4 5) '(1 2 5)))
- (test-equal "list-mask with dflt"
- '(0 1 2 0 0 5)
- (list-mask '(0 1 2 3 4 5) '(1 2 5) 0))
- ;;; Check list-indices
- (test-equal "list-indices"
- '(0 2 4)
- (list-indices even? '(0 1 2 3 4 5)))
- (test-equal "list-indices"
- '(1 3 5)
- (list-indices odd? '(0 1 2 3 4 5)))
- (test-equal "list-indices"
- '(0 4)
- (list-indices identity '(#t #f #f #f #t)))
- (test-end "util-test")
|