util.scm 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. (define-module (data-mining util)
  2. #:use-module (srfi srfi-1)
  3. #:use-module (srfi srfi-26)
  4. #:use-module (ice-9 receive)
  5. #:use-module (ice-9 threads) ;par-map
  6. #:export (scan
  7. successive-apply
  8. successive-filter
  9. take-indices
  10. gather
  11. list-insert
  12. list-split
  13. list-mask
  14. list-indices
  15. borders
  16. combine
  17. recursive-map
  18. for-each/key+value
  19. map/key+value
  20. filter-map/key+value
  21. substitute-bindings
  22. substitute-map
  23. alist-merge
  24. reverse-map
  25. max*
  26. min*
  27. extremum*
  28. max+value*
  29. min+value*
  30. extremum+value*
  31. par-extremum+value*
  32. permutations
  33. k-combinations
  34. powerset))
  35. ;; Utility Procedures and generics
  36. ;; ===============================
  37. (define (scan proc lst)
  38. "Prefix scan across LST. Apply PROC as (proc p e) for each e in
  39. LST, and P being the result of the previous application of PROC,
  40. except for the first application, when p is the head of LST."
  41. (reverse!
  42. (fold
  43. (lambda (e l) (cons (proc (car l) e) l))
  44. (list (car lst))
  45. (cdr lst))))
  46. (define (successive-apply procs init)
  47. "Apply successive procedures from PROCS to LST, accumulating
  48. intermediate results. The first element of the returned list is the
  49. result of applying the first procedure in PROCS to INIT.
  50. e.g.:
  51. > (define ps '(p1 p2 p3 p4))
  52. > (successive-apply ps l)
  53. => ((p1 l) (p2 (p1 l)) (p3 (p2 (p1 l))) (p4 (p3 (p2 (p1 l)))))"
  54. (reverse!
  55. (fold
  56. (lambda (p l) (cons (p (car l)) l))
  57. (list ((car procs) init))
  58. (cdr procs))))
  59. (define (successive-filter preds lst)
  60. "Apply successive filter predicates to LST"
  61. (successive-apply (map (lambda (p)
  62. (cut filter p <>))
  63. preds)
  64. lst))
  65. (define (list-insert l i e)
  66. "Insert into L at position I the element E"
  67. (cond ((null? l) (list e))
  68. ((<= i 0) (cons e l))
  69. (else (cons (car l)
  70. (list-insert (cdr l) (1- i) e)))))
  71. (define (take-indices lst ind)
  72. "Return a subset of elements from LST. The elements in the
  73. resulting list are those which occupied indices in IND. The indices
  74. in IND need not be sorted."
  75. (define (take-indices-iter i inds l acc)
  76. (if (or (null? inds) (null? l))
  77. (reverse! acc)
  78. (if (= i (car inds))
  79. (take-indices-iter (1+ i) (cdr inds) (cdr l) (cons (car l) acc))
  80. (take-indices-iter (1+ i) inds (cdr l) acc))))
  81. (let ((sind (sort ind <)))
  82. (take-indices-iter 0 sind lst '())))
  83. (define (gather lst ind)
  84. (map (cut list-ref lst <>) ind))
  85. (define* (list-mask lst mask #:optional (dflt #f))
  86. "Return a new list of the same length as LST. For each element
  87. index i in MASK the returned list has a copy of the corresponding
  88. element from LST. Otherwise DFLT.
  89. E.g. (list-mask '(0 1 2 3 4 5) '(1 2 5))
  90. => '(#f 1 2 #f #f 5)"
  91. (fold
  92. (lambda (i result)
  93. (list-set! result i (list-ref lst i))
  94. result)
  95. (make-list (length lst) dflt)
  96. mask))
  97. (define (list-indices pred . lsts)
  98. "Returns a list of all indices for which (pred lst1 lst2 ...) is
  99. satisfied. Similar to list-index."
  100. (let loop ((count 0)
  101. (result '())
  102. (lsts lsts))
  103. (if (any null? lsts)
  104. (reverse! result)
  105. (loop (1+ count)
  106. (if (apply pred (map car lsts))
  107. (cons count result)
  108. result)
  109. (map cdr lsts)))))
  110. (define (list-split lst n)
  111. "Split a list into n sublists of roughly equal size, and return a
  112. new list containing those sublists as elements.
  113. e.g.:
  114. > (list-split (iota 10) 3)
  115. => ((0 1 2) (3 4 5) (6 7 8 9))"
  116. (if (or (null? lst) (= n 0))
  117. '()
  118. (let ((s (quotient (length lst) n)))
  119. (cons (list-head lst s)
  120. (list-split (list-tail lst s)
  121. (1- n))))))
  122. (define* (borders lst #:optional (pred eq?))
  123. "Return the pairs of consecutive values (a . b) from LST for
  124. which (PRED a b) does not return #t"
  125. (remove (lambda (e) (pred (car e) (cdr e)))
  126. (combine cons lst 2)))
  127. (define (combine proc lst n)
  128. "Applies procedure PROC to groups of N consecutive elements from LST,
  129. resulting in a new list with (- (length LST) N) elements."
  130. (define (combine-iter g l acc)
  131. (if (< (length l) n)
  132. (append acc (list (apply proc g))) ;do the last group
  133. (combine-iter
  134. (append (cdr g) (list (list-ref l (1- n))))
  135. (cdr l)
  136. (append acc (list (apply proc g))))))
  137. (combine-iter (take lst n) (cdr lst) '()))
  138. (define (recursive-map proc lst)
  139. "Apply procedure PROC to elements in LST. If an element, ELEM, is
  140. itself a list, then recursively apply PROC to the elements of ELEM."
  141. (map (lambda (e)
  142. (cond ((list? e) (recursive-map proc e))
  143. ((pair? e) (list (proc (car e)) (proc (cdr e))))
  144. (else (proc e))))
  145. lst))
  146. (define (apply-pair proc)
  147. "Return a procedure that takes as argument a pair and applies the
  148. arity-2 procedure PROC to the car and cdr of that pair."
  149. (lambda (p) (proc (car p) (cdr p))))
  150. (define (for-each/key+value proc lst)
  151. "Where LST is a list of key+value pairs, call (PROC key value)"
  152. (for-each (apply-pair proc) lst))
  153. (define (map/key+value proc lst)
  154. "Where LST is a list of key+value pairs, call (PROC key value) and
  155. return a list of the results."
  156. (map (apply-pair proc) lst))
  157. (define (filter-map/key+value proc lst)
  158. "Map PROC on LST but leave out applications that result in #f"
  159. (filter-map (apply-pair proc) lst))
  160. (define (substitute-bindings lst bindings)
  161. "For every symbol that is a key in BINDINGS, substitute the
  162. corresponding value into the resulting list."
  163. (recursive-map (lambda (a)
  164. (if (symbol? a)
  165. (let ((binding (assq a bindings)))
  166. (if binding
  167. (cdr binding)
  168. a))
  169. a))
  170. lst))
  171. (define (substitute-map lst mapping)
  172. "More general form of substitute-bindings that replaces matching key
  173. from MAPPING into LST. I.e. the keys in the MAPPING alist need not be
  174. symbols."
  175. (recursive-map (lambda (a)
  176. (let ((binding (assoc a mapping)))
  177. (if binding
  178. (cdr binding)
  179. a)))
  180. lst))
  181. (define (alist-merge proc . rest)
  182. "Merge a number of alists together. If two alists share a common key,
  183. then the corresponding values are merged by applying PROC to those
  184. values."
  185. (define (merge-iter ks acc)
  186. (if (null? ks) acc
  187. (let* ((key (car ks))
  188. (values (map cdr (filter-map (cut assoc key <>) rest))))
  189. (merge-iter (cdr ks) (acons key (apply proc values) acc)))))
  190. (merge-iter (apply (cut lset-union equal? <...>)
  191. (map (cut map car <>) rest))
  192. '()))
  193. (define (reverse-map alst)
  194. "Reverse the mapping in ALST such that the values now point to the
  195. keys. The values in ALST must be unique
  196. E.g: (reverse-map '((\"a\" . 1) (\"b\" . 2)))
  197. => ((1 . \"a\") (2 . \"b\"))"
  198. (define (swap-pair p) (cons (cdr p) (car p)))
  199. (map swap-pair alst))
  200. (define (memoized-extremum+value* proc cmp x x* . rest)
  201. (if (null? rest)
  202. (values x x*)
  203. (let* ((y (car rest))
  204. (y* (proc y)))
  205. (if (cmp x* y*)
  206. (apply memoized-extremum+value* proc cmp x x* (cdr rest))
  207. (apply memoized-extremum+value* proc cmp y y* (cdr rest))))))
  208. (define extremum*
  209. (case-lambda
  210. ((proc cmp x) x)
  211. ((proc cmp x . rest)
  212. (receive (extremum value)
  213. (apply memoized-extremum+value* proc cmp x (proc x) rest)
  214. extremum))))
  215. (define (extremum+value* proc cmp x . rest)
  216. (apply memoized-extremum+value* proc cmp x (proc x) rest))
  217. (define (par-extremum+value* proc cmp . args)
  218. "Apply extremum+value* in parallel."
  219. ;; Do extremum+value* on chunks of args, then aggregate results.
  220. (let ((result
  221. (apply extremum* cdr cmp
  222. (par-map
  223. (lambda (lst)
  224. (receive (x x*)
  225. (apply extremum+value* proc cmp lst)
  226. (cons x x*)))
  227. (list-split args
  228. ;; min ensures that each there's at least
  229. ;; one element in each resulting list.
  230. (min (length args)
  231. (current-processor-count)))))))
  232. (values (car result) (cdr result))))
  233. (define (max* proc . args)
  234. "Return the maximal element of the arguments given, where comparison
  235. is based on the value returned by applying PROC to each. PROC is not
  236. called more than once for each argument."
  237. (apply extremum* proc > args))
  238. (define (max+value* proc . args)
  239. "Like max* but returns two values: The maximal element, max, and the
  240. value of (PROC MAX)."
  241. (apply extremum+value* proc > args))
  242. (define (min* proc . args)
  243. "Like max* but return the minimal argument."
  244. (apply extremum* proc < args))
  245. (define (min+value* proc . args)
  246. "Like max+value* but for the minimal argument."
  247. (apply extremum+value* proc < args))
  248. (define (permutations lst)
  249. "Return a list where each element of the list is a permutation of the
  250. input list LST."
  251. (cond ((null? lst) '(()))
  252. ((= (length lst) 1) (list lst))
  253. (else
  254. (let ((head (car lst))
  255. (rec-perms (permutations (cdr lst))))
  256. (concatenate
  257. (map (lambda (p)
  258. (map (cut list-insert p <> head)
  259. (iota (1+ (length p)))))
  260. rec-perms))))))
  261. ;; This is SUPER expensive!!
  262. (define (k-combinations lst k)
  263. ;; TODO: Try an iterative approach. Get rid of 'drop' and 'iota',
  264. ;; which become expensive.
  265. (if (> k 0)
  266. (concatenate (map (lambda (e i)
  267. (map (cut cons e <>)
  268. (k-combinations (drop lst i)
  269. (1- k))))
  270. lst
  271. (iota (length lst) 1)))
  272. '(())))
  273. (define* (powerset lst #:optional (lower 0) (upper (length lst)))
  274. "Return the poweset of LST whose subsets have cardinality at least
  275. LOWER and at most UPPER."
  276. (concatenate
  277. (map (cut k-combinations lst <>)
  278. (iota (- upper lower -1) lower))))
  279. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  280. ;;; Tests
  281. (use-modules (srfi srfi-64))
  282. (use-modules (ice-9 format))
  283. (test-begin "util-test")
  284. ;;; Check min*, max*, and extremum*
  285. (test-equal '(4 . "a") (max* car '(1 . "c") '(4 . "a")))
  286. (receive (max value)
  287. (max+value* car '(1 . 20) '(4 . 2))
  288. (test-equal '(4 . 2) max)
  289. (test-equal 4 value))
  290. (test-equal '(1 . "c") (min* car '(1 . "c") '(4 . "a")))
  291. (receive (min value)
  292. (min+value* car '(1 . 20) '(4 . 2))
  293. (test-equal '(1 . 20) min)
  294. (test-equal 1 value))
  295. (receive (min value)
  296. (extremum+value* cdr string<? '(1 . "c") '(4 . "a") '(0 . "b"))
  297. (test-equal '(4 . "a") min)
  298. (test-equal "a" value))
  299. ;;; Check substitute-bindings and substitute-map
  300. (define vals '(("r" . 2) (foo . 20) ("bar" . "baz")))
  301. (test-equal "substitute-bindings"
  302. '(+ 20 20)
  303. (substitute-bindings '(+ foo foo) vals))
  304. (test-equal "substitute-map"
  305. '(format #t "~a: ~a\n" "baz" (/ 2 20))
  306. (substitute-map '(format #t "~a: ~a\n" "bar" (/ "r" foo)) vals))
  307. ;;; Check list-mask
  308. (test-equal "list-mask"
  309. '(#f 1 2 #f #f 5)
  310. (list-mask '(0 1 2 3 4 5) '(1 2 5)))
  311. (test-equal "list-mask with dflt"
  312. '(0 1 2 0 0 5)
  313. (list-mask '(0 1 2 3 4 5) '(1 2 5) 0))
  314. ;;; Check list-indices
  315. (test-equal "list-indices"
  316. '(0 2 4)
  317. (list-indices even? '(0 1 2 3 4 5)))
  318. (test-equal "list-indices"
  319. '(1 3 5)
  320. (list-indices odd? '(0 1 2 3 4 5)))
  321. (test-equal "list-indices"
  322. '(0 4)
  323. (list-indices identity '(#t #f #f #f #t)))
  324. (test-end "util-test")