indexed-matrix.scm 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. #!/cray/css/libsci/bavier/bin/guile -s
  2. !#
  3. (define-module (data-mining indexed-matrix)
  4. #:use-module (srfi srfi-9)
  5. #:use-module (srfi srfi-26)
  6. #:use-module (data-mining hash-util)
  7. #:export (make-indexed-matrix
  8. make-indexed-matrix/shared
  9. indexed-matrix-set!
  10. indexed-matrix-ref
  11. indexed-matrix-handle
  12. indexed-matrix-length
  13. indexed-matrix-width
  14. indexed-matrix-row-tags
  15. indexed-matrix-column-tags
  16. indexed-matrix-for-each-row
  17. indexed-matrix-for-each-column
  18. indexed-matrix-for-each-entry
  19. indexed-matrix-map-rows
  20. indexed-matrix-map-columns
  21. indexed-matrix-map-indexed-rows
  22. indexed-matrix-map-indexed-columns
  23. indexed-matrix->row-list
  24. indexed-matrix->column-list
  25. indexed-matrix->array
  26. indexed-matrix-row-entries
  27. indexed-matrix-indexed-row
  28. indexed-matrix-column-entries
  29. indexed-matrix-indexed-column
  30. indexed-matrix-reindex-row!
  31. indexed-matrix-reindex-column!
  32. ))
  33. (define-record-type indexed-matrix
  34. (make-indexed-matrix* row-tags-table col-tags-table rows cols)
  35. indexed-matrix?
  36. (row-tags-table row-tags-table set-row-tags-table!)
  37. (col-tags-table col-tags-table set-col-tags-table!)
  38. (rows indexed-matrix-rows set-indexed-matrix-rows!)
  39. (cols indexed-matrix-cols set-indexed-matrix-cols!))
  40. (define* (make-indexed-matrix #:key
  41. (row-tags '())
  42. (column-tags '()))
  43. (let ((m (make-indexed-matrix*
  44. ;; We use hash-table's as sets for row-tags and col-tags
  45. (make-hash-table) ;row-tags-table
  46. (make-hash-table) ;col-tags-table
  47. (make-hash-table) ;rows
  48. (make-hash-table) ;cols
  49. )))
  50. (set-indexed-matrix-row-tags! m row-tags)
  51. (set-indexed-matrix-column-tags! m column-tags)
  52. m))
  53. ;;; Make a new shared indexed-matrix. The new indexed-matrix will
  54. ;;; share underlying storage with M, but will only operate on the row
  55. ;;; indices in I and column indices in J. New entries added to the
  56. ;;; created indexed-matrix will not be seen by M, but changing an
  57. ;;; existing entry *will* be seen by M. If the I and J arguments are
  58. ;;; not given then all entries of the row or column, respecitively,
  59. ;;; will be shared in the new indexed-matrix.
  60. (define* (make-indexed-matrix/shared
  61. m
  62. #:key
  63. (row-indices (indexed-matrix-row-tags m))
  64. (column-indices (indexed-matrix-column-tags m)))
  65. (let ((n (make-indexed-matrix
  66. #:row-tags row-indices
  67. #:column-tags column-indices)))
  68. (set-indexed-matrix-rows! n (indexed-matrix-rows m))
  69. (set-indexed-matrix-cols! n (indexed-matrix-cols m))
  70. n))
  71. (define-public (indexed-matrix-set! m e i j)
  72. (let ((rh (hash-create-handle! (indexed-matrix-rows m) i (make-hash-table)))
  73. (ch (hash-create-handle! (indexed-matrix-cols m) j (make-hash-table))))
  74. (begin
  75. (hash-set! (row-tags-table m) i i)
  76. (hash-set! (col-tags-table m) j j)
  77. (hash-set! (cdr rh) j e)
  78. (hash-set! (cdr ch) i e))))
  79. ;;; Return the tuple (i j . e) or #f if no entry
  80. (define-public (indexed-matrix-handle m i j)
  81. (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
  82. (and rh (let ((ch (hash-get-handle (cdr rh) j)))
  83. (and ch (cons (car rh) ch))))))
  84. (define-public (indexed-matrix-ref m i j)
  85. (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
  86. (and rh (hash-ref (cdr rh) j))))
  87. (define-public (indexed-matrix-for-each-row proc m)
  88. ;; For each row i of the indexed matrix, call (proc i <entries>),
  89. ;; where <entries> is the list of entries in that row. The order in
  90. ;; which is the rows are traversed is not guaranteed.
  91. (for-each ;TODO: formulate in terms of hash-for-each
  92. (lambda (i)
  93. (proc i (indexed-matrix-row-entries m i)))
  94. (indexed-matrix-row-tags m)))
  95. (define-public (indexed-matrix-for-each-column proc m)
  96. (for-each ;TODO: formulate in terms of hash-for-each
  97. (lambda (j)
  98. (proc j (indexed-matrix-column-entries m j)))
  99. (indexed-matrix-column-tags m)))
  100. ;;; proc is called as (i j e), where i is the row tag and j is the column tag
  101. ;;; associated with the entry e.
  102. (define-public (indexed-matrix-for-each-entry proc m)
  103. (for-each
  104. (lambda (i)
  105. (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
  106. (if rh
  107. (for-each
  108. (lambda (j)
  109. (let ((ch (hash-get-handle (cdr rh) j)))
  110. (if ch
  111. (proc i j (cdr ch)))))
  112. (indexed-matrix-column-tags m)))))
  113. (indexed-matrix-row-tags m)))
  114. (define-public (indexed-matrix-map-rows proc m)
  115. (map
  116. (lambda (i)
  117. (proc i (indexed-matrix-row-entries m i)))
  118. (indexed-matrix-row-tags m)))
  119. (define-public (indexed-matrix-map-columns proc m)
  120. (map
  121. (lambda (j)
  122. (proc j (indexed-matrix-column-entries m j)))
  123. (indexed-matrix-column-tags m)))
  124. (define-public (indexed-matrix-map-indexed-rows proc m)
  125. (map
  126. (lambda (i)
  127. (proc i (indexed-matrix-indexed-row m i)))
  128. (indexed-matrix-row-tags m)))
  129. (define-public (indexed-matrix-map-indexed-columns proc m)
  130. (map
  131. (lambda (j)
  132. (proc j (indexed-matrix-indexed-column m j)))
  133. (indexed-matrix-column-tags m)))
  134. ;;; Return a dense representation of this indexed-matrix in row-major list
  135. ;;; form. Index information is lost through this transformation.
  136. ;;; Non-existent entries appear as #f, which will conflict with explicit #f
  137. ;;; entries if the matrix is not already known to be dense/completely filled.
  138. (define-public (indexed-matrix->row-list m)
  139. (map
  140. (lambda (col)
  141. (map (cut hash-ref col <>)
  142. (indexed-matrix-column-tags m)))
  143. (map (cut hash-ref (indexed-matrix-rows m) <>)
  144. (indexed-matrix-row-tags m))))
  145. (define-public (indexed-matrix->column-list m)
  146. (map
  147. (lambda (row)
  148. (map (cut hash-ref row <>)
  149. (indexed-matrix-row-tags m)))
  150. (map (cut hash-ref (indexed-matrix-cols m) <>)
  151. (indexed-matrix-column-tags m))))
  152. ;;; Return a list of the entries in the given row. Returns #f if I is an
  153. ;;; invalid row index.
  154. (define-public (indexed-matrix-row-entries m i)
  155. (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
  156. (and rh (map (cut hash-ref (cdr rh) <>)
  157. (indexed-matrix-column-tags m)))))
  158. ;;; Return an association list whose keys are the column tags and values are
  159. ;;; the entries those tags map to.
  160. ;;;
  161. ;;; E.g. If M is ::
  162. ;;;
  163. ;;; a b c
  164. ;;; - - -
  165. ;;; 1 : 1 2 3
  166. ;;; 2 : 4 5 6
  167. ;;;
  168. ;;; then
  169. ;;;
  170. ;;; guile> (indexed-matrix-indexed-row m 2)
  171. ;;; => ((a . 4) (b . 5) (c . 6))
  172. ;;;
  173. (define-public (indexed-matrix-indexed-row m i)
  174. (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
  175. (and rh (map (lambda (c)
  176. (cons c (hash-ref (cdr rh) c)))
  177. (indexed-matrix-column-tags m)))))
  178. ;;; Return a list of the entries in the given column. Returns #f if J is an
  179. ;;; invalid column index.
  180. (define-public (indexed-matrix-column-entries m j)
  181. (let ((ch (hash-get-handle (indexed-matrix-cols m) j)))
  182. (and ch (map (cut hash-ref (cdr ch) <>)
  183. (indexed-matrix-row-tags m)))))
  184. ;;; Analogous to indexed-matrix-indexed-row, but column-wise
  185. (define-public (indexed-matrix-indexed-column m j)
  186. (let ((ch (hash-get-handle (indexed-matrix-cols m) j)))
  187. (and ch (map (lambda (c)
  188. (cons c (hash-ref (cdr ch) c)))
  189. (indexed-matrix-row-tags m)))))
  190. ;;; Change all references to the row index I to use NI instead.
  191. (define-public (indexed-matrix-reindex-row! m i ni)
  192. (hash-move-key (row-tags-table m) i ni (make-hash-table))
  193. (hash-move-key (indexed-matrix-rows m) i ni (make-hash-table))
  194. ;; Update references in the column hashes
  195. (hash-for-each (lambda (j jv) (hash-move-key jv i ni))
  196. (indexed-matrix-cols m)))
  197. ;;; Change all references to the column index J to use NJ instead.
  198. (define-public (indexed-matrix-reindex-column! m j nj)
  199. (hash-move-key (col-tags-table m) j nj (make-hash-table))
  200. (hash-move-key (indexed-matrix-cols m) j nj (make-hash-table))
  201. ;; Update reference in the row hashes
  202. (hash-for-each (lambda (i iv) (hash-move-key iv j nj))
  203. (indexed-matrix-rows m)))
  204. ;;; Transform the indexed-matrix into a 2-dimensional scheme array. The row
  205. ;;; and column permutations are based on the order of the indices as they are
  206. ;;; found in (row-tags M) and (col-tags M), as this information cannot be
  207. ;;; stored in the resulting array.
  208. (define-public (indexed-matrix->array m)
  209. (let ((a (make-array #f
  210. (indexed-matrix-length m)
  211. (indexed-matrix-width m)))
  212. (row-map (list->index-map (indexed-matrix-row-tags m)))
  213. (col-map (list->index-map (indexed-matrix-column-tags m))))
  214. (begin
  215. (indexed-matrix-for-each-entry
  216. (lambda (i j e)
  217. (array-set! a e (assoc-ref row-map i) (assoc-ref col-map j)))
  218. m)
  219. a)))
  220. ;;; Given a list, create an association list which maps each element of the
  221. ;;; given list to the index at which that element resides in the list.
  222. (define-public (list->index-map lst)
  223. ;; An iterative algorithm allows us to traverse the list only once. The
  224. ;; other alternative is (map cons lst (iota (length lst))), but requires two
  225. ;; list traversals.
  226. (define (iter c l a)
  227. (if (null? l)
  228. a
  229. (iter (1+ c) (cdr l) (cons (cons (car l) c) a))))
  230. (iter 0 lst '()))
  231. (define-public (indexed-matrix-length m)
  232. (hash-table-size (row-tags-table m)))
  233. (define-public (indexed-matrix-width m)
  234. (hash-table-size (col-tags-table m)))
  235. (define (first-arg a . b) a)
  236. (define (second-arg a b . c) b)
  237. (define (third-arg a b c . d) c)
  238. ;; Provide transparent list-based access to internal hash sets.
  239. (define (indexed-matrix-row-tags m)
  240. (hash-map->list second-arg (row-tags-table m)))
  241. (define (set-indexed-matrix-row-tags! m rt)
  242. (let ((rtt (make-hash-table)))
  243. (for-each (lambda (t) (hash-set! rtt t t)) rt)
  244. (set-row-tags-table! m rtt)))
  245. (define (indexed-matrix-column-tags m)
  246. (hash-map->list second-arg (col-tags-table m)))
  247. (define (set-indexed-matrix-column-tags! m ct)
  248. (let ((ctt (make-hash-table)))
  249. (for-each (lambda (t) (hash-set! ctt t t)) ct)
  250. (set-col-tags-table! m ctt)))
  251. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  252. ;;; Tests
  253. (use-modules (srfi srfi-64)
  254. (srfi srfi-1)
  255. (ice-9 format)
  256. (data-mining test-util))
  257. (test-begin "indexed-matrix-test")
  258. (define im (make-indexed-matrix))
  259. (test-assert (indexed-matrix? im))
  260. ;;; If we set and then ref, we should get back what we set
  261. ;;; 1 2
  262. ;;; "a" 10 "hi"
  263. ;;; "b" 20 "there"
  264. (indexed-matrix-set! im 10 "a" 1)
  265. (test-assert (eq? (indexed-matrix-ref im "a" 1) 10))
  266. (indexed-matrix-set! im "hi" "a" 2)
  267. (test-assert (eq? (indexed-matrix-ref im "a" 2) "hi"))
  268. (indexed-matrix-set! im 20 "b" 1)
  269. (test-assert (eq? (indexed-matrix-ref im "b" 1) 20))
  270. (indexed-matrix-set! im "there" "b" 2)
  271. (test-assert (eq? (indexed-matrix-ref im "b" 2) "there"))
  272. ;;; Check dimensions
  273. (test-assert (eq? (indexed-matrix-width im) 2))
  274. (test-assert (eq? (indexed-matrix-length im) 2))
  275. ;;; Check indexed-matrix-handle
  276. (define im_a1-handle (indexed-matrix-handle im "a" 1))
  277. (test-assert (eq? (car im_a1-handle) "a"))
  278. (test-assert (eq? (cadr im_a1-handle) 1))
  279. (test-assert (eq? (cddr im_a1-handle) 10))
  280. (test-assert (not (indexed-matrix-handle im "foo" 9000))) ;return #f for invalid
  281. ;;; XXX: This test is fragile because it depends on the order in which
  282. ;;; row entries are returned by indexed-matrix-row-entries.
  283. (define row-globs (make-list (indexed-matrix-length im) #f))
  284. (indexed-matrix-for-each-row
  285. (let ((row-num 0))
  286. (lambda (i es)
  287. (list-set! row-globs row-num
  288. (format #f "~{~a~}" es))
  289. (set! row-num (1+ row-num))))
  290. im)
  291. (test-assert (member "hi10" row-globs))
  292. (test-assert (member "there20" row-globs))
  293. ;;; Check row-tags, col-tags
  294. (test-assert ((list-permutation? '("a" "b")) (indexed-matrix-row-tags im)))
  295. (test-assert ((list-permutation? '(1 2)) (indexed-matrix-column-tags im)))
  296. ;;; Check for-each-entry
  297. (define entries-seen '())
  298. (indexed-matrix-for-each-entry
  299. (lambda (i j e)
  300. (test-assert (indexed-matrix-handle im i j)) ;there is an entry at this point
  301. (test-assert (indexed-matrix-ref im i j) e) ;that entry is the same
  302. (set! entries-seen (cons e entries-seen)))
  303. im)
  304. (test-assert (null? (lset-difference equal? '(10 20 "hi" "there") entries-seen)))
  305. ;;; Check row-entries
  306. (test-assert (null? (lset-difference equal? '(10 "hi")
  307. (indexed-matrix-row-entries im "a"))))
  308. (test-assert (null? (lset-difference equal? '(20 "there")
  309. (indexed-matrix-row-entries im "b"))))
  310. ;;; Check row-list and column-list
  311. (define rl (indexed-matrix->row-list im))
  312. (test-assert (find (list-permutation? '(10 "hi")) rl))
  313. (test-assert (find (list-permutation? '(20 "there")) rl))
  314. (define cl (indexed-matrix->column-list im))
  315. (test-assert (find (list-permutation? '(10 20)) cl))
  316. (test-assert (find (list-permutation? '("hi" "there")) cl))
  317. ;;; Check indexed-row and indexed-colum
  318. (test-assert (same-map? (indexed-matrix-indexed-row im "a")
  319. '((1 . 10) (2 . "hi"))))
  320. (test-assert (same-map? (indexed-matrix-indexed-row im "b")
  321. '((1 . 20) (2 . "there"))))
  322. (test-assert (same-map? (indexed-matrix-indexed-column im 1)
  323. '(("a" . 10) ("b" . 20))))
  324. (test-assert (same-map? (indexed-matrix-indexed-column im 2)
  325. '(("a" . "hi") ("b" . "there"))))
  326. ;;; Test internal helper routine hash-move-key
  327. (define h (make-hash-table))
  328. (hash-set! h "foo" 9001)
  329. (test-eq "hash size" 1 (hash-table-size h))
  330. (test-assert (hash-ref h "foo"))
  331. (test-assert (equal? (hash-ref h "foo") 9001))
  332. (hash-move-key h "foo" "bar")
  333. (test-eq "hash size after move" 1 (hash-table-size h))
  334. (test-assert (hash-ref h "bar"))
  335. (test-assert (equal? (hash-ref h "bar") 9001))
  336. ;;; Test make-indexed-matrix/shared
  337. (define im' (make-indexed-matrix/shared im))
  338. (test-eq "shared width" 2 (indexed-matrix-width im'))
  339. (test-eq "shared length" 2 (indexed-matrix-length im'))
  340. (indexed-matrix-set! im' "you" "c" 2)
  341. (test-eq "original width retained" 2 (indexed-matrix-width im))
  342. (test-eq "original length retained" 2 (indexed-matrix-length im))
  343. (test-eq "new length" 3 (indexed-matrix-length im'))
  344. (test-assert "original column tags remain"
  345. ((list-permutation? '(1 2))
  346. (indexed-matrix-column-tags im)))
  347. (test-assert "original row tags remain"
  348. ((list-permutation? '("a" "b"))
  349. (indexed-matrix-row-tags im)))
  350. (test-assert "new row tags"
  351. ((list-permutation? '("a" "b" "c"))
  352. (indexed-matrix-row-tags im')))
  353. (indexed-matrix-set! im' 13 "a" 1)
  354. (test-eq "shared changes propagate"
  355. 13 (indexed-matrix-ref im "a" 1))
  356. (test-end "indexed-matrix-test")