123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410 |
- #!/cray/css/libsci/bavier/bin/guile -s
- !#
- (define-module (data-mining indexed-matrix)
- #:use-module (srfi srfi-9)
- #:use-module (srfi srfi-26)
- #:use-module (data-mining hash-util)
- #:export (make-indexed-matrix
- make-indexed-matrix/shared
- indexed-matrix-set!
- indexed-matrix-ref
- indexed-matrix-handle
- indexed-matrix-length
- indexed-matrix-width
- indexed-matrix-row-tags
- indexed-matrix-column-tags
- indexed-matrix-for-each-row
- indexed-matrix-for-each-column
- indexed-matrix-for-each-entry
- indexed-matrix-map-rows
- indexed-matrix-map-columns
- indexed-matrix-map-indexed-rows
- indexed-matrix-map-indexed-columns
- indexed-matrix->row-list
- indexed-matrix->column-list
- indexed-matrix->array
- indexed-matrix-row-entries
- indexed-matrix-indexed-row
- indexed-matrix-column-entries
- indexed-matrix-indexed-column
- indexed-matrix-reindex-row!
- indexed-matrix-reindex-column!
- ))
- (define-record-type indexed-matrix
- (make-indexed-matrix* row-tags-table col-tags-table rows cols)
- indexed-matrix?
- (row-tags-table row-tags-table set-row-tags-table!)
- (col-tags-table col-tags-table set-col-tags-table!)
- (rows indexed-matrix-rows set-indexed-matrix-rows!)
- (cols indexed-matrix-cols set-indexed-matrix-cols!))
- (define* (make-indexed-matrix #:key
- (row-tags '())
- (column-tags '()))
- (let ((m (make-indexed-matrix*
- ;; We use hash-table's as sets for row-tags and col-tags
- (make-hash-table) ;row-tags-table
- (make-hash-table) ;col-tags-table
- (make-hash-table) ;rows
- (make-hash-table) ;cols
- )))
- (set-indexed-matrix-row-tags! m row-tags)
- (set-indexed-matrix-column-tags! m column-tags)
- m))
- ;;; Make a new shared indexed-matrix. The new indexed-matrix will
- ;;; share underlying storage with M, but will only operate on the row
- ;;; indices in I and column indices in J. New entries added to the
- ;;; created indexed-matrix will not be seen by M, but changing an
- ;;; existing entry *will* be seen by M. If the I and J arguments are
- ;;; not given then all entries of the row or column, respecitively,
- ;;; will be shared in the new indexed-matrix.
- (define* (make-indexed-matrix/shared
- m
- #:key
- (row-indices (indexed-matrix-row-tags m))
- (column-indices (indexed-matrix-column-tags m)))
- (let ((n (make-indexed-matrix
- #:row-tags row-indices
- #:column-tags column-indices)))
- (set-indexed-matrix-rows! n (indexed-matrix-rows m))
- (set-indexed-matrix-cols! n (indexed-matrix-cols m))
- n))
- (define-public (indexed-matrix-set! m e i j)
- (let ((rh (hash-create-handle! (indexed-matrix-rows m) i (make-hash-table)))
- (ch (hash-create-handle! (indexed-matrix-cols m) j (make-hash-table))))
- (begin
- (hash-set! (row-tags-table m) i i)
- (hash-set! (col-tags-table m) j j)
- (hash-set! (cdr rh) j e)
- (hash-set! (cdr ch) i e))))
- ;;; Return the tuple (i j . e) or #f if no entry
- (define-public (indexed-matrix-handle m i j)
- (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
- (and rh (let ((ch (hash-get-handle (cdr rh) j)))
- (and ch (cons (car rh) ch))))))
- (define-public (indexed-matrix-ref m i j)
- (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
- (and rh (hash-ref (cdr rh) j))))
- (define-public (indexed-matrix-for-each-row proc m)
- ;; For each row i of the indexed matrix, call (proc i <entries>),
- ;; where <entries> is the list of entries in that row. The order in
- ;; which is the rows are traversed is not guaranteed.
- (for-each ;TODO: formulate in terms of hash-for-each
- (lambda (i)
- (proc i (indexed-matrix-row-entries m i)))
- (indexed-matrix-row-tags m)))
- (define-public (indexed-matrix-for-each-column proc m)
- (for-each ;TODO: formulate in terms of hash-for-each
- (lambda (j)
- (proc j (indexed-matrix-column-entries m j)))
- (indexed-matrix-column-tags m)))
- ;;; proc is called as (i j e), where i is the row tag and j is the column tag
- ;;; associated with the entry e.
- (define-public (indexed-matrix-for-each-entry proc m)
- (for-each
- (lambda (i)
- (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
- (if rh
- (for-each
- (lambda (j)
- (let ((ch (hash-get-handle (cdr rh) j)))
- (if ch
- (proc i j (cdr ch)))))
- (indexed-matrix-column-tags m)))))
- (indexed-matrix-row-tags m)))
- (define-public (indexed-matrix-map-rows proc m)
- (map
- (lambda (i)
- (proc i (indexed-matrix-row-entries m i)))
- (indexed-matrix-row-tags m)))
- (define-public (indexed-matrix-map-columns proc m)
- (map
- (lambda (j)
- (proc j (indexed-matrix-column-entries m j)))
- (indexed-matrix-column-tags m)))
- (define-public (indexed-matrix-map-indexed-rows proc m)
- (map
- (lambda (i)
- (proc i (indexed-matrix-indexed-row m i)))
- (indexed-matrix-row-tags m)))
- (define-public (indexed-matrix-map-indexed-columns proc m)
- (map
- (lambda (j)
- (proc j (indexed-matrix-indexed-column m j)))
- (indexed-matrix-column-tags m)))
- ;;; Return a dense representation of this indexed-matrix in row-major list
- ;;; form. Index information is lost through this transformation.
- ;;; Non-existent entries appear as #f, which will conflict with explicit #f
- ;;; entries if the matrix is not already known to be dense/completely filled.
- (define-public (indexed-matrix->row-list m)
- (map
- (lambda (col)
- (map (cut hash-ref col <>)
- (indexed-matrix-column-tags m)))
- (map (cut hash-ref (indexed-matrix-rows m) <>)
- (indexed-matrix-row-tags m))))
- (define-public (indexed-matrix->column-list m)
- (map
- (lambda (row)
- (map (cut hash-ref row <>)
- (indexed-matrix-row-tags m)))
- (map (cut hash-ref (indexed-matrix-cols m) <>)
- (indexed-matrix-column-tags m))))
- ;;; Return a list of the entries in the given row. Returns #f if I is an
- ;;; invalid row index.
- (define-public (indexed-matrix-row-entries m i)
- (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
- (and rh (map (cut hash-ref (cdr rh) <>)
- (indexed-matrix-column-tags m)))))
- ;;; Return an association list whose keys are the column tags and values are
- ;;; the entries those tags map to.
- ;;;
- ;;; E.g. If M is ::
- ;;;
- ;;; a b c
- ;;; - - -
- ;;; 1 : 1 2 3
- ;;; 2 : 4 5 6
- ;;;
- ;;; then
- ;;;
- ;;; guile> (indexed-matrix-indexed-row m 2)
- ;;; => ((a . 4) (b . 5) (c . 6))
- ;;;
- (define-public (indexed-matrix-indexed-row m i)
- (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
- (and rh (map (lambda (c)
- (cons c (hash-ref (cdr rh) c)))
- (indexed-matrix-column-tags m)))))
- ;;; Return a list of the entries in the given column. Returns #f if J is an
- ;;; invalid column index.
- (define-public (indexed-matrix-column-entries m j)
- (let ((ch (hash-get-handle (indexed-matrix-cols m) j)))
- (and ch (map (cut hash-ref (cdr ch) <>)
- (indexed-matrix-row-tags m)))))
- ;;; Analogous to indexed-matrix-indexed-row, but column-wise
- (define-public (indexed-matrix-indexed-column m j)
- (let ((ch (hash-get-handle (indexed-matrix-cols m) j)))
- (and ch (map (lambda (c)
- (cons c (hash-ref (cdr ch) c)))
- (indexed-matrix-row-tags m)))))
- ;;; Change all references to the row index I to use NI instead.
- (define-public (indexed-matrix-reindex-row! m i ni)
- (hash-move-key (row-tags-table m) i ni (make-hash-table))
- (hash-move-key (indexed-matrix-rows m) i ni (make-hash-table))
- ;; Update references in the column hashes
- (hash-for-each (lambda (j jv) (hash-move-key jv i ni))
- (indexed-matrix-cols m)))
- ;;; Change all references to the column index J to use NJ instead.
- (define-public (indexed-matrix-reindex-column! m j nj)
- (hash-move-key (col-tags-table m) j nj (make-hash-table))
- (hash-move-key (indexed-matrix-cols m) j nj (make-hash-table))
- ;; Update reference in the row hashes
- (hash-for-each (lambda (i iv) (hash-move-key iv j nj))
- (indexed-matrix-rows m)))
- ;;; Transform the indexed-matrix into a 2-dimensional scheme array. The row
- ;;; and column permutations are based on the order of the indices as they are
- ;;; found in (row-tags M) and (col-tags M), as this information cannot be
- ;;; stored in the resulting array.
- (define-public (indexed-matrix->array m)
- (let ((a (make-array #f
- (indexed-matrix-length m)
- (indexed-matrix-width m)))
- (row-map (list->index-map (indexed-matrix-row-tags m)))
- (col-map (list->index-map (indexed-matrix-column-tags m))))
- (begin
- (indexed-matrix-for-each-entry
- (lambda (i j e)
- (array-set! a e (assoc-ref row-map i) (assoc-ref col-map j)))
- m)
- a)))
- ;;; Given a list, create an association list which maps each element of the
- ;;; given list to the index at which that element resides in the list.
- (define-public (list->index-map lst)
- ;; An iterative algorithm allows us to traverse the list only once. The
- ;; other alternative is (map cons lst (iota (length lst))), but requires two
- ;; list traversals.
- (define (iter c l a)
- (if (null? l)
- a
- (iter (1+ c) (cdr l) (cons (cons (car l) c) a))))
- (iter 0 lst '()))
- (define-public (indexed-matrix-length m)
- (hash-table-size (row-tags-table m)))
- (define-public (indexed-matrix-width m)
- (hash-table-size (col-tags-table m)))
- (define (first-arg a . b) a)
- (define (second-arg a b . c) b)
- (define (third-arg a b c . d) c)
- ;; Provide transparent list-based access to internal hash sets.
- (define (indexed-matrix-row-tags m)
- (hash-map->list second-arg (row-tags-table m)))
- (define (set-indexed-matrix-row-tags! m rt)
- (let ((rtt (make-hash-table)))
- (for-each (lambda (t) (hash-set! rtt t t)) rt)
- (set-row-tags-table! m rtt)))
- (define (indexed-matrix-column-tags m)
- (hash-map->list second-arg (col-tags-table m)))
- (define (set-indexed-matrix-column-tags! m ct)
- (let ((ctt (make-hash-table)))
- (for-each (lambda (t) (hash-set! ctt t t)) ct)
- (set-col-tags-table! m ctt)))
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Tests
- (use-modules (srfi srfi-64)
- (srfi srfi-1)
- (ice-9 format)
- (data-mining test-util))
- (test-begin "indexed-matrix-test")
- (define im (make-indexed-matrix))
- (test-assert (indexed-matrix? im))
- ;;; If we set and then ref, we should get back what we set
- ;;; 1 2
- ;;; "a" 10 "hi"
- ;;; "b" 20 "there"
- (indexed-matrix-set! im 10 "a" 1)
- (test-assert (eq? (indexed-matrix-ref im "a" 1) 10))
- (indexed-matrix-set! im "hi" "a" 2)
- (test-assert (eq? (indexed-matrix-ref im "a" 2) "hi"))
- (indexed-matrix-set! im 20 "b" 1)
- (test-assert (eq? (indexed-matrix-ref im "b" 1) 20))
- (indexed-matrix-set! im "there" "b" 2)
- (test-assert (eq? (indexed-matrix-ref im "b" 2) "there"))
- ;;; Check dimensions
- (test-assert (eq? (indexed-matrix-width im) 2))
- (test-assert (eq? (indexed-matrix-length im) 2))
- ;;; Check indexed-matrix-handle
- (define im_a1-handle (indexed-matrix-handle im "a" 1))
- (test-assert (eq? (car im_a1-handle) "a"))
- (test-assert (eq? (cadr im_a1-handle) 1))
- (test-assert (eq? (cddr im_a1-handle) 10))
- (test-assert (not (indexed-matrix-handle im "foo" 9000))) ;return #f for invalid
- ;;; XXX: This test is fragile because it depends on the order in which
- ;;; row entries are returned by indexed-matrix-row-entries.
- (define row-globs (make-list (indexed-matrix-length im) #f))
- (indexed-matrix-for-each-row
- (let ((row-num 0))
- (lambda (i es)
- (list-set! row-globs row-num
- (format #f "~{~a~}" es))
- (set! row-num (1+ row-num))))
- im)
- (test-assert (member "hi10" row-globs))
- (test-assert (member "there20" row-globs))
- ;;; Check row-tags, col-tags
- (test-assert ((list-permutation? '("a" "b")) (indexed-matrix-row-tags im)))
- (test-assert ((list-permutation? '(1 2)) (indexed-matrix-column-tags im)))
- ;;; Check for-each-entry
- (define entries-seen '())
- (indexed-matrix-for-each-entry
- (lambda (i j e)
- (test-assert (indexed-matrix-handle im i j)) ;there is an entry at this point
- (test-assert (indexed-matrix-ref im i j) e) ;that entry is the same
- (set! entries-seen (cons e entries-seen)))
- im)
- (test-assert (null? (lset-difference equal? '(10 20 "hi" "there") entries-seen)))
- ;;; Check row-entries
- (test-assert (null? (lset-difference equal? '(10 "hi")
- (indexed-matrix-row-entries im "a"))))
- (test-assert (null? (lset-difference equal? '(20 "there")
- (indexed-matrix-row-entries im "b"))))
- ;;; Check row-list and column-list
- (define rl (indexed-matrix->row-list im))
- (test-assert (find (list-permutation? '(10 "hi")) rl))
- (test-assert (find (list-permutation? '(20 "there")) rl))
- (define cl (indexed-matrix->column-list im))
- (test-assert (find (list-permutation? '(10 20)) cl))
- (test-assert (find (list-permutation? '("hi" "there")) cl))
- ;;; Check indexed-row and indexed-colum
- (test-assert (same-map? (indexed-matrix-indexed-row im "a")
- '((1 . 10) (2 . "hi"))))
- (test-assert (same-map? (indexed-matrix-indexed-row im "b")
- '((1 . 20) (2 . "there"))))
- (test-assert (same-map? (indexed-matrix-indexed-column im 1)
- '(("a" . 10) ("b" . 20))))
- (test-assert (same-map? (indexed-matrix-indexed-column im 2)
- '(("a" . "hi") ("b" . "there"))))
- ;;; Test internal helper routine hash-move-key
- (define h (make-hash-table))
- (hash-set! h "foo" 9001)
- (test-eq "hash size" 1 (hash-table-size h))
- (test-assert (hash-ref h "foo"))
- (test-assert (equal? (hash-ref h "foo") 9001))
- (hash-move-key h "foo" "bar")
- (test-eq "hash size after move" 1 (hash-table-size h))
- (test-assert (hash-ref h "bar"))
- (test-assert (equal? (hash-ref h "bar") 9001))
- ;;; Test make-indexed-matrix/shared
- (define im' (make-indexed-matrix/shared im))
- (test-eq "shared width" 2 (indexed-matrix-width im'))
- (test-eq "shared length" 2 (indexed-matrix-length im'))
- (indexed-matrix-set! im' "you" "c" 2)
- (test-eq "original width retained" 2 (indexed-matrix-width im))
- (test-eq "original length retained" 2 (indexed-matrix-length im))
- (test-eq "new length" 3 (indexed-matrix-length im'))
- (test-assert "original column tags remain"
- ((list-permutation? '(1 2))
- (indexed-matrix-column-tags im)))
- (test-assert "original row tags remain"
- ((list-permutation? '("a" "b"))
- (indexed-matrix-row-tags im)))
- (test-assert "new row tags"
- ((list-permutation? '("a" "b" "c"))
- (indexed-matrix-row-tags im')))
- (indexed-matrix-set! im' 13 "a" 1)
- (test-eq "shared changes propagate"
- 13 (indexed-matrix-ref im "a" 1))
- (test-end "indexed-matrix-test")
|