123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361 |
- (define-module (data-mining attributes)
- #:use-module (srfi srfi-9) ;define-record-type
- #:use-module (srfi srfi-1) ;any
- #:use-module (srfi srfi-26) ;cut
- #:use-module (data-mining util)
- #:export (make-attribute
- make-string-attribute
- make-numeric-attribute
- make-real-attribute ;alias to make-numeric-attribute
- make-nominal-attribute
- make-ordinal-attribute
- make-integer-attribute ;alias to make-ordinal-attribute
- symbol->attribute
- attribute?
- attribute-make-value
- attribute-value->string
- attribute-name
- set-attribute-name!
- attribute-domain
- set-attribute-domain!
- attribute-extend-domain!
- value-in-attribute-domain?
- attribute-dissectors
- nominal-dissector
- ordinal-dissector
- integer-dissector
- numeric-dissector
- true
- domain-union))
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Attribute
- (define-record-type attribute
- (make-attribute*
- name ;This attribute's name.
- read-value ;Read a value of this
- ;attribute type from a string
- value->string ;Return a string
- ;representation of an
- ;attribute value
- domain-union ;A procedure that unites a
- ;domain to this attribute's
- ;curren domain. Called as
- ;(domain-union domain1 domain2)
- domain ;Allowed values
- dissector-gen ;Given an attribute and a list
- ;of values from the domain,
- ;create a list of possible
- ;dissectors
- )
- attribute?
- (name attribute-name set-attribute-name!)
- (read-value attribute-reader)
- (value->string attribute-stringerizer)
- (domain-union attribute-domain-union)
- (domain attribute-domain set-attribute-domain!)
- (dissector-gen attribute-dissector-generator))
- (define make-attribute
- (let ((count 0)) ;for default attribute names
- (lambda* (#:key
- (name #f)
- (read-value identity)
- (value->string (cut format #f "~a" <>))
- (domain-union domain-union)
- (domain #t) ;Default to accepting all values
- (dissector-gen nominal-dissector))
- (let ((name (or name
- (let ((next-count (1+ count)))
- (set! count next-count)
- (string->symbol
- (string-append "attr" (number->string count)))))))
- (make-attribute* name read-value value->string
- domain-union domain dissector-gen)))))
- ;;; Convenience constructors
- (define make-string-attribute make-attribute)
- (define* (make-numeric-attribute #:key
- (read-value string->number)
- (dissector-gen numeric-dissector)
- #:allow-other-keys #:rest args)
- (apply make-attribute
- (append args
- `(#:read-value ,read-value
- #:dissector-gen ,dissector-gen))))
- (define make-real-attribute make-numeric-attribute)
- (define* (make-nominal-attribute #:key
- (dissector-gen nominal-dissector)
- #:allow-other-keys #:rest args)
- (apply make-attribute
- (append args
- `(#:dissector-gen ,dissector-gen))))
- (define* (make-ordinal-attribute #:key
- (read-value string->number)
- (dissector-gen ordinal-dissector)
- #:allow-other-keys #:rest args)
- (apply make-attribute
- (append args
- `(#:read-value ,read-value
- #:dissector-gen ,dissector-gen))))
- (define make-integer-attribute make-ordinal-attribute)
- (define (symbol->attribute sym . args)
- (let ((ctor (case sym
- ((string) make-string-attribute)
- ((numeric real) make-numeric-attribute)
- ((integer) make-integer-attribute)
- ((nominal) make-nominal-attribute)
- ((ordinal) make-ordinal-attribute)
- (else make-attribute))))
- (apply ctor args)))
- (define (attribute-make-value attr str)
- ((attribute-reader attr) str))
- (define (attribute-value->string attr v)
- ((attribute-stringerizer attr) v))
- (define (domain-union d1 d2)
- ;; Naive union of two domains
- ;;
- ;; TODO: Optimize for different domain types
- (lambda (v)
- (or (value-in-domain? d1 v)
- (value-in-domain? d2 v))))
- (define (attribute-extend-domain! attr domain)
- ;; Extend this attribute's current domain with domain. Returns the
- ;; new domain, or #f if the union of the domains could not be formed
- ;; according to (attribute-domain-union attr).
- (let ((new-domain ((attribute-domain-union attr)
- (attribute-domain attr) domain)))
- (and=> new-domain
- (lambda (d) (set-attribute-domain! attr d)))))
- (define (value-in-domain? domain value)
- ;; Return #t if the domain includes the given value
- (cond
- ;; Universal acceptance
- ((eq? domain #t) #t)
- ;; A procedure that determines inclusion
- ((procedure? domain) (domain value))
- ;; A discrete set of acceptable values
- ((list? domain) (member value domain))
- ;; A pair should represent some sort of lower and upper bound
- ((pair? domain) (and (> value (car domain))
- (< value (cdr domain))))
- ;; A discrete set of acceptable values stored in a hash set
- ((hash-table? domain) (hash-ref domain value))))
- (define (value-in-attribute-domain? attr value)
- (value-in-domain? (attribute-domain attr) value))
- ;;; TODO: Would it be best to put the dissector procedures in the
- ;;; decision-tree module?
- (define* (nominal-dissector attr values #:optional (= equal?))
- (list (map (lambda (e)
- `(,= ,(attribute-name attr) ,e))
- (cond
- ;; If the domain is true, then dissect the given values
- ((boolean? (attribute-domain attr))
- (delete-duplicates values))
- (else (attribute-domain attr))))))
- (define* (ordinal-dissector attr values
- #:optional
- (< <)
- (= equal?)
- (which? cdr))
- ;; Every point that separates two unique values is potentially a
- ;; dissector
- (map (lambda (e)
- `((,< ,(attribute-name attr) ,(which? e)) (,true)))
- (borders (sort values <) =)))
- (define integer-dissector ordinal-dissector) ;convenience alias
- (define (pair-geometric-mean p)
- "Return the geometric mean of the numbers in the given pair."
- (let ((a (car p)) (b (cdr p)))
- (/ (+ a b) 2)))
- (define* (numeric-dissector attr values
- #:optional
- (which? pair-geometric-mean))
- (ordinal-dissector attr values < = which?))
- (define (attribute-dissectors attr values)
- ;; Return a list of dissectors according to
- ;; attribute-dissector-generator.
- (begin
- ;; First check that all values are in this attribute's domain
- (if (any (lambda (v)
- (not (value-in-attribute-domain? attr v)))
- values)
- (error "Cannot dissect values that are not in the domain!"))
- ((attribute-dissector-generator attr) attr values)))
- ;;; A routine that always return #t.
- (define true (const #t))
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;; Tests
- (use-modules (srfi srfi-64)
- (ice-9 format))
- (test-begin "attributes-test")
- (define a1 (make-attribute #:name "foo"))
- (test-assert (attribute? a1))
- (define num-attr (make-attribute #:name "num1"
- #:read-value string->number))
- (test-assert (number? (attribute-make-value num-attr "9")))
- (test-assert (number? (attribute-make-value num-attr "2.71828")))
- (test-assert (attribute-domain num-attr)) ;Domain must always be non-#f
- (define n1 (make-attribute #:name "n1"
- #:read-value string->number
- #:domain (lambda (n) (> n 0))))
- (test-assert (value-in-attribute-domain? n1 1.0))
- (test-assert (not (value-in-attribute-domain? n1 (- 2.0))))
- (define n2 (make-attribute #:name "n2"
- #:read-value string->number
- #:domain '(-2.5 . 3.1415926535)))
- (test-assert (not (value-in-attribute-domain? n2 -2.6)))
- (test-assert (value-in-attribute-domain? n2 -1.0))
- (test-assert (value-in-attribute-domain? n2 1.2))
- (test-assert (not (value-in-attribute-domain? n2 4)))
- (define n3 (make-attribute #:name "n3"
- #:read-value string->number
- #:domain '(1 2 4 8 16)))
- (for-each
- (lambda (n)
- (test-assert (value-in-attribute-domain? n3 n)))
- '(1 2 4 8 16))
- (for-each
- (lambda (n)
- (test-assert (not (value-in-attribute-domain? n3 n))))
- '(-2 0 3 9 13 25))
- (define domain-hash (make-hash-table))
- (for-each
- (lambda (n)
- (hash-set! domain-hash n n))
- '(-1.25 -.25 0. .25 1.25))
- (define n4 (make-attribute #:name "n4"
- #:read-value string->number
- #:domain domain-hash))
- (for-each
- (lambda (n)
- (test-assert (value-in-attribute-domain? n4 n)))
- '(-1.25 -.25 0. .25 1.25))
- (for-each
- (lambda (n)
- (test-assert (not (value-in-attribute-domain? n4 n))))
- '(-20 .23 0.01 100 9001))
- ;;; Test extend-domain
- (attribute-extend-domain! n1 '(-4 -2))
- (for-each
- (lambda (n)
- (test-assert (value-in-attribute-domain? n1 n)))
- '(-4 -2 1 2 3 4 5))
- (for-each
- (lambda (n)
- (test-assert (not (value-in-attribute-domain? n1 n))))
- '(-5 -1 0))
- (attribute-extend-domain! n1 (lambda (n) (< n -30)))
- (for-each
- (lambda (n)
- (test-assert (value-in-attribute-domain? n1 n)))
- '(-33 -32 -31 -4 -2 1 2 3 4 5))
- (for-each
- (lambda (n)
- (test-assert (not (value-in-attribute-domain? n1 n))))
- '(-30 -10))
- ;;; Test symbolic attributes
- (define s1 (make-attribute #:name "s1"
- #:read-value string->symbol
- #:dissector-gen (cut nominal-dissector <> <> eq?)))
- (test-assert (symbol? (attribute-make-value s1 "foo")))
- (test-assert (symbol? (attribute-make-value s1 "baR")))
- (set-attribute-domain! s1 '(foo bar baz))
- (for-each
- (lambda (s)
- (test-assert (value-in-attribute-domain? s1 s)))
- '(foo bar))
- (for-each
- (lambda (s)
- (test-assert (not (value-in-attribute-domain? s1 s))))
- '("biz " "bz" "foo" 40))
- (define s1-dissectors (attribute-dissectors s1 '()))
- ;; (format #t "~a\n" s1-dissectors)
- (test-eq "symbolic attr trivial dissector" 1 (length s1-dissectors))
- ;;; Test nominal attributes
- (define str1 (make-attribute #:name 'str
- #:read-value identity
- #:dissector-gen (cut ordinal-dissector
- <> <>
- string<? string=?)))
- (test-assert (string? (attribute-make-value str1 "foo")))
- (set-attribute-domain! str1 '("foo" "bar" "baz" "fit"))
- (for-each
- (lambda (s)
- (test-assert (value-in-attribute-domain? str1 s)))
- '("foo" "bar"))
- (for-each
- (lambda (s)
- (test-assert (not (value-in-attribute-domain? str1 s))))
- '("biz " "bz" foo 40))
- (define str1-dissectors (attribute-dissectors
- str1 '("bar" "foo" "bar" "baz" "foo" "foo" "fit")))
- (test-eq 3 (length str1-dissectors)) ;4 distinct values, 3 places to
- ;split along the ordinal range
- (define n3 (make-attribute #:name 'n3
- #:read-value string->number
- #:domain (iota 20)
- #:dissector-gen (cut ordinal-dissector <> <> < =)))
- (define n3-dissectors (attribute-dissectors
- n3 (concatenate (list (iota 10) (iota 5) (iota 10 5)))))
- (define n4 (make-attribute #:name 'n4
- #:read-value string->number
- #:domain (iota 20)
- #:dissector-gen numeric-dissector))
- (define n4-dissectors (attribute-dissectors
- n4 (concatenate (list (iota 10) (iota 5) (iota 10 5)))))
- ;;; Check symbol->attribute
- (test-assert (attribute? (symbol->attribute 'integer)))
- (test-assert (attribute? (symbol->attribute 'nominal)))
- (test-assert (attribute? (symbol->attribute 'string)))
- (test-assert (attribute? (symbol->attribute 'real)))
- ;;; Check convenience constructors
- (define nom1 (make-nominal-attribute #:name 'nom1))
- (test-eq 'nom1 (attribute-name nom1))
- (define nom2 (make-nominal-attribute #:read-value string->symbol))
- (test-eq 'foo (attribute-make-value nom2 "foo"))
- (test-eq 'bar (attribute-make-value nom2 "bar"))
- (test-end "attributes-test")
|