attributes.scm 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. (define-module (data-mining attributes)
  2. #:use-module (srfi srfi-9) ;define-record-type
  3. #:use-module (srfi srfi-1) ;any
  4. #:use-module (srfi srfi-26) ;cut
  5. #:use-module (data-mining util)
  6. #:export (make-attribute
  7. make-string-attribute
  8. make-numeric-attribute
  9. make-real-attribute ;alias to make-numeric-attribute
  10. make-nominal-attribute
  11. make-ordinal-attribute
  12. make-integer-attribute ;alias to make-ordinal-attribute
  13. symbol->attribute
  14. attribute?
  15. attribute-make-value
  16. attribute-value->string
  17. attribute-name
  18. set-attribute-name!
  19. attribute-domain
  20. set-attribute-domain!
  21. attribute-extend-domain!
  22. value-in-attribute-domain?
  23. attribute-dissectors
  24. nominal-dissector
  25. ordinal-dissector
  26. integer-dissector
  27. numeric-dissector
  28. true
  29. domain-union))
  30. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  31. ;;; Attribute
  32. (define-record-type attribute
  33. (make-attribute*
  34. name ;This attribute's name.
  35. read-value ;Read a value of this
  36. ;attribute type from a string
  37. value->string ;Return a string
  38. ;representation of an
  39. ;attribute value
  40. domain-union ;A procedure that unites a
  41. ;domain to this attribute's
  42. ;curren domain. Called as
  43. ;(domain-union domain1 domain2)
  44. domain ;Allowed values
  45. dissector-gen ;Given an attribute and a list
  46. ;of values from the domain,
  47. ;create a list of possible
  48. ;dissectors
  49. )
  50. attribute?
  51. (name attribute-name set-attribute-name!)
  52. (read-value attribute-reader)
  53. (value->string attribute-stringerizer)
  54. (domain-union attribute-domain-union)
  55. (domain attribute-domain set-attribute-domain!)
  56. (dissector-gen attribute-dissector-generator))
  57. (define make-attribute
  58. (let ((count 0)) ;for default attribute names
  59. (lambda* (#:key
  60. (name #f)
  61. (read-value identity)
  62. (value->string (cut format #f "~a" <>))
  63. (domain-union domain-union)
  64. (domain #t) ;Default to accepting all values
  65. (dissector-gen nominal-dissector))
  66. (let ((name (or name
  67. (let ((next-count (1+ count)))
  68. (set! count next-count)
  69. (string->symbol
  70. (string-append "attr" (number->string count)))))))
  71. (make-attribute* name read-value value->string
  72. domain-union domain dissector-gen)))))
  73. ;;; Convenience constructors
  74. (define make-string-attribute make-attribute)
  75. (define* (make-numeric-attribute #:key
  76. (read-value string->number)
  77. (dissector-gen numeric-dissector)
  78. #:allow-other-keys #:rest args)
  79. (apply make-attribute
  80. (append args
  81. `(#:read-value ,read-value
  82. #:dissector-gen ,dissector-gen))))
  83. (define make-real-attribute make-numeric-attribute)
  84. (define* (make-nominal-attribute #:key
  85. (dissector-gen nominal-dissector)
  86. #:allow-other-keys #:rest args)
  87. (apply make-attribute
  88. (append args
  89. `(#:dissector-gen ,dissector-gen))))
  90. (define* (make-ordinal-attribute #:key
  91. (read-value string->number)
  92. (dissector-gen ordinal-dissector)
  93. #:allow-other-keys #:rest args)
  94. (apply make-attribute
  95. (append args
  96. `(#:read-value ,read-value
  97. #:dissector-gen ,dissector-gen))))
  98. (define make-integer-attribute make-ordinal-attribute)
  99. (define (symbol->attribute sym . args)
  100. (let ((ctor (case sym
  101. ((string) make-string-attribute)
  102. ((numeric real) make-numeric-attribute)
  103. ((integer) make-integer-attribute)
  104. ((nominal) make-nominal-attribute)
  105. ((ordinal) make-ordinal-attribute)
  106. (else make-attribute))))
  107. (apply ctor args)))
  108. (define (attribute-make-value attr str)
  109. ((attribute-reader attr) str))
  110. (define (attribute-value->string attr v)
  111. ((attribute-stringerizer attr) v))
  112. (define (domain-union d1 d2)
  113. ;; Naive union of two domains
  114. ;;
  115. ;; TODO: Optimize for different domain types
  116. (lambda (v)
  117. (or (value-in-domain? d1 v)
  118. (value-in-domain? d2 v))))
  119. (define (attribute-extend-domain! attr domain)
  120. ;; Extend this attribute's current domain with domain. Returns the
  121. ;; new domain, or #f if the union of the domains could not be formed
  122. ;; according to (attribute-domain-union attr).
  123. (let ((new-domain ((attribute-domain-union attr)
  124. (attribute-domain attr) domain)))
  125. (and=> new-domain
  126. (lambda (d) (set-attribute-domain! attr d)))))
  127. (define (value-in-domain? domain value)
  128. ;; Return #t if the domain includes the given value
  129. (cond
  130. ;; Universal acceptance
  131. ((eq? domain #t) #t)
  132. ;; A procedure that determines inclusion
  133. ((procedure? domain) (domain value))
  134. ;; A discrete set of acceptable values
  135. ((list? domain) (member value domain))
  136. ;; A pair should represent some sort of lower and upper bound
  137. ((pair? domain) (and (> value (car domain))
  138. (< value (cdr domain))))
  139. ;; A discrete set of acceptable values stored in a hash set
  140. ((hash-table? domain) (hash-ref domain value))))
  141. (define (value-in-attribute-domain? attr value)
  142. (value-in-domain? (attribute-domain attr) value))
  143. ;;; TODO: Would it be best to put the dissector procedures in the
  144. ;;; decision-tree module?
  145. (define* (nominal-dissector attr values #:optional (= equal?))
  146. (list (map (lambda (e)
  147. `(,= ,(attribute-name attr) ,e))
  148. (cond
  149. ;; If the domain is true, then dissect the given values
  150. ((boolean? (attribute-domain attr))
  151. (delete-duplicates values))
  152. (else (attribute-domain attr))))))
  153. (define* (ordinal-dissector attr values
  154. #:optional
  155. (< <)
  156. (= equal?)
  157. (which? cdr))
  158. ;; Every point that separates two unique values is potentially a
  159. ;; dissector
  160. (map (lambda (e)
  161. `((,< ,(attribute-name attr) ,(which? e)) (,true)))
  162. (borders (sort values <) =)))
  163. (define integer-dissector ordinal-dissector) ;convenience alias
  164. (define (pair-geometric-mean p)
  165. "Return the geometric mean of the numbers in the given pair."
  166. (let ((a (car p)) (b (cdr p)))
  167. (/ (+ a b) 2)))
  168. (define* (numeric-dissector attr values
  169. #:optional
  170. (which? pair-geometric-mean))
  171. (ordinal-dissector attr values < = which?))
  172. (define (attribute-dissectors attr values)
  173. ;; Return a list of dissectors according to
  174. ;; attribute-dissector-generator.
  175. (begin
  176. ;; First check that all values are in this attribute's domain
  177. (if (any (lambda (v)
  178. (not (value-in-attribute-domain? attr v)))
  179. values)
  180. (error "Cannot dissect values that are not in the domain!"))
  181. ((attribute-dissector-generator attr) attr values)))
  182. ;;; A routine that always return #t.
  183. (define true (const #t))
  184. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  185. ;;; Tests
  186. (use-modules (srfi srfi-64)
  187. (ice-9 format))
  188. (test-begin "attributes-test")
  189. (define a1 (make-attribute #:name "foo"))
  190. (test-assert (attribute? a1))
  191. (define num-attr (make-attribute #:name "num1"
  192. #:read-value string->number))
  193. (test-assert (number? (attribute-make-value num-attr "9")))
  194. (test-assert (number? (attribute-make-value num-attr "2.71828")))
  195. (test-assert (attribute-domain num-attr)) ;Domain must always be non-#f
  196. (define n1 (make-attribute #:name "n1"
  197. #:read-value string->number
  198. #:domain (lambda (n) (> n 0))))
  199. (test-assert (value-in-attribute-domain? n1 1.0))
  200. (test-assert (not (value-in-attribute-domain? n1 (- 2.0))))
  201. (define n2 (make-attribute #:name "n2"
  202. #:read-value string->number
  203. #:domain '(-2.5 . 3.1415926535)))
  204. (test-assert (not (value-in-attribute-domain? n2 -2.6)))
  205. (test-assert (value-in-attribute-domain? n2 -1.0))
  206. (test-assert (value-in-attribute-domain? n2 1.2))
  207. (test-assert (not (value-in-attribute-domain? n2 4)))
  208. (define n3 (make-attribute #:name "n3"
  209. #:read-value string->number
  210. #:domain '(1 2 4 8 16)))
  211. (for-each
  212. (lambda (n)
  213. (test-assert (value-in-attribute-domain? n3 n)))
  214. '(1 2 4 8 16))
  215. (for-each
  216. (lambda (n)
  217. (test-assert (not (value-in-attribute-domain? n3 n))))
  218. '(-2 0 3 9 13 25))
  219. (define domain-hash (make-hash-table))
  220. (for-each
  221. (lambda (n)
  222. (hash-set! domain-hash n n))
  223. '(-1.25 -.25 0. .25 1.25))
  224. (define n4 (make-attribute #:name "n4"
  225. #:read-value string->number
  226. #:domain domain-hash))
  227. (for-each
  228. (lambda (n)
  229. (test-assert (value-in-attribute-domain? n4 n)))
  230. '(-1.25 -.25 0. .25 1.25))
  231. (for-each
  232. (lambda (n)
  233. (test-assert (not (value-in-attribute-domain? n4 n))))
  234. '(-20 .23 0.01 100 9001))
  235. ;;; Test extend-domain
  236. (attribute-extend-domain! n1 '(-4 -2))
  237. (for-each
  238. (lambda (n)
  239. (test-assert (value-in-attribute-domain? n1 n)))
  240. '(-4 -2 1 2 3 4 5))
  241. (for-each
  242. (lambda (n)
  243. (test-assert (not (value-in-attribute-domain? n1 n))))
  244. '(-5 -1 0))
  245. (attribute-extend-domain! n1 (lambda (n) (< n -30)))
  246. (for-each
  247. (lambda (n)
  248. (test-assert (value-in-attribute-domain? n1 n)))
  249. '(-33 -32 -31 -4 -2 1 2 3 4 5))
  250. (for-each
  251. (lambda (n)
  252. (test-assert (not (value-in-attribute-domain? n1 n))))
  253. '(-30 -10))
  254. ;;; Test symbolic attributes
  255. (define s1 (make-attribute #:name "s1"
  256. #:read-value string->symbol
  257. #:dissector-gen (cut nominal-dissector <> <> eq?)))
  258. (test-assert (symbol? (attribute-make-value s1 "foo")))
  259. (test-assert (symbol? (attribute-make-value s1 "baR")))
  260. (set-attribute-domain! s1 '(foo bar baz))
  261. (for-each
  262. (lambda (s)
  263. (test-assert (value-in-attribute-domain? s1 s)))
  264. '(foo bar))
  265. (for-each
  266. (lambda (s)
  267. (test-assert (not (value-in-attribute-domain? s1 s))))
  268. '("biz " "bz" "foo" 40))
  269. (define s1-dissectors (attribute-dissectors s1 '()))
  270. ;; (format #t "~a\n" s1-dissectors)
  271. (test-eq "symbolic attr trivial dissector" 1 (length s1-dissectors))
  272. ;;; Test nominal attributes
  273. (define str1 (make-attribute #:name 'str
  274. #:read-value identity
  275. #:dissector-gen (cut ordinal-dissector
  276. <> <>
  277. string<? string=?)))
  278. (test-assert (string? (attribute-make-value str1 "foo")))
  279. (set-attribute-domain! str1 '("foo" "bar" "baz" "fit"))
  280. (for-each
  281. (lambda (s)
  282. (test-assert (value-in-attribute-domain? str1 s)))
  283. '("foo" "bar"))
  284. (for-each
  285. (lambda (s)
  286. (test-assert (not (value-in-attribute-domain? str1 s))))
  287. '("biz " "bz" foo 40))
  288. (define str1-dissectors (attribute-dissectors
  289. str1 '("bar" "foo" "bar" "baz" "foo" "foo" "fit")))
  290. (test-eq 3 (length str1-dissectors)) ;4 distinct values, 3 places to
  291. ;split along the ordinal range
  292. (define n3 (make-attribute #:name 'n3
  293. #:read-value string->number
  294. #:domain (iota 20)
  295. #:dissector-gen (cut ordinal-dissector <> <> < =)))
  296. (define n3-dissectors (attribute-dissectors
  297. n3 (concatenate (list (iota 10) (iota 5) (iota 10 5)))))
  298. (define n4 (make-attribute #:name 'n4
  299. #:read-value string->number
  300. #:domain (iota 20)
  301. #:dissector-gen numeric-dissector))
  302. (define n4-dissectors (attribute-dissectors
  303. n4 (concatenate (list (iota 10) (iota 5) (iota 10 5)))))
  304. ;;; Check symbol->attribute
  305. (test-assert (attribute? (symbol->attribute 'integer)))
  306. (test-assert (attribute? (symbol->attribute 'nominal)))
  307. (test-assert (attribute? (symbol->attribute 'string)))
  308. (test-assert (attribute? (symbol->attribute 'real)))
  309. ;;; Check convenience constructors
  310. (define nom1 (make-nominal-attribute #:name 'nom1))
  311. (test-eq 'nom1 (attribute-name nom1))
  312. (define nom2 (make-nominal-attribute #:read-value string->symbol))
  313. (test-eq 'foo (attribute-make-value nom2 "foo"))
  314. (test-eq 'bar (attribute-make-value nom2 "bar"))
  315. (test-end "attributes-test")