dataset.scm 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113
  1. #!/cray/css/users/bavier/bin/guile -s
  2. !#
  3. ;;
  4. ;; Procedures for manipulating data stored in a
  5. ;; 2D-like structure, where a row represents a
  6. ;; "record", and records may have a number of
  7. ;; "attributes" or columns
  8. ;;
  9. (define-module (data-mining dataset)
  10. #:use-module (data-mining type-conversions)
  11. #:use-module (data-mining util)
  12. #:use-module (data-mining hash-util)
  13. #:use-module (data-mining attributes)
  14. #:use-module (data-mining indexed-matrix)
  15. #:use-module (srfi srfi-1) ;;autoload (srfi srfi-1) (first second third last every)
  16. #:use-module (srfi srfi-9)
  17. #:use-module (srfi srfi-19) ;;(date->string time-utc->date)
  18. #:use-module (srfi srfi-26) ;;(cut cute)
  19. #:use-module (ice-9 receive) ;;(receive)
  20. #:use-module (ice-9 rdelim) ;;(read-line write-line)
  21. #:use-module (ice-9 regex) ;;(string-match make-regexp regexp-exec)
  22. #:export (make-dataset
  23. make-dataset/shared
  24. dataset-length
  25. dataset-width
  26. dataset-empty?
  27. ;; arff->dataset
  28. delimited->dataset
  29. dataset->delimited
  30. dataset-set!
  31. dataset-ref
  32. dataset-filter
  33. dataset-partition-records
  34. dataset-attributes
  35. dataset-attribute
  36. dataset-attribute-ref
  37. dataset-attribute-set!
  38. dataset-attribute-indices
  39. dataset-attribute-values
  40. dataset-label-idx
  41. dataset-label-attribute-values
  42. dataset-record-indices
  43. dataset-derive-attribute!
  44. dataset-entry-value-alist))
  45. (define author "Eric Bavier <bavier@member.fsf.org>")
  46. (define date "2014 June 3")
  47. (define copyright "GPLv3+")
  48. ;;
  49. ;; This is free software released under the GPLv3, or later
  50. ;;
  51. ;; =================================================
  52. ;; Dataset Object
  53. ;; =================================================
  54. (define-record-type dataset
  55. (make-dataset*
  56. entries ;2D matrix containing dataset
  57. ;values
  58. attribute-table ;A collection of the attributes
  59. ;describing each column of the
  60. ;dataset
  61. label-idx ;The name of the label attribute
  62. )
  63. dataset?
  64. (entries dataset-entries)
  65. (attribute-table dataset-attribute-table)
  66. (label-idx dataset-label-idx set-dataset-label-idx!))
  67. (define (attribute-list->attributes lst)
  68. (fold
  69. ;; Allow a mixing of simple symbol declarations
  70. ;; for attributes and concrete attribute types.
  71. (lambda (e hash)
  72. (cond
  73. ((symbol? e)
  74. (let ((attr (symbol->attribute e)))
  75. (hash-set!
  76. hash (attribute-name attr) attr)))
  77. ((attribute? e)
  78. (hash-set! hash (attribute-name e) e)))
  79. hash)
  80. (make-hash-table)
  81. lst))
  82. (define* (make-dataset attributes
  83. label-idx
  84. #:optional
  85. (entries (make-indexed-matrix)))
  86. (let ((attributes* (cond
  87. ((hash-table? attributes) attributes)
  88. (else (attribute-list->attributes attributes)))))
  89. (make-dataset* entries attributes* label-idx)))
  90. (define* (make-dataset/shared dataset
  91. #:key
  92. (rows (dataset-record-indices dataset))
  93. (columns (dataset-attribute-indices dataset)))
  94. ;; Make sure the dataset label attribute is included in the columns
  95. (let* ((columns* (lset-adjoin equal?
  96. columns
  97. (dataset-label-idx dataset)))
  98. (attribute-table* (hash-subset (dataset-attribute-table dataset)
  99. columns*)))
  100. (make-dataset
  101. attribute-table*
  102. (dataset-label-idx dataset)
  103. (make-indexed-matrix/shared (dataset-entries dataset)
  104. #:row-indices rows
  105. #:column-indices columns*))))
  106. ;;; Set the value of an entry in DS. We assume that ENTRY has already
  107. ;;; been vetted by the attribute it is being set for (i.e. no scrubbing
  108. ;;; takes place).
  109. (define (dataset-set! ds entry rec-idx attr-idx)
  110. (indexed-matrix-set!
  111. (dataset-entries ds)
  112. entry rec-idx attr-idx))
  113. (define (dataset-ref ds rec-idx attr-idx)
  114. (indexed-matrix-ref (dataset-entries ds) rec-idx attr-idx))
  115. ;;; Filter the entries of DS simultaneously on the contents of each record
  116. ;;; (row) as well as the contents of each attribute (column).
  117. ;;;
  118. ;;; The procedure RECORD-PRED is applied as (record-pred row-idx entry-alist)
  119. ;;; where row-idx is the unique identifier for the current row, and
  120. ;;; entry-alist is an association list where the keys are the attribute
  121. ;;; names and the values are the values each attribute assumes for the current
  122. ;;; record. Analogously for ATTRIBUTE-PRED. RECORD-PRED and ATTRIBUTE-PRED
  123. ;;; may be #t, in which case all rows or columns, respectively, are returned
  124. ;;; in the resulting dataset.
  125. ;;;
  126. ;;; Note: It is not possible to filter out the label attribute
  127. ;;;
  128. (define* (dataset-filter ds
  129. #:key
  130. (record-pred #t)
  131. (attribute-pred #t))
  132. (let ((recs
  133. (if (eq? record-pred #t)
  134. (dataset-record-indices ds)
  135. (filter identity ;filter out those that return #f
  136. (indexed-matrix-map-indexed-rows
  137. (lambda (i e) (and (record-pred i e) i))
  138. (dataset-entries ds)))))
  139. (attrs
  140. (if (eq? attribute-pred #t)
  141. (dataset-attribute-indices ds)
  142. (filter identity ;filter out those that return #f
  143. (indexed-matrix-map-indexed-columns
  144. (lambda (j e) (and (attribute-pred j e) j))
  145. (dataset-entries ds))))))
  146. (make-dataset/shared ds #:rows recs #:columns attrs)))
  147. ;;; Returns a list of datasets of length (1+ (length
  148. ;;; preds)). The records in d1 are those records
  149. ;;; which satisfy p1, the records in d2 are those
  150. ;;; which satisfay p2 but not p1, etc. The last
  151. ;;; dataset in the result contains those records
  152. ;;; which do not satisfy any of the predicates in
  153. ;;; PREDS.
  154. ;;;
  155. ;;; Each predicate in PREDS is applied as in
  156. ;;; dataset-filter
  157. (define (dataset-partition-records preds ds)
  158. (if (null? preds) (list ds)
  159. (reverse!
  160. (map
  161. (lambda (alist-ds)
  162. (make-dataset/shared ds #:rows (map car alist-ds)))
  163. (fold (lambda (p l)
  164. (let* ((part (fold (lambda (e acc)
  165. (if (p (car e) (cdr e))
  166. (cons (cons e (car acc))
  167. (cdr acc))
  168. (cons (car acc)
  169. (cons e (cdr acc)))))
  170. '(() . ())
  171. (car l)))
  172. (in (reverse! (car part)))
  173. (out (reverse! (cdr part))))
  174. (cons out (cons in (cdr l)))))
  175. (list (indexed-matrix-map-indexed-rows
  176. cons (dataset-entries ds)))
  177. preds)))))
  178. ;; Lookup an attribute ATTR in DS by name or index. Return #f if none found
  179. ;; by the given index.
  180. (define (dataset-attribute-ref ds attr-idx)
  181. (hash-ref (dataset-attribute-table ds) attr-idx))
  182. (define (dataset-attribute-set! ds attr-idx attr)
  183. (hash-set! (dataset-attribute-table ds) attr-idx attr))
  184. (define dataset-attribute
  185. (make-procedure-with-setter dataset-attribute-ref
  186. dataset-attribute-set!))
  187. ;;; Return a reference to the label attribute
  188. ;; (define (dataset-label-attribute-ref ds)
  189. ;; (dataset-attribute-ref ds (dataset-label-idx ds)))
  190. ;; (define (dataset-label-attribute-set! ds attr)
  191. ;; (let ((label-idx (dataset-label-idx ds))
  192. ;; (label-idx* (attribute-name attr)))
  193. ;; (hash-remove! (dataset-attribute-table ds) label-idx)
  194. ;; (hash-set! (dataset-attribute-table ds) lavel-idx* attr)
  195. ;; ;; Swap row references to labels from the old tag to the new
  196. ;; (indexed-matrix-reindex-column! (entries ds) oldtag newtag)
  197. ;; (set-dataset-label-idx! ds label-idx*)))
  198. ;; (define dataset-label-attribute
  199. ;; (make-procedure-with-setter dataset-label-attribute-ref
  200. ;; dataset-label-attribute-set!))
  201. ;;; Return a list of the concrete attribute validators of DS.
  202. (define (dataset-attributes ds)
  203. (hash-map->list (lambda (_ b) b) (dataset-attribute-table ds))
  204. (map (cute hash-ref (dataset-attribute-table ds) <>)
  205. (dataset-attribute-indices ds)))
  206. (define* (dataset-attribute-indices ds
  207. #:key
  208. (with-label #t))
  209. (let ((indices (hash-map->list (lambda (i _) i)
  210. (dataset-attribute-table ds))))
  211. (if with-label
  212. indices
  213. (remove (cute equal? <> (dataset-label-idx ds))
  214. indices))))
  215. ;;; Return a list of values belonging to the named attribute
  216. (define (dataset-attribute-values ds attr-idx)
  217. (indexed-matrix-column-entries (dataset-entries ds) attr-idx))
  218. ;;; Return a list of the label values
  219. (define (dataset-label-attribute-values ds)
  220. (dataset-attribute-values ds (dataset-label-idx ds)))
  221. ;;; Return a list of the record tag/index values
  222. (define (dataset-record-indices ds)
  223. (indexed-matrix-row-tags (dataset-entries ds)))
  224. ;;; Return the number of records this dataset has.
  225. (define (dataset-length ds)
  226. (indexed-matrix-length (dataset-entries ds)))
  227. ;;; Return the number of attributes this dataset has
  228. ;;; (does not including the label attribute)
  229. (define (dataset-width ds)
  230. (1- (hash-table-size (dataset-attribute-table ds))))
  231. (define (dataset-empty? ds)
  232. (or (= (dataset-length ds) 0)
  233. (= (dataset-width ds) 0)))
  234. ;; Add a new attribute ATTR to DS according to the
  235. ;; expression in PROC-EXP, which may contain
  236. ;; attribute tag names from DS. PROC-EXP will be
  237. ;; applied record-wise over DS, and the value for
  238. ;; ATTR at that record will be the result of
  239. ;; evaluating PROC-EXP with tag names replaced by
  240. ;; the attribute value of that record.'
  241. ;;
  242. ;; E.g. ::
  243. ;;
  244. ;; (dataset-derive-attribute!
  245. ;; d
  246. ;; (make-numeric-attribute #:name 'foo)
  247. ;; '(/ bar baz))
  248. ;;
  249. (define (dataset-derive-attribute! ds attr proc-exp)
  250. (let ((name (attribute-name attr))
  251. (ents (dataset-entries ds)))
  252. (begin
  253. (dataset-attribute-set! ds name attr)
  254. (for-each
  255. (lambda (i)
  256. (indexed-matrix-set!
  257. ents
  258. (eval (substitute-map proc-exp (dataset-entry-value-alist ds i))
  259. (interaction-environment))
  260. i name))
  261. (indexed-matrix-row-tags ents))
  262. (set-attribute-domain! attr (dataset-attribute-values ds name)))))
  263. (define (dataset-entry-value-alist ds rec-idx)
  264. (indexed-matrix-indexed-row (dataset-entries ds) rec-idx))
  265. ;; =================================================
  266. ;; Reading a dataset from an ARFF file
  267. ;; =================================================
  268. ;; Produces a new attribute according to the ARFF
  269. ;; @attribute tag-line
  270. (define (arff->attribute port)
  271. (define (attr-from-def name def)
  272. (cond ((equal? def "string") (make-string-attribute #:name name))
  273. ((or (equal? def "numeric")
  274. (equal? def "real")) (make-numeric-attribute #:name name))
  275. ((equal? def "integer") (make-integer-attribute #:name name))
  276. ((char=? #\{ (string-ref def 0))
  277. (let ((domain (string-split
  278. (substring def 1 (1- (string-length def)))
  279. #\,)))
  280. (make-nominal-attribute #:name name #:domain domain)))
  281. ((char=? #\[ (string-ref def 0))
  282. (let ((domain (string-split
  283. (substring def 1 (1- (string-length def)))
  284. #\,)))
  285. (make-ordinal-attribute #:name name #:domain domain)))))
  286. (define (bad-input in)
  287. (error (format #f "Cannot construct attribute from input: ~s\n" in)))
  288. (let* ((line (read-line port 'trim))
  289. (pieces (remove string-null? (string-split line #\ ))))
  290. (if (= (length pieces) 3)
  291. (let ((arff-tag (first pieces))
  292. (attr-name (string->symbol (second pieces)))
  293. (attr-def (third pieces)))
  294. (if (string-ci=? arff-tag "@attribute")
  295. (attr-from-def attr-name attr-def)
  296. (bad-input line))))))
  297. ;; Reads an ARFF-formatted data stream from PORT and
  298. ;; returns a new dataset with the contained data
  299. ;; (define* (arff->dataset #:optional (port (current-input-port)) . rest)
  300. ;; (let ((ds (make <dataset>)))
  301. ;; (begin
  302. ;; ((cut set-from-arff! ds port <...>) rest)
  303. ;; ds)))
  304. ;;; IGNORE-ATTRIBUTES and SELECT-ATTRIBUTES should be lists of symbols, which
  305. ;;; specify the attributes to ignore or select, respectively.
  306. ;;; SELECT-ATTRIBUTES may also be #t, in which case all attributes are
  307. ;;; selected.
  308. ;; (define* (arff->dataset #:optional
  309. ;; (port (current-input-port))
  310. ;; #:key
  311. ;; (ignore-attributes '())
  312. ;; (select-attributes #t)
  313. ;; (tag-index first)
  314. ;; (label-index last))
  315. ;; (let ((ds (make <dataset>)))
  316. ;; (begin
  317. ;; (skip-comments-and-whitespace port)
  318. ;; ;; Read relation tag and discard it
  319. ;; (let ((line (read-line port)))
  320. ;; (unless (string-match "@relation.*" line)
  321. ;; (error (format #f "Expecting '@relation' tag but got ~s" line))))
  322. ;; (skip-comments-and-whitespace port)
  323. ;; ;; Now we expect a block of '@attribute' statements
  324. ;; (let ((start-of-data? (let ((r (make-regexp "^@data.*$" regexp/icase)))
  325. ;; (cut regexp-exec r <>)))
  326. ;; (attrs '()))
  327. ;; (begin
  328. ;; ;; First, read all the attribute definitions
  329. ;; (do ((line (read-line port)
  330. ;; (begin (skip-comments-and-whitespace port)(read-line port))))
  331. ;; ;; Stop once the '@data' tag has been read
  332. ;; ((start-of-data? line))
  333. ;; (let ((a (arff->attribute (open-input-string line))))
  334. ;; (if (or (eq? select-attributes #t)
  335. ;; (member (tag a) select))
  336. ;; (begin
  337. ;; (set! attrs (append! attrs (list a)))))))
  338. ;; ;; Then, pick out those we're interested in
  339. ;; (let* ((all-tags (map tag attrs))
  340. ;; (select (cond
  341. ;; ((eq? select-attributes #t) all-tags)
  342. ;; ((eq? select-attributes #f) '())
  343. ;; (else select-attributes)))
  344. ;; (ignore (cond
  345. ;; ((eq? ignore-attributes #f) '())
  346. ;; (else ignore-attributes)))
  347. ;; (selected-tags
  348. ;; (map (lambda (s)
  349. ;; (cond ((integer? s) (list-ref all-tags s))
  350. ;; (else
  351. ;; (let ((ss (as-symbol s)))
  352. ;; (if (member ss all-tags)
  353. ;; ss
  354. ;; (error
  355. ;; (format #f
  356. ;; "Selected tag ~s not an attribute"
  357. ;; s)))))))
  358. ;; (lset-difference equal? select ignore)))
  359. ;; (selected-indices
  360. ;; (map (let ((tag-map (list->index-map all-tags)))
  361. ;; (cut assq-ref tag-map <>))
  362. ;; selected-tags))
  363. ;; (ti (cond
  364. ;; ((procedure? tag-index) (tag-index selected-indices))
  365. ;; ((symbol? tag-index)
  366. ;; (list-index (cut eq? tag-index <>)
  367. ;; all-tags))
  368. ;; (else tag-index)))
  369. ;; (li (cond
  370. ;; ((procedure? label-index) (label-index selected-indices))
  371. ;; ((symbol? label-index)
  372. ;; (list-index (cut eq? label-index <>)
  373. ;; all-tags))
  374. ;; (else label-index)))
  375. ;; (label-attr (list-ref attrs li))
  376. ;; (tag-label-list (if tag-index (list li ti) (list li)))
  377. ;; (mask (lset-union! = selected-indices tag-label-list))
  378. ;; (entry-attributes (take-indices
  379. ;; attrs
  380. ;; (lset-difference = mask (if tag-index
  381. ;; (list ti) '())))))
  382. ;; (slot-set! (entries ds) 'col-tags
  383. ;; (map tag entry-attributes))
  384. ;; (for-each (lambda (a) (dataset-attribute-set! ds (tag a) a))
  385. ;; entry-attributes)
  386. ;; (set! (dataset-label-attribute ds) label-attr)
  387. ;; ;; Now all the attributes are loaded in ATTRS,
  388. ;; ;; create a dataset and set its values from
  389. ;; ;; the block after '@data'
  390. ;; (set-delimited! ds mask port #:delimiter #\,
  391. ;; #:tag-index ti #:label-index li))))
  392. ;; ds)))
  393. ;;; Shorthand for reading lines from delimited input that may contain
  394. ;;; whitespace lines and comments
  395. (define* (next-line port #:optional (handle-delim 'trim))
  396. (begin
  397. (skip-comments-and-whitespace port)
  398. (read-line port handle-delim)))
  399. ;;; Read a dataset from delimited text.
  400. ;;;
  401. ;;; ATTRIBUTES should be a list of attributes that describe the columns
  402. ;;; of the input data. The length of ATTRIBUTES should be the same as
  403. ;;; the number of columns in the input data. If any element of
  404. ;;; ATTRIBUTES is #f, then that column in the input will be ignored.
  405. ;;;
  406. ;;; If HEADER is #t, then assume there is a header line and read
  407. ;;; attribute indices from that. Indices read in such a way will
  408. ;;; override any names/indices that the attributes in ATTRIBUTES already
  409. ;;; had. If HEADER is #f then we assume that attributes already have
  410. ;;; names set.
  411. (define* (delimited->dataset attributes
  412. label-idx
  413. #:optional
  414. (port (current-input-port))
  415. #:key
  416. (delimiter #\,)
  417. (header #t)
  418. (rec-idx #f))
  419. (let* ((attribute-columns (list-indices attribute? attributes))
  420. (attributes* (filter attribute? attributes))
  421. (attribute-indices
  422. (if header
  423. ;; Read attribute indices from header line
  424. (let* ((line (next-line port))
  425. (pieces (map (cute list-ref
  426. (string-split line delimiter)
  427. <>)
  428. attribute-columns))
  429. (indices (map string->symbol pieces)))
  430. (map (lambda (attr idx)
  431. (set-attribute-name! attr idx))
  432. attributes*
  433. indices)
  434. indices)
  435. (map attribute-name attributes)))
  436. ;; We need an input attribute for the record index column, but
  437. ;; that attribute should not be added to the dataset.
  438. (dataset-attrs (if rec-idx
  439. (remove (lambda (a)
  440. (equal? (attribute-name a) rec-idx))
  441. attributes*)
  442. attributes*))
  443. (attribute-map (map cons attribute-indices attribute-columns))
  444. (dataset (make-dataset dataset-attrs label-idx)))
  445. (set-delimited! dataset attribute-map port
  446. #:delimiter delimiter
  447. #:rec-idx rec-idx)
  448. dataset))
  449. (define* (dataset->delimited dataset
  450. #:optional
  451. (port (current-output-port)))
  452. (let ((attr-indices (dataset-attribute-indices dataset)))
  453. (begin
  454. (format port "rec,~{~a~^,~}\n"
  455. attr-indices)
  456. (for-each
  457. (lambda (rec-idx)
  458. (let ((values (dataset-entry-value-alist dataset rec-idx)))
  459. (format port "~a,~{~a~^,~}\n"
  460. (symbol->string rec-idx)
  461. (map (lambda (ai)
  462. (assoc-ref values ai))
  463. attr-indices))))
  464. (dataset-record-indices dataset)))))
  465. ;;; ATTRIBUTE-MAP must be a list of pairs (attr-idx . attr-col) where
  466. ;;; ATTR-IDX is the index of an attribute in DATASET (if not part of
  467. ;;; DATASET it will be ignored) and ATTR-COL is the 0-based index at
  468. ;;; which values for that attribute reside in the delimited input.
  469. ;;; REC-IDX, if given, should name one of the attributes in
  470. ;;; ATTRIBUTE-MAP that is to be used to assign indices to records. If
  471. ;;; not given each record will be assigned a "random" index.
  472. (define* (set-delimited! dataset
  473. ;; Maps from attribute index to the column in
  474. ;; the delimited input where that attribute's
  475. ;; values are found. Should include the label
  476. ;; attribute.
  477. attribute-map ;((attr-idx . attr-col) ...)
  478. #:optional
  479. (port (current-input-port))
  480. #:key
  481. (delimiter #\,)
  482. (rec-idx #f))
  483. (let record-loop ((count 0))
  484. (let ((line (next-line port)))
  485. (if (not (eof-object? line))
  486. (let* ((str-values (map string-trim (string-split line delimiter)))
  487. ;; Transform those string values into attribute values,
  488. ;; and construct input suitable for
  489. ;; set-dataset-entry-values!
  490. (values (filter-map/key+value
  491. (lambda (attr-idx attr-col)
  492. (and=> (dataset-attribute dataset attr-idx)
  493. (lambda (attr)
  494. (cons attr-idx
  495. (attribute-make-value
  496. attr
  497. (list-ref str-values attr-col))))))
  498. attribute-map))
  499. (rec-name (string->symbol
  500. (if rec-idx
  501. (list-ref str-values
  502. (assoc-ref attribute-map rec-idx))
  503. (string-append "rec" (number->string count))))))
  504. (set-dataset-entry-values! dataset rec-name values)
  505. (record-loop (1+ count)))))))
  506. ;;; For the record with index REC-IDX, set the values in VALUES, which
  507. ;;; must be an alist whose keys are attribute names/indices and whose
  508. ;;; values are the associated entry values for that attribute. An entry
  509. ;;; with index REC-IDX may or may not already exist in DATASET. VALUES
  510. ;;; must not necessarily contain a value for each attribute in DATASET,
  511. ;;; though if there are attribute indices in VALUES that are not part of
  512. ;;; DATASET then they will be ignored.
  513. (define (set-dataset-entry-values! dataset rec-idx values)
  514. (let ((attributes (dataset-attribute-table dataset)))
  515. (for-each/key+value
  516. (lambda (attr-idx value)
  517. (if (hash-ref attributes attr-idx)
  518. (dataset-set! dataset value rec-idx attr-idx)))
  519. values)))
  520. (define (skip-comments-and-whitespace port)
  521. (cond
  522. ((eof-object? (peek-char port)) (noop)) ;Nothing to be done
  523. ((char=? (peek-char port) #\%)
  524. (begin
  525. ;; Discard the comment line and continue
  526. (read-line port)
  527. (skip-comments-and-whitespace port)))
  528. ;; else check for whitespace-only lines
  529. (else
  530. (let ((line (read-line port 'concat)))
  531. (if (string-every char-set:whitespace line)
  532. ;; Discard this line and continue
  533. (skip-comments-and-whitespace port)
  534. ;; Else put the line back in port
  535. (unread-string line port))))))
  536. ;; Writing a dataset to an ARFF file
  537. ;; =================================
  538. ;; Write an attribute tag line for the given
  539. ;; attribute to PORT
  540. (define (attribute->arff attr port)
  541. (display (string-append
  542. (format #f "@attribute ~a " (attribute-name attr))
  543. (let ((domain (attribute-domain attr)))
  544. (cond
  545. ;; Guess the arff attribute type from
  546. ;; the characteristics of the domain
  547. ;; values.
  548. ;;
  549. ;; TODO: So far this only works if
  550. ;; domain is a list.
  551. ((every string=? domain) "string")
  552. ((every symbol? domain)
  553. (format #f "{~{~a~^,~}}" domain))
  554. (else "string")))
  555. "\n")
  556. port))
  557. ;; (define-method (attribute->arff (attr <attribute>) port)
  558. ;; (format port "@attribute ~a string\n" (tag attr)))
  559. ;; (define-method (attribute->arff (attr <string-attr>) port)
  560. ;; (format port "@attribute ~a string\n" (tag attr)))
  561. ;; (define-method (attribute->arff (attr <numeric-attr>) port)
  562. ;; (format port "@attribute ~a numeric\n" (tag attr)))
  563. ;; (define-method (attribute->arff (attr <nominal-attr>) port)
  564. ;; (format port "@attribute ~a {~a}\n" (tag attr)
  565. ;; (string-join (map as-string
  566. ;; (or (domain attr) '()))
  567. ;; ",")))
  568. ;; (define-method (attribute->arff (attr <ordinal-attr>) port)
  569. ;; (format port "@attribute ~a [~a]\n" (tag attr)
  570. ;; (string-join (map as-string
  571. ;; (or (domain attr) '()))
  572. ;; ",")))
  573. ;; (define* (dataset->arff ds #:optional (port (current-output-port)))
  574. ;; (begin
  575. ;; (when (slot-ref ds 'set-attr-domains)
  576. ;; (dataset-set-domains! ds))
  577. ;; (format port "% Dataset output by dataset.scm ~a\n"
  578. ;; (date->string (time-utc->date (current-time))))
  579. ;; (format port "@relation ~a\n\n"
  580. ;; (let ((fn (and port (port-filename port))))
  581. ;; (if fn
  582. ;; ;; Pull out just the basename without extension
  583. ;; (match:substring
  584. ;; (string-match "([^ /]+).arff" fn)
  585. ;; 1)
  586. ;; "foo")))
  587. ;; (format port "@attribute tag string\n")
  588. ;; (map (cut attribute->arff <> port)
  589. ;; (dataset-attributes ds #:with-label #t))
  590. ;; (format port "\n@data\n")
  591. ;; (indexed-matrix-for-each-row
  592. ;; (lambda (i elst)
  593. ;; (format port "~a,~{~a~^,~}\n"
  594. ;; (as-string i) (map as-string elst)))
  595. ;; (entries ds))))
  596. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  597. ;;; Tests
  598. (use-modules (srfi srfi-64)
  599. (srfi srfi-1)
  600. (ice-9 format)
  601. (srfi srfi-43) ;vector library
  602. (data-mining test-util))
  603. (test-begin "dataset-test")
  604. ;;; Check helper routine attribute-list->attributes
  605. (define attr-hash (attribute-list->attributes
  606. `(integer
  607. nominal
  608. ,(make-string-attribute))))
  609. (hash-for-each
  610. (lambda (key value)
  611. (test-assert (attribute? value)))
  612. attr-hash)
  613. ;;; Check creating a dataset with symbols for attributes
  614. (define d0 (make-dataset `(integer string ,(make-nominal-attribute #:name 'class))
  615. 'class))
  616. (test-assert (dataset? d0))
  617. (test-eq "width with no entries" 2 (dataset-width d0))
  618. (test-eq "length with no entries" 0 (dataset-length d0))
  619. (test-eq "label-idx set" 'class (dataset-label-idx d0))
  620. (define d1 (make-dataset '(nominal) 'class))
  621. (test-assert (dataset-empty? d1))
  622. (test-eq "width of empty dataset" 0 (dataset-width d1))
  623. (define d2 (make-dataset `(,(make-integer-attribute #:name "i")
  624. ,(make-string-attribute #:name "s")
  625. ,(make-real-attribute #:name "r")
  626. ,(make-nominal-attribute #:name "n"))
  627. "n"))
  628. (define d2/s0 (make-dataset/shared d2))
  629. (test-assert "shared has all attributes"
  630. ((list-permutation? '("i" "s" "r" "n"))
  631. (dataset-attribute-indices d2/s0)))
  632. (test-eq "width of shared same" 3 (dataset-width d2/s0))
  633. (define d2/s1 (make-dataset/shared d2 #:columns '("i" "r" "n")))
  634. (test-assert "shared has subset of attributes"
  635. ((list-permutation? '("i" "r" "n"))
  636. (dataset-attribute-indices d2/s1)))
  637. (test-eq "width of narrower shared" 2 (dataset-width d2/s1))
  638. ;;; Add some entries and make sure it doesn't affect the original
  639. (dataset-set! d2/s1 2 "r0" "i")
  640. (dataset-set! d2/s1 2.71 "r0" "r")
  641. (dataset-set! d2/s1 'foo "r0" "n")
  642. (test-eq "ref" 2 (dataset-ref d2/s1 "r0" "i"))
  643. (test-eq "ref" 2.71 (dataset-ref d2/s1 "r0" "r"))
  644. (test-eq "ref" 'foo (dataset-ref d2/s1 "r0" "n"))
  645. (test-eq "length of single record dataset" 1 (dataset-length d2/s1))
  646. (test-eq "length of original dataset" 0 (dataset-length d2))
  647. (test-eq "length of record indices" 1 (length (dataset-record-indices d2/s1)))
  648. (test-assert "row indices"
  649. ((list-permutation? '("r0")) (dataset-record-indices d2/s1)))
  650. (test-assert "label values"
  651. ((list-permutation? '(foo)) (dataset-label-attribute-values d2/s1)))
  652. (test-assert "record values"
  653. ((list-permutation? '(("i" . 2) ("r" . 2.71) ("n" . foo)))
  654. (dataset-entry-value-alist d2/s1 "r0")))
  655. ;;; Create a new attribute and derive some values for it
  656. (dataset-derive-attribute!
  657. d2/s1
  658. (make-numeric-attribute #:name "s*")
  659. '(/ "r" "i"))
  660. (test-eq "length with derived" 1 (dataset-length d2/s1))
  661. (test-eq "width with derived" 3 (dataset-width d2/s1))
  662. (test-eq "width of original after shared derive"
  663. 3 (dataset-width d2))
  664. (define derived-value (assoc-ref (dataset-entry-value-alist d2/s1 "r0") "s*"))
  665. (test-eqv "derived value" 1.355 derived-value)
  666. (test-assert "original does not know about derived attribute"
  667. ((list-permutation? '("i" "r" "s" "n"))
  668. (dataset-attribute-indices d2)))
  669. ;;; Check set-dataset-entry-values!
  670. (set-dataset-entry-values! d2 "r1"
  671. '(("i" . 3) ("r" . 3.14) ("s" . "bust") ("n" . bar)))
  672. (test-eq "shared datasets unchanged"
  673. 0 (dataset-length d2/s0))
  674. (test-eq "shared datasets unchanged"
  675. 1 (dataset-length d2/s1))
  676. (test-eq "set values for new record changes length"
  677. 1 (dataset-length d2))
  678. (test-assert "new record indices"
  679. ((list-permutation? '("r1"))
  680. (dataset-record-indices d2)))
  681. ;;; Update the entry for the "i" attribute
  682. (set-dataset-entry-values! d2 "r1" '(("i" . 4)))
  683. (test-eq "updating values does not change dataset length"
  684. 1 (dataset-length d2))
  685. (test-assert "updated record values"
  686. ((list-permutation?
  687. '(("i" . 4) ("r" . 3.14) ("s" . "bust") ("n" . bar)))
  688. (dataset-entry-value-alist d2 "r1")))
  689. ;;; Values for "unknown" attribute indices should be ignored
  690. (set-dataset-entry-values! d2 "r1" '(("i" . 5) ("blaz" . flib)))
  691. (test-eq "no change for unknown attribute index"
  692. 1 (dataset-length d2))
  693. (test-eq "no change for unknown attribute index"
  694. 3 (dataset-width d2))
  695. (test-assert "updated record values"
  696. ((list-permutation?
  697. '(("i" . 5) ("r" . 3.14) ("s" . "bust") ("n" . bar)))
  698. (dataset-entry-value-alist d2 "r1")))
  699. ;;; Check set-delimited!
  700. (let* ((data "
  701. a,classy,1,4,blurp,qu,0.2
  702. b,classx,2,6,blub,qa,0.3")
  703. (foo (make-ordinal-attribute
  704. #:name 'foo
  705. #:read-value identity
  706. #:dissector-gen (cut ordinal-dissector
  707. <> <>
  708. string<? string=?)))
  709. (bar (make-string-attribute #:name 'bar))
  710. (baz (make-integer-attribute #:name 'baz))
  711. (bin (make-integer-attribute #:name 'bin))
  712. (bit (make-nominal-attribute
  713. #:name 'bit
  714. #:read-value string->symbol))
  715. (bug (make-string-attribute #:name 'bug))
  716. (baf (make-numeric-attribute #:name 'baf))
  717. (attribute-map `((foo . 0) (bar . 1) (baz . 2)
  718. (bin . 3) (bit . 4) (bug . 5) (baf . 6)))
  719. (datasets
  720. (vector (make-dataset (list foo bar baz bin bit bug baf) 'bar)
  721. (make-dataset (list foo bar bin) 'bar)
  722. (make-dataset (list foo bar bit baf) 'bit)
  723. (make-dataset (list bar baz bin bug) 'bug))))
  724. (vector-for-each
  725. (lambda (i d args)
  726. (apply set-delimited! d attribute-map
  727. (open-input-string data) args))
  728. datasets
  729. '#(() () () (#:rec-idx foo)))
  730. ;; Check that the rec-idx-col option worked correctly
  731. (test-assert "rec-idx-col record indices"
  732. ((list-permutation? '(a b))
  733. (dataset-record-indices (vector-ref datasets 3))))
  734. (vector-for-each
  735. (lambda (i d)
  736. (test-eq (format #f "length of dataset ~a" i)
  737. 2 (dataset-length d)))
  738. datasets)
  739. (vector-for-each
  740. (lambda (i d w)
  741. (test-eq (format #f "delimited width for dataset ~a" i)
  742. w (dataset-width d)))
  743. datasets
  744. #(6 2 3 3))
  745. (vector-for-each
  746. (lambda (i d v)
  747. (for-each
  748. (lambda (c)
  749. (test-assert (format #f "column values of ~a for dataset ~a"
  750. (car c) i)
  751. ((list-permutation? (cdr c))
  752. (dataset-attribute-values d (car c)))))
  753. v))
  754. datasets
  755. '#(((foo . ("a" "b")) (bin . (4 6)) (bit . (blurp blub)))
  756. ((bar . ("classy" "classx")) (bin . (4 6)))
  757. ((baf . (0.2 0.3))))))
  758. ;;; Check delimited->dataset
  759. (let* ((data "
  760. foo,bar,baz,bin,bit,bug,baf
  761. a,classy,1,4,blurp,qu,0.2
  762. b,classx,2,6,blub,qa,0.3")
  763. ;; Attribute names should be derived from the header
  764. (attributes `(,(make-ordinal-attribute
  765. #:read-value identity
  766. #:dissector-gen (cut ordinal-dissector
  767. <> <>
  768. string<? string=?))
  769. ,(make-string-attribute)
  770. ,(make-integer-attribute)
  771. ,(make-integer-attribute)
  772. ,(make-nominal-attribute
  773. #:read-value string->symbol)
  774. ,(make-string-attribute)
  775. ,(make-numeric-attribute)))
  776. (datasets `#(,(delimited->dataset attributes
  777. 'foo ;label-idx
  778. (open-input-string data))
  779. ,(delimited->dataset attributes
  780. 'bar ;label-idx
  781. (open-input-string data))
  782. ,(delimited->dataset attributes
  783. 'baf
  784. (open-input-string data)
  785. #:rec-idx 'foo)
  786. ,(delimited->dataset (list-mask attributes '(0 1 3 5))
  787. 'bar
  788. (open-input-string data)
  789. #:rec-idx 'foo)
  790. ,(delimited->dataset (list-mask attributes '(0 1 3 5 6))
  791. 'bar
  792. (open-input-string data)
  793. #:rec-idx 'foo))))
  794. (vector-for-each
  795. (lambda (i d w)
  796. (test-eq w (dataset-width d)))
  797. datasets
  798. #(6 6 5 2 3))
  799. (vector-for-each
  800. (lambda (i d)
  801. (test-eq 2 (dataset-length d)))
  802. datasets)
  803. (vector-for-each
  804. (lambda (i d c)
  805. (test-assert (format #f "dataset ~a column names" i)
  806. ((list-permutation? c)
  807. (dataset-attribute-indices d)))
  808. (test-assert (format #f "dataset ~a column attributes" i)
  809. ((list-permutation? c)
  810. (map attribute-name
  811. (dataset-attributes d)))))
  812. datasets
  813. '#((foo bar baz bin bit bug baf)
  814. (foo bar baz bin bit bug baf)
  815. (foo bar baz bin bit bug baf)
  816. (bug bin bar)
  817. (baf bug bin bar)))
  818. (vector-for-each
  819. (lambda (i d v)
  820. (for-each
  821. (lambda (c)
  822. (test-assert (format #f "column values of ~a for dataset ~a"
  823. (car c) i)
  824. ((list-permutation? (cdr c))
  825. (dataset-attribute-values d (car c)))))
  826. v))
  827. datasets
  828. '#(((foo . ("a" "b")) (bin . (4 6)) (bit . (blurp blub)))
  829. ((bar . ("classy" "classx")) (bin . (4 6)))
  830. ((baf . (0.2 0.3)))
  831. ((bug . ("qu" "qa")) (bin . (4 6)) (bar . ("classy" "classx")))
  832. ((baf . (0.2 0.3)) (bug . ("qu" "qa")))))
  833. (vector-for-each
  834. (lambda (i d l)
  835. (test-assert (format #f "label values for dataset ~a" i)
  836. ((list-permutation? l)
  837. (dataset-label-attribute-values d))))
  838. datasets
  839. '#(("a" "b")
  840. ("classy" "classx")
  841. (0.2 0.3)
  842. ("classy" "classx")
  843. ("classy" "classx")))
  844. ;; Check dataset-filter
  845. (vector-for-each
  846. (lambda (i d filters+assertions)
  847. (for-each
  848. (lambda (f) ;f is a pair ((row-pred . col-pred) (assertions...))
  849. (let* ((row-pred (caar f))
  850. (col-pred (cdar f))
  851. (assertions (cdr f))
  852. (ds/f (dataset-filter d
  853. #:record-pred row-pred
  854. #:attribute-pred col-pred)))
  855. (for-each
  856. (lambda (assertion)
  857. (test-assert (assertion ds/f)))
  858. assertions)))
  859. filters+assertions))
  860. datasets
  861. `#((((#t #|row-pred|# . #t #|col-pred|#)
  862. ,(lambda (d) ((list-permutation? '(foo bar baz bin bit bug baf))
  863. (dataset-attribute-indices d)))
  864. ,(lambda (d) ((list-permutation? '("a" "b"))
  865. (dataset-label-attribute-values d))))
  866. ((,(lambda (ri vals) (string=? (assoc-ref vals 'bar) "classy")) . #t)
  867. ,(lambda (d) ((list-permutation? '("a"))
  868. (dataset-label-attribute-values d)))
  869. ,(lambda (d) (= 1 (dataset-length d))))
  870. ((#t . ,(lambda (ai vals) (memq ai '(foo bar bin bit))))
  871. ,(lambda (d) ((list-permutation? '(foo bar bin bit))
  872. (dataset-attribute-indices d))))
  873. ((,(lambda (ri vals) (< (assoc-ref vals 'baf) 0.25)) . #t)
  874. ,(lambda (d) (= 1 (dataset-length d))))))))
  875. ;;; Check dataset-partition-records
  876. (let* ((data "
  877. rec,data,class
  878. 0,0.1,a
  879. 1,0.12,a
  880. 2,0.09,a
  881. 3,0.21,b
  882. 4,0.18,b
  883. 5,0.11,a
  884. 6,0.121,a
  885. 7,0.23,b
  886. 8,0.04,c")
  887. (attributes `(,(make-nominal-attribute)
  888. ,(make-numeric-attribute)))
  889. (dataset (delimited->dataset attributes
  890. 'class
  891. (open-input-string data)
  892. #:rec-idx 'rec))
  893. (parts `(,(dataset-partition-records
  894. `(,(lambda (ri vals)
  895. (< (assoc-ref vals 'data) 0.06))
  896. ,(lambda (ri vals)
  897. (< (assoc-ref vals 'data) 0.15)))
  898. dataset)
  899. ,(dataset-partition-records
  900. `(,(lambda (ri vals)
  901. (< (assoc-ref vals 'data) 0.12)))
  902. dataset))))
  903. (for-each
  904. (lambda (part l)
  905. (test-eq "partition parts" l (length part)))
  906. parts
  907. '(3 2))
  908. (for-each
  909. (lambda (part data-sizes)
  910. (for-each
  911. (lambda (ds size)
  912. (test-eq "partition size"
  913. size (dataset-length ds)))
  914. part data-sizes))
  915. parts
  916. '((1 5 3) (4 5))))
  917. (test-end "dataset-test")
  918. ;; (define (test-dataset->arff)
  919. ;; (begin
  920. ;; (let ((d (make <dataset>))
  921. ;; (d1 (make <dataset> #:attributes (list 'real 'integer 'ordinal 'nominal)))
  922. ;; (counter 0))
  923. ;; (begin
  924. ;; (for-each
  925. ;; (lambda (i)
  926. ;; (for-each
  927. ;; (lambda (j)
  928. ;; (begin
  929. ;; (dataset-set! d1 counter i j)
  930. ;; (set! counter (1+ counter))))
  931. ;; (col-tags (entries d1))))
  932. ;; (list 'e0 'e1 'e2 'e3 'e4)))
  933. ;; (dataset->arff d1))
  934. ;; (newline)))
  935. ;; Try reading in a dataset from arff. Output it to verify the contents.
  936. ;; (define (test-arff->dataset)
  937. ;; (begin
  938. ;; (let ((d (arff->dataset (open-input-string "
  939. ;; @relation bar
  940. ;; % This is a test dataset
  941. ;; @attribute tag string
  942. ;; @attribute funk string
  943. ;; @attribute foo numeric
  944. ;; @attribute bin numeric
  945. ;; @attribute bork [quick,quack]
  946. ;; @attribute frob {blurb,blip,blup}
  947. ;; @attribute label {a,b,c}
  948. ;; @data
  949. ;; e4,\"friz\",1.0,2.0,quick,blip,a
  950. ;; e3,\"fruz\",0.7,2.5,quick,blip,a
  951. ;; e2,\"frum\",1.2,2.0,quack,blurp,c
  952. ;; e1,\"fraz\",1.1,2.3,quack,blup,c
  953. ;; e0,\"frim\",1.6,2.9,quack,blup,b"))))
  954. ;; (begin
  955. ;; (dataset->arff d)
  956. ;; (newline)))
  957. ;; (let ((d (arff->dataset (open-input-string "
  958. ;; @relation bar
  959. ;; @attribute name string
  960. ;; @attribute class {a,b,c}
  961. ;; @attribute funk string
  962. ;; @attribute ignored [blarney,quack,silly]
  963. ;; @attribute val numeric
  964. ;; @data
  965. ;; e4,a,foo,friz,1.0
  966. ;; e3,b,bar,fratz,3.2
  967. ;; e2,b,biz,frumble,0.75
  968. ;; e1,c,bur,fram,0.01
  969. ;; e0,a,fit,frobble,10")
  970. ;; #:ignore-attributes (list 'ignored)
  971. ;; #:tag-index 'name
  972. ;; #:label-index 'class)))
  973. ;; (begin
  974. ;; (dataset->arff d)
  975. ;; (newline)))))
  976. ;; (define (test-set-delimited!)
  977. ;; (begin
  978. ;; (let ((d (make <dataset> #:attributes (list 'real 'integer))))
  979. ;; (begin
  980. ;; (set-delimited! d (iota 4) (open-input-string "
  981. ;; e1,1.0,2,a
  982. ;; e3,2.0,3,b
  983. ;; e8,3.0,4,a
  984. ;; e4,4.0,1,r"))
  985. ;; (dataset-set-domains! d)
  986. ;; (dataset->arff d)
  987. ;; (newline)))
  988. ;; ;; Test another invocation, with a non-trivial column-mask, alternate
  989. ;; ;; delimiter, and out-of-order tag and label indices.
  990. ;; (let ((d (make <dataset> #:attributes (list 'real 'integer))))
  991. ;; (begin
  992. ;; (set-delimited! d (list 0 1 2 4) (open-input-string "
  993. ;; a:1.0:2:fuzz:e1
  994. ;; b:2.0:3:fizz:e3
  995. ;; a:3.0:4:fizz:e8
  996. ;; r:4.0:1:fuzz:e4")
  997. ;; #:tag-index 4 #:label-index 0
  998. ;; #:delimiter #\:)
  999. ;; (dataset-set-domains! d)
  1000. ;; (dataset->arff d)
  1001. ;; (newline)))
  1002. ;; ;; This invocation will create record tags
  1003. ;; (let ((d (make <dataset> #:attributes (list 'nominal 'ordinal 'integer))))
  1004. ;; (begin
  1005. ;; (set-delimited! d (list 5 1 2 4) (open-input-string "
  1006. ;; a,b,1,foo,10,classy
  1007. ;; a,d,3,bar,20,classx
  1008. ;; t,a,2,biz,5,classe")
  1009. ;; #:tag-index #f #:label-index 5)
  1010. ;; (dataset-set-domains! d)
  1011. ;; (dataset->arff d)
  1012. ;; (newline)))))
  1013. ;; (define (test-dataset-filter)
  1014. ;; (let ((d (make-test-dataset)))
  1015. ;; (dataset->arff (dataset-filter
  1016. ;; d
  1017. ;; #:record-pred (lambda (rt elts)
  1018. ;; (eq? (assoc-ref elts 'frob) 'blup))
  1019. ;; #:attribute-pred #t))))
  1020. ;; (define (make-test-dataset)
  1021. ;; (arff->dataset (open-input-string "
  1022. ;; @relation bar
  1023. ;; % This is a test dataset
  1024. ;; @attribute tag string
  1025. ;; @attribute funk string
  1026. ;; @attribute foo numeric
  1027. ;; @attribute bin numeric
  1028. ;; @attribute bork [quick,quack]
  1029. ;; @attribute frob {blurp,blip,blup}
  1030. ;; @attribute label {a,b,c}
  1031. ;; @data
  1032. ;; e4,\"friz\",1.0,2.0,quick,blip,a
  1033. ;; e3,\"fruz\",0.7,2.5,quick,blip,a
  1034. ;; e2,\"frum\",1.2,2.0,quack,blurp,c
  1035. ;; e1,\"fraz\",1.1,2.3,quack,blup,c
  1036. ;; e0,\"frim\",1.6,2.9,quack,blup,b")))
  1037. ;; Local Variables:
  1038. ;; fill-column: 72
  1039. ;; End: