123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- (use-modules
- (utils csv)
- (decision-tree)
- (dataset)
- (tree)
- (utils string)
- (utils display)
- (prediction)
- ;; ice-9 format for format procedure
- (ice-9 format))
- (define FILE-PATH
- "data_banknote_authentication.csv")
- ;; For each column we define a column converter, which converts the string,
- ;; which is read in from the CSV, to an appropriate data type for the data set
- ;; in the program.
- (define COLUMN-CONVERTERS
- (list (list string->number)
- (list string->number)
- (list string->number)
- (list string->number)
- (list
- #;(lambda (val)
- (display (simple-format #f "converting: ~a\n" val))
- (display (simple-format #f "converted: ~a\n" (string->number val)))
- (string->number val))
- (lambda (val) (string->number (string-trim-both val))))))
- ;; Using the defined column converters, we define the data set.
- (define banking-dataset
- (all-rows "data_banknote_authentication.csv" #:converters COLUMN-CONVERTERS))
- ;; This is an artefact from development. It serves as an example to test things
- ;; with interactively or in a shorter time than with a whole larger data set.
- (define dev-dataset
- (list #(2.771244718 1.784783929 0)
- #(1.728571309 1.169761413 0)
- #(3.678319846 2.81281357 0)
- #(3.961043357 2.61995032 0)
- #(2.999208922 2.209014212 0)
- #(7.497545867 3.162953546 1)
- #(9.00220326 3.339047188 1)
- #(7.444542326 0.476683375 1)
- #(10.12493903 3.234550982 1)
- #(6.642287351 3.319983761 1)))
- ;; displays a string representation of a learned decision tree
- (define-public print-tree
- (lambda (tree label-column-index)
- (define tree->string
- (lambda (tree depth)
- (cond
- [(leaf-node? tree)
- (string-append (n-times-string " " depth)
- "["
- (number->string
- (dataset-majority-prediction (node-data tree)
- label-column-index))
- "]\n")]
- [else
- (string-append
- (string-append (n-times-string " " depth)
- "[feature:"
- (number->string (node-split-feature-index tree))
- " < "
- (number->string (node-split-value tree))
- "]\n")
- (tree->string (node-left tree) (+ depth 1))
- (tree->string (node-right tree) (+ depth 1)))])))
- (displayln (tree->string tree 0))))
- (display
- (simple-format
- #f "~a\n"
- (map (lambda (num) (format #f "~,3f\n" num))
- (evaluate-algorithm #:dataset (shuffle-dataset banking-dataset #:seed 12345)
- #:n-folds 10
- #:feature-column-indices '(0 1 2 3)
- #:label-column-index 4
- #:max-depth 6
- #:min-data-points 12
- #:min-data-points-ratio 0.02
- #:min-impurity-split (expt 10 -7)
- #:stop-at-no-impurity-improvement #t
- #:random-seed 12345))))
- ;; (define tree
- ;; (fit #:train-data (shuffle-dataset banking-dataset #:seed 12345)
- ;; #:feature-column-indices (list 0 1 2 3)
- ;; #:label-column-index 4
- ;; #:max-depth 5
- ;; #:min-data-points 12
- ;; #:min-data-points-ratio 0.02
- ;; #:min-impurity-split (expt 10 -7)
- ;; #:stop-at-no-impurity-improvement #t))
- ;; (print-tree tree 4)
|