123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- ;; This module provides functionality for reading in CSV files. The assumptions
- ;; is, that the CSV file is really a comma-separated values file.
- ;; Another detail is, that leading and trailing whitespace is stripped from the
- ;; values in the CSV.
- ;; What do we need?
- #|
- (define-public DEFAULT-DATA-READER-MAKER
- (make-csv-reader-maker
- '((seperator-chars #\,)
- (strip-leading-whitespace . true)
- (strip-trailing-whitespace . true))))
- |#
- (define-module (utils csv))
- (use-modules
- ;; dsv - delimiter separated values library for reading CSV files
- (dsv)
- (utils list)
- ;; R6RS for assertions
- ((rnrs) #:version (6) #:prefix rnrs:))
- ;; Maybe these should be put in the data.scm file as abstractions over data
- ;; structures used to represent data sets, because they deal with data sets
- ;; (export).
- ;; =============================
- ;; LIBRARY INTERFACE ABSTRACTION
- ;; =============================
- (define-public read-dsv-from-file
- (lambda* (file-path
- #:optional (delimiter #\,)
- #:key
- (format 'unix)
- (comment-prefix 'default)
- (encoding "UTF-8"))
- (call-with-input-file file-path
- (lambda (port)
- (set-port-encoding! port encoding)
- (dsv->scm port
- delimiter
- #:format format
- #:comment-prefix comment-prefix)))))
- (define-public read-dsv-from-string
- (lambda* (input
- #:optional (delimiter #\,)
- #:key
- (format 'unix)
- (comment-prefix 'default))
- (dsv-string->scm input
- delimiter
- #:format format
- #:comment-prefix comment-prefix)))
- (define-public write-scm-dsv-to-file
- (lambda* (scm-output
- file-path
- #:optional (delimiter #\,)
- #:key
- (format 'unix)
- (comment-prefix 'default)
- (encoding "UTF-8"))
- ;; For some unknown reason scm->dsv expects everything inside the
- ;; list to be strings already. This means we need to convert to
- ;; strings before giving the data to scm->dsv.
- ;; WARNING: There is no representation for symbols in DSV files, so
- ;; symbols and strings are not distinguished within such a
- ;; file. That means the conversion is lossy for some types of data.
- (call-with-output-file file-path
- (lambda (port)
- (rnrs:assert (list? scm-output))
- (set-port-encoding! port encoding)
- (scm->dsv (stringify* scm-output)
- port
- delimiter
- #:format format)))))
- (define-public write-scm-dsv-to-string
- (lambda* (scm-output
- #:optional (delimiter #\,)
- #:key
- (format 'unix)
- (comment-prefix 'default))
- (scm->dsv-string (stringify* scm-output)
- delimiter
- #:format format)))
- (define-public all-rows
- (lambda* (a-file-path
- #:key
- (converters '()))
- "Read in a DSV and return all rows of it as a list of vectors. `converters`
- is a list of lists containing column converters, which are applied to each
- column."
- (define convert-row
- (lambda (row row-converters)
- (let iter-cells ([remaining-cells row]
- [remaining-converters row-converters])
- (cond
- [(null? remaining-converters) remaining-cells]
- [(null? remaining-cells) '()]
- [else
- ;; convert the current cell and recur with the remaining cells
- (cons (convert-cell (car remaining-cells) (car remaining-converters))
- (iter-cells (cdr remaining-cells)
- (cdr remaining-converters)))]))))
- (define convert-cell
- (lambda (cell cell-converters)
- (apply-multiple cell-converters cell)))
- ;; list of vectors
- (let ([dataset (read-dsv-from-file a-file-path)])
- (rnrs:assert (>= (length (car dataset)) 1))
- (rnrs:assert (<= (length converters) (length (car dataset))))
- (map (lambda (row) (list->vector (convert-row row converters)))
- dataset))))
|