csv.scm 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. ;; This module provides functionality for reading in CSV files. The assumptions
  2. ;; is, that the CSV file is really a comma-separated values file.
  3. ;; Another detail is, that leading and trailing whitespace is stripped from the
  4. ;; values in the CSV.
  5. ;; What do we need?
  6. #|
  7. (define-public DEFAULT-DATA-READER-MAKER
  8. (make-csv-reader-maker
  9. '((seperator-chars #\,)
  10. (strip-leading-whitespace . true)
  11. (strip-trailing-whitespace . true))))
  12. |#
  13. (define-module (utils csv))
  14. (use-modules
  15. ;; dsv - delimiter separated values library for reading CSV files
  16. (dsv)
  17. (utils list)
  18. ;; R6RS for assertions
  19. ((rnrs) #:version (6) #:prefix rnrs:))
  20. ;; Maybe these should be put in the data.scm file as abstractions over data
  21. ;; structures used to represent data sets, because they deal with data sets
  22. ;; (export).
  23. ;; =============================
  24. ;; LIBRARY INTERFACE ABSTRACTION
  25. ;; =============================
  26. (define-public read-dsv-from-file
  27. (lambda* (file-path
  28. #:optional (delimiter #\,)
  29. #:key
  30. (format 'unix)
  31. (comment-prefix 'default)
  32. (encoding "UTF-8"))
  33. (call-with-input-file file-path
  34. (lambda (port)
  35. (set-port-encoding! port encoding)
  36. (dsv->scm port
  37. delimiter
  38. #:format format
  39. #:comment-prefix comment-prefix)))))
  40. (define-public read-dsv-from-string
  41. (lambda* (input
  42. #:optional (delimiter #\,)
  43. #:key
  44. (format 'unix)
  45. (comment-prefix 'default))
  46. (dsv-string->scm input
  47. delimiter
  48. #:format format
  49. #:comment-prefix comment-prefix)))
  50. (define-public write-scm-dsv-to-file
  51. (lambda* (scm-output
  52. file-path
  53. #:optional (delimiter #\,)
  54. #:key
  55. (format 'unix)
  56. (comment-prefix 'default)
  57. (encoding "UTF-8"))
  58. ;; For some unknown reason scm->dsv expects everything inside the
  59. ;; list to be strings already. This means we need to convert to
  60. ;; strings before giving the data to scm->dsv.
  61. ;; WARNING: There is no representation for symbols in DSV files, so
  62. ;; symbols and strings are not distinguished within such a
  63. ;; file. That means the conversion is lossy for some types of data.
  64. (call-with-output-file file-path
  65. (lambda (port)
  66. (rnrs:assert (list? scm-output))
  67. (set-port-encoding! port encoding)
  68. (scm->dsv (stringify* scm-output)
  69. port
  70. delimiter
  71. #:format format)))))
  72. (define-public write-scm-dsv-to-string
  73. (lambda* (scm-output
  74. #:optional (delimiter #\,)
  75. #:key
  76. (format 'unix)
  77. (comment-prefix 'default))
  78. (scm->dsv-string (stringify* scm-output)
  79. delimiter
  80. #:format format)))
  81. (define-public all-rows
  82. (lambda* (a-file-path
  83. #:key
  84. (converters '()))
  85. "Read in a DSV and return all rows of it as a list of vectors. `converters`
  86. is a list of lists containing column converters, which are applied to each
  87. column."
  88. (define convert-row
  89. (lambda (row row-converters)
  90. (let iter-cells ([remaining-cells row]
  91. [remaining-converters row-converters])
  92. (cond
  93. [(null? remaining-converters) remaining-cells]
  94. [(null? remaining-cells) '()]
  95. [else
  96. ;; convert the current cell and recur with the remaining cells
  97. (cons (convert-cell (car remaining-cells) (car remaining-converters))
  98. (iter-cells (cdr remaining-cells)
  99. (cdr remaining-converters)))]))))
  100. (define convert-cell
  101. (lambda (cell cell-converters)
  102. (apply-multiple cell-converters cell)))
  103. ;; list of vectors
  104. (let ([dataset (read-dsv-from-file a-file-path)])
  105. (rnrs:assert (>= (length (car dataset)) 1))
  106. (rnrs:assert (<= (length converters) (length (car dataset))))
  107. (map (lambda (row) (list->vector (convert-row row converters)))
  108. dataset))))