text-codec.scm 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. ; Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees. See file COPYING.
  2. ; Encoders/decoders from text to bytes and vice versa, for use by the
  3. ; the ports subsystem.
  4. ; Note that encoders and decoders must operate on buffers only
  5. ; provisionally.
  6. (define-record-type text-codec :text-codec
  7. (really-make-text-codec names
  8. builtin-code
  9. encode-char-proc
  10. decode-char-proc)
  11. text-codec?
  12. (names text-codec-names)
  13. ;; either #f or an integer from enum TEXT-ENCODING-OPTION
  14. ;; for encodings built into the VM
  15. (builtin-code text-codec-builtin-code)
  16. ;; (char buffer start count) -> (ok? #f or #bytes consumed or #bytes needed)
  17. (encode-char-proc text-codec-encode-char-proc)
  18. ;; (buffer start count) -> (char #bytes consumed)
  19. ;; or (#f #total bytes needed at least)
  20. ;; or (#f #f) (failure)
  21. (decode-char-proc text-codec-decode-char-proc))
  22. (define (make-builtin-text-codec names code)
  23. (really-make-text-codec names
  24. code
  25. (lambda (char buffer start count)
  26. (encode-char code char buffer start count))
  27. (lambda (buffer start count)
  28. (decode-char code buffer start count))))
  29. (define (make-text-codec names encode-char-proc decode-char-proc)
  30. (really-make-text-codec names #f encode-char-proc decode-char-proc))
  31. (define-record-discloser :text-codec
  32. (lambda (r)
  33. (cons 'text-codec (text-codec-names r))))
  34. (define *builtin-text-codecs*
  35. (make-vector (+ (max (enum text-encoding-option us-ascii)
  36. (enum text-encoding-option utf-8)
  37. (enum text-encoding-option utf-16le)
  38. (enum text-encoding-option utf-16be)
  39. (enum text-encoding-option utf-32le)
  40. (enum text-encoding-option utf-32be))
  41. 1)))
  42. (define (spec->text-codec spec)
  43. (if (text-codec? spec)
  44. spec
  45. (vector-ref *builtin-text-codecs* spec)))
  46. (define (text-codec->spec codec)
  47. (or (text-codec-builtin-code codec)
  48. codec))
  49. (define *text-codecs* '())
  50. (define (register-text-codec! codec)
  51. (set! *text-codecs* (cons codec *text-codecs*)))
  52. (define (find-text-codec name)
  53. (let loop ((codecs *text-codecs*))
  54. (cond
  55. ((null? codecs) #f)
  56. ((member name (text-codec-names (car codecs)))
  57. (car codecs))
  58. (else (loop (cdr codecs))))))
  59. (define-syntax define-text-codec
  60. (syntax-rules ()
  61. ((define-text-codec ?id (?name ...) ?encode-proc ?decode-proc)
  62. (begin
  63. (define ?id (make-text-codec '(?name ...) ?encode-proc ?decode-proc))
  64. (register-text-codec! ?id)))
  65. ((define-text-codec ?id ?name ?encode-proc ?decode-proc)
  66. (define-text-codec ?id (?name) ?encode-proc ?decode-proc))))
  67. (define-syntax define-builtin-text-codec
  68. (syntax-rules ()
  69. ((define-builtin-text-codec ?id (?name ...) ?enumerand)
  70. (begin
  71. (define ?id (make-builtin-text-codec '(?name ...) (enum text-encoding-option ?enumerand)))
  72. (register-text-codec! ?id)
  73. (vector-set! *builtin-text-codecs* (enum text-encoding-option ?enumerand)
  74. ?id)))
  75. ((define-builtin-text-codec ?id ?name ?enumerand)
  76. (define-builtin-text-codec ?id (?name) ?enumerand))))
  77. (define-text-codec null-text-codec "null"
  78. (lambda (char buffer start count)
  79. #f)
  80. (lambda (buffer start count)
  81. (values #f #f)))
  82. (define-builtin-text-codec us-ascii-codec
  83. ("US-ASCII"
  84. "ANSI_X3.4-1968" ; apparently, the POSIX locale on some Linux systems returns this
  85. )
  86. us-ascii)
  87. (define-builtin-text-codec latin-1-codec "ISO8859-1" latin-1)
  88. (define-builtin-text-codec utf-8-codec "UTF-8" utf-8)
  89. (define-builtin-text-codec utf-16le-codec "UTF-16LE" utf-16le)
  90. (define-builtin-text-codec utf-16be-codec "UTF-16BE" utf-16be)
  91. (define-builtin-text-codec utf-32le-codec "UTF-32LE" utf-32le)
  92. (define-builtin-text-codec utf-32be-codec "UTF-32BE" utf-32be)