os-string.scm 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. ; Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees. See file COPYING.
  2. ; You may think that file names / environment variables / user names
  3. ; etc. are just text, but on most platforms, that assumption is wrong:
  4. ; They are usually NUL-terminated byte strings in some format. The
  5. ; bytes are invariant, but the corresponding text may depend on the
  6. ; locale. Also, byte sequences without a textual representation are
  7. ; possible.
  8. ; We assume that OS strings are encoded in some conservative extension
  9. ; of NUL-terminated ASCII. On Unix, this assumption pretty much has
  10. ; to hold true because of the various constraints of locale handling
  11. ; there. The Windows API uses an extension of UTF-16 that includes
  12. ; unpaired surrogates. For this, we use a synthetic extension of
  13. ; UTF-8 called UTF-8of16 that also deals with unpaired surrogates.
  14. ; #### lossiness
  15. (define-record-type os-string :os-string
  16. (make-os-string text-codec string byte-vector)
  17. os-string?
  18. (text-codec os-string-text-codec)
  19. ; may be #f, will get cached value
  20. (string os-string-string set-os-string-string!)
  21. ; may be #f, will get cached value
  22. (byte-vector os-string-byte-vector set-os-string-byte-vector!))
  23. (define-record-discloser :os-string
  24. (lambda (oss)
  25. (list "OS-string"
  26. (text-codec-names (os-string-text-codec oss))
  27. (os-string->string oss))))
  28. (define *initial-os-string-text-codec* #f)
  29. (define (initialize-os-string-text-codec!)
  30. (set! *initial-os-string-text-codec*
  31. (or (find-text-codec
  32. (system-parameter (enum system-parameter-option os-string-encoding)))
  33. us-ascii-codec)))
  34. (define $os-string-text-codec
  35. (make-fluid
  36. (lambda () *initial-os-string-text-codec*)))
  37. (define (current-os-string-text-codec)
  38. ((fluid $os-string-text-codec)))
  39. (define (call-with-os-string-text-codec codec thunk)
  40. (let-fluid $os-string-text-codec (lambda () codec)
  41. thunk))
  42. (define (string->os-string s)
  43. (let ((c (string-copy s)))
  44. (make-immutable! c)
  45. (make-os-string (current-os-string-text-codec)
  46. c #f)))
  47. (define (byte-vector->os-string b)
  48. (let ((c (byte-vector-copy b)))
  49. (make-immutable! b)
  50. (make-os-string (current-os-string-text-codec)
  51. #f c)))
  52. (define (os-string->byte-vector oss)
  53. (or (os-string-byte-vector oss)
  54. (let* ((string (os-string-string oss))
  55. (codec (os-string-text-codec oss))
  56. (size (string-encoding-length codec
  57. string
  58. 0
  59. (string-length string)))
  60. (bytes (make-byte-vector (+ size 1) 0))) ; NUL termination
  61. (encode-string codec
  62. string 0 (string-length string)
  63. bytes 0 size)
  64. (set-os-string-byte-vector! oss bytes)
  65. (make-immutable! bytes)
  66. bytes)))
  67. (define (os-string->string oss)
  68. (or (os-string-string oss)
  69. (let* ((bytes (os-string-byte-vector oss))
  70. (size (- (byte-vector-length bytes) 1))
  71. (codec (os-string-text-codec oss)))
  72. (call-with-values
  73. (lambda ()
  74. (bytes-string-size codec bytes 0 size #f))
  75. (lambda (status consumed-count decoded-count)
  76. (let ((string (make-string decoded-count)))
  77. (decode-string codec bytes 0 size
  78. string 0 decoded-count
  79. #\?)
  80. (set-os-string-string! oss string)
  81. (make-immutable! string)
  82. string))))))
  83. (define (x->os-string x)
  84. (cond
  85. ((os-string? x) x)
  86. ((string? x) (string->os-string x))
  87. ((byte-vector? x) (byte-vector->os-string x))))
  88. ; doesn't really belong here
  89. (define (byte-vector-copy b)
  90. (let* ((size (byte-vector-length b))
  91. (result (make-byte-vector size 0)))
  92. (copy-bytes! b 0 result 0 size)
  93. result))
  94. (initialize-os-string-text-codec!)