text-codec-util.scm 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. ; Part of Scheme 48 1.9. See file COPYING for notices and license.
  2. ; Authors: Mike Sperber, Robert Ransom
  3. ; Copyright (c) 2005-2006 by Basis Technology Corporation.
  4. ; Utilities for setting and constructing text codecs
  5. ; This follows Richard Gillam: Unicode Demystified, Chapter 6.
  6. ; The BOM is U+FEFF---we look for one at the beginning of the stream.
  7. ; Note if this fails, you better re-open the port from the start
  8. ; Note that the UTF-32 detection suggested in Gillam's book is not
  9. ; practical, as it may confuse valid UTF-16 with UTF-32.
  10. (define (guess-port-text-codec-according-to-bom port)
  11. (let ((first (peek-byte port)))
  12. (case first
  13. ((#xfe)
  14. (read-byte port)
  15. (if (eqv? #xff (read-byte port))
  16. utf-16be-codec
  17. #f))
  18. ((#xff)
  19. (read-byte port)
  20. (if (eqv? #xfe (read-byte port))
  21. utf-16le-codec
  22. #f))
  23. ((#xef)
  24. (read-byte port)
  25. (if (and (eqv? #xbb (read-byte port))
  26. (eqv? #xbf (read-byte port)))
  27. utf-8-codec
  28. #f))
  29. (else
  30. #f))))
  31. ; The caller really should check the return code
  32. (define (set-port-text-codec-according-to-bom! port)
  33. (cond
  34. ((guess-port-text-codec-according-to-bom port)
  35. => (lambda (text-codec)
  36. (set-port-text-codec! port text-codec)
  37. #t))
  38. (else
  39. #f)))