reader.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. // Package quotedprintable implements quoted-printable encoding as specified by
  2. // RFC 2045.
  3. package quotedprintable
  4. import (
  5. "bufio"
  6. "bytes"
  7. "fmt"
  8. "io"
  9. )
  10. // Reader is a quoted-printable decoder.
  11. type Reader struct {
  12. br *bufio.Reader
  13. rerr error // last read error
  14. line []byte // to be consumed before more of br
  15. }
  16. // NewReader returns a quoted-printable reader, decoding from r.
  17. func NewReader(r io.Reader) *Reader {
  18. return &Reader{
  19. br: bufio.NewReader(r),
  20. }
  21. }
  22. func fromHex(b byte) (byte, error) {
  23. switch {
  24. case b >= '0' && b <= '9':
  25. return b - '0', nil
  26. case b >= 'A' && b <= 'F':
  27. return b - 'A' + 10, nil
  28. // Accept badly encoded bytes.
  29. case b >= 'a' && b <= 'f':
  30. return b - 'a' + 10, nil
  31. }
  32. return 0, fmt.Errorf("quotedprintable: invalid hex byte 0x%02x", b)
  33. }
  34. func readHexByte(a, b byte) (byte, error) {
  35. var hb, lb byte
  36. var err error
  37. if hb, err = fromHex(a); err != nil {
  38. return 0, err
  39. }
  40. if lb, err = fromHex(b); err != nil {
  41. return 0, err
  42. }
  43. return hb<<4 | lb, nil
  44. }
  45. func isQPDiscardWhitespace(r rune) bool {
  46. switch r {
  47. case '\n', '\r', ' ', '\t':
  48. return true
  49. }
  50. return false
  51. }
  52. var (
  53. crlf = []byte("\r\n")
  54. lf = []byte("\n")
  55. softSuffix = []byte("=")
  56. )
  57. // Read reads and decodes quoted-printable data from the underlying reader.
  58. func (r *Reader) Read(p []byte) (n int, err error) {
  59. // Deviations from RFC 2045:
  60. // 1. in addition to "=\r\n", "=\n" is also treated as soft line break.
  61. // 2. it will pass through a '\r' or '\n' not preceded by '=', consistent
  62. // with other broken QP encoders & decoders.
  63. for len(p) > 0 {
  64. if len(r.line) == 0 {
  65. if r.rerr != nil {
  66. return n, r.rerr
  67. }
  68. r.line, r.rerr = r.br.ReadSlice('\n')
  69. // Does the line end in CRLF instead of just LF?
  70. hasLF := bytes.HasSuffix(r.line, lf)
  71. hasCR := bytes.HasSuffix(r.line, crlf)
  72. wholeLine := r.line
  73. r.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace)
  74. if bytes.HasSuffix(r.line, softSuffix) {
  75. rightStripped := wholeLine[len(r.line):]
  76. r.line = r.line[:len(r.line)-1]
  77. if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) {
  78. r.rerr = fmt.Errorf("quotedprintable: invalid bytes after =: %q", rightStripped)
  79. }
  80. } else if hasLF {
  81. if hasCR {
  82. r.line = append(r.line, '\r', '\n')
  83. } else {
  84. r.line = append(r.line, '\n')
  85. }
  86. }
  87. continue
  88. }
  89. b := r.line[0]
  90. switch {
  91. case b == '=':
  92. if len(r.line[1:]) < 2 {
  93. return n, io.ErrUnexpectedEOF
  94. }
  95. b, err = readHexByte(r.line[1], r.line[2])
  96. if err != nil {
  97. return n, err
  98. }
  99. r.line = r.line[2:] // 2 of the 3; other 1 is done below
  100. case b == '\t' || b == '\r' || b == '\n':
  101. break
  102. case b < ' ' || b > '~':
  103. return n, fmt.Errorf("quotedprintable: invalid unescaped byte 0x%02x in body", b)
  104. }
  105. p[0] = b
  106. p = p[1:]
  107. r.line = r.line[1:]
  108. n++
  109. }
  110. return n, nil
  111. }