mediatype.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package mime
  5. import (
  6. "bytes"
  7. "errors"
  8. "fmt"
  9. "sort"
  10. "strings"
  11. "unicode"
  12. )
  13. // FormatMediaType serializes mediatype t and the parameters
  14. // param as a media type conforming to RFC 2045 and RFC 2616.
  15. // The type and parameter names are written in lower-case.
  16. // When any of the arguments result in a standard violation then
  17. // FormatMediaType returns the empty string.
  18. func FormatMediaType(t string, param map[string]string) string {
  19. slash := strings.Index(t, "/")
  20. if slash == -1 {
  21. return ""
  22. }
  23. major, sub := t[:slash], t[slash+1:]
  24. if !isToken(major) || !isToken(sub) {
  25. return ""
  26. }
  27. var b bytes.Buffer
  28. b.WriteString(strings.ToLower(major))
  29. b.WriteByte('/')
  30. b.WriteString(strings.ToLower(sub))
  31. attrs := make([]string, 0, len(param))
  32. for a := range param {
  33. attrs = append(attrs, a)
  34. }
  35. sort.Strings(attrs)
  36. for _, attribute := range attrs {
  37. value := param[attribute]
  38. b.WriteByte(';')
  39. b.WriteByte(' ')
  40. if !isToken(attribute) {
  41. return ""
  42. }
  43. b.WriteString(strings.ToLower(attribute))
  44. b.WriteByte('=')
  45. if isToken(value) {
  46. b.WriteString(value)
  47. continue
  48. }
  49. b.WriteByte('"')
  50. offset := 0
  51. for index, character := range value {
  52. if character == '"' || character == '\\' {
  53. b.WriteString(value[offset:index])
  54. offset = index
  55. b.WriteByte('\\')
  56. }
  57. if character&0x80 != 0 {
  58. return ""
  59. }
  60. }
  61. b.WriteString(value[offset:])
  62. b.WriteByte('"')
  63. }
  64. return b.String()
  65. }
  66. func checkMediaTypeDisposition(s string) error {
  67. typ, rest := consumeToken(s)
  68. if typ == "" {
  69. return errors.New("mime: no media type")
  70. }
  71. if rest == "" {
  72. return nil
  73. }
  74. if !strings.HasPrefix(rest, "/") {
  75. return errors.New("mime: expected slash after first token")
  76. }
  77. subtype, rest := consumeToken(rest[1:])
  78. if subtype == "" {
  79. return errors.New("mime: expected token after slash")
  80. }
  81. if rest != "" {
  82. return errors.New("mime: unexpected content after media subtype")
  83. }
  84. return nil
  85. }
  86. // ParseMediaType parses a media type value and any optional
  87. // parameters, per RFC 1521. Media types are the values in
  88. // Content-Type and Content-Disposition headers (RFC 2183).
  89. // On success, ParseMediaType returns the media type converted
  90. // to lowercase and trimmed of white space and a non-nil map.
  91. // The returned map, params, maps from the lowercase
  92. // attribute to the attribute value with its case preserved.
  93. func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
  94. i := strings.Index(v, ";")
  95. if i == -1 {
  96. i = len(v)
  97. }
  98. mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
  99. err = checkMediaTypeDisposition(mediatype)
  100. if err != nil {
  101. return "", nil, err
  102. }
  103. params = make(map[string]string)
  104. // Map of base parameter name -> parameter name -> value
  105. // for parameters containing a '*' character.
  106. // Lazily initialized.
  107. var continuation map[string]map[string]string
  108. v = v[i:]
  109. for len(v) > 0 {
  110. v = strings.TrimLeftFunc(v, unicode.IsSpace)
  111. if len(v) == 0 {
  112. break
  113. }
  114. key, value, rest := consumeMediaParam(v)
  115. if key == "" {
  116. if strings.TrimSpace(rest) == ";" {
  117. // Ignore trailing semicolons.
  118. // Not an error.
  119. return
  120. }
  121. // Parse error.
  122. return "", nil, errors.New("mime: invalid media parameter")
  123. }
  124. pmap := params
  125. if idx := strings.Index(key, "*"); idx != -1 {
  126. baseName := key[:idx]
  127. if continuation == nil {
  128. continuation = make(map[string]map[string]string)
  129. }
  130. var ok bool
  131. if pmap, ok = continuation[baseName]; !ok {
  132. continuation[baseName] = make(map[string]string)
  133. pmap = continuation[baseName]
  134. }
  135. }
  136. if _, exists := pmap[key]; exists {
  137. // Duplicate parameter name is bogus.
  138. return "", nil, errors.New("mime: duplicate parameter name")
  139. }
  140. pmap[key] = value
  141. v = rest
  142. }
  143. // Stitch together any continuations or things with stars
  144. // (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
  145. var buf bytes.Buffer
  146. for key, pieceMap := range continuation {
  147. singlePartKey := key + "*"
  148. if v, ok := pieceMap[singlePartKey]; ok {
  149. decv := decode2231Enc(v)
  150. params[key] = decv
  151. continue
  152. }
  153. buf.Reset()
  154. valid := false
  155. for n := 0; ; n++ {
  156. simplePart := fmt.Sprintf("%s*%d", key, n)
  157. if v, ok := pieceMap[simplePart]; ok {
  158. valid = true
  159. buf.WriteString(v)
  160. continue
  161. }
  162. encodedPart := simplePart + "*"
  163. if v, ok := pieceMap[encodedPart]; ok {
  164. valid = true
  165. if n == 0 {
  166. buf.WriteString(decode2231Enc(v))
  167. } else {
  168. decv, _ := percentHexUnescape(v)
  169. buf.WriteString(decv)
  170. }
  171. } else {
  172. break
  173. }
  174. }
  175. if valid {
  176. params[key] = buf.String()
  177. }
  178. }
  179. return
  180. }
  181. func decode2231Enc(v string) string {
  182. sv := strings.SplitN(v, "'", 3)
  183. if len(sv) != 3 {
  184. return ""
  185. }
  186. // TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
  187. // need to decide how to expose it in the API. But I'm not sure
  188. // anybody uses it in practice.
  189. charset := strings.ToLower(sv[0])
  190. if charset != "us-ascii" && charset != "utf-8" {
  191. // TODO: unsupported encoding
  192. return ""
  193. }
  194. encv, _ := percentHexUnescape(sv[2])
  195. return encv
  196. }
  197. func isNotTokenChar(r rune) bool {
  198. return !isTokenChar(r)
  199. }
  200. // consumeToken consumes a token from the beginning of provided
  201. // string, per RFC 2045 section 5.1 (referenced from 2183), and return
  202. // the token consumed and the rest of the string. Returns ("", v) on
  203. // failure to consume at least one character.
  204. func consumeToken(v string) (token, rest string) {
  205. notPos := strings.IndexFunc(v, isNotTokenChar)
  206. if notPos == -1 {
  207. return v, ""
  208. }
  209. if notPos == 0 {
  210. return "", v
  211. }
  212. return v[0:notPos], v[notPos:]
  213. }
  214. // consumeValue consumes a "value" per RFC 2045, where a value is
  215. // either a 'token' or a 'quoted-string'. On success, consumeValue
  216. // returns the value consumed (and de-quoted/escaped, if a
  217. // quoted-string) and the rest of the string. On failure, returns
  218. // ("", v).
  219. func consumeValue(v string) (value, rest string) {
  220. if !strings.HasPrefix(v, `"`) && !strings.HasPrefix(v, `'`) {
  221. return consumeToken(v)
  222. }
  223. leadQuote := rune(v[0])
  224. // parse a quoted-string
  225. rest = v[1:] // consume the leading quote
  226. buffer := new(bytes.Buffer)
  227. var idx int
  228. var r rune
  229. var nextIsLiteral bool
  230. for idx, r = range rest {
  231. switch {
  232. case nextIsLiteral:
  233. buffer.WriteRune(r)
  234. nextIsLiteral = false
  235. case r == leadQuote:
  236. return buffer.String(), rest[idx+1:]
  237. case r == '\\':
  238. nextIsLiteral = true
  239. case r != '\r' && r != '\n':
  240. buffer.WriteRune(r)
  241. default:
  242. return "", v
  243. }
  244. }
  245. return "", v
  246. }
  247. func consumeMediaParam(v string) (param, value, rest string) {
  248. rest = strings.TrimLeftFunc(v, unicode.IsSpace)
  249. if !strings.HasPrefix(rest, ";") {
  250. return "", "", v
  251. }
  252. rest = rest[1:] // consume semicolon
  253. rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
  254. param, rest = consumeToken(rest)
  255. param = strings.ToLower(param)
  256. if param == "" {
  257. return "", "", v
  258. }
  259. rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
  260. if !strings.HasPrefix(rest, "=") {
  261. return "", "", v
  262. }
  263. rest = rest[1:] // consume equals sign
  264. rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
  265. value, rest = consumeValue(rest)
  266. if value == "" {
  267. return "", "", v
  268. }
  269. return param, value, rest
  270. }
  271. func percentHexUnescape(s string) (string, error) {
  272. // Count %, check that they're well-formed.
  273. percents := 0
  274. for i := 0; i < len(s); {
  275. if s[i] != '%' {
  276. i++
  277. continue
  278. }
  279. percents++
  280. if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
  281. s = s[i:]
  282. if len(s) > 3 {
  283. s = s[0:3]
  284. }
  285. return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
  286. }
  287. i += 3
  288. }
  289. if percents == 0 {
  290. return s, nil
  291. }
  292. t := make([]byte, len(s)-2*percents)
  293. j := 0
  294. for i := 0; i < len(s); {
  295. switch s[i] {
  296. case '%':
  297. t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
  298. j++
  299. i += 3
  300. default:
  301. t[j] = s[i]
  302. j++
  303. i++
  304. }
  305. }
  306. return string(t), nil
  307. }
  308. func ishex(c byte) bool {
  309. switch {
  310. case '0' <= c && c <= '9':
  311. return true
  312. case 'a' <= c && c <= 'f':
  313. return true
  314. case 'A' <= c && c <= 'F':
  315. return true
  316. }
  317. return false
  318. }
  319. func unhex(c byte) byte {
  320. switch {
  321. case '0' <= c && c <= '9':
  322. return c - '0'
  323. case 'a' <= c && c <= 'f':
  324. return c - 'a' + 10
  325. case 'A' <= c && c <= 'F':
  326. return c - 'A' + 10
  327. }
  328. return 0
  329. }