cache.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. package amp
  2. import (
  3. "crypto/sha256"
  4. "encoding/base32"
  5. "fmt"
  6. "net"
  7. "net/url"
  8. "path"
  9. "strings"
  10. "golang.org/x/net/idna"
  11. )
  12. // domainPrefixBasic does the basic domain prefix conversion. Does not do any
  13. // IDNA mapping, such as https://www.unicode.org/reports/tr46/.
  14. //
  15. // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#basic-algorithm
  16. func domainPrefixBasic(domain string) (string, error) {
  17. // 1. Punycode Decode the publisher domain.
  18. prefix, err := idna.ToUnicode(domain)
  19. if err != nil {
  20. return "", err
  21. }
  22. // 2. Replace any "-" (hyphen) character in the output of step 1 with
  23. // "--" (two hyphens).
  24. prefix = strings.Replace(prefix, "-", "--", -1)
  25. // 3. Replace any "." (dot) character in the output of step 2 with "-"
  26. // (hyphen).
  27. prefix = strings.Replace(prefix, ".", "-", -1)
  28. // 4. If the output of step 3 has a "-" (hyphen) at both positions 3 and
  29. // 4, then to the output of step 3, add a prefix of "0-" and add a
  30. // suffix of "-0".
  31. if len(prefix) >= 4 && prefix[2] == '-' && prefix[3] == '-' {
  32. prefix = "0-" + prefix + "-0"
  33. }
  34. // 5. Punycode Encode the output of step 3.
  35. return idna.ToASCII(prefix)
  36. }
  37. // Lower-case base32 without padding.
  38. var fallbackBase32Encoding = base32.NewEncoding("abcdefghijklmnopqrstuvwxyz234567").WithPadding(base32.NoPadding)
  39. // domainPrefixFallback does the fallback domain prefix conversion. The returned
  40. // base32 domain uses lower-case letters.
  41. //
  42. // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#fallback-algorithm
  43. func domainPrefixFallback(domain string) string {
  44. // The algorithm specification does not say what, exactly, we are to
  45. // take the SHA-256 of. domain is notionally an abstract Unicode
  46. // string, not a byte sequence. While
  47. // https://github.com/ampproject/amp-toolbox/blob/84cb3057e5f6c54d64369ddd285db1cb36237ee8/packages/cache-url/lib/AmpCurlUrlGenerator.js#L62
  48. // says "Take the SHA256 of the punycode view of the domain," in reality
  49. // it hashes the UTF-8 encoding of the domain, without Punycode:
  50. // https://github.com/ampproject/amp-toolbox/blob/84cb3057e5f6c54d64369ddd285db1cb36237ee8/packages/cache-url/lib/AmpCurlUrlGenerator.js#L141
  51. // https://github.com/ampproject/amp-toolbox/blob/84cb3057e5f6c54d64369ddd285db1cb36237ee8/packages/cache-url/lib/browser/Sha256.js#L24
  52. // We do the same here, hashing the raw bytes of domain, presumed to be
  53. // UTF-8.
  54. // 1. Hash the publisher's domain using SHA256.
  55. h := sha256.Sum256([]byte(domain))
  56. // 2. Base32 Escape the output of step 1.
  57. // 3. Remove the last 4 characters from the output of step 2, which are
  58. // always "=" (equals) characters.
  59. return fallbackBase32Encoding.EncodeToString(h[:])
  60. }
  61. // domainPrefix computes the domain prefix of an AMP cache URL.
  62. //
  63. // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#domain-name-prefix
  64. func domainPrefix(domain string) string {
  65. // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#combined-algorithm
  66. // 1. Run the Basic Algorithm. If the output is a valid DNS label,
  67. // [append the Cache domain suffix and] return. Otherwise continue to
  68. // step 2.
  69. prefix, err := domainPrefixBasic(domain)
  70. // "A domain prefix is not a valid DNS label if it is longer than 63
  71. // characters"
  72. if err == nil && len(prefix) <= 63 {
  73. return prefix
  74. }
  75. // 2. Run the Fallback Algorithm. [Append the Cache domain suffix and]
  76. // return.
  77. return domainPrefixFallback(domain)
  78. }
  79. // CacheURL computes the AMP cache URL for the publisher URL pubURL, using the
  80. // AMP cache at cacheURL. contentType is a string such as "c" or "i" that
  81. // indicates what type of serving the AMP cache is to perform. The Scheme of
  82. // pubURL must be "http" or "https". The Port of pubURL, if any, must match the
  83. // default for the scheme. cacheURL may not have RawQuery, Fragment, or
  84. // RawFragment set, because the resulting URL's query and fragment are taken
  85. // from the publisher URL.
  86. //
  87. // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/
  88. func CacheURL(pubURL, cacheURL *url.URL, contentType string) (*url.URL, error) {
  89. // The cache URL subdomain, including the domain prefix corresponding to
  90. // the publisher URL's domain.
  91. resultHost := domainPrefix(pubURL.Hostname()) + "." + cacheURL.Hostname()
  92. if cacheURL.Port() != "" {
  93. resultHost = net.JoinHostPort(resultHost, cacheURL.Port())
  94. }
  95. // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#url-path
  96. // The first part of the path is the cache URL's own path, if any.
  97. pathComponents := []string{cacheURL.EscapedPath()}
  98. // The next path component is the content type. We cannot encode an
  99. // empty content type, because it would result in consecutive path
  100. // separators, which would semantically combine into a single separator.
  101. if contentType == "" {
  102. return nil, fmt.Errorf("invalid content type %+q", contentType)
  103. }
  104. pathComponents = append(pathComponents, url.PathEscape(contentType))
  105. // Then, we add an "s" path component, if the publisher URL scheme is
  106. // "https".
  107. switch pubURL.Scheme {
  108. case "http":
  109. // Do nothing.
  110. case "https":
  111. pathComponents = append(pathComponents, "s")
  112. default:
  113. return nil, fmt.Errorf("invalid scheme %+q in publisher URL", pubURL.Scheme)
  114. }
  115. // The next path component is the publisher URL's host. The AMP cache
  116. // URL format specification is not clear about whether other
  117. // subcomponents of the authority (namely userinfo and port) may appear
  118. // here. We adopt a policy of forbidding userinfo, and requiring that
  119. // the port be the default for the scheme (and then we omit the port
  120. // entirely from the returned URL).
  121. if pubURL.User != nil {
  122. return nil, fmt.Errorf("publisher URL may not contain userinfo")
  123. }
  124. if port := pubURL.Port(); port != "" {
  125. if !((pubURL.Scheme == "http" && port == "80") || (pubURL.Scheme == "https" && port == "443")) {
  126. return nil, fmt.Errorf("publisher URL port %+q is not the default for scheme %+q", port, pubURL.Scheme)
  127. }
  128. }
  129. // As with the content type, we cannot encode an empty host, because
  130. // that would result in an empty path component.
  131. if pubURL.Hostname() == "" {
  132. return nil, fmt.Errorf("invalid host %+q in publisher URL", pubURL.Hostname())
  133. }
  134. pathComponents = append(pathComponents, url.PathEscape(pubURL.Hostname()))
  135. // Finally, we append the remainder of the original escaped path from
  136. // the publisher URL.
  137. pathComponents = append(pathComponents, pubURL.EscapedPath())
  138. resultRawPath := path.Join(pathComponents...)
  139. resultPath, err := url.PathUnescape(resultRawPath)
  140. if err != nil {
  141. return nil, err
  142. }
  143. // The query and fragment of the returned URL always come from pubURL.
  144. // Any query or fragment of cacheURL would be ignored. Return an error
  145. // if either is set.
  146. if cacheURL.RawQuery != "" {
  147. return nil, fmt.Errorf("cache URL may not contain a query")
  148. }
  149. if cacheURL.Fragment != "" {
  150. return nil, fmt.Errorf("cache URL may not contain a fragment")
  151. }
  152. return &url.URL{
  153. Scheme: cacheURL.Scheme,
  154. User: cacheURL.User,
  155. Host: resultHost,
  156. Path: resultPath,
  157. RawPath: resultRawPath,
  158. RawQuery: pubURL.RawQuery,
  159. Fragment: pubURL.Fragment,
  160. }, nil
  161. }