http.go 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. // This file is subject to a 1-clause BSD license.
  2. // Its contents can be found in the enclosed LICENSE file.
  3. package url
  4. import (
  5. "bytes"
  6. "fmt"
  7. "html"
  8. "io"
  9. "net/http"
  10. "net/url"
  11. "regexp"
  12. "strings"
  13. "notabug.org/mouz/bot/irc"
  14. "notabug.org/mouz/bot/irc/proto"
  15. "notabug.org/mouz/bot/plugins/url/youtube"
  16. )
  17. var (
  18. // regUrl is used by readUrl to extract web page URLs from incoming
  19. // PRIVMSG contents.
  20. regUrl = regexp.MustCompile(`\b[Hh][Tt][Tt][Pp][Ss]?\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]+(\:[0-9]+)?(/\S*)?\b`)
  21. // These values are used to extract title contents from HTML.
  22. bOpenTitle1 = []byte("<title>")
  23. bOpenTitle2 = []byte("<title ")
  24. bCloseTitle = []byte("</title>")
  25. bCloseTag = []byte(">")
  26. )
  27. // fetchTitle attempts to retrieve the title element for a given url.
  28. func fetchTitle(w irc.ResponseWriter, r *irc.Request, url, apiKey string) {
  29. // Ensure the url targets a HTML page. We do this by issueing a HEAD
  30. // request and checking its content type header.
  31. resp, err := http.Head(url)
  32. if err != nil {
  33. return
  34. }
  35. resp.Body.Close()
  36. ctype := strings.ToLower(resp.Header.Get("Content-Type"))
  37. if strings.Index(ctype, "text/html") == -1 &&
  38. strings.Index(ctype, "text/xhtml") == -1 {
  39. return
  40. }
  41. // We have an HTML document -- Fetch its contents.
  42. resp, err = http.Get(url)
  43. if err != nil {
  44. return
  45. }
  46. // buf defines the maximum amount of data we will be reading from a page,
  47. // before stopping our search for the <title> tag.
  48. //
  49. // 16kB is a chunky buffer, but some sites packa a ludicrous amount of
  50. // crud in their page headers, before getting to the <title> tag.
  51. var buf [1024 * 16]byte
  52. // Read the body.
  53. n, err := io.ReadFull(resp.Body, buf[:])
  54. resp.Body.Close()
  55. if err != nil && n <= 0 {
  56. return // Exit only if no data was read at all.
  57. }
  58. body := buf[:n]
  59. // Extract the title.
  60. s := bytes.Index(bytes.ToLower(body), bOpenTitle1)
  61. if s == -1 {
  62. // title could be something like:
  63. //
  64. // <title xml:lang="en-US">....</title>
  65. //
  66. s = bytes.Index(bytes.ToLower(body), bOpenTitle2)
  67. if s == -1 {
  68. return
  69. }
  70. body = body[s+len(bOpenTitle2):]
  71. s = bytes.Index(body, bCloseTag)
  72. if s == -1 {
  73. return
  74. }
  75. body = body[s+1:]
  76. } else {
  77. body = body[s+len(bOpenTitle1):]
  78. }
  79. e := bytes.Index(bytes.ToLower(body), bCloseTitle)
  80. if e == -1 {
  81. e = len(body) - 1
  82. }
  83. body = bytes.TrimSpace(body[:e])
  84. if len(body) == 0 {
  85. return
  86. }
  87. title := html.UnescapeString(string(body))
  88. // If we are dealing with a youtube link, try to fetch the
  89. // video duration and append it to our response.
  90. if id := isYoutube(url); len(id) > 0 {
  91. info, err := youtube.GetVideoInfo(apiKey, id)
  92. if err == nil {
  93. title += fmt.Sprintf(TextYoutubeDuration, info.Duration)
  94. }
  95. }
  96. // If the title matches one of the titles that we want to ignore,
  97. // do not show it.
  98. if Ignore[title] {
  99. return
  100. }
  101. // Show the title to the channel from whence the URL came.
  102. proto.PrivMsg(w, r.Target, TextDisplay, r.SenderName, title)
  103. }
  104. // isYoutube returns a video ID and true if v denotes a recognized youtube
  105. // video URL. Returns an empty string otherwise.
  106. func isYoutube(v string) string {
  107. u, err := url.Parse(v)
  108. if err != nil {
  109. return ""
  110. }
  111. if strings.EqualFold(u.Host, "youtube.com") ||
  112. strings.EqualFold(u.Host, "www.youtube.com") {
  113. id := strings.TrimSpace(u.Query().Get("v"))
  114. return id
  115. }
  116. if strings.EqualFold(u.Host, "youtu.be") ||
  117. strings.EqualFold(u.Host, "www.youtu.be") {
  118. id := u.RequestURI()
  119. if strings.HasPrefix(id, "/") {
  120. id = id[1:]
  121. }
  122. return id
  123. }
  124. return ""
  125. }