scrape.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. package main
  2. import (
  3. "log"
  4. "net/http"
  5. "time"
  6. "net"
  7. "fmt"
  8. "strings"
  9. "github.com/gocolly/colly"
  10. )
  11. func scrape (gurl string) []Route {
  12. ua := "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
  13. sc := colly.NewCollector(
  14. colly.AllowURLRevisit(),
  15. colly.Async(true),
  16. )
  17. sc.WithTransport(&http.Transport {
  18. Proxy: http.ProxyFromEnvironment,
  19. DialContext: (&net.Dialer{
  20. Timeout: 30 * time.Second,
  21. KeepAlive: 30 * time.Second,
  22. DualStack: true,
  23. }).DialContext,
  24. ForceAttemptHTTP2: true,
  25. MaxIdleConns: 100,
  26. IdleConnTimeout: 90 * time.Second,
  27. TLSHandshakeTimeout: 10 * time.Second,
  28. ExpectContinueTimeout: 1 * time.Second,
  29. })
  30. sc.OnRequest(func(r *colly.Request) {
  31. r.Headers.Set("User-Agent", ua)
  32. r.Headers.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
  33. r.Headers.Set("Accept-Language", "en-US,en;q=0.5")
  34. })
  35. sc.OnError(func(_ *colly.Response, err error) {
  36. log.Fatal("エラー:", err)
  37. })
  38. var routeArr []Route
  39. for i := 1; i <= 3; i++ {
  40. route := fmt.Sprintf("div#route%02d", i)
  41. sc.OnHTML("div.elmRouteDetail " + route, func (e *colly.HTMLElement) {
  42. Routes := Route{}
  43. e.ForEach("div.routeSummary div ul.priority li span", func (j int, el *colly.HTMLElement) {
  44. if el.Attr("class") == "icnPriTime" {
  45. Routes.Badges = append(Routes.Badges, 1)
  46. }
  47. if el.Attr("class") == "icnPriFare" {
  48. Routes.Badges = append(Routes.Badges, 2)
  49. }
  50. if el.Attr("class") == "icnPriTrans" {
  51. Routes.Badges = append(Routes.Badges, 3)
  52. }
  53. })
  54. base := e.ChildText("ul.summary li.time span")
  55. time := strings.ReplaceAll(base, e.ChildText("ul.summary li.time span.small"), "")
  56. time2 := strings.Split(time, "着")
  57. Routes.Time = time2[0] + "着"
  58. durabase := e.ChildText("ul.summary li.time")
  59. durasi := strings.Index(durabase, "着") + len("着")
  60. duraei := strings.Index(durabase[durasi:], "分") + len("分") + durasi
  61. Routes.Duration = durabase[durasi:duraei]
  62. Routes.TransitCunt = strings.ReplaceAll(e.ChildText("ul.summary li.transfer"), "乗換:", "")
  63. Routes.Fare = strings.ReplaceAll(e.ChildText("ul.summary li.fare"), "[priic]IC優先:", "")
  64. Stations := Station{}
  65. Fares := Fare{}
  66. Stops := Stop{}
  67. e.ForEach("div.routeDetail div.station", func (j int, el *colly.HTMLElement) {
  68. Stations.Time = el.ChildText("ul.time li")
  69. if el.ChildText("p.icon span") == "[dep]" { Stations.Time += "発" }
  70. if el.ChildText("p.icon span") == "[arr]" { Stations.Time += "着" }
  71. Stations.Name = el.ChildText("dl dt a")
  72. e.ForEach("div.routeDetail div.fareSection div.access", func (jf int, elf *colly.HTMLElement) {
  73. Fares.Stops = nil
  74. if jf == j {
  75. Fares.Train = strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(elf.ChildText("li.transport div"), "[train]", "【電車】"), "[bus]", "【バス】"), "[air]", "【空路】")
  76. Fares.Platform = elf.ChildText("li.platform")
  77. Fares.Color = strings.ReplaceAll(elf.ChildAttr("span", "style"), "border-color:#", "")
  78. elf.ForEach("li.stop ul", func (js int, els *colly.HTMLElement) {
  79. Stops.Time = els.ChildText("li dl dt")
  80. Stops.Name = strings.ReplaceAll(els.ChildText("li dl dd"), "○", "")
  81. Fares.Stops = append(Fares.Stops, Stops)
  82. })
  83. Stations.Fares = append(Stations.Fares, Fares)
  84. }
  85. })
  86. e.ForEach("div.routeDetail div.walk ul.info", func (jw int, elw *colly.HTMLElement) {
  87. if jw == j {
  88. Fares.Train = strings.ReplaceAll(elw.ChildText("li.transport"), "[line][walk]", "")
  89. Fares.Platform = ""
  90. Fares.Color = "a8a8a8"
  91. Stations.Fares = append(Stations.Fares, Fares)
  92. }
  93. })
  94. Routes.Stations = append(Routes.Stations, Stations)
  95. })
  96. routeArr = append(routeArr, Routes)
  97. })
  98. }
  99. sc.Visit(gurl)
  100. sc.Wait()
  101. return routeArr
  102. }