main.go 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. package main
  2. import (
  3. "crypto/sha1"
  4. "fmt"
  5. "io"
  6. "net/http"
  7. "os"
  8. "strings"
  9. "sync"
  10. "github.com/PuerkitoBio/goquery"
  11. "github.com/cryptix/go/logging"
  12. )
  13. var (
  14. log = logging.Logger("linkExt")
  15. hashes *os.File
  16. )
  17. const url = `http://www.spiegel.de/international/germany/inside-the-nsa-s-war-on-internet-security-a-1010361.html`
  18. func main() {
  19. var wg sync.WaitGroup
  20. doc, err := goquery.NewDocument(url)
  21. logging.CheckFatal(err)
  22. hashes, err = os.Create("hashes")
  23. logging.CheckFatal(err)
  24. defer hashes.Close()
  25. doc.Find("a").Each(func(i int, s *goquery.Selection) {
  26. link, found := s.Attr("href")
  27. title, _ := s.Attr("title")
  28. if found && strings.HasSuffix(link, ".pdf") {
  29. wg.Add(1)
  30. go fetchPDF(&wg, link, title)
  31. }
  32. })
  33. wg.Wait()
  34. log.Notice("Done")
  35. }
  36. func fetchPDF(wg *sync.WaitGroup, l, t string) (err error) {
  37. s := sha1.New()
  38. fname := l[7:len(l)-4] + "-" + strings.TrimSpace(t) + ".pdf"
  39. fname = strings.Replace(fname, "/", "-", -1)
  40. fname = "pdfs/" + fname
  41. log.Noticef("fetching: %s", fname)
  42. defer func() {
  43. if err != nil {
  44. fetchPDF(wg, l, t)
  45. } else {
  46. fmt.Fprintf(hashes, "%x %s\n", s.Sum(nil), fname)
  47. wg.Done()
  48. }
  49. }()
  50. resp, err := http.Get("https://www.spiegel.de/" + l)
  51. if err != nil {
  52. log.Critical(err)
  53. return
  54. }
  55. defer resp.Body.Close()
  56. if resp.StatusCode != http.StatusOK {
  57. log.Criticalf("http.Get %q", resp.Status)
  58. return
  59. }
  60. f, err := os.Create(fname)
  61. if err != nil {
  62. log.Critical(err)
  63. return
  64. }
  65. multi := io.MultiWriter(s, f)
  66. _, err = io.Copy(multi, resp.Body)
  67. if err != nil {
  68. log.Critical(err)
  69. os.Remove(fname)
  70. return err
  71. }
  72. log.Noticef("Saved: %s", fname)
  73. return nil
  74. }