ripfeed.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. #!/usr/bin/python3
  2. import atomparser
  3. import logging, os, requests, time
  4. from argparse import ArgumentParser
  5. from base64 import urlsafe_b64encode
  6. parser = ArgumentParser()
  7. parser.add_argument('-u', '--url', help='URL to Atom feed to rip')
  8. parser.add_argument('-i', '--interval', default=5, help='seconds to wait between each fetch')
  9. parser.add_argument('-p', '--path', default='data', help='directory to put stuff in (will be created)')
  10. parser.add_argument('-d', '--debug', action='store_true', help='print debug logging output to console')
  11. args = parser.parse_args()
  12. if args.debug:
  13. logging.basicConfig(level=logging.DEBUG)
  14. log = logging.getLogger()
  15. if not os.path.isdir(args.path):
  16. os.mkdir(args.path)
  17. url = args.url
  18. while True:
  19. filename = urlsafe_b64encode(url.encode()).decode('utf-8')
  20. filepath = os.path.join(args.path, filename)
  21. retrieved = False
  22. if not os.path.isfile(filepath):
  23. print('downloading to {}'.format(filepath))
  24. log.debug('downloading to {}'.format(filepath))
  25. with open(filepath, 'wb') as xml:
  26. xml.write(requests.get(url).content)
  27. retrieved = True
  28. feed = atomparser.AtomFeed(filepath)
  29. try:
  30. url = feed.next.get('href')
  31. except AttributeError:
  32. log.debug('no more "next" in feed XML')
  33. break
  34. if not url:
  35. log.debug('url empty despite no exception thrown')
  36. break
  37. if retrieved:
  38. log.debug('sleeping {}s before next iteration')
  39. time.sleep(args.interval)