bulbapedia-news.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. #!/usr/bin/env python
  2. # Script for bulbapedia RSS in rssguard
  3. # Post processing:
  4. # https://bulbagarden.net
  5. import requests
  6. import re
  7. import json
  8. import sys
  9. from datetime import datetime
  10. from bs4 import BeautifulSoup
  11. sys.stdin.reconfigure(encoding="utf-8")
  12. input_data = sys.stdin.read()
  13. soup = BeautifulSoup(input_data, "html.parser")
  14. articles = soup.find("div", class_="news-article-list")
  15. articles = articles.find_all("article", {"class": "news-article-card"})
  16. items = []
  17. for article in articles:
  18. date = article.find("div", {"class": "news-article-card-date"}).text
  19. date = datetime.strptime(date, "%b%d").isoformat()
  20. title = article.find("h2", {"class": "news-article-card-title"}).text
  21. thumbnail = article.find("a", {"class": "news-article-card-thumbnail"})
  22. url = thumbnail.get("href")
  23. image = thumbnail.find("img").get("src")
  24. description = article.find("div", {"class": "news-article-card-excerpt news-content"}).text
  25. author = article.find("li", {"class": "news-meta-item author"}).find("a").text.replace("\n", "")
  26. html = f"""
  27. <img src='{image}'>
  28. <p>{description}</p>
  29. """
  30. item = {
  31. "author": {"name": author},
  32. "title": title,
  33. "id": url,
  34. "content_html": html,
  35. "url": url,
  36. "date_published": date
  37. }
  38. items.append(item)
  39. json_feed = {
  40. "title": "Pokemon News",
  41. "items": items
  42. }
  43. print(json.dumps(json_feed))