magazine.json 1.1 KB

1234567891011121314151617181920212223242526272829
  1. #!/usr/bin/env python3
  2. import datetime
  3. import json
  4. import re
  5. import bs4
  6. import requests
  7. print('Content-Type: application/json')
  8. print()
  9. cricinfo = []
  10. soup = bs4.BeautifulSoup(requests.get('http://www.espncricinfo.com/ci/content/story/features.html').text, 'html.parser')
  11. for article in map(lambda title: title.parent, soup.find_all("h2", class_="story-title")):
  12. cricinfo.append({
  13. 'title': article.find('h2', class_='story-title').get_text(),
  14. 'url': 'http://www.espncricinfo.com' + article.find('h2', class_='story-title').a['href'],
  15. 'description': article.find('p', class_='story-brief').contents[1].strip(),
  16. 'date': (datetime.datetime.strptime(article.find('strong', class_='story-date').get_text(), "%b %d, %Y") - datetime.datetime.fromtimestamp(0)).total_seconds() * 1000,
  17. 'author': article.find('div', class_='author').get_text().strip()
  18. })
  19. imageParentTag = article.find('figure', class_='story-img')
  20. if imageParentTag:
  21. cricinfo[-1]['image'] = imageParentTag.img['src']
  22. print(json.dumps(cricinfo))