1234567891011121314151617181920212223242526272829 |
- #!/usr/bin/env python3
- import datetime
- import json
- import re
- import bs4
- import requests
- print('Content-Type: application/json')
- print()
- cricinfo = []
- soup = bs4.BeautifulSoup(requests.get('http://www.espncricinfo.com/ci/content/story/features.html').text, 'html.parser')
- for article in map(lambda title: title.parent, soup.find_all("h2", class_="story-title")):
- cricinfo.append({
- 'title': article.find('h2', class_='story-title').get_text(),
- 'url': 'http://www.espncricinfo.com' + article.find('h2', class_='story-title').a['href'],
- 'description': article.find('p', class_='story-brief').contents[1].strip(),
- 'date': (datetime.datetime.strptime(article.find('strong', class_='story-date').get_text(), "%b %d, %Y") - datetime.datetime.fromtimestamp(0)).total_seconds() * 1000,
- 'author': article.find('div', class_='author').get_text().strip()
- })
- imageParentTag = article.find('figure', class_='story-img')
- if imageParentTag:
- cricinfo[-1]['image'] = imageParentTag.img['src']
-
- print(json.dumps(cricinfo))
|