reddit.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Reddit
  3. """
  4. import json
  5. from datetime import datetime
  6. from urllib.parse import urlencode, urljoin, urlparse
  7. # about
  8. about = {
  9. "website": 'https://www.reddit.com/',
  10. "wikidata_id": 'Q1136',
  11. "official_api_documentation": 'https://www.reddit.com/dev/api',
  12. "use_official_api": True,
  13. "require_api_key": False,
  14. "results": 'JSON',
  15. }
  16. # engine dependent config
  17. categories = ['social media']
  18. page_size = 25
  19. # search-url
  20. base_url = 'https://www.reddit.com/'
  21. search_url = base_url + 'search.json?{query}'
  22. def request(query, params):
  23. query = urlencode({'q': query, 'limit': page_size})
  24. params['url'] = search_url.format(query=query)
  25. return params
  26. def response(resp):
  27. img_results = []
  28. text_results = []
  29. search_results = json.loads(resp.text)
  30. # return empty array if there are no results
  31. if 'data' not in search_results:
  32. return []
  33. posts = search_results.get('data', {}).get('children', [])
  34. # process results
  35. for post in posts:
  36. data = post['data']
  37. # extract post information
  38. params = {'url': urljoin(base_url, data['permalink']), 'title': data['title']}
  39. # if thumbnail field contains a valid URL, we need to change template
  40. thumbnail = data['thumbnail']
  41. url_info = urlparse(thumbnail)
  42. # netloc & path
  43. if url_info[1] != '' and url_info[2] != '':
  44. params['img_src'] = data['url']
  45. params['thumbnail_src'] = thumbnail
  46. params['template'] = 'images.html'
  47. img_results.append(params)
  48. else:
  49. created = datetime.fromtimestamp(data['created_utc'])
  50. content = data['selftext']
  51. if len(content) > 500:
  52. content = content[:500] + '...'
  53. params['content'] = content
  54. params['publishedDate'] = created
  55. text_results.append(params)
  56. # show images first and text results second
  57. return img_results + text_results