reddit.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Reddit
  4. """
  5. import json
  6. from datetime import datetime
  7. from urllib.parse import urlencode, urljoin, urlparse
  8. # about
  9. about = {
  10. "website": 'https://www.reddit.com/',
  11. "wikidata_id": 'Q1136',
  12. "official_api_documentation": 'https://www.reddit.com/dev/api',
  13. "use_official_api": True,
  14. "require_api_key": False,
  15. "results": 'JSON',
  16. }
  17. # engine dependent config
  18. categories = ['general', 'images', 'news', 'social media']
  19. page_size = 25
  20. # search-url
  21. base_url = 'https://www.reddit.com/'
  22. search_url = base_url + 'search.json?{query}'
  23. # do search-request
  24. def request(query, params):
  25. query = urlencode({'q': query, 'limit': page_size})
  26. params['url'] = search_url.format(query=query)
  27. return params
  28. # get response from search-request
  29. def response(resp):
  30. img_results = []
  31. text_results = []
  32. search_results = json.loads(resp.text)
  33. # return empty array if there are no results
  34. if 'data' not in search_results:
  35. return []
  36. posts = search_results.get('data', {}).get('children', [])
  37. # process results
  38. for post in posts:
  39. data = post['data']
  40. # extract post information
  41. params = {
  42. 'url': urljoin(base_url, data['permalink']),
  43. 'title': data['title']
  44. }
  45. # if thumbnail field contains a valid URL, we need to change template
  46. thumbnail = data['thumbnail']
  47. url_info = urlparse(thumbnail)
  48. # netloc & path
  49. if url_info[1] != '' and url_info[2] != '':
  50. params['img_src'] = data['url']
  51. params['thumbnail_src'] = thumbnail
  52. params['template'] = 'images.html'
  53. img_results.append(params)
  54. else:
  55. created = datetime.fromtimestamp(data['created_utc'])
  56. content = data['selftext']
  57. if len(content) > 500:
  58. content = content[:500] + '...'
  59. params['content'] = content
  60. params['publishedDate'] = created
  61. text_results.append(params)
  62. # show images first and text results second
  63. return img_results + text_results