digg.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Digg (News, Social media)
  4. """
  5. # pylint: disable=missing-function-docstring
  6. from json import loads
  7. from urllib.parse import urlencode
  8. from datetime import datetime
  9. from lxml import html
  10. # about
  11. about = {
  12. "website": 'https://digg.com',
  13. "wikidata_id": 'Q270478',
  14. "official_api_documentation": None,
  15. "use_official_api": False,
  16. "require_api_key": False,
  17. "results": 'HTML',
  18. }
  19. # engine dependent config
  20. categories = ['news', 'social media']
  21. paging = True
  22. base_url = 'https://digg.com'
  23. # search-url
  24. search_url = base_url + (
  25. '/api/search/'
  26. '?{query}'
  27. '&from={position}'
  28. '&size=20'
  29. '&format=html'
  30. )
  31. def request(query, params):
  32. offset = (params['pageno'] - 1) * 20
  33. params['url'] = search_url.format(
  34. query = urlencode({'q': query}),
  35. position = offset,
  36. )
  37. return params
  38. def response(resp):
  39. results = []
  40. # parse results
  41. for result in loads(resp.text)['mapped']:
  42. # strip html tags and superfluous quotation marks from content
  43. content = html.document_fromstring(
  44. result['excerpt']
  45. ).text_content()
  46. # 'created': {'ISO': '2020-10-16T14:09:55Z', ...}
  47. published = datetime.strptime(
  48. result['created']['ISO'], '%Y-%m-%dT%H:%M:%SZ'
  49. )
  50. results.append({
  51. 'url': result['url'],
  52. 'title': result['title'],
  53. 'content' : content,
  54. 'template': 'videos.html',
  55. 'publishedDate': published,
  56. 'thumbnail': result['images']['thumbImage'],
  57. })
  58. return results