tagesschau.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """ARD: `Tagesschau API`_
  3. The Tagesschau is a news program of the ARD. Via the `Tagesschau API`_, current
  4. news and media reports are available in JSON format. The `Bundesstelle für Open
  5. Data`_ offers a `OpenAPI`_ portal at bundDEV_ where APIs are documented an can
  6. be tested.
  7. This SearXNG engine uses the `/api2u/search`_ API.
  8. .. _/api2u/search: http://tagesschau.api.bund.dev/
  9. .. _bundDEV: https://bund.dev/apis
  10. .. _Bundesstelle für Open Data: https://github.com/bundesAPI
  11. .. _Tagesschau API: https://github.com/AndreasFischer1985/tagesschau-api/blob/main/README_en.md
  12. .. _OpenAPI: https://swagger.io/specification/
  13. """
  14. from typing import TYPE_CHECKING
  15. from datetime import datetime
  16. from urllib.parse import urlencode
  17. import re
  18. if TYPE_CHECKING:
  19. import logging
  20. logger: logging.Logger
  21. about = {
  22. 'website': "https://tagesschau.de",
  23. 'wikidata_id': "Q703907",
  24. 'official_api_documentation': None,
  25. 'use_official_api': True,
  26. 'require_api_key': False,
  27. 'results': 'JSON',
  28. 'language': 'de',
  29. }
  30. categories = ['general', 'news']
  31. paging = True
  32. results_per_page = 10
  33. base_url = "https://www.tagesschau.de"
  34. use_source_url = True
  35. """When set to false, display URLs from Tagesschau, and not the actual source
  36. (e.g. NDR, WDR, SWR, HR, ...)
  37. .. note::
  38. The actual source may contain additional content, such as commentary, that is
  39. not displayed in the Tagesschau.
  40. """
  41. def request(query, params):
  42. args = {
  43. 'searchText': query,
  44. 'pageSize': results_per_page,
  45. 'resultPage': params['pageno'] - 1,
  46. }
  47. params['url'] = f"{base_url}/api2u/search?{urlencode(args)}"
  48. return params
  49. def response(resp):
  50. results = []
  51. json = resp.json()
  52. for item in json['searchResults']:
  53. item_type = item.get('type')
  54. if item_type in ('story', 'webview'):
  55. results.append(_story(item))
  56. elif item_type == 'video':
  57. results.append(_video(item))
  58. else:
  59. logger.error("unknow result type: %s", item_type)
  60. return results
  61. def _story(item):
  62. return {
  63. 'title': item['title'],
  64. 'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'),
  65. 'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'),
  66. 'content': item['firstSentence'],
  67. 'url': item['shareURL'] if use_source_url else item['detailsweb'],
  68. }
  69. def _video(item):
  70. streams = item['streams']
  71. video_url = streams.get('h264s') or streams.get('h264m') or streams.get('h264l') or streams.get('h264xl')
  72. title = item['title']
  73. if "_vapp.mxf" in title:
  74. title = title.replace("_vapp.mxf", "")
  75. title = re.sub(r"APP\d+ (FC-)?", "", title, count=1)
  76. return {
  77. 'template': 'videos.html',
  78. 'title': title,
  79. 'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'),
  80. 'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'),
  81. 'content': item.get('firstSentence', ''),
  82. 'iframe_src': video_url,
  83. 'url': video_url,
  84. }