peertube.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
  3. (more or less) the same REST API and the schema of the JSON result is identical.
  4. """
  5. import re
  6. from urllib.parse import urlencode
  7. from datetime import datetime
  8. from dateutil.parser import parse
  9. from dateutil.relativedelta import relativedelta
  10. import babel
  11. from searx.network import get # see https://github.com/searxng/searxng/issues/762
  12. from searx.locales import language_tag
  13. from searx.utils import html_to_text, humanize_number
  14. from searx.enginelib.traits import EngineTraits
  15. traits: EngineTraits
  16. about = {
  17. # pylint: disable=line-too-long
  18. "website": 'https://joinpeertube.org',
  19. "wikidata_id": 'Q50938515',
  20. "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
  21. "use_official_api": True,
  22. "require_api_key": False,
  23. "results": 'JSON',
  24. }
  25. # engine dependent config
  26. categories = ["videos"]
  27. paging = True
  28. base_url = "https://peer.tube"
  29. """Base URL of the Peertube instance. A list of instances is available at:
  30. - https://instances.joinpeertube.org/instances
  31. """
  32. time_range_support = True
  33. time_range_table = {
  34. 'day': relativedelta(),
  35. 'week': relativedelta(weeks=-1),
  36. 'month': relativedelta(months=-1),
  37. 'year': relativedelta(years=-1),
  38. }
  39. safesearch = True
  40. safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
  41. def minute_to_hm(minute):
  42. if isinstance(minute, int):
  43. return "%d:%02d" % (divmod(minute, 60))
  44. return None
  45. def request(query, params):
  46. """Assemble request for the Peertube API"""
  47. if not query:
  48. return False
  49. # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
  50. eng_lang = traits.get_language(params['searxng_locale'], None)
  51. params['url'] = (
  52. base_url.rstrip("/")
  53. + "/api/v1/search/videos?"
  54. + urlencode(
  55. {
  56. 'search': query,
  57. 'searchTarget': 'search-index', # Vidiversum
  58. 'resultType': 'videos',
  59. 'start': (params['pageno'] - 1) * 10,
  60. 'count': 10,
  61. # -createdAt: sort by date ascending / createdAt: date descending
  62. 'sort': '-match', # sort by *match descending*
  63. 'nsfw': safesearch_table[params['safesearch']],
  64. }
  65. )
  66. )
  67. if eng_lang is not None:
  68. params['url'] += '&languageOneOf[]=' + eng_lang
  69. params['url'] += '&boostLanguages[]=' + eng_lang
  70. if params['time_range'] in time_range_table:
  71. time = datetime.now().date() + time_range_table[params['time_range']]
  72. params['url'] += '&startDate=' + time.isoformat()
  73. return params
  74. def response(resp):
  75. return video_response(resp)
  76. def video_response(resp):
  77. """Parse video response from SepiaSearch and Peertube instances."""
  78. results = []
  79. json_data = resp.json()
  80. if 'data' not in json_data:
  81. return []
  82. for result in json_data['data']:
  83. metadata = [
  84. x
  85. for x in [
  86. result.get('channel', {}).get('displayName'),
  87. result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
  88. ', '.join(result.get('tags', [])),
  89. ]
  90. if x
  91. ]
  92. results.append(
  93. {
  94. 'url': result['url'],
  95. 'title': result['name'],
  96. 'content': html_to_text(result.get('description') or ''),
  97. 'author': result.get('account', {}).get('displayName'),
  98. 'length': minute_to_hm(result.get('duration')),
  99. 'views': humanize_number(result['views']),
  100. 'template': 'videos.html',
  101. 'publishedDate': parse(result['publishedAt']),
  102. 'iframe_src': result.get('embedUrl'),
  103. 'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
  104. 'metadata': ' | '.join(metadata),
  105. }
  106. )
  107. return results
  108. def fetch_traits(engine_traits: EngineTraits):
  109. """Fetch languages from peertube's search-index source code.
  110. See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
  111. .. _8ed5c729 - Refactor and redesign client:
  112. https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
  113. .. _videoLanguages:
  114. https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
  115. """
  116. resp = get(
  117. 'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
  118. # the response from search-index repository is very slow
  119. timeout=60,
  120. )
  121. if not resp.ok: # type: ignore
  122. print("ERROR: response from peertube is not OK.")
  123. return
  124. js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL) # type: ignore
  125. if not js_lang:
  126. print("ERROR: can't determine languages from peertube")
  127. return
  128. for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
  129. eng_tag = lang.group(1)
  130. if eng_tag == 'oc':
  131. # Occitanis not known by babel, its closest relative is Catalan
  132. # but 'ca' is already in the list of engine_traits.languages -->
  133. # 'oc' will be ignored.
  134. continue
  135. try:
  136. sxng_tag = language_tag(babel.Locale.parse(eng_tag))
  137. except babel.UnknownLocaleError:
  138. print("ERROR: %s is unknown by babel" % eng_tag)
  139. continue
  140. conflict = engine_traits.languages.get(sxng_tag)
  141. if conflict:
  142. if conflict != eng_tag:
  143. print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
  144. continue
  145. engine_traits.languages[sxng_tag] = eng_tag
  146. engine_traits.languages['zh_Hans'] = 'zh'
  147. engine_traits.languages['zh_Hant'] = 'zh'