yacy.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """YaCy_ is a free distributed search engine, built on the principles of
  3. peer-to-peer (P2P) networks.
  4. API: Dev:APIyacysearch_
  5. Releases:
  6. - https://github.com/yacy/yacy_search_server/tags
  7. - https://download.yacy.net/
  8. .. _Yacy: https://yacy.net/
  9. .. _Dev:APIyacysearch: https://wiki.yacy.net/index.php/Dev:APIyacysearch
  10. Configuration
  11. =============
  12. The engine has the following (additional) settings:
  13. - :py:obj:`http_digest_auth_user`
  14. - :py:obj:`http_digest_auth_pass`
  15. - :py:obj:`search_mode`
  16. - :py:obj:`search_type`
  17. The :py:obj:`base_url` has to be set in the engine named `yacy` and is used by
  18. all yacy engines.
  19. .. code:: yaml
  20. - name: yacy
  21. engine: yacy
  22. categories: general
  23. search_type: text
  24. shortcut: ya
  25. base_url:
  26. - https://yacy.searchlab.eu
  27. - https://search.lomig.me
  28. - https://yacy.ecosys.eu
  29. - https://search.webproject.link
  30. - name: yacy images
  31. engine: yacy
  32. categories: images
  33. search_type: image
  34. shortcut: yai
  35. disabled: true
  36. Implementations
  37. ===============
  38. """
  39. # pylint: disable=fixme
  40. from __future__ import annotations
  41. import random
  42. from json import loads
  43. from urllib.parse import urlencode
  44. from dateutil import parser
  45. from httpx import DigestAuth
  46. from searx.utils import html_to_text
  47. # about
  48. about = {
  49. "website": 'https://yacy.net/',
  50. "wikidata_id": 'Q1759675',
  51. "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
  52. "use_official_api": True,
  53. "require_api_key": False,
  54. "results": 'JSON',
  55. }
  56. # engine dependent config
  57. categories = ['general']
  58. paging = True
  59. number_of_results = 10
  60. http_digest_auth_user = ""
  61. """HTTP digest user for the local YACY instance"""
  62. http_digest_auth_pass = ""
  63. """HTTP digest password for the local YACY instance"""
  64. search_mode = 'global'
  65. """Yacy search mode ``global`` or ``local``. By default, Yacy operates in ``global``
  66. mode.
  67. ``global``
  68. Peer-to-Peer search
  69. ``local``
  70. Privacy or Stealth mode, restricts the search to local yacy instance.
  71. """
  72. search_type = 'text'
  73. """One of ``text``, ``image`` / The search-types ``app``, ``audio`` and
  74. ``video`` are not yet implemented (Pull-Requests are welcome).
  75. """
  76. base_url: list | str = 'https://yacy.searchlab.eu'
  77. """The value is an URL or a list of URLs. In the latter case instance will be
  78. selected randomly.
  79. """
  80. def init(_):
  81. valid_types = [
  82. 'text',
  83. 'image',
  84. # 'app', 'audio', 'video',
  85. ]
  86. if search_type not in valid_types:
  87. raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types))
  88. def _base_url() -> str:
  89. from searx.engines import engines # pylint: disable=import-outside-toplevel
  90. url = engines['yacy'].base_url # type: ignore
  91. if isinstance(url, list):
  92. url = random.choice(url)
  93. if url.endswith("/"):
  94. url = url[:-1]
  95. return url
  96. def request(query, params):
  97. offset = (params['pageno'] - 1) * number_of_results
  98. args = {
  99. 'query': query,
  100. 'startRecord': offset,
  101. 'maximumRecords': number_of_results,
  102. 'contentdom': search_type,
  103. 'resource': search_mode,
  104. }
  105. # add language tag if specified
  106. if params['language'] != 'all':
  107. args['lr'] = 'lang_' + params['language'].split('-')[0]
  108. params["url"] = f"{_base_url()}/yacysearch.json?{urlencode(args)}"
  109. if http_digest_auth_user and http_digest_auth_pass:
  110. params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
  111. return params
  112. def response(resp):
  113. results = []
  114. raw_search_results = loads(resp.text)
  115. # return empty array if there are no results
  116. if not raw_search_results:
  117. return []
  118. search_results = raw_search_results.get('channels', [])
  119. if len(search_results) == 0:
  120. return []
  121. for result in search_results[0].get('items', []):
  122. # parse image results
  123. if search_type == 'image':
  124. result_url = ''
  125. if 'url' in result:
  126. result_url = result['url']
  127. elif 'link' in result:
  128. result_url = result['link']
  129. else:
  130. continue
  131. # append result
  132. results.append(
  133. {
  134. 'url': result_url,
  135. 'title': result['title'],
  136. 'content': '',
  137. 'img_src': result['image'],
  138. 'template': 'images.html',
  139. }
  140. )
  141. # parse general results
  142. else:
  143. publishedDate = None
  144. if 'pubDate' in result:
  145. publishedDate = parser.parse(result['pubDate'])
  146. # append result
  147. results.append(
  148. {
  149. 'url': result['link'] or '',
  150. 'title': result['title'],
  151. 'content': html_to_text(result['description']),
  152. 'publishedDate': publishedDate,
  153. }
  154. )
  155. # TODO parse video, audio and file results
  156. return results