yacy.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Yacy (Web, Images, Videos, Music, Files)
  4. """
  5. from json import loads
  6. from dateutil import parser
  7. from urllib.parse import urlencode
  8. from requests.auth import HTTPDigestAuth
  9. from searx.utils import html_to_text
  10. # about
  11. about = {
  12. "website": 'https://yacy.net/',
  13. "wikidata_id": 'Q1759675',
  14. "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
  15. "use_official_api": True,
  16. "require_api_key": False,
  17. "results": 'JSON',
  18. }
  19. # engine dependent config
  20. categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
  21. paging = True
  22. number_of_results = 5
  23. http_digest_auth_user = ""
  24. http_digest_auth_pass = ""
  25. # search-url
  26. base_url = 'http://localhost:8090'
  27. search_url = '/yacysearch.json?{query}'\
  28. '&startRecord={offset}'\
  29. '&maximumRecords={limit}'\
  30. '&contentdom={search_type}'\
  31. '&resource=global'
  32. # yacy specific type-definitions
  33. search_types = {'general': 'text',
  34. 'images': 'image',
  35. 'files': 'app',
  36. 'music': 'audio',
  37. 'videos': 'video'}
  38. # do search-request
  39. def request(query, params):
  40. offset = (params['pageno'] - 1) * number_of_results
  41. search_type = search_types.get(params.get('category'), '0')
  42. params['url'] = base_url +\
  43. search_url.format(query=urlencode({'query': query}),
  44. offset=offset,
  45. limit=number_of_results,
  46. search_type=search_type)
  47. if http_digest_auth_user and http_digest_auth_pass:
  48. params['auth'] = HTTPDigestAuth(http_digest_auth_user, http_digest_auth_pass)
  49. # add language tag if specified
  50. if params['language'] != 'all':
  51. params['url'] += '&lr=lang_' + params['language'].split('-')[0]
  52. return params
  53. # get response from search-request
  54. def response(resp):
  55. results = []
  56. raw_search_results = loads(resp.text)
  57. # return empty array if there are no results
  58. if not raw_search_results:
  59. return []
  60. search_results = raw_search_results.get('channels', [])
  61. if len(search_results) == 0:
  62. return []
  63. for result in search_results[0].get('items', []):
  64. # parse image results
  65. if resp.search_params.get('category') == 'images':
  66. result_url = ''
  67. if 'url' in result:
  68. result_url = result['url']
  69. elif 'link' in result:
  70. result_url = result['link']
  71. else:
  72. continue
  73. # append result
  74. results.append({'url': result_url,
  75. 'title': result['title'],
  76. 'content': '',
  77. 'img_src': result['image'],
  78. 'template': 'images.html'})
  79. # parse general results
  80. else:
  81. publishedDate = parser.parse(result['pubDate'])
  82. # append result
  83. results.append({'url': result['link'],
  84. 'title': result['title'],
  85. 'content': html_to_text(result['description']),
  86. 'publishedDate': publishedDate})
  87. # TODO parse video, audio and file results
  88. return results