solr.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """.. sidebar:: info
  3. - :origin:`solr.py <searx/engines/solr.py>`
  4. - `Solr <https://solr.apache.org>`_
  5. - `Solr Resources <https://solr.apache.org/resources.html>`_
  6. - `Install Solr <https://solr.apache.org/guide/installing-solr.html>`_
  7. Solr_ is a popular search engine based on Lucene, just like Elasticsearch_. But
  8. instead of searching in indices, you can search in collections.
  9. Example
  10. =======
  11. This is an example configuration for searching in the collection
  12. ``my-collection`` and get the results in ascending order.
  13. .. code:: yaml
  14. - name: solr
  15. engine: solr
  16. shortcut: slr
  17. base_url: http://localhost:8983
  18. collection: my-collection
  19. sort: asc
  20. enable_http: true
  21. """
  22. # pylint: disable=global-statement
  23. from json import loads
  24. from urllib.parse import urlencode
  25. from searx.exceptions import SearxEngineAPIException
  26. base_url = 'http://localhost:8983'
  27. collection = ''
  28. rows = 10
  29. sort = '' # sorting: asc or desc
  30. field_list = 'name' # list of field names to display on the UI
  31. default_fields = '' # default field to query
  32. query_fields = '' # query fields
  33. _search_url = ''
  34. paging = True
  35. def init(_):
  36. if collection == '':
  37. raise ValueError('collection cannot be empty')
  38. global _search_url
  39. _search_url = base_url + '/solr/' + collection + '/select?{params}'
  40. def request(query, params):
  41. query_params = {'q': query, 'rows': rows}
  42. if field_list != '':
  43. query_params['fl'] = field_list
  44. if query_fields != '':
  45. query_params['qf'] = query_fields
  46. if default_fields != '':
  47. query_params['df'] = default_fields
  48. if sort != '':
  49. query_params['sort'] = sort
  50. if 'pageno' in params:
  51. query_params['start'] = rows * (params['pageno'] - 1)
  52. params['url'] = _search_url.format(params=urlencode(query_params))
  53. return params
  54. def response(resp):
  55. resp_json = __get_response(resp)
  56. results = []
  57. for result in resp_json['response']['docs']:
  58. r = {key: str(value) for key, value in result.items()}
  59. if len(r) == 0:
  60. continue
  61. r['template'] = 'key-value.html'
  62. results.append(r)
  63. return results
  64. def __get_response(resp):
  65. try:
  66. resp_json = loads(resp.text)
  67. except Exception as e:
  68. raise SearxEngineAPIException("failed to parse response") from e
  69. if 'error' in resp_json:
  70. raise SearxEngineAPIException(resp_json['error']['msg'])
  71. return resp_json