elasticsearch.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """.. sidebar:: info
  3. - :origin:`elasticsearch.py <searx/engines/elasticsearch.py>`
  4. - `Elasticsearch <https://www.elastic.co/elasticsearch/>`_
  5. - `Elasticsearch Guide
  6. <https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html>`_
  7. - `Install Elasticsearch
  8. <https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html>`_
  9. Elasticsearch_ supports numerous ways to query the data it is storing. At the
  10. moment the engine supports the most popular search methods (``query_type``):
  11. - ``match``,
  12. - ``simple_query_string``,
  13. - ``term`` and
  14. - ``terms``.
  15. If none of the methods fit your use case, you can select ``custom`` query type
  16. and provide the JSON payload to submit to Elasticsearch in
  17. ``custom_query_json``.
  18. Example
  19. =======
  20. The following is an example configuration for an Elasticsearch_ instance with
  21. authentication configured to read from ``my-index`` index.
  22. .. code:: yaml
  23. - name: elasticsearch
  24. shortcut: els
  25. engine: elasticsearch
  26. base_url: http://localhost:9200
  27. username: elastic
  28. password: changeme
  29. index: my-index
  30. query_type: match
  31. # custom_query_json: '{ ... }'
  32. enable_http: true
  33. """
  34. from json import loads, dumps
  35. from searx.exceptions import SearxEngineAPIException
  36. from searx.result_types import EngineResults
  37. from searx.extended_types import SXNG_Response
  38. categories = ['general']
  39. paging = True
  40. about = {
  41. 'website': 'https://www.elastic.co',
  42. 'wikidata_id': 'Q3050461',
  43. 'official_api_documentation': 'https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html',
  44. 'use_official_api': True,
  45. 'require_api_key': False,
  46. 'format': 'JSON',
  47. }
  48. base_url = 'http://localhost:9200'
  49. username = ''
  50. password = ''
  51. index = ''
  52. query_type = 'match'
  53. custom_query_json = {}
  54. show_metadata = False
  55. page_size = 10
  56. def init(engine_settings):
  57. if 'query_type' in engine_settings and engine_settings['query_type'] not in _available_query_types:
  58. raise ValueError('unsupported query type', engine_settings['query_type'])
  59. if index == '':
  60. raise ValueError('index cannot be empty')
  61. def request(query, params):
  62. if query_type not in _available_query_types:
  63. return params
  64. if username and password:
  65. params['auth'] = (username, password)
  66. args = {
  67. 'from': (params['pageno'] - 1) * page_size,
  68. 'size': page_size,
  69. }
  70. data = _available_query_types[query_type](query)
  71. data.update(args)
  72. params['url'] = f"{base_url}/{index}/_search"
  73. params['method'] = 'GET'
  74. params['data'] = dumps(data)
  75. params['headers']['Content-Type'] = 'application/json'
  76. return params
  77. def _match_query(query):
  78. """
  79. The standard for full text queries.
  80. searx format: "key:value" e.g. city:berlin
  81. REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
  82. """
  83. try:
  84. key, value = query.split(':')
  85. except Exception as e:
  86. raise ValueError('query format must be "key:value"') from e
  87. return {"query": {"match": {key: {'query': value}}}}
  88. def _simple_query_string_query(query):
  89. """
  90. Accepts query strings, but it is less strict than query_string
  91. The field used can be specified in index.query.default_field in Elasticsearch.
  92. REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html
  93. """
  94. return {'query': {'simple_query_string': {'query': query}}}
  95. def _term_query(query):
  96. """
  97. Accepts one term and the name of the field.
  98. searx format: "key:value" e.g. city:berlin
  99. REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-term-query.html
  100. """
  101. try:
  102. key, value = query.split(':')
  103. except Exception as e:
  104. raise ValueError('query format must be key:value') from e
  105. return {'query': {'term': {key: value}}}
  106. def _terms_query(query):
  107. """
  108. Accepts multiple terms and the name of the field.
  109. searx format: "key:value1,value2" e.g. city:berlin,paris
  110. REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
  111. """
  112. try:
  113. key, values = query.split(':')
  114. except Exception as e:
  115. raise ValueError('query format must be key:value1,value2') from e
  116. return {'query': {'terms': {key: values.split(',')}}}
  117. def _custom_query(query):
  118. key, value = query.split(':')
  119. custom_query = custom_query_json
  120. for query_key, query_value in custom_query.items():
  121. if query_key == '{{KEY}}':
  122. custom_query[key] = custom_query.pop(query_key)
  123. if query_value == '{{VALUE}}':
  124. custom_query[query_key] = value
  125. return custom_query
  126. def response(resp: SXNG_Response) -> EngineResults:
  127. res = EngineResults()
  128. resp_json = loads(resp.text)
  129. if 'error' in resp_json:
  130. raise SearxEngineAPIException(resp_json["error"])
  131. for result in resp_json["hits"]["hits"]:
  132. kvmap = {key: str(value) if not key.startswith("_") else value for key, value in result["_source"].items()}
  133. if show_metadata:
  134. kvmap["metadata"] = {"index": result["_index"], "id": result["_id"], "score": result["_score"]}
  135. res.add(res.types.KeyValue(kvmap=kvmap))
  136. return res
  137. _available_query_types = {
  138. # Full text queries
  139. # https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html
  140. 'match': _match_query,
  141. 'simple_query_string': _simple_query_string_query,
  142. # Term-level queries
  143. # https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html
  144. 'term': _term_query,
  145. 'terms': _terms_query,
  146. # Query JSON defined by the instance administrator.
  147. 'custom': _custom_query,
  148. }