command.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """With *command engines* administrators can run engines to integrate arbitrary
  3. shell commands.
  4. .. attention::
  5. When creating and enabling a ``command`` engine on a public instance, you
  6. must be careful to avoid leaking private data.
  7. The easiest solution is to limit the access by setting ``tokens`` as described
  8. in section :ref:`private engines`. The engine base is flexible. Only your
  9. imagination can limit the power of this engine (and maybe security concerns).
  10. Configuration
  11. =============
  12. The following options are available:
  13. ``command``:
  14. A comma separated list of the elements of the command. A special token
  15. ``{{QUERY}}`` tells where to put the search terms of the user. Example:
  16. .. code:: yaml
  17. ['ls', '-l', '-h', '{{QUERY}}']
  18. ``delimiter``:
  19. A mapping containing a delimiter ``char`` and the *titles* of each element in
  20. ``keys``.
  21. ``parse_regex``:
  22. A dict containing the regular expressions for each result key.
  23. ``query_type``:
  24. The expected type of user search terms. Possible values: ``path`` and
  25. ``enum``.
  26. ``path``:
  27. Checks if the user provided path is inside the working directory. If not,
  28. the query is not executed.
  29. ``enum``:
  30. Is a list of allowed search terms. If the user submits something which is
  31. not included in the list, the query returns an error.
  32. ``query_enum``:
  33. A list containing allowed search terms if ``query_type`` is set to ``enum``.
  34. ``working_dir``:
  35. The directory where the command has to be executed. Default: ``./``.
  36. ``result_separator``:
  37. The character that separates results. Default: ``\\n``.
  38. Example
  39. =======
  40. The example engine below can be used to find files with a specific name in the
  41. configured working directory:
  42. .. code:: yaml
  43. - name: find
  44. engine: command
  45. command: ['find', '.', '-name', '{{QUERY}}']
  46. query_type: path
  47. shortcut: fnd
  48. delimiter:
  49. chars: ' '
  50. keys: ['line']
  51. Implementations
  52. ===============
  53. """
  54. import re
  55. from os.path import expanduser, isabs, realpath, commonprefix
  56. from shlex import split as shlex_split
  57. from subprocess import Popen, PIPE
  58. from threading import Thread
  59. from searx import logger
  60. engine_type = 'offline'
  61. paging = True
  62. command = []
  63. delimiter = {}
  64. parse_regex = {}
  65. query_type = ''
  66. query_enum = []
  67. environment_variables = {}
  68. working_dir = realpath('.')
  69. result_separator = '\n'
  70. result_template = 'key-value.html'
  71. timeout = 4.0
  72. _command_logger = logger.getChild('command')
  73. _compiled_parse_regex = {}
  74. def init(engine_settings):
  75. check_parsing_options(engine_settings)
  76. if 'command' not in engine_settings:
  77. raise ValueError('engine command : missing configuration key: command')
  78. global command, working_dir, delimiter, parse_regex, environment_variables # pylint: disable=global-statement
  79. command = engine_settings['command']
  80. if 'working_dir' in engine_settings:
  81. working_dir = engine_settings['working_dir']
  82. if not isabs(engine_settings['working_dir']):
  83. working_dir = realpath(working_dir)
  84. if 'parse_regex' in engine_settings:
  85. parse_regex = engine_settings['parse_regex']
  86. for result_key, regex in parse_regex.items():
  87. _compiled_parse_regex[result_key] = re.compile(regex, flags=re.MULTILINE)
  88. if 'delimiter' in engine_settings:
  89. delimiter = engine_settings['delimiter']
  90. if 'environment_variables' in engine_settings:
  91. environment_variables = engine_settings['environment_variables']
  92. def search(query, params):
  93. cmd = _get_command_to_run(query)
  94. if not cmd:
  95. return []
  96. results = []
  97. reader_thread = Thread(target=_get_results_from_process, args=(results, cmd, params['pageno']))
  98. reader_thread.start()
  99. reader_thread.join(timeout=timeout)
  100. return results
  101. def _get_command_to_run(query):
  102. params = shlex_split(query)
  103. __check_query_params(params)
  104. cmd = []
  105. for c in command:
  106. if c == '{{QUERY}}':
  107. cmd.extend(params)
  108. else:
  109. cmd.append(c)
  110. return cmd
  111. def _get_results_from_process(results, cmd, pageno):
  112. leftover = ''
  113. count = 0
  114. start, end = __get_results_limits(pageno)
  115. with Popen(cmd, stdout=PIPE, stderr=PIPE, env=environment_variables) as process:
  116. line = process.stdout.readline()
  117. while line:
  118. buf = leftover + line.decode('utf-8')
  119. raw_results = buf.split(result_separator)
  120. if raw_results[-1]:
  121. leftover = raw_results[-1]
  122. raw_results = raw_results[:-1]
  123. for raw_result in raw_results:
  124. result = __parse_single_result(raw_result)
  125. if result is None:
  126. _command_logger.debug('skipped result:', raw_result)
  127. continue
  128. if start <= count and count <= end: # pylint: disable=chained-comparison
  129. result['template'] = result_template
  130. results.append(result)
  131. count += 1
  132. if end < count:
  133. return results
  134. line = process.stdout.readline()
  135. return_code = process.wait(timeout=timeout)
  136. if return_code != 0:
  137. raise RuntimeError('non-zero return code when running command', cmd, return_code)
  138. return None
  139. def __get_results_limits(pageno):
  140. start = (pageno - 1) * 10
  141. end = start + 9
  142. return start, end
  143. def __check_query_params(params):
  144. if not query_type:
  145. return
  146. if query_type == 'path':
  147. query_path = params[-1]
  148. query_path = expanduser(query_path)
  149. if commonprefix([realpath(query_path), working_dir]) != working_dir:
  150. raise ValueError('requested path is outside of configured working directory')
  151. elif query_type == 'enum' and len(query_enum) > 0:
  152. for param in params:
  153. if param not in query_enum:
  154. raise ValueError('submitted query params is not allowed', param, 'allowed params:', query_enum)
  155. def check_parsing_options(engine_settings):
  156. """Checks if delimiter based parsing or regex parsing is configured correctly"""
  157. if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings:
  158. raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex')
  159. if 'delimiter' in engine_settings and 'parse_regex' in engine_settings:
  160. raise ValueError('failed to init settings for parsing lines: too many settings')
  161. if 'delimiter' in engine_settings:
  162. if 'chars' not in engine_settings['delimiter'] or 'keys' not in engine_settings['delimiter']:
  163. raise ValueError
  164. def __parse_single_result(raw_result):
  165. """Parses command line output based on configuration"""
  166. result = {}
  167. if delimiter:
  168. elements = raw_result.split(delimiter['chars'], maxsplit=len(delimiter['keys']) - 1)
  169. if len(elements) != len(delimiter['keys']):
  170. return {}
  171. for i in range(len(elements)): # pylint: disable=consider-using-enumerate
  172. result[delimiter['keys'][i]] = elements[i]
  173. if parse_regex:
  174. for result_key, regex in _compiled_parse_regex.items():
  175. found = regex.search(raw_result)
  176. if not found:
  177. return {}
  178. result[result_key] = raw_result[found.start() : found.end()]
  179. return result