_core.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # pylint: disable=too-few-public-methods,missing-module-docstring
  3. from __future__ import annotations
  4. __all__ = ["PluginInfo", "Plugin", "PluginStorage"]
  5. import abc
  6. import importlib
  7. import logging
  8. import pathlib
  9. import types
  10. import typing
  11. import warnings
  12. from dataclasses import dataclass, field
  13. import flask
  14. import searx
  15. from searx.utils import load_module
  16. from searx.extended_types import SXNG_Request
  17. from searx.result_types import Result
  18. if typing.TYPE_CHECKING:
  19. from searx.search import SearchWithPlugins
  20. _default = pathlib.Path(__file__).parent
  21. log: logging.Logger = logging.getLogger("searx.plugins")
  22. @dataclass
  23. class PluginInfo:
  24. """Object that holds informations about a *plugin*, these infos are shown to
  25. the user in the Preferences menu.
  26. To be able to translate the information into other languages, the text must
  27. be written in English and translated with :py:obj:`flask_babel.gettext`.
  28. """
  29. id: str
  30. """The ID-selector in HTML/CSS `#<id>`."""
  31. name: str
  32. """Name of the *plugin*."""
  33. description: str
  34. """Short description of the *answerer*."""
  35. preference_section: typing.Literal["general", "ui", "privacy", "query"] | None = "general"
  36. """Section (tab/group) in the preferences where this plugin is shown to the
  37. user.
  38. The value ``query`` is reserved for plugins that are activated via a
  39. *keyword* as part of a search query, see:
  40. - :py:obj:`PluginInfo.examples`
  41. - :py:obj:`Plugin.keywords`
  42. Those plugins are shown in the preferences in tab *Special Queries*.
  43. """
  44. examples: list[str] = field(default_factory=list)
  45. """List of short examples of the usage / of query terms."""
  46. keywords: list[str] = field(default_factory=list)
  47. """See :py:obj:`Plugin.keywords`"""
  48. class Plugin(abc.ABC):
  49. """Abstract base class of all Plugins."""
  50. id: typing.ClassVar[str]
  51. """The ID (suffix) in the HTML form."""
  52. default_on: typing.ClassVar[bool]
  53. """Plugin is enabled/disabled by default."""
  54. keywords: list[str] = []
  55. """Keywords in the search query that activate the plugin. The *keyword* is
  56. the first word in a search query. If a plugin should be executed regardless
  57. of the search query, the list of keywords should be empty (which is also the
  58. default in the base class for Plugins)."""
  59. log: logging.Logger
  60. """A logger object, is automatically initialized when calling the
  61. constructor (if not already set in the subclass)."""
  62. info: PluginInfo
  63. """Informations about the *plugin*, see :py:obj:`PluginInfo`."""
  64. def __init__(self) -> None:
  65. super().__init__()
  66. for attr in ["id", "default_on"]:
  67. if getattr(self, attr, None) is None:
  68. raise NotImplementedError(f"plugin {self} is missing attribute {attr}")
  69. if not self.id:
  70. self.id = f"{self.__class__.__module__}.{self.__class__.__name__}"
  71. if not getattr(self, "log", None):
  72. self.log = log.getChild(self.id)
  73. def __hash__(self) -> int:
  74. """The hash value is used in :py:obj:`set`, for example, when an object
  75. is added to the set. The hash value is also used in other contexts,
  76. e.g. when checking for equality to identify identical plugins from
  77. different sources (name collisions)."""
  78. return id(self)
  79. def __eq__(self, other):
  80. """py:obj:`Plugin` objects are equal if the hash values of the two
  81. objects are equal."""
  82. return hash(self) == hash(other)
  83. def init(self, app: flask.Flask) -> bool: # pylint: disable=unused-argument
  84. """Initialization of the plugin, the return value decides whether this
  85. plugin is active or not. Initialization only takes place once, at the
  86. time the WEB application is set up. The base methode always returns
  87. ``True``, the methode can be overwritten in the inheritances,
  88. - ``True`` plugin is active
  89. - ``False`` plugin is inactive
  90. """
  91. return True
  92. # pylint: disable=unused-argument
  93. def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
  94. """Runs BEFORE the search request and returns a boolean:
  95. - ``True`` to continue the search
  96. - ``False`` to stop the search
  97. """
  98. return True
  99. def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
  100. """Runs for each result of each engine and returns a boolean:
  101. - ``True`` to keep the result
  102. - ``False`` to remove the result from the result list
  103. The ``result`` can be modified to the needs.
  104. .. hint::
  105. If :py:obj:`Result.url` is modified, :py:obj:`Result.parsed_url` must
  106. be changed accordingly:
  107. .. code:: python
  108. result["parsed_url"] = urlparse(result["url"])
  109. """
  110. return True
  111. def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | typing.Sequence[Result]:
  112. """Runs AFTER the search request. Can return a list of :py:obj:`Result`
  113. objects to be added to the final result list."""
  114. return
  115. class ModulePlugin(Plugin):
  116. """A wrapper class for legacy *plugins*.
  117. .. note::
  118. For internal use only!
  119. In a module plugin, the follwing names are mapped:
  120. - `module.query_keywords` --> :py:obj:`Plugin.keywords`
  121. - `module.plugin_id` --> :py:obj:`Plugin.id`
  122. - `module.logger` --> :py:obj:`Plugin.log`
  123. """
  124. _required_attrs = (("name", str), ("description", str), ("default_on", bool))
  125. def __init__(self, mod: types.ModuleType):
  126. """In case of missing attributes in the module or wrong types are given,
  127. a :py:obj:`TypeError` exception is raised."""
  128. self.module = mod
  129. self.id = getattr(self.module, "plugin_id", self.module.__name__)
  130. self.log = logging.getLogger(self.module.__name__)
  131. self.keywords = getattr(self.module, "query_keywords", [])
  132. for attr, attr_type in self._required_attrs:
  133. if not hasattr(self.module, attr):
  134. msg = f"missing attribute {attr}, cannot load plugin"
  135. self.log.critical(msg)
  136. raise TypeError(msg)
  137. if not isinstance(getattr(self.module, attr), attr_type):
  138. msg = f"attribute {attr} is not of type {attr_type}"
  139. self.log.critical(msg)
  140. raise TypeError(msg)
  141. self.default_on = mod.default_on
  142. self.info = PluginInfo(
  143. id=self.id,
  144. name=self.module.name,
  145. description=self.module.description,
  146. preference_section=getattr(self.module, "preference_section", None),
  147. examples=getattr(self.module, "query_examples", []),
  148. keywords=self.keywords,
  149. )
  150. # monkeypatch module
  151. self.module.logger = self.log # type: ignore
  152. super().__init__()
  153. def init(self, app: flask.Flask) -> bool:
  154. if not hasattr(self.module, "init"):
  155. return True
  156. return self.module.init(app)
  157. def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
  158. if not hasattr(self.module, "pre_search"):
  159. return True
  160. return self.module.pre_search(request, search)
  161. def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
  162. if not hasattr(self.module, "on_result"):
  163. return True
  164. return self.module.on_result(request, search, result)
  165. def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None | list[Result]:
  166. if not hasattr(self.module, "post_search"):
  167. return None
  168. return self.module.post_search(request, search)
  169. class PluginStorage:
  170. """A storage for managing the *plugins* of SearXNG."""
  171. plugin_list: set[Plugin]
  172. """The list of :py:obj:`Plugins` in this storage."""
  173. legacy_plugins = [
  174. "ahmia_filter",
  175. "calculator",
  176. "hostnames",
  177. "oa_doi_rewrite",
  178. "tor_check",
  179. "tracker_url_remover",
  180. "unit_converter",
  181. ]
  182. """Internal plugins implemented in the legacy style (as module / deprecated!)."""
  183. def __init__(self):
  184. self.plugin_list = set()
  185. def __iter__(self):
  186. yield from self.plugin_list
  187. def __len__(self):
  188. return len(self.plugin_list)
  189. @property
  190. def info(self) -> list[PluginInfo]:
  191. return [p.info for p in self.plugin_list]
  192. def load_builtins(self):
  193. """Load plugin modules from:
  194. - the python packages in :origin:`searx/plugins` and
  195. - the external plugins from :ref:`settings plugins`.
  196. """
  197. for f in _default.iterdir():
  198. if f.name.startswith("_"):
  199. continue
  200. if f.stem not in self.legacy_plugins:
  201. self.register_by_fqn(f"searx.plugins.{f.stem}.SXNGPlugin")
  202. continue
  203. # for backward compatibility
  204. mod = load_module(f.name, str(f.parent))
  205. self.register(ModulePlugin(mod))
  206. for fqn in searx.get_setting("plugins"): # type: ignore
  207. self.register_by_fqn(fqn)
  208. def register(self, plugin: Plugin):
  209. """Register a :py:obj:`Plugin`. In case of name collision (if two
  210. plugins have same ID) a :py:obj:`KeyError` exception is raised.
  211. """
  212. if plugin in self.plugin_list:
  213. msg = f"name collision '{plugin.id}'"
  214. plugin.log.critical(msg)
  215. raise KeyError(msg)
  216. self.plugin_list.add(plugin)
  217. plugin.log.debug("plugin has been loaded")
  218. def register_by_fqn(self, fqn: str):
  219. """Register a :py:obj:`Plugin` via its fully qualified class name (FQN).
  220. The FQNs of external plugins could be read from a configuration, for
  221. example, and registered using this method
  222. """
  223. mod_name, _, obj_name = fqn.rpartition('.')
  224. if not mod_name:
  225. # for backward compatibility
  226. code_obj = importlib.import_module(fqn)
  227. else:
  228. mod = importlib.import_module(mod_name)
  229. code_obj = getattr(mod, obj_name, None)
  230. if code_obj is None:
  231. msg = f"plugin {fqn} is not implemented"
  232. log.critical(msg)
  233. raise ValueError(msg)
  234. if isinstance(code_obj, types.ModuleType):
  235. # for backward compatibility
  236. warnings.warn(
  237. f"plugin {fqn} is implemented in a legacy module / migrate to searx.plugins.Plugin", DeprecationWarning
  238. )
  239. self.register(ModulePlugin(code_obj))
  240. return
  241. self.register(code_obj())
  242. def init(self, app: flask.Flask) -> None:
  243. """Calls the method :py:obj:`Plugin.init` of each plugin in this
  244. storage. Depending on its return value, the plugin is removed from
  245. *this* storage or not."""
  246. for plg in self.plugin_list.copy():
  247. if not plg.init(app):
  248. self.plugin_list.remove(plg)
  249. def pre_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> bool:
  250. ret = True
  251. for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
  252. try:
  253. ret = bool(plugin.pre_search(request=request, search=search))
  254. except Exception: # pylint: disable=broad-except
  255. plugin.log.exception("Exception while calling pre_search")
  256. continue
  257. if not ret:
  258. # skip this search on the first False from a plugin
  259. break
  260. return ret
  261. def on_result(self, request: SXNG_Request, search: "SearchWithPlugins", result: Result) -> bool:
  262. ret = True
  263. for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
  264. try:
  265. ret = bool(plugin.on_result(request=request, search=search, result=result))
  266. except Exception: # pylint: disable=broad-except
  267. plugin.log.exception("Exception while calling on_result")
  268. continue
  269. if not ret:
  270. # ignore this result item on the first False from a plugin
  271. break
  272. return ret
  273. def post_search(self, request: SXNG_Request, search: "SearchWithPlugins") -> None:
  274. """Extend :py:obj:`search.result_container
  275. <searx.results.ResultContainer`> with result items from plugins listed
  276. in :py:obj:`search.user_plugins <SearchWithPlugins.user_plugins>`.
  277. """
  278. keyword = None
  279. for keyword in search.search_query.query.split():
  280. if keyword:
  281. break
  282. for plugin in [p for p in self.plugin_list if p.id in search.user_plugins]:
  283. if plugin.keywords:
  284. # plugin with keywords: skip plugin if no keyword match
  285. if keyword and keyword not in plugin.keywords:
  286. continue
  287. try:
  288. results = plugin.post_search(request=request, search=search) or []
  289. except Exception: # pylint: disable=broad-except
  290. plugin.log.exception("Exception while calling post_search")
  291. continue
  292. # In case of *plugins* prefix ``plugin:`` is set, see searx.result_types.Result
  293. search.result_container.extend(f"plugin: {plugin.id}", results)