proxy.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Implementations for a favicon proxy"""
  3. from __future__ import annotations
  4. from typing import Callable
  5. import importlib
  6. import base64
  7. import pathlib
  8. import urllib.parse
  9. import flask
  10. from httpx import HTTPError
  11. import msgspec
  12. from searx import get_setting
  13. from searx.webutils import new_hmac, is_hmac_of
  14. from searx.exceptions import SearxEngineResponseException
  15. from searx.extended_types import sxng_request
  16. from .resolvers import DEFAULT_RESOLVER_MAP
  17. from . import cache
  18. DEFAULT_FAVICON_URL = {}
  19. CFG: FaviconProxyConfig = None # type: ignore
  20. def init(cfg: FaviconProxyConfig):
  21. global CFG # pylint: disable=global-statement
  22. CFG = cfg
  23. def _initial_resolver_map():
  24. d = {}
  25. name: str = get_setting("search.favicon_resolver", None) # type: ignore
  26. if name:
  27. func = DEFAULT_RESOLVER_MAP.get(name)
  28. if func:
  29. d = {name: f"searx.favicons.resolvers.{func.__name__}"}
  30. return d
  31. class FaviconProxyConfig(msgspec.Struct):
  32. """Configuration of the favicon proxy."""
  33. max_age: int = 60 * 60 * 24 * 7 # seven days
  34. """HTTP header Cache-Control_ ``max-age``
  35. .. _Cache-Control: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
  36. """
  37. secret_key: str = get_setting("server.secret_key") # type: ignore
  38. """By default, the value from :ref:`server.secret_key <settings server>`
  39. setting is used."""
  40. resolver_timeout: int = get_setting("outgoing.request_timeout") # type: ignore
  41. """Timeout which the resolvers should not exceed, is usually passed to the
  42. outgoing request of the resolver. By default, the value from
  43. :ref:`outgoing.request_timeout <settings outgoing>` setting is used."""
  44. resolver_map: dict[str, str] = msgspec.field(default_factory=_initial_resolver_map)
  45. """The resolver_map is a key / value dictionary where the key is the name of
  46. the resolver and the value is the fully qualifying name (fqn) of resolver's
  47. function (the callable). The resolvers from the python module
  48. :py:obj:`searx.favicons.resolver` are available by default."""
  49. def get_resolver(self, name: str) -> Callable | None:
  50. """Returns the callable object (function) of the resolver with the
  51. ``name``. If no resolver is registered for the ``name``, ``None`` is
  52. returned.
  53. """
  54. fqn = self.resolver_map.get(name)
  55. if fqn is None:
  56. return None
  57. mod_name, _, func_name = fqn.rpartition('.')
  58. mod = importlib.import_module(mod_name)
  59. func = getattr(mod, func_name)
  60. if func is None:
  61. raise ValueError(f"resolver {fqn} is not implemented")
  62. return func
  63. favicon_path: str = get_setting("ui.static_path") + "/themes/{theme}/img/empty_favicon.svg" # type: ignore
  64. favicon_mime_type: str = "image/svg+xml"
  65. def favicon(self, **replacements):
  66. """Returns pathname and mimetype of the default favicon."""
  67. return (
  68. pathlib.Path(self.favicon_path.format(**replacements)),
  69. self.favicon_mime_type,
  70. )
  71. def favicon_data_url(self, **replacements):
  72. """Returns data image URL of the default favicon."""
  73. cache_key = ", ".join(f"{x}:{replacements[x]}" for x in sorted(list(replacements.keys()), key=str))
  74. data_url = DEFAULT_FAVICON_URL.get(cache_key)
  75. if data_url is not None:
  76. return data_url
  77. fav, mimetype = CFG.favicon(**replacements)
  78. # hint: encoding utf-8 limits favicons to be a SVG image
  79. with fav.open("r", encoding="utf-8") as f:
  80. data_url = f.read()
  81. data_url = urllib.parse.quote(data_url)
  82. data_url = f"data:{mimetype};utf8,{data_url}"
  83. DEFAULT_FAVICON_URL[cache_key] = data_url
  84. return data_url
  85. def favicon_proxy():
  86. """REST API of SearXNG's favicon proxy service
  87. ::
  88. /favicon_proxy?authority=<...>&h=<...>
  89. ``authority``:
  90. Domain name :rfc:`3986` / see :py:obj:`favicon_url`
  91. ``h``:
  92. HMAC :rfc:`2104`, build up from the :ref:`server.secret_key <settings
  93. server>` setting.
  94. """
  95. authority = sxng_request.args.get('authority')
  96. # malformed request or RFC 3986 authority
  97. if not authority or "/" in authority:
  98. return '', 400
  99. # malformed request / does not have authorisation
  100. if not is_hmac_of(
  101. CFG.secret_key,
  102. authority.encode(),
  103. sxng_request.args.get('h', ''),
  104. ):
  105. return '', 400
  106. resolver = sxng_request.preferences.get_value('favicon_resolver') # type: ignore
  107. # if resolver is empty or not valid, just return HTTP 400.
  108. if not resolver or resolver not in CFG.resolver_map.keys():
  109. return "", 400
  110. data, mime = search_favicon(resolver, authority)
  111. if data is not None and mime is not None:
  112. resp = flask.Response(data, mimetype=mime) # type: ignore
  113. resp.headers['Cache-Control'] = f"max-age={CFG.max_age}"
  114. return resp
  115. # return default favicon from static path
  116. theme = sxng_request.preferences.get_value("theme") # type: ignore
  117. fav, mimetype = CFG.favicon(theme=theme)
  118. return flask.send_from_directory(fav.parent, fav.name, mimetype=mimetype)
  119. def search_favicon(resolver: str, authority: str) -> tuple[None | bytes, None | str]:
  120. """Sends the request to the favicon resolver and returns a tuple for the
  121. favicon. The tuple consists of ``(data, mime)``, if the resolver has not
  122. determined a favicon, both values are ``None``.
  123. ``data``:
  124. Binary data of the favicon.
  125. ``mime``:
  126. Mime type of the favicon.
  127. """
  128. data, mime = (None, None)
  129. func = CFG.get_resolver(resolver)
  130. if func is None:
  131. return data, mime
  132. # to avoid superfluous requests to the resolver, first look in the cache
  133. data_mime = cache.CACHE(resolver, authority)
  134. if data_mime is not None:
  135. return data_mime
  136. try:
  137. data, mime = func(authority, timeout=CFG.resolver_timeout)
  138. if data is None or mime is None:
  139. data, mime = (None, None)
  140. except (HTTPError, SearxEngineResponseException):
  141. pass
  142. cache.CACHE.set(resolver, authority, mime, data)
  143. return data, mime
  144. def favicon_url(authority: str) -> str:
  145. """Function to generate the image URL used for favicons in SearXNG's result
  146. lists. The ``authority`` argument (aka netloc / :rfc:`3986`) is usually a
  147. (sub-) domain name. This function is used in the HTML (jinja) templates.
  148. .. code:: html
  149. <div class="favicon">
  150. <img src="{{ favicon_url(result.parsed_url.netloc) }}">
  151. </div>
  152. The returned URL is a route to :py:obj:`favicon_proxy` REST API.
  153. If the favicon is already in the cache, the returned URL is a `data URL`_
  154. (something like ``data:image/png;base64,...``). By generating a data url from
  155. the :py:obj:`.cache.FaviconCache`, additional HTTP roundtripps via the
  156. :py:obj:`favicon_proxy` are saved. However, it must also be borne in mind
  157. that data urls are not cached in the client (web browser).
  158. .. _data URL: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs
  159. """
  160. resolver = sxng_request.preferences.get_value('favicon_resolver') # type: ignore
  161. # if resolver is empty or not valid, just return nothing.
  162. if not resolver or resolver not in CFG.resolver_map.keys():
  163. return ""
  164. data_mime = cache.CACHE(resolver, authority)
  165. if data_mime == (None, None):
  166. # we have already checked, the resolver does not have a favicon
  167. theme = sxng_request.preferences.get_value("theme") # type: ignore
  168. return CFG.favicon_data_url(theme=theme)
  169. if data_mime is not None:
  170. data, mime = data_mime
  171. return f"data:{mime};base64,{str(base64.b64encode(data), 'utf-8')}" # type: ignore
  172. h = new_hmac(CFG.secret_key, authority.encode())
  173. proxy_url = flask.url_for('favicon_proxy')
  174. query = urllib.parse.urlencode({"authority": authority, "h": h})
  175. return f"{proxy_url}?{query}"