error_recorder.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # pylint: disable=missing-module-docstring, invalid-name
  3. import typing
  4. import inspect
  5. from json import JSONDecodeError
  6. from urllib.parse import urlparse
  7. from httpx import HTTPError, HTTPStatusError
  8. from searx.exceptions import (
  9. SearxXPathSyntaxException,
  10. SearxEngineXPathException,
  11. SearxEngineAPIException,
  12. SearxEngineAccessDeniedException,
  13. )
  14. from searx import searx_parent_dir, settings
  15. from searx.engines import engines
  16. errors_per_engines = {}
  17. class ErrorContext: # pylint: disable=missing-class-docstring
  18. __slots__ = (
  19. 'filename',
  20. 'function',
  21. 'line_no',
  22. 'code',
  23. 'exception_classname',
  24. 'log_message',
  25. 'log_parameters',
  26. 'secondary',
  27. )
  28. def __init__( # pylint: disable=too-many-arguments
  29. self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary
  30. ):
  31. self.filename = filename
  32. self.function = function
  33. self.line_no = line_no
  34. self.code = code
  35. self.exception_classname = exception_classname
  36. self.log_message = log_message
  37. self.log_parameters = log_parameters
  38. self.secondary = secondary
  39. def __eq__(self, o) -> bool: # pylint: disable=invalid-name
  40. if not isinstance(o, ErrorContext):
  41. return False
  42. return (
  43. self.filename == o.filename
  44. and self.function == o.function
  45. and self.line_no == o.line_no
  46. and self.code == o.code
  47. and self.exception_classname == o.exception_classname
  48. and self.log_message == o.log_message
  49. and self.log_parameters == o.log_parameters
  50. and self.secondary == o.secondary
  51. )
  52. def __hash__(self):
  53. return hash(
  54. (
  55. self.filename,
  56. self.function,
  57. self.line_no,
  58. self.code,
  59. self.exception_classname,
  60. self.log_message,
  61. self.log_parameters,
  62. self.secondary,
  63. )
  64. )
  65. def __repr__(self):
  66. return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".format(
  67. self.filename,
  68. self.line_no,
  69. self.code,
  70. self.exception_classname,
  71. self.log_message,
  72. self.log_parameters,
  73. self.secondary,
  74. )
  75. def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
  76. errors_for_engine = errors_per_engines.setdefault(engine_name, {})
  77. errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
  78. engines[engine_name].logger.warning('%s', str(error_context))
  79. def get_trace(traces):
  80. for trace in reversed(traces):
  81. split_filename = trace.filename.split('/')
  82. if '/'.join(split_filename[-3:-1]) == 'searx/engines':
  83. return trace
  84. if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
  85. return trace
  86. return traces[-1]
  87. def get_hostname(exc: HTTPError) -> typing.Optional[None]:
  88. url = exc.request.url
  89. if url is None and exc.response is not None:
  90. url = exc.response.url
  91. return urlparse(url).netloc
  92. def get_request_exception_messages(
  93. exc: HTTPError,
  94. ) -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
  95. url = None
  96. status_code = None
  97. reason = None
  98. hostname = None
  99. if hasattr(exc, '_request') and exc._request is not None: # pylint: disable=protected-access
  100. # exc.request is property that raise an RuntimeException
  101. # if exc._request is not defined.
  102. url = exc.request.url
  103. if url is None and hasattr(exc, 'response') and exc.response is not None:
  104. url = exc.response.url
  105. if url is not None:
  106. hostname = url.host
  107. if isinstance(exc, HTTPStatusError):
  108. status_code = str(exc.response.status_code)
  109. reason = exc.response.reason_phrase
  110. return (status_code, reason, hostname)
  111. def get_messages(exc, filename) -> typing.Tuple: # pylint: disable=too-many-return-statements
  112. if isinstance(exc, JSONDecodeError):
  113. return (exc.msg,)
  114. if isinstance(exc, TypeError):
  115. return (str(exc),)
  116. if isinstance(exc, ValueError) and 'lxml' in filename:
  117. return (str(exc),)
  118. if isinstance(exc, HTTPError):
  119. return get_request_exception_messages(exc)
  120. if isinstance(exc, SearxXPathSyntaxException):
  121. return (exc.xpath_str, exc.message)
  122. if isinstance(exc, SearxEngineXPathException):
  123. return (exc.xpath_str, exc.message)
  124. if isinstance(exc, SearxEngineAPIException):
  125. return (str(exc.args[0]),)
  126. if isinstance(exc, SearxEngineAccessDeniedException):
  127. return (exc.message,)
  128. return ()
  129. def get_exception_classname(exc: Exception) -> str:
  130. exc_class = exc.__class__
  131. exc_name = exc_class.__qualname__
  132. exc_module = exc_class.__module__
  133. if exc_module is None or exc_module == str.__class__.__module__:
  134. return exc_name
  135. return exc_module + '.' + exc_name
  136. def get_error_context(framerecords, exception_classname, log_message, log_parameters, secondary) -> ErrorContext:
  137. searx_frame = get_trace(framerecords)
  138. filename = searx_frame.filename
  139. if filename.startswith(searx_parent_dir):
  140. filename = filename[len(searx_parent_dir) + 1 :]
  141. function = searx_frame.function
  142. line_no = searx_frame.lineno
  143. code = searx_frame.code_context[0].strip()
  144. del framerecords
  145. return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary)
  146. def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None:
  147. if not settings['general']['enable_metrics']:
  148. return
  149. framerecords = inspect.trace()
  150. try:
  151. exception_classname = get_exception_classname(exc)
  152. log_parameters = get_messages(exc, framerecords[-1][1])
  153. error_context = get_error_context(framerecords, exception_classname, None, log_parameters, secondary)
  154. add_error_context(engine_name, error_context)
  155. finally:
  156. del framerecords
  157. def count_error(
  158. engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False
  159. ) -> None:
  160. if not settings['general']['enable_metrics']:
  161. return
  162. framerecords = list(reversed(inspect.stack()[1:]))
  163. try:
  164. error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary)
  165. add_error_context(engine_name, error_context)
  166. finally:
  167. del framerecords