error_recorder.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. import typing
  2. import inspect
  3. import logging
  4. from json import JSONDecodeError
  5. from urllib.parse import urlparse
  6. from requests.exceptions import RequestException
  7. from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException,
  8. SearxEngineAccessDeniedException)
  9. from searx import logger
  10. logging.basicConfig(level=logging.INFO)
  11. errors_per_engines = {}
  12. class ErrorContext:
  13. __slots__ = 'filename', 'function', 'line_no', 'code', 'exception_classname', 'log_message', 'log_parameters'
  14. def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters):
  15. self.filename = filename
  16. self.function = function
  17. self.line_no = line_no
  18. self.code = code
  19. self.exception_classname = exception_classname
  20. self.log_message = log_message
  21. self.log_parameters = log_parameters
  22. def __eq__(self, o) -> bool:
  23. if not isinstance(o, ErrorContext):
  24. return False
  25. return self.filename == o.filename and self.function == o.function and self.line_no == o.line_no\
  26. and self.code == o.code and self.exception_classname == o.exception_classname\
  27. and self.log_message == o.log_message and self.log_parameters == o.log_parameters
  28. def __hash__(self):
  29. return hash((self.filename, self.function, self.line_no, self.code, self.exception_classname, self.log_message,
  30. self.log_parameters))
  31. def __repr__(self):
  32. return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
  33. format(self.filename, self.line_no, self.code, self.exception_classname, self.log_message,
  34. self.log_parameters)
  35. def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
  36. errors_for_engine = errors_per_engines.setdefault(engine_name, {})
  37. errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
  38. logger.debug('%s: %s', engine_name, str(error_context))
  39. def get_trace(traces):
  40. for trace in reversed(traces):
  41. split_filename = trace.filename.split('/')
  42. if '/'.join(split_filename[-3:-1]) == 'searx/engines':
  43. return trace
  44. if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
  45. return trace
  46. return traces[-1]
  47. def get_hostname(exc: RequestException) -> typing.Optional[None]:
  48. url = exc.request.url
  49. if url is None and exc.response is not None:
  50. url = exc.response.url
  51. return urlparse(url).netloc
  52. def get_request_exception_messages(exc: RequestException)\
  53. -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
  54. url = None
  55. status_code = None
  56. reason = None
  57. hostname = None
  58. if exc.request is not None:
  59. url = exc.request.url
  60. if url is None and exc.response is not None:
  61. url = exc.response.url
  62. if url is not None:
  63. hostname = str(urlparse(url).netloc)
  64. if exc.response is not None:
  65. status_code = str(exc.response.status_code)
  66. reason = exc.response.reason
  67. return (status_code, reason, hostname)
  68. def get_messages(exc, filename) -> typing.Tuple:
  69. if isinstance(exc, JSONDecodeError):
  70. return (exc.msg, )
  71. if isinstance(exc, TypeError):
  72. return (str(exc), )
  73. if isinstance(exc, ValueError) and 'lxml' in filename:
  74. return (str(exc), )
  75. if isinstance(exc, RequestException):
  76. return get_request_exception_messages(exc)
  77. if isinstance(exc, SearxXPathSyntaxException):
  78. return (exc.xpath_str, exc.message)
  79. if isinstance(exc, SearxEngineXPathException):
  80. return (exc.xpath_str, exc.message)
  81. if isinstance(exc, SearxEngineAPIException):
  82. return (str(exc.args[0]), )
  83. if isinstance(exc, SearxEngineAccessDeniedException):
  84. return (exc.message, )
  85. return ()
  86. def get_exception_classname(exc: Exception) -> str:
  87. exc_class = exc.__class__
  88. exc_name = exc_class.__qualname__
  89. exc_module = exc_class.__module__
  90. if exc_module is None or exc_module == str.__class__.__module__:
  91. return exc_name
  92. return exc_module + '.' + exc_name
  93. def get_error_context(framerecords, exception_classname, log_message, log_parameters) -> ErrorContext:
  94. searx_frame = get_trace(framerecords)
  95. filename = searx_frame.filename
  96. function = searx_frame.function
  97. line_no = searx_frame.lineno
  98. code = searx_frame.code_context[0].strip()
  99. del framerecords
  100. return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters)
  101. def record_exception(engine_name: str, exc: Exception) -> None:
  102. framerecords = inspect.trace()
  103. try:
  104. exception_classname = get_exception_classname(exc)
  105. log_parameters = get_messages(exc, framerecords[-1][1])
  106. error_context = get_error_context(framerecords, exception_classname, None, log_parameters)
  107. add_error_context(engine_name, error_context)
  108. finally:
  109. del framerecords
  110. def record_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None:
  111. framerecords = list(reversed(inspect.stack()[1:]))
  112. try:
  113. error_context = get_error_context(framerecords, None, log_message, log_parameters or ())
  114. add_error_context(engine_name, error_context)
  115. finally:
  116. del framerecords