tracker_url_remover.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # pylint: disable=missing-module-docstring
  3. from __future__ import annotations
  4. import re
  5. from urllib.parse import urlunparse, parse_qsl, urlencode
  6. from flask_babel import gettext
  7. regexes = {
  8. re.compile(r'utm_[^&]+'),
  9. re.compile(r'(wkey|wemail)[^&]*'),
  10. re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'),
  11. re.compile(r'&$'),
  12. }
  13. name = gettext('Tracker URL remover')
  14. description = gettext('Remove trackers arguments from the returned URL')
  15. default_on = True
  16. preference_section = 'privacy'
  17. def on_result(_request, _search, result) -> bool:
  18. parsed_url = getattr(result, "parsed_url", None)
  19. if not parsed_url:
  20. return True
  21. if parsed_url.query == "":
  22. return True
  23. parsed_query = parse_qsl(parsed_url.query)
  24. changes = 0
  25. for i, (param_name, _) in enumerate(list(parsed_query)):
  26. for reg in regexes:
  27. if reg.match(param_name):
  28. parsed_query.pop(i - changes)
  29. changes += 1
  30. result.parsed_url = result.parsed_url._replace(query=urlencode(parsed_query))
  31. result.url = urlunparse(result.parsed_url)
  32. break
  33. return True