tracker_url_remover.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # pylint: disable=missing-module-docstring
  3. import re
  4. from urllib.parse import urlunparse, parse_qsl, urlencode
  5. from flask_babel import gettext
  6. regexes = {
  7. re.compile(r'utm_[^&]+'),
  8. re.compile(r'(wkey|wemail)[^&]*'),
  9. re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'),
  10. re.compile(r'&$'),
  11. }
  12. name = gettext('Tracker URL remover')
  13. description = gettext('Remove trackers arguments from the returned URL')
  14. default_on = True
  15. preference_section = 'privacy'
  16. def on_result(_request, _search, result):
  17. if 'parsed_url' not in result:
  18. return True
  19. query = result['parsed_url'].query
  20. if query == "":
  21. return True
  22. parsed_query = parse_qsl(query)
  23. changes = 0
  24. for i, (param_name, _) in enumerate(list(parsed_query)):
  25. for reg in regexes:
  26. if reg.match(param_name):
  27. parsed_query.pop(i - changes)
  28. changes += 1
  29. result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query))
  30. result['url'] = urlunparse(result['parsed_url'])
  31. break
  32. return True