geizhals.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Geizhals is a German website to compare the price of a product on the
  3. most common German shopping sites and find the lowest price.
  4. The sorting of the search results can be influenced by the following additions
  5. to the search term:
  6. ``asc`` or ``price``
  7. To sort by price in ascending order.
  8. ``desc``
  9. To sort by price in descending order.
  10. """
  11. import re
  12. from urllib.parse import urlencode
  13. from lxml import html
  14. from searx.utils import eval_xpath, eval_xpath_list, extract_text
  15. about = {
  16. 'website': 'https://geizhals.de',
  17. 'wikidata_id': 'Q15977657',
  18. 'use_official_api': False,
  19. 'official_api_documentation': None,
  20. 'require_api_key': False,
  21. 'results': 'HTML',
  22. 'language': 'de',
  23. }
  24. paging = True
  25. categories = ['shopping']
  26. base_url = "https://geizhals.de"
  27. sort_order = 'relevance'
  28. SORT_RE = re.compile(r"sort:(\w+)")
  29. sort_order_map = {
  30. 'relevance': None,
  31. 'price': 'p',
  32. 'asc': 'p',
  33. 'desc': '-p',
  34. }
  35. def request(query, params):
  36. sort = None
  37. sort_order_path = SORT_RE.search(query)
  38. if sort_order_path:
  39. sort = sort_order_map.get(sort_order_path.group(1))
  40. query = SORT_RE.sub("", query)
  41. logger.debug(query)
  42. args = {
  43. 'fs': query,
  44. 'pg': params['pageno'],
  45. 'toggle_all': 1, # load item specs
  46. 'sort': sort,
  47. }
  48. params['url'] = f"{base_url}/?{urlencode(args)}"
  49. return params
  50. def response(resp):
  51. results = []
  52. dom = html.fromstring(resp.text)
  53. for result in eval_xpath_list(dom, "//article[contains(@class, 'listview__item')]"):
  54. content = []
  55. for spec in eval_xpath_list(result, ".//div[contains(@class, 'specs-grid__item')]"):
  56. content.append(f"{extract_text(eval_xpath(spec, './dt'))}: {extract_text(eval_xpath(spec, './dd'))}")
  57. metadata = [
  58. extract_text(eval_xpath(result, ".//div[contains(@class, 'stars-rating-label')]")),
  59. extract_text(eval_xpath(result, ".//div[contains(@class, 'listview__offercount')]")),
  60. ]
  61. item = {
  62. 'template': 'products.html',
  63. 'url': (
  64. base_url + "/" + extract_text(eval_xpath(result, ".//a[contains(@class, 'listview__name-link')]/@href"))
  65. ),
  66. 'title': extract_text(eval_xpath(result, ".//h3[contains(@class, 'listview__name')]")),
  67. 'content': ' | '.join(content),
  68. 'thumbnail': extract_text(eval_xpath(result, ".//img[contains(@class, 'listview__image')]/@src")),
  69. 'metadata': ', '.join(item for item in metadata if item),
  70. }
  71. best_price = extract_text(eval_xpath(result, ".//a[contains(@class, 'listview__price-link')]")).split(" ")
  72. if len(best_price) > 1:
  73. item["price"] = f"Bestes Angebot: {best_price[1]}€"
  74. results.append(item)
  75. return results