seznam.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Seznam
  4. """
  5. from urllib.parse import urlencode, urlparse
  6. from lxml import html
  7. from searx.poolrequests import get
  8. from searx.exceptions import SearxEngineAccessDeniedException
  9. from searx.utils import (
  10. extract_text,
  11. eval_xpath_list,
  12. eval_xpath_getindex,
  13. eval_xpath,
  14. )
  15. # about
  16. about = {
  17. "website": "https://www.seznam.cz/",
  18. "wikidata_id": "Q3490485",
  19. "official_api_documentation": "https://api.sklik.cz/",
  20. "use_official_api": False,
  21. "require_api_key": False,
  22. "results": "HTML",
  23. }
  24. base_url = 'https://search.seznam.cz/'
  25. def request(query, params):
  26. response_index = get(base_url, headers=params['headers'], raise_for_httperror=True)
  27. dom = html.fromstring(response_index.text)
  28. url_params = {
  29. 'q': query,
  30. 'oq': query,
  31. }
  32. for e in eval_xpath_list(dom, '//input[@type="hidden"]'):
  33. name = e.get('name')
  34. value = e.get('value')
  35. url_params[name] = value
  36. params['url'] = base_url + '?' + urlencode(url_params)
  37. params['cookies'] = response_index.cookies
  38. return params
  39. def response(resp):
  40. resp_url = urlparse(resp.url)
  41. if resp_url.path.startswith('/verify'):
  42. raise SearxEngineAccessDeniedException()
  43. results = []
  44. dom = html.fromstring(resp.content.decode())
  45. for result_element in eval_xpath_list(dom, '//div[@data-dot="results"]/div'):
  46. result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "bec586")]', 0, default=None)
  47. if result_data is None:
  48. continue
  49. title_element = eval_xpath_getindex(result_element, './/h3/a', 0)
  50. results.append({
  51. 'url': title_element.get('href'),
  52. 'title': extract_text(title_element),
  53. 'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')),
  54. })
  55. return results