onesearch.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. """Onesearch
  2. """
  3. from lxml.html import fromstring
  4. import re
  5. from searx.utils import (
  6. eval_xpath,
  7. extract_text,
  8. )
  9. from urllib.parse import unquote
  10. # about
  11. about = {
  12. "website": 'https://www.onesearch.com/',
  13. "wikidata_id": None,
  14. "use_official_api": False,
  15. "require_api_key": False,
  16. "results": 'HTML',
  17. }
  18. # engine dependent config
  19. categories = ['general']
  20. paging = True
  21. # search-url
  22. URL = 'https://www.onesearch.com/yhs/search;?p=%s&b=%d'
  23. def request(query, params):
  24. starting_from = (params['pageno'] * 10) - 9
  25. params['url'] = URL % (query, starting_from)
  26. return params
  27. # get response from search-request
  28. def response(resp):
  29. results = []
  30. doc = fromstring(resp.text)
  31. titles_tags = eval_xpath(doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]')
  32. contents = eval_xpath(doc, '//div[contains(@class, "algo")]/div[contains(@class, "compText")]/p')
  33. onesearch_urls = eval_xpath(doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]/a/@href')
  34. for title_tag, content, onesearch_url in zip(titles_tags, contents, onesearch_urls):
  35. matches = re.search(r'RU=(.*?)\/', onesearch_url)
  36. results.append({
  37. 'title': title_tag.text_content(),
  38. 'content': extract_text(content),
  39. 'url': unquote(matches.group(1)),
  40. })
  41. return results