deviantart.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Deviantart (Images)
  3. """
  4. import urllib.parse
  5. from lxml import html
  6. from searx.utils import extract_text, eval_xpath, eval_xpath_list
  7. # about
  8. about = {
  9. "website": 'https://www.deviantart.com/',
  10. "wikidata_id": 'Q46523',
  11. "official_api_documentation": 'https://www.deviantart.com/developers/',
  12. "use_official_api": False,
  13. "require_api_key": False,
  14. "results": 'HTML',
  15. }
  16. # engine dependent config
  17. categories = ['images']
  18. paging = True
  19. # search-url
  20. base_url = 'https://www.deviantart.com'
  21. results_xpath = '//div[@class="_2pZkk"]/div/div/a'
  22. url_xpath = './@href'
  23. thumbnail_src_xpath = './div/img/@src'
  24. img_src_xpath = './div/img/@srcset'
  25. title_xpath = './@aria-label'
  26. premium_xpath = '../div/div/div/text()'
  27. premium_keytext = 'Watch the artist to view this deviation'
  28. cursor_xpath = '(//a[@class="_1OGeq"]/@href)[last()]'
  29. def request(query, params):
  30. # https://www.deviantart.com/search?q=foo
  31. nextpage_url = params['engine_data'].get('nextpage')
  32. # don't use nextpage when user selected to jump back to page 1
  33. if params['pageno'] > 1 and nextpage_url is not None:
  34. params['url'] = nextpage_url
  35. else:
  36. params['url'] = f"{base_url}/search?{urllib.parse.urlencode({'q': query})}"
  37. return params
  38. def response(resp):
  39. results = []
  40. dom = html.fromstring(resp.text)
  41. for result in eval_xpath_list(dom, results_xpath):
  42. # skip images that are blurred
  43. _text = extract_text(eval_xpath(result, premium_xpath))
  44. if _text and premium_keytext in _text:
  45. continue
  46. img_src = extract_text(eval_xpath(result, img_src_xpath))
  47. if img_src:
  48. img_src = img_src.split(' ')[0]
  49. parsed_url = urllib.parse.urlparse(img_src)
  50. img_src = parsed_url._replace(path=parsed_url.path.split('/v1')[0]).geturl()
  51. results.append(
  52. {
  53. 'template': 'images.html',
  54. 'url': extract_text(eval_xpath(result, url_xpath)),
  55. 'img_src': img_src,
  56. 'thumbnail_src': extract_text(eval_xpath(result, thumbnail_src_xpath)),
  57. 'title': extract_text(eval_xpath(result, title_xpath)),
  58. }
  59. )
  60. nextpage_url = extract_text(eval_xpath(dom, cursor_xpath))
  61. if nextpage_url:
  62. results.append(
  63. {
  64. 'engine_data': nextpage_url.replace("http://", "https://"),
  65. 'key': 'nextpage',
  66. }
  67. )
  68. return results