google_play.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Google Play Apps & Google Play Movies
  3. """
  4. from urllib.parse import urlencode
  5. from lxml import html
  6. from searx.utils import (
  7. eval_xpath,
  8. extract_url,
  9. extract_text,
  10. eval_xpath_list,
  11. eval_xpath_getindex,
  12. )
  13. about = {
  14. "website": "https://play.google.com/",
  15. "wikidata_id": "Q79576",
  16. "use_official_api": False,
  17. "require_api_key": False,
  18. "results": "HTML",
  19. }
  20. send_accept_language_header = True
  21. play_categ = None # apps|movies
  22. base_url = 'https://play.google.com'
  23. search_url = base_url + "/store/search?{query}&c={play_categ}"
  24. def request(query, params):
  25. if play_categ not in ('movies', 'apps'):
  26. raise ValueError(f"unknown google play category: {play_categ}")
  27. params["url"] = search_url.format(
  28. query=urlencode({"q": query}),
  29. play_categ=play_categ,
  30. )
  31. params['cookies']['CONSENT'] = "YES+"
  32. return params
  33. def response(resp):
  34. if play_categ == 'movies':
  35. return response_movies(resp)
  36. if play_categ == 'apps':
  37. return response_apps(resp)
  38. raise ValueError(f"Unsupported play category: {play_categ}")
  39. def response_movies(resp):
  40. results = []
  41. dom = html.fromstring(resp.text)
  42. for section in eval_xpath(dom, '//c-wiz/section/header/..'):
  43. sec_name = extract_text(eval_xpath(section, './header'))
  44. for item in eval_xpath(section, './/a'):
  45. url = base_url + item.get('href')
  46. div_1, div_2 = eval_xpath(item, './div')[:2]
  47. title = extract_text(eval_xpath(div_2, './div[@title]'))
  48. metadata = extract_text(eval_xpath(div_2, './div[@class]'))
  49. img = eval_xpath(div_1, './/img')[0]
  50. thumbnail = img.get('src')
  51. results.append(
  52. {
  53. "url": url,
  54. "title": title,
  55. "content": sec_name,
  56. "thumbnail": thumbnail,
  57. 'metadata': metadata,
  58. 'template': 'videos.html',
  59. }
  60. )
  61. return results
  62. def response_apps(resp):
  63. results = []
  64. dom = html.fromstring(resp.text)
  65. if eval_xpath(dom, '//div[@class="v6DsQb"]'):
  66. return []
  67. spot = eval_xpath_getindex(dom, '//div[@class="ipRz4"]', 0, None)
  68. if spot is not None:
  69. url = extract_url(eval_xpath(spot, './a[@class="Qfxief"]/@href'), search_url)
  70. title = extract_text(eval_xpath(spot, './/div[@class="vWM94c"]'))
  71. content = extract_text(eval_xpath(spot, './/div[@class="LbQbAe"]'))
  72. img = extract_text(eval_xpath(spot, './/img[@class="T75of bzqKMd"]/@src'))
  73. results.append({"url": url, "title": title, "content": content, "img_src": img})
  74. more = eval_xpath_list(dom, '//c-wiz[@jsrenderer="RBsfwb"]//div[@role="listitem"]', min_len=1)
  75. for result in more:
  76. url = extract_url(eval_xpath(result, ".//a/@href"), search_url)
  77. title = extract_text(eval_xpath(result, './/span[@class="DdYX5"]'))
  78. content = extract_text(eval_xpath(result, './/span[@class="wMUdtb"]'))
  79. img = extract_text(
  80. eval_xpath(
  81. result,
  82. './/img[@class="T75of stzEZd" or @class="T75of etjhNc Q8CSx "]/@src',
  83. )
  84. )
  85. results.append({"url": url, "title": title, "content": content, "img_src": img})
  86. for suggestion in eval_xpath_list(dom, '//c-wiz[@jsrenderer="qyd4Kb"]//div[@class="ULeU3b neq64b"]'):
  87. results.append({"suggestion": extract_text(eval_xpath(suggestion, './/div[@class="Epkrse "]'))})
  88. return results