yggtorrent.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Yggtorrent (Videos, Music, Files)
  4. """
  5. from lxml import html
  6. from operator import itemgetter
  7. from datetime import datetime
  8. from urllib.parse import quote
  9. from searx.utils import extract_text, get_torrent_size
  10. from searx.poolrequests import get as http_get
  11. # about
  12. about = {
  13. "website": 'https://www4.yggtorrent.li/',
  14. "wikidata_id": None,
  15. "official_api_documentation": None,
  16. "use_official_api": False,
  17. "require_api_key": False,
  18. "results": 'HTML',
  19. }
  20. # engine dependent config
  21. categories = ['videos', 'music', 'files']
  22. paging = True
  23. # search-url
  24. url = 'https://www4.yggtorrent.li/'
  25. search_url = url + 'engine/search?name={search_term}&do=search&page={pageno}&category={search_type}'
  26. # yggtorrent specific type-definitions
  27. search_types = {'files': 'all',
  28. 'music': '2139',
  29. 'videos': '2145'}
  30. cookies = dict()
  31. def init(engine_settings=None):
  32. global cookies # pylint: disable=global-variable-not-assigned
  33. # initial cookies
  34. resp = http_get(url)
  35. if resp.ok:
  36. for r in resp.history:
  37. cookies.update(r.cookies)
  38. cookies.update(resp.cookies)
  39. # do search-request
  40. def request(query, params):
  41. search_type = search_types.get(params['category'], 'all')
  42. pageno = (params['pageno'] - 1) * 50
  43. params['url'] = search_url.format(search_term=quote(query),
  44. search_type=search_type,
  45. pageno=pageno)
  46. params['cookies'] = cookies
  47. return params
  48. # get response from search-request
  49. def response(resp):
  50. results = []
  51. dom = html.fromstring(resp.text)
  52. search_res = dom.xpath('//section[@id="#torrents"]/div/table/tbody/tr')
  53. # return empty array if nothing is found
  54. if not search_res:
  55. return []
  56. # parse results
  57. for result in search_res:
  58. link = result.xpath('.//a[@id="torrent_name"]')[0]
  59. href = link.attrib.get('href')
  60. title = extract_text(link)
  61. seed = result.xpath('.//td[8]/text()')[0]
  62. leech = result.xpath('.//td[9]/text()')[0]
  63. # convert seed to int if possible
  64. if seed.isdigit():
  65. seed = int(seed)
  66. else:
  67. seed = 0
  68. # convert leech to int if possible
  69. if leech.isdigit():
  70. leech = int(leech)
  71. else:
  72. leech = 0
  73. params = {'url': href,
  74. 'title': title,
  75. 'seed': seed,
  76. 'leech': leech,
  77. 'template': 'torrent.html'}
  78. # let's try to calculate the torrent size
  79. try:
  80. filesize_info = result.xpath('.//td[6]/text()')[0]
  81. filesize = filesize_info[:-2]
  82. filesize_multiplier = filesize_info[-2:].lower()
  83. multiplier_french_to_english = {
  84. 'to': 'TiB',
  85. 'go': 'GiB',
  86. 'mo': 'MiB',
  87. 'ko': 'KiB'
  88. }
  89. filesize = get_torrent_size(filesize, multiplier_french_to_english[filesize_multiplier])
  90. params['filesize'] = filesize
  91. except:
  92. pass
  93. # extract and convert creation date
  94. try:
  95. date_ts = result.xpath('.//td[5]/div/text()')[0]
  96. date = datetime.fromtimestamp(float(date_ts))
  97. params['publishedDate'] = date
  98. except:
  99. pass
  100. # append result
  101. results.append(params)
  102. # return results sorted by seeder
  103. return sorted(results, key=itemgetter('seed'), reverse=True)