nyaa.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. """
  2. Nyaa.se (Anime Bittorrent tracker)
  3. @website http://www.nyaa.se/
  4. @provide-api no
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change)
  8. @parse url, title, content, seed, leech, torrentfile
  9. """
  10. from lxml import html
  11. from searx.engines.xpath import extract_text
  12. from searx.url_utils import urlencode
  13. # engine dependent config
  14. categories = ['files', 'images', 'videos', 'music']
  15. paging = True
  16. # search-url
  17. base_url = 'http://www.nyaa.se/'
  18. search_url = base_url + '?page=search&{query}&offset={offset}'
  19. # xpath queries
  20. xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]'
  21. xpath_category = './/td[@class="tlisticon"]/a'
  22. xpath_title = './/td[@class="tlistname"]/a'
  23. xpath_torrent_file = './/td[@class="tlistdownload"]/a'
  24. xpath_filesize = './/td[@class="tlistsize"]/text()'
  25. xpath_seeds = './/td[@class="tlistsn"]/text()'
  26. xpath_leeches = './/td[@class="tlistln"]/text()'
  27. xpath_downloads = './/td[@class="tlistdn"]/text()'
  28. # convert a variable to integer or return 0 if it's not a number
  29. def int_or_zero(num):
  30. if isinstance(num, list):
  31. if len(num) < 1:
  32. return 0
  33. num = num[0]
  34. if num.isdigit():
  35. return int(num)
  36. return 0
  37. # get multiplier to convert torrent size to bytes
  38. def get_filesize_mul(suffix):
  39. return {
  40. 'KB': 1024,
  41. 'MB': 1024 ** 2,
  42. 'GB': 1024 ** 3,
  43. 'TB': 1024 ** 4,
  44. 'KIB': 1024,
  45. 'MIB': 1024 ** 2,
  46. 'GIB': 1024 ** 3,
  47. 'TIB': 1024 ** 4
  48. }[str(suffix).upper()]
  49. # do search-request
  50. def request(query, params):
  51. query = urlencode({'term': query})
  52. params['url'] = search_url.format(query=query, offset=params['pageno'])
  53. return params
  54. # get response from search-request
  55. def response(resp):
  56. results = []
  57. dom = html.fromstring(resp.text)
  58. for result in dom.xpath(xpath_results):
  59. # category in which our torrent belongs
  60. category = result.xpath(xpath_category)[0].attrib.get('title')
  61. # torrent title
  62. page_a = result.xpath(xpath_title)[0]
  63. title = extract_text(page_a)
  64. # link to the page
  65. href = page_a.attrib.get('href')
  66. # link to the torrent file
  67. torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href')
  68. # torrent size
  69. try:
  70. file_size, suffix = result.xpath(xpath_filesize)[0].split(' ')
  71. file_size = int(float(file_size) * get_filesize_mul(suffix))
  72. except:
  73. file_size = None
  74. # seed count
  75. seed = int_or_zero(result.xpath(xpath_seeds))
  76. # leech count
  77. leech = int_or_zero(result.xpath(xpath_leeches))
  78. # torrent downloads count
  79. downloads = int_or_zero(result.xpath(xpath_downloads))
  80. # content string contains all information not included into template
  81. content = 'Category: "{category}". Downloaded {downloads} times.'
  82. content = content.format(category=category, downloads=downloads)
  83. results.append({'url': href,
  84. 'title': title,
  85. 'content': content,
  86. 'seed': seed,
  87. 'leech': leech,
  88. 'filesize': file_size,
  89. 'torrentfile': torrent_link,
  90. 'template': 'torrent.html'})
  91. return results