torznab.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Torznab_ is an API specification that provides a standardized way to query
  3. torrent site for content. It is used by a number of torrent applications,
  4. including Prowlarr_ and Jackett_.
  5. Using this engine together with Prowlarr_ or Jackett_ allows you to search
  6. a huge number of torrent sites which are not directly supported.
  7. Configuration
  8. =============
  9. The engine has the following settings:
  10. ``base_url``:
  11. Torznab endpoint URL.
  12. ``api_key``:
  13. The API key to use for authentication.
  14. ``torznab_categories``:
  15. The categories to use for searching. This is a list of category IDs. See
  16. Prowlarr-categories_ or Jackett-categories_ for more information.
  17. ``show_torrent_files``:
  18. Whether to show the torrent file in the search results. Be careful as using
  19. this with Prowlarr_ or Jackett_ leaks the API key. This should be used only
  20. if you are querying a Torznab endpoint without authentication or if the
  21. instance is private. Be aware that private trackers may ban you if you share
  22. the torrent file. Defaults to ``false``.
  23. ``show_magnet_links``:
  24. Whether to show the magnet link in the search results. Be aware that private
  25. trackers may ban you if you share the magnet link. Defaults to ``true``.
  26. .. _Torznab:
  27. https://torznab.github.io/spec-1.3-draft/index.html
  28. .. _Prowlarr:
  29. https://github.com/Prowlarr/Prowlarr
  30. .. _Jackett:
  31. https://github.com/Jackett/Jackett
  32. .. _Prowlarr-categories:
  33. https://wiki.servarr.com/en/prowlarr/cardigann-yml-definition#categories
  34. .. _Jackett-categories:
  35. https://github.com/Jackett/Jackett/wiki/Jackett-Categories
  36. Implementations
  37. ===============
  38. """
  39. from __future__ import annotations
  40. from typing import TYPE_CHECKING
  41. from typing import List, Dict, Any
  42. from datetime import datetime
  43. from urllib.parse import quote
  44. from lxml import etree # type: ignore
  45. from searx.exceptions import SearxEngineAPIException
  46. from searx.utils import humanize_bytes
  47. if TYPE_CHECKING:
  48. import httpx
  49. import logging
  50. logger: logging.Logger
  51. # engine settings
  52. about: Dict[str, Any] = {
  53. "website": None,
  54. "wikidata_id": None,
  55. "official_api_documentation": "https://torznab.github.io/spec-1.3-draft",
  56. "use_official_api": True,
  57. "require_api_key": False,
  58. "results": 'XML',
  59. }
  60. categories: List[str] = ['files']
  61. paging: bool = False
  62. time_range_support: bool = False
  63. # defined in settings.yml
  64. # example (Jackett): "http://localhost:9117/api/v2.0/indexers/all/results/torznab"
  65. base_url: str = ''
  66. api_key: str = ''
  67. # https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories
  68. torznab_categories: List[str] = []
  69. show_torrent_files: bool = False
  70. show_magnet_links: bool = True
  71. def init(engine_settings=None): # pylint: disable=unused-argument
  72. """Initialize the engine."""
  73. if len(base_url) < 1:
  74. raise ValueError('missing torznab base_url')
  75. def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
  76. """Build the request params."""
  77. search_url: str = base_url + '?t=search&q={search_query}'
  78. if len(api_key) > 0:
  79. search_url += '&apikey={api_key}'
  80. if len(torznab_categories) > 0:
  81. search_url += '&cat={torznab_categories}'
  82. params['url'] = search_url.format(
  83. search_query=quote(query), api_key=api_key, torznab_categories=",".join([str(x) for x in torznab_categories])
  84. )
  85. return params
  86. def response(resp: httpx.Response) -> List[Dict[str, Any]]:
  87. """Parse the XML response and return a list of results."""
  88. results = []
  89. search_results = etree.XML(resp.content)
  90. # handle errors: https://newznab.readthedocs.io/en/latest/misc/api/#newznab-error-codes
  91. if search_results.tag == "error":
  92. raise SearxEngineAPIException(search_results.get("description"))
  93. channel: etree.Element = search_results[0]
  94. item: etree.Element
  95. for item in channel.iterfind('item'):
  96. result: Dict[str, Any] = build_result(item)
  97. results.append(result)
  98. return results
  99. def build_result(item: etree.Element) -> Dict[str, Any]:
  100. """Build a result from a XML item."""
  101. # extract attributes from XML
  102. # see https://torznab.github.io/spec-1.3-draft/torznab/Specification-v1.3.html#predefined-attributes
  103. enclosure: etree.Element | None = item.find('enclosure')
  104. enclosure_url: str | None = None
  105. if enclosure is not None:
  106. enclosure_url = enclosure.get('url')
  107. filesize = get_attribute(item, 'size')
  108. if not filesize and enclosure:
  109. filesize = enclosure.get('length')
  110. guid = get_attribute(item, 'guid')
  111. comments = get_attribute(item, 'comments')
  112. pubDate = get_attribute(item, 'pubDate')
  113. seeders = get_torznab_attribute(item, 'seeders')
  114. leechers = get_torznab_attribute(item, 'leechers')
  115. peers = get_torznab_attribute(item, 'peers')
  116. # map attributes to searx result
  117. result: Dict[str, Any] = {
  118. 'template': 'torrent.html',
  119. 'title': get_attribute(item, 'title'),
  120. 'filesize': humanize_bytes(int(filesize)) if filesize else None,
  121. 'files': get_attribute(item, 'files'),
  122. 'seed': seeders,
  123. 'leech': _map_leechers(leechers, seeders, peers),
  124. 'url': _map_result_url(guid, comments),
  125. 'publishedDate': _map_published_date(pubDate),
  126. 'torrentfile': None,
  127. 'magnetlink': None,
  128. }
  129. link = get_attribute(item, 'link')
  130. if show_torrent_files:
  131. result['torrentfile'] = _map_torrent_file(link, enclosure_url)
  132. if show_magnet_links:
  133. magneturl = get_torznab_attribute(item, 'magneturl')
  134. result['magnetlink'] = _map_magnet_link(magneturl, guid, enclosure_url, link)
  135. return result
  136. def _map_result_url(guid: str | None, comments: str | None) -> str | None:
  137. if guid and guid.startswith('http'):
  138. return guid
  139. if comments and comments.startswith('http'):
  140. return comments
  141. return None
  142. def _map_leechers(leechers: str | None, seeders: str | None, peers: str | None) -> str | None:
  143. if leechers:
  144. return leechers
  145. if seeders and peers:
  146. return str(int(peers) - int(seeders))
  147. return None
  148. def _map_published_date(pubDate: str | None) -> datetime | None:
  149. if pubDate is not None:
  150. try:
  151. return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %z')
  152. except (ValueError, TypeError) as e:
  153. logger.debug("ignore exception (publishedDate): %s", e)
  154. return None
  155. def _map_torrent_file(link: str | None, enclosure_url: str | None) -> str | None:
  156. if link and link.startswith('http'):
  157. return link
  158. if enclosure_url and enclosure_url.startswith('http'):
  159. return enclosure_url
  160. return None
  161. def _map_magnet_link(
  162. magneturl: str | None,
  163. guid: str | None,
  164. enclosure_url: str | None,
  165. link: str | None,
  166. ) -> str | None:
  167. if magneturl and magneturl.startswith('magnet'):
  168. return magneturl
  169. if guid and guid.startswith('magnet'):
  170. return guid
  171. if enclosure_url and enclosure_url.startswith('magnet'):
  172. return enclosure_url
  173. if link and link.startswith('magnet'):
  174. return link
  175. return None
  176. def get_attribute(item: etree.Element, property_name: str) -> str | None:
  177. """Get attribute from item."""
  178. property_element: etree.Element | None = item.find(property_name)
  179. if property_element is not None:
  180. return property_element.text
  181. return None
  182. def get_torznab_attribute(item: etree.Element, attribute_name: str) -> str | None:
  183. """Get torznab special attribute from item."""
  184. element: etree.Element | None = item.find(
  185. './/torznab:attr[@name="{attribute_name}"]'.format(attribute_name=attribute_name),
  186. {'torznab': 'http://torznab.com/schemas/2015/feed'},
  187. )
  188. if element is not None:
  189. return element.get("value")
  190. return None