ivi.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. import json
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. int_or_none,
  7. qualities,
  8. )
  9. class IviIE(InfoExtractor):
  10. IE_DESC = 'ivi.ru'
  11. IE_NAME = 'ivi'
  12. _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
  13. _EMBED_REGEX = [r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1']
  14. _GEO_BYPASS = False
  15. _GEO_COUNTRIES = ['RU']
  16. _LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c'
  17. _LIGHT_URL = 'https://api.ivi.ru/light/'
  18. _TESTS = [
  19. # Single movie
  20. {
  21. 'url': 'http://www.ivi.ru/watch/53141',
  22. 'md5': '6ff5be2254e796ed346251d117196cf4',
  23. 'info_dict': {
  24. 'id': '53141',
  25. 'ext': 'mp4',
  26. 'title': 'Иван Васильевич меняет профессию',
  27. 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
  28. 'duration': 5498,
  29. 'thumbnail': r're:^https?://.*\.jpg$',
  30. },
  31. 'skip': 'Only works from Russia',
  32. },
  33. # Serial's series
  34. {
  35. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
  36. 'md5': '221f56b35e3ed815fde2df71032f4b3e',
  37. 'info_dict': {
  38. 'id': '9549',
  39. 'ext': 'mp4',
  40. 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
  41. 'series': 'Двое из ларца',
  42. 'season': 'Сезон 1',
  43. 'season_number': 1,
  44. 'episode': 'Дело Гольдберга (1 часть)',
  45. 'episode_number': 1,
  46. 'duration': 2655,
  47. 'thumbnail': r're:^https?://.*\.jpg$',
  48. },
  49. 'skip': 'Only works from Russia',
  50. },
  51. {
  52. # with MP4-HD720 format
  53. 'url': 'http://www.ivi.ru/watch/146500',
  54. 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e',
  55. 'info_dict': {
  56. 'id': '146500',
  57. 'ext': 'mp4',
  58. 'title': 'Кукла',
  59. 'description': 'md5:ffca9372399976a2d260a407cc74cce6',
  60. 'duration': 5599,
  61. 'thumbnail': r're:^https?://.*\.jpg$',
  62. },
  63. 'skip': 'Only works from Russia',
  64. },
  65. {
  66. 'url': 'https://www.ivi.tv/watch/33560/',
  67. 'only_matching': True,
  68. },
  69. ]
  70. # Sorted by quality
  71. _KNOWN_FORMATS = (
  72. 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi',
  73. 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080')
  74. def _real_extract(self, url):
  75. video_id = self._match_id(url)
  76. data = json.dumps({
  77. 'method': 'da.content.get',
  78. 'params': [
  79. video_id, {
  80. 'site': 's%d',
  81. 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
  82. 'contentid': video_id
  83. }
  84. ]
  85. })
  86. for site in (353, 183):
  87. content_data = (data % site).encode()
  88. if site == 353:
  89. try:
  90. from Cryptodome.Cipher import Blowfish
  91. from Cryptodome.Hash import CMAC
  92. pycryptodome_found = True
  93. except ImportError:
  94. try:
  95. from Crypto.Cipher import Blowfish
  96. from Crypto.Hash import CMAC
  97. pycryptodome_found = True
  98. except ImportError:
  99. pycryptodome_found = False
  100. continue
  101. timestamp = (self._download_json(
  102. self._LIGHT_URL, video_id,
  103. 'Downloading timestamp JSON', data=json.dumps({
  104. 'method': 'da.timestamp.get',
  105. 'params': []
  106. }).encode(), fatal=False) or {}).get('result')
  107. if not timestamp:
  108. continue
  109. query = {
  110. 'ts': timestamp,
  111. 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, Blowfish).hexdigest(),
  112. }
  113. else:
  114. query = {}
  115. video_json = self._download_json(
  116. self._LIGHT_URL, video_id,
  117. 'Downloading video JSON', data=content_data, query=query)
  118. error = video_json.get('error')
  119. if error:
  120. origin = error.get('origin')
  121. message = error.get('message') or error.get('user_message')
  122. extractor_msg = 'Unable to download video %s'
  123. if origin == 'NotAllowedForLocation':
  124. self.raise_geo_restricted(message, self._GEO_COUNTRIES)
  125. elif origin == 'NoRedisValidData':
  126. extractor_msg = 'Video %s does not exist'
  127. elif site == 353:
  128. continue
  129. elif not pycryptodome_found:
  130. raise ExtractorError('pycryptodome not found. Please install', expected=True)
  131. elif message:
  132. extractor_msg += ': ' + message
  133. raise ExtractorError(extractor_msg % video_id, expected=True)
  134. else:
  135. break
  136. result = video_json['result']
  137. title = result['title']
  138. quality = qualities(self._KNOWN_FORMATS)
  139. formats = []
  140. for f in result.get('files', []):
  141. f_url = f.get('url')
  142. content_format = f.get('content_format')
  143. if not f_url:
  144. continue
  145. if (not self.get_param('allow_unplayable_formats')
  146. and ('-MDRM-' in content_format or '-FPS-' in content_format)):
  147. continue
  148. formats.append({
  149. 'url': f_url,
  150. 'format_id': content_format,
  151. 'quality': quality(content_format),
  152. 'filesize': int_or_none(f.get('size_in_bytes')),
  153. })
  154. compilation = result.get('compilation')
  155. episode = title if compilation else None
  156. title = '%s - %s' % (compilation, title) if compilation is not None else title
  157. thumbnails = [{
  158. 'url': preview['url'],
  159. 'id': preview.get('content_format'),
  160. } for preview in result.get('preview', []) if preview.get('url')]
  161. webpage = self._download_webpage(url, video_id)
  162. season = self._search_regex(
  163. r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
  164. webpage, 'season', default=None)
  165. season_number = int_or_none(self._search_regex(
  166. r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
  167. webpage, 'season number', default=None))
  168. episode_number = int_or_none(self._search_regex(
  169. r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
  170. webpage, 'episode number', default=None))
  171. description = self._og_search_description(webpage, default=None) or self._html_search_meta(
  172. 'description', webpage, 'description', default=None)
  173. return {
  174. 'id': video_id,
  175. 'title': title,
  176. 'series': compilation,
  177. 'season': season,
  178. 'season_number': season_number,
  179. 'episode': episode,
  180. 'episode_number': episode_number,
  181. 'thumbnails': thumbnails,
  182. 'description': description,
  183. 'duration': int_or_none(result.get('duration')),
  184. 'formats': formats,
  185. }
  186. class IviCompilationIE(InfoExtractor):
  187. IE_DESC = 'ivi.ru compilations'
  188. IE_NAME = 'ivi:compilation'
  189. _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
  190. _TESTS = [{
  191. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
  192. 'info_dict': {
  193. 'id': 'dvoe_iz_lartsa',
  194. 'title': 'Двое из ларца (2006 - 2008)',
  195. },
  196. 'playlist_mincount': 24,
  197. }, {
  198. 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
  199. 'info_dict': {
  200. 'id': 'dvoe_iz_lartsa/season1',
  201. 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
  202. },
  203. 'playlist_mincount': 12,
  204. }]
  205. def _extract_entries(self, html, compilation_id):
  206. return [
  207. self.url_result(
  208. 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
  209. for serie in re.findall(
  210. r'<a\b[^>]+\bhref=["\']/watch/%s/(\d+)["\']' % compilation_id, html)]
  211. def _real_extract(self, url):
  212. mobj = self._match_valid_url(url)
  213. compilation_id = mobj.group('compilationid')
  214. season_id = mobj.group('seasonid')
  215. if season_id is not None: # Season link
  216. season_page = self._download_webpage(
  217. url, compilation_id, 'Downloading season %s web page' % season_id)
  218. playlist_id = '%s/season%s' % (compilation_id, season_id)
  219. playlist_title = self._html_search_meta('title', season_page, 'title')
  220. entries = self._extract_entries(season_page, compilation_id)
  221. else: # Compilation link
  222. compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
  223. playlist_id = compilation_id
  224. playlist_title = self._html_search_meta('title', compilation_page, 'title')
  225. seasons = re.findall(
  226. r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
  227. if not seasons: # No seasons in this compilation
  228. entries = self._extract_entries(compilation_page, compilation_id)
  229. else:
  230. entries = []
  231. for season_id in seasons:
  232. season_page = self._download_webpage(
  233. 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
  234. compilation_id, 'Downloading season %s web page' % season_id)
  235. entries.extend(self._extract_entries(season_page, compilation_id))
  236. return self.playlist_result(entries, playlist_id, playlist_title)