vrv.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. import base64
  2. import hashlib
  3. import hmac
  4. import json
  5. import random
  6. import string
  7. import time
  8. import urllib.parse
  9. from .common import InfoExtractor
  10. from ..compat import compat_HTTPError, compat_urllib_parse_urlencode
  11. from ..utils import (
  12. ExtractorError,
  13. float_or_none,
  14. int_or_none,
  15. join_nonempty,
  16. traverse_obj,
  17. )
  18. class VRVBaseIE(InfoExtractor):
  19. _API_DOMAIN = None
  20. _API_PARAMS = {}
  21. _CMS_SIGNING = {}
  22. _TOKEN = None
  23. _TOKEN_SECRET = ''
  24. def _call_api(self, path, video_id, note, data=None):
  25. # https://tools.ietf.org/html/rfc5849#section-3
  26. base_url = self._API_DOMAIN + '/core/' + path
  27. query = [
  28. ('oauth_consumer_key', self._API_PARAMS['oAuthKey']),
  29. ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])),
  30. ('oauth_signature_method', 'HMAC-SHA1'),
  31. ('oauth_timestamp', int(time.time())),
  32. ]
  33. if self._TOKEN:
  34. query.append(('oauth_token', self._TOKEN))
  35. encoded_query = compat_urllib_parse_urlencode(query)
  36. headers = self.geo_verification_headers()
  37. if data:
  38. data = json.dumps(data).encode()
  39. headers['Content-Type'] = 'application/json'
  40. base_string = '&'.join([
  41. 'POST' if data else 'GET',
  42. urllib.parse.quote(base_url, ''),
  43. urllib.parse.quote(encoded_query, '')])
  44. oauth_signature = base64.b64encode(hmac.new(
  45. (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
  46. base_string.encode(), hashlib.sha1).digest()).decode()
  47. encoded_query += '&oauth_signature=' + urllib.parse.quote(oauth_signature, '')
  48. try:
  49. return self._download_json(
  50. '?'.join([base_url, encoded_query]), video_id,
  51. note='Downloading %s JSON metadata' % note, headers=headers, data=data)
  52. except ExtractorError as e:
  53. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
  54. raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True)
  55. raise
  56. def _call_cms(self, path, video_id, note):
  57. if not self._CMS_SIGNING:
  58. index = self._call_api('index', video_id, 'CMS Signing')
  59. self._CMS_SIGNING = index.get('cms_signing') or {}
  60. if not self._CMS_SIGNING:
  61. for signing_policy in index.get('signing_policies', []):
  62. signing_path = signing_policy.get('path')
  63. if signing_path and signing_path.startswith('/cms/'):
  64. name, value = signing_policy.get('name'), signing_policy.get('value')
  65. if name and value:
  66. self._CMS_SIGNING[name] = value
  67. return self._download_json(
  68. self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
  69. note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
  70. def _get_cms_resource(self, resource_key, video_id):
  71. return self._call_api(
  72. 'cms_resource', video_id, 'resource path', data={
  73. 'resource_key': resource_key,
  74. })['__links__']['cms_resource']['href']
  75. def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
  76. if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
  77. return []
  78. format_id = join_nonempty(
  79. stream_format,
  80. audio_lang and 'audio-%s' % audio_lang,
  81. hardsub_lang and 'hardsub-%s' % hardsub_lang)
  82. if 'hls' in stream_format:
  83. adaptive_formats = self._extract_m3u8_formats(
  84. url, video_id, 'mp4', m3u8_id=format_id,
  85. note='Downloading %s information' % format_id,
  86. fatal=False)
  87. elif stream_format == 'dash':
  88. adaptive_formats = self._extract_mpd_formats(
  89. url, video_id, mpd_id=format_id,
  90. note='Downloading %s information' % format_id,
  91. fatal=False)
  92. if audio_lang:
  93. for f in adaptive_formats:
  94. if f.get('acodec') != 'none':
  95. f['language'] = audio_lang
  96. return adaptive_formats
  97. def _set_api_params(self):
  98. webpage = self._download_webpage(
  99. 'https://vrv.co/', None, headers=self.geo_verification_headers())
  100. self._API_PARAMS = self._parse_json(self._search_regex(
  101. [
  102. r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)',
  103. r'window\.__APP_CONFIG__\s*=\s*({.+})'
  104. ], webpage, 'app config'), None)['cxApiParams']
  105. self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
  106. class VRVIE(VRVBaseIE):
  107. IE_NAME = 'vrv'
  108. _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
  109. _TESTS = [{
  110. 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
  111. 'info_dict': {
  112. 'id': 'GR9PNZ396',
  113. 'ext': 'mp4',
  114. 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT',
  115. 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f',
  116. 'uploader_id': 'seeso',
  117. },
  118. 'params': {
  119. # m3u8 download
  120. 'skip_download': True,
  121. },
  122. }, {
  123. # movie listing
  124. 'url': 'https://vrv.co/watch/G6NQXZ1J6/Lily-CAT',
  125. 'info_dict': {
  126. 'id': 'G6NQXZ1J6',
  127. 'title': 'Lily C.A.T',
  128. 'description': 'md5:988b031e7809a6aeb60968be4af7db07',
  129. },
  130. 'playlist_count': 2,
  131. }]
  132. _NETRC_MACHINE = 'vrv'
  133. def _perform_login(self, username, password):
  134. token_credentials = self._call_api(
  135. 'authenticate/by:credentials', None, 'Token Credentials', data={
  136. 'email': username,
  137. 'password': password,
  138. })
  139. self._TOKEN = token_credentials['oauth_token']
  140. self._TOKEN_SECRET = token_credentials['oauth_token_secret']
  141. def _initialize_pre_login(self):
  142. return self._set_api_params()
  143. def _real_extract(self, url):
  144. video_id = self._match_id(url)
  145. object_data = self._call_cms(self._get_cms_resource(
  146. 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0]
  147. resource_path = object_data['__links__']['resource']['href']
  148. video_data = self._call_cms(resource_path, video_id, 'video')
  149. title = video_data['title']
  150. description = video_data.get('description')
  151. if video_data.get('__class__') == 'movie_listing':
  152. items = self._call_cms(
  153. video_data['__links__']['movie_listing/movies']['href'],
  154. video_id, 'movie listing').get('items') or []
  155. if len(items) != 1:
  156. entries = []
  157. for item in items:
  158. item_id = item.get('id')
  159. if not item_id:
  160. continue
  161. entries.append(self.url_result(
  162. 'https://vrv.co/watch/' + item_id,
  163. self.ie_key(), item_id, item.get('title')))
  164. return self.playlist_result(entries, video_id, title, description)
  165. video_data = items[0]
  166. streams_path = video_data['__links__'].get('streams', {}).get('href')
  167. if not streams_path:
  168. self.raise_login_required()
  169. streams_json = self._call_cms(streams_path, video_id, 'streams')
  170. audio_locale = streams_json.get('audio_locale')
  171. formats = []
  172. for stream_type, streams in streams_json.get('streams', {}).items():
  173. if stream_type in ('adaptive_hls', 'adaptive_dash'):
  174. for stream in streams.values():
  175. formats.extend(self._extract_vrv_formats(
  176. stream.get('url'), video_id, stream_type.split('_')[1],
  177. audio_locale, stream.get('hardsub_locale')))
  178. subtitles = {}
  179. for k in ('captions', 'subtitles'):
  180. for subtitle in streams_json.get(k, {}).values():
  181. subtitle_url = subtitle.get('url')
  182. if not subtitle_url:
  183. continue
  184. subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
  185. 'url': subtitle_url,
  186. 'ext': subtitle.get('format', 'ass'),
  187. })
  188. thumbnails = []
  189. for thumbnail in traverse_obj(video_data, ('images', 'thumbnail', ..., ...)) or []:
  190. thumbnail_url = thumbnail.get('source')
  191. if not thumbnail_url:
  192. continue
  193. thumbnails.append({
  194. 'url': thumbnail_url,
  195. 'width': int_or_none(thumbnail.get('width')),
  196. 'height': int_or_none(thumbnail.get('height')),
  197. })
  198. return {
  199. 'id': video_id,
  200. 'title': title,
  201. 'formats': formats,
  202. 'subtitles': subtitles,
  203. 'thumbnails': thumbnails,
  204. 'description': description,
  205. 'duration': float_or_none(video_data.get('duration_ms'), 1000),
  206. 'uploader_id': video_data.get('channel_id'),
  207. 'series': video_data.get('series_title'),
  208. 'season': video_data.get('season_title'),
  209. 'season_number': int_or_none(video_data.get('season_number')),
  210. 'season_id': video_data.get('season_id'),
  211. 'episode': title,
  212. 'episode_number': int_or_none(video_data.get('episode_number')),
  213. 'episode_id': video_data.get('production_episode_id'),
  214. }
  215. class VRVSeriesIE(VRVBaseIE):
  216. IE_NAME = 'vrv:series'
  217. _VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)'
  218. _TEST = {
  219. 'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider',
  220. 'info_dict': {
  221. 'id': 'G68VXG3G6',
  222. },
  223. 'playlist_mincount': 11,
  224. }
  225. def _initialize_pre_login(self):
  226. return self._set_api_params()
  227. def _real_extract(self, url):
  228. series_id = self._match_id(url)
  229. seasons_path = self._get_cms_resource(
  230. 'cms:/seasons?series_id=' + series_id, series_id)
  231. seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
  232. entries = []
  233. for season in seasons_data.get('items', []):
  234. episodes_path = season['__links__']['season/episodes']['href']
  235. episodes = self._call_cms(episodes_path, series_id, 'episodes')
  236. for episode in episodes.get('items', []):
  237. episode_id = episode['id']
  238. entries.append(self.url_result(
  239. 'https://vrv.co/watch/' + episode_id,
  240. 'VRV', episode_id, episode.get('title')))
  241. return self.playlist_result(entries, series_id)