vidio.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. clean_html,
  4. ExtractorError,
  5. format_field,
  6. get_element_by_class,
  7. int_or_none,
  8. parse_iso8601,
  9. smuggle_url,
  10. str_or_none,
  11. strip_or_none,
  12. try_get,
  13. unsmuggle_url,
  14. urlencode_postdata,
  15. )
  16. class VidioBaseIE(InfoExtractor):
  17. _LOGIN_URL = 'https://www.vidio.com/users/login'
  18. _NETRC_MACHINE = 'vidio'
  19. def _perform_login(self, username, password):
  20. def is_logged_in():
  21. res = self._download_json(
  22. 'https://www.vidio.com/interactions.json', None, 'Checking if logged in', fatal=False) or {}
  23. return bool(res.get('current_user'))
  24. if is_logged_in():
  25. return
  26. login_page = self._download_webpage(
  27. self._LOGIN_URL, None, 'Downloading log in page')
  28. login_form = self._form_hidden_inputs("login-form", login_page)
  29. login_form.update({
  30. 'user[login]': username,
  31. 'user[password]': password,
  32. })
  33. login_post, login_post_urlh = self._download_webpage_handle(
  34. self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
  35. if login_post_urlh.status == 401:
  36. if get_element_by_class('onboarding-content-register-popup__title', login_post):
  37. raise ExtractorError(
  38. 'Unable to log in: The provided email has not registered yet.', expected=True)
  39. reason = get_element_by_class('onboarding-form__general-error', login_post) or get_element_by_class('onboarding-modal__title', login_post)
  40. if 'Akun terhubung ke' in reason:
  41. raise ExtractorError(
  42. 'Unable to log in: Your account is linked to a social media account. '
  43. 'Use --cookies to provide account credentials instead', expected=True)
  44. elif reason:
  45. subreason = get_element_by_class('onboarding-modal__description-text', login_post) or ''
  46. raise ExtractorError(
  47. 'Unable to log in: %s. %s' % (reason, clean_html(subreason)), expected=True)
  48. raise ExtractorError('Unable to log in')
  49. def _initialize_pre_login(self):
  50. self._api_key = self._download_json(
  51. 'https://www.vidio.com/auth', None, data=b'')['api_key']
  52. def _call_api(self, url, video_id, note=None):
  53. return self._download_json(url, video_id, note=note, headers={
  54. 'Content-Type': 'application/vnd.api+json',
  55. 'X-API-KEY': self._api_key,
  56. })
  57. class VidioIE(VidioBaseIE):
  58. _VALID_URL = r'https?://(?:www\.)?vidio\.com/(watch|embed)/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
  59. _TESTS = [{
  60. 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
  61. 'md5': 'abac81b1a205a8d94c609a473b5ea62a',
  62. 'info_dict': {
  63. 'id': '165683',
  64. 'display_id': 'dj_ambred-booyah-live-2015',
  65. 'ext': 'mp4',
  66. 'title': 'DJ_AMBRED - Booyah (Live 2015)',
  67. 'description': 'md5:27dc15f819b6a78a626490881adbadf8',
  68. 'thumbnail': r're:^https?://.*\.jpg$',
  69. 'duration': 149,
  70. 'like_count': int,
  71. 'uploader': 'TWELVE Pic',
  72. 'timestamp': 1444902800,
  73. 'upload_date': '20151015',
  74. 'uploader_id': 'twelvepictures',
  75. 'channel': 'Cover Music Video',
  76. 'channel_id': '280236',
  77. 'view_count': int,
  78. 'dislike_count': int,
  79. 'comment_count': int,
  80. 'tags': 'count:3',
  81. 'uploader_url': 'https://www.vidio.com/@twelvepictures',
  82. },
  83. }, {
  84. 'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
  85. 'only_matching': True,
  86. }, {
  87. # Premier-exclusive video
  88. 'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
  89. 'only_matching': True
  90. }, {
  91. # embed url from https://enamplus.liputan6.com/read/5033648/video-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah
  92. 'url': 'https://www.vidio.com/embed/7115874-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
  93. 'info_dict': {
  94. 'id': '7115874',
  95. 'ext': 'mp4',
  96. 'channel_id': '40172876',
  97. 'comment_count': int,
  98. 'uploader_id': 'liputan6',
  99. 'view_count': int,
  100. 'dislike_count': int,
  101. 'upload_date': '20220804',
  102. 'uploader': 'Liputan6.com',
  103. 'display_id': 'fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
  104. 'channel': 'ENAM PLUS 165',
  105. 'timestamp': 1659605520,
  106. 'title': 'Fakta Temuan Suspek Cacar Monyet di Jawa Tengah',
  107. 'duration': 59,
  108. 'like_count': int,
  109. 'tags': ['monkeypox indonesia', 'cacar monyet menyebar', 'suspek cacar monyet di indonesia', 'fakta', 'hoax atau bukan?', 'jawa tengah'],
  110. 'thumbnail': 'https://thumbor.prod.vidiocdn.com/83PN-_BKm5sS7emLtRxl506MLqQ=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7115874/fakta-suspek-cacar-monyet-di-jawa-tengah-24555a.jpg',
  111. 'uploader_url': 'https://www.vidio.com/@liputan6',
  112. 'description': 'md5:6d595a18d3b19ee378e335a6f288d5ac',
  113. },
  114. }]
  115. def _real_extract(self, url):
  116. match = self._match_valid_url(url).groupdict()
  117. video_id, display_id = match.get('id'), match.get('display_id')
  118. data = self._call_api('https://api.vidio.com/videos/' + video_id, display_id)
  119. video = data['videos'][0]
  120. title = video['title'].strip()
  121. is_premium = video.get('is_premium')
  122. if is_premium:
  123. sources = self._download_json(
  124. 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=videos' % video_id,
  125. display_id, note='Downloading premier API JSON')
  126. if not (sources.get('source') or sources.get('source_dash')):
  127. self.raise_login_required('This video is only available for registered users with the appropriate subscription')
  128. formats, subs = [], {}
  129. if sources.get('source'):
  130. hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
  131. sources['source'], display_id, 'mp4', 'm3u8_native')
  132. formats.extend(hls_formats)
  133. subs.update(hls_subs)
  134. if sources.get('source_dash'): # TODO: Find video example with source_dash
  135. dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
  136. sources['source_dash'], display_id, 'dash')
  137. formats.extend(dash_formats)
  138. subs.update(dash_subs)
  139. else:
  140. hls_url = data['clips'][0]['hls_url']
  141. formats, subs = self._extract_m3u8_formats_and_subtitles(
  142. hls_url, display_id, 'mp4', 'm3u8_native')
  143. get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
  144. channel = get_first('channel')
  145. user = get_first('user')
  146. username = user.get('username')
  147. get_count = lambda x: int_or_none(video.get('total_' + x))
  148. return {
  149. 'id': video_id,
  150. 'display_id': display_id,
  151. 'title': title,
  152. 'description': strip_or_none(video.get('description')),
  153. 'thumbnail': video.get('image_url_medium'),
  154. 'duration': int_or_none(video.get('duration')),
  155. 'like_count': get_count('likes'),
  156. 'formats': formats,
  157. 'subtitles': subs,
  158. 'uploader': user.get('name'),
  159. 'timestamp': parse_iso8601(video.get('created_at')),
  160. 'uploader_id': username,
  161. 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
  162. 'channel': channel.get('name'),
  163. 'channel_id': str_or_none(channel.get('id')),
  164. 'view_count': get_count('view_count'),
  165. 'dislike_count': get_count('dislikes'),
  166. 'comment_count': get_count('comments'),
  167. 'tags': video.get('tag_list'),
  168. }
  169. class VidioPremierIE(VidioBaseIE):
  170. _VALID_URL = r'https?://(?:www\.)?vidio\.com/premier/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
  171. _TESTS = [{
  172. 'url': 'https://www.vidio.com/premier/2885/badai-pasti-berlalu',
  173. 'playlist_mincount': 14,
  174. }, {
  175. # Series with both free and premier-exclusive videos
  176. 'url': 'https://www.vidio.com/premier/2567/sosmed',
  177. 'only_matching': True,
  178. }]
  179. def _playlist_entries(self, playlist_url, display_id):
  180. index = 1
  181. while playlist_url:
  182. playlist_json = self._call_api(playlist_url, display_id, 'Downloading API JSON page %s' % index)
  183. for video_json in playlist_json.get('data', []):
  184. link = video_json['links']['watchpage']
  185. yield self.url_result(link, 'Vidio', video_json['id'])
  186. playlist_url = try_get(playlist_json, lambda x: x['links']['next'])
  187. index += 1
  188. def _real_extract(self, url):
  189. url, idata = unsmuggle_url(url, {})
  190. playlist_id, display_id = self._match_valid_url(url).groups()
  191. playlist_url = idata.get('url')
  192. if playlist_url: # Smuggled data contains an API URL. Download only that playlist
  193. playlist_id = idata['id']
  194. return self.playlist_result(
  195. self._playlist_entries(playlist_url, playlist_id),
  196. playlist_id=playlist_id, playlist_title=idata.get('title'))
  197. playlist_data = self._call_api('https://api.vidio.com/content_profiles/%s/playlists' % playlist_id, display_id)
  198. return self.playlist_from_matches(
  199. playlist_data.get('data', []), playlist_id=playlist_id, ie=self.ie_key(),
  200. getter=lambda data: smuggle_url(url, {
  201. 'url': data['relationships']['videos']['links']['related'],
  202. 'id': data['id'],
  203. 'title': try_get(data, lambda x: x['attributes']['name'])
  204. }))
  205. class VidioLiveIE(VidioBaseIE):
  206. _VALID_URL = r'https?://(?:www\.)?vidio\.com/live/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
  207. _TESTS = [{
  208. 'url': 'https://www.vidio.com/live/204-sctv',
  209. 'info_dict': {
  210. 'id': '204',
  211. 'title': 'SCTV',
  212. 'uploader': 'SCTV',
  213. 'uploader_id': 'sctv',
  214. 'thumbnail': r're:^https?://.*\.jpg$',
  215. },
  216. }, {
  217. # Premier-exclusive livestream
  218. 'url': 'https://www.vidio.com/live/6362-tvn',
  219. 'only_matching': True,
  220. }, {
  221. # DRM premier-exclusive livestream
  222. 'url': 'https://www.vidio.com/live/6299-bein-1',
  223. 'only_matching': True,
  224. }]
  225. def _real_extract(self, url):
  226. video_id, display_id = self._match_valid_url(url).groups()
  227. stream_data = self._call_api(
  228. 'https://www.vidio.com/api/livestreamings/%s/detail' % video_id, display_id)
  229. stream_meta = stream_data['livestreamings'][0]
  230. user = stream_data.get('users', [{}])[0]
  231. title = stream_meta.get('title')
  232. username = user.get('username')
  233. formats = []
  234. if stream_meta.get('is_drm'):
  235. if not self.get_param('allow_unplayable_formats'):
  236. self.report_drm(video_id)
  237. if stream_meta.get('is_premium'):
  238. sources = self._download_json(
  239. 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id,
  240. display_id, note='Downloading premier API JSON')
  241. if not (sources.get('source') or sources.get('source_dash')):
  242. self.raise_login_required('This video is only available for registered users with the appropriate subscription')
  243. if str_or_none(sources.get('source')):
  244. token_json = self._download_json(
  245. 'https://www.vidio.com/live/%s/tokens' % video_id,
  246. display_id, note='Downloading HLS token JSON', data=b'')
  247. formats.extend(self._extract_m3u8_formats(
  248. sources['source'] + '?' + token_json.get('token', ''), display_id, 'mp4', 'm3u8_native'))
  249. if str_or_none(sources.get('source_dash')):
  250. pass
  251. else:
  252. if stream_meta.get('stream_token_url'):
  253. token_json = self._download_json(
  254. 'https://www.vidio.com/live/%s/tokens' % video_id,
  255. display_id, note='Downloading HLS token JSON', data=b'')
  256. formats.extend(self._extract_m3u8_formats(
  257. stream_meta['stream_token_url'] + '?' + token_json.get('token', ''),
  258. display_id, 'mp4', 'm3u8_native'))
  259. if stream_meta.get('stream_dash_url'):
  260. pass
  261. if stream_meta.get('stream_url'):
  262. formats.extend(self._extract_m3u8_formats(
  263. stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
  264. return {
  265. 'id': video_id,
  266. 'display_id': display_id,
  267. 'title': title,
  268. 'is_live': True,
  269. 'description': strip_or_none(stream_meta.get('description')),
  270. 'thumbnail': stream_meta.get('image'),
  271. 'like_count': int_or_none(stream_meta.get('like')),
  272. 'dislike_count': int_or_none(stream_meta.get('dislike')),
  273. 'formats': formats,
  274. 'uploader': user.get('name'),
  275. 'timestamp': parse_iso8601(stream_meta.get('start_time')),
  276. 'uploader_id': username,
  277. 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
  278. }