vice.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. import functools
  2. import hashlib
  3. import json
  4. import random
  5. import time
  6. from .adobepass import AdobePassIE
  7. from .common import InfoExtractor
  8. from .youtube import YoutubeIE
  9. from ..compat import (
  10. compat_HTTPError,
  11. compat_str,
  12. )
  13. from ..utils import (
  14. clean_html,
  15. ExtractorError,
  16. int_or_none,
  17. OnDemandPagedList,
  18. parse_age_limit,
  19. str_or_none,
  20. try_get,
  21. )
  22. class ViceBaseIE(InfoExtractor):
  23. def _call_api(self, resource, resource_key, resource_id, locale, fields, args=''):
  24. return self._download_json(
  25. 'https://video.vice.com/api/v1/graphql', resource_id, query={
  26. 'query': '''{
  27. %s(locale: "%s", %s: "%s"%s) {
  28. %s
  29. }
  30. }''' % (resource, locale, resource_key, resource_id, args, fields),
  31. })['data'][resource]
  32. class ViceIE(ViceBaseIE, AdobePassIE):
  33. IE_NAME = 'vice'
  34. _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
  35. _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})']
  36. _TESTS = [{
  37. 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
  38. 'info_dict': {
  39. 'id': '58c69e38a55424f1227dc3f7',
  40. 'ext': 'mp4',
  41. 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
  42. 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
  43. 'uploader': 'vice',
  44. 'uploader_id': '57a204088cb727dec794c67b',
  45. 'timestamp': 1489664942,
  46. 'upload_date': '20170316',
  47. 'age_limit': 14,
  48. },
  49. 'params': {
  50. # m3u8 download
  51. 'skip_download': True,
  52. },
  53. }, {
  54. # geo restricted to US
  55. 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
  56. 'info_dict': {
  57. 'id': '5816510690b70e6c5fd39a56',
  58. 'ext': 'mp4',
  59. 'uploader': 'vice',
  60. 'title': 'The Signal From Tölva',
  61. 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
  62. 'uploader_id': '57a204088cb727dec794c67b',
  63. 'timestamp': 1477941983,
  64. 'upload_date': '20161031',
  65. },
  66. 'params': {
  67. # m3u8 download
  68. 'skip_download': True,
  69. },
  70. }, {
  71. 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
  72. 'info_dict': {
  73. 'id': '581b12b60a0e1f4c0fb6ea2f',
  74. 'ext': 'mp4',
  75. 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
  76. 'description': 'Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.',
  77. 'uploader': 'vice',
  78. 'uploader_id': '57a204088cb727dec794c67b',
  79. 'timestamp': 1485368119,
  80. 'upload_date': '20170125',
  81. 'age_limit': 14,
  82. },
  83. 'params': {
  84. # AES-encrypted m3u8
  85. 'skip_download': True,
  86. },
  87. }, {
  88. 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
  89. 'only_matching': True,
  90. }, {
  91. 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
  92. 'only_matching': True,
  93. }, {
  94. 'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
  95. 'only_matching': True,
  96. }, {
  97. 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
  98. 'only_matching': True,
  99. }]
  100. def _real_extract(self, url):
  101. locale, video_id = self._match_valid_url(url).groups()
  102. video = self._call_api('videos', 'id', video_id, locale, '''body
  103. locked
  104. rating
  105. thumbnail_url
  106. title''')[0]
  107. title = video['title'].strip()
  108. rating = video.get('rating')
  109. query = {}
  110. if video.get('locked'):
  111. resource = self._get_mvpd_resource(
  112. 'VICELAND', title, video_id, rating)
  113. query['tvetoken'] = self._extract_mvpd_auth(
  114. url, video_id, 'VICELAND', resource)
  115. # signature generation algorithm is reverse engineered from signatureGenerator in
  116. # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
  117. # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
  118. # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
  119. exp = int(time.time()) + 1440
  120. query.update({
  121. 'exp': exp,
  122. 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
  123. 'skipadstitching': 1,
  124. 'platform': 'desktop',
  125. 'rn': random.randint(10000, 100000),
  126. })
  127. try:
  128. preplay = self._download_json(
  129. 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id),
  130. video_id, query=query)
  131. except ExtractorError as e:
  132. if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
  133. error = json.loads(e.cause.read().decode())
  134. error_message = error.get('error_description') or error['details']
  135. raise ExtractorError('%s said: %s' % (
  136. self.IE_NAME, error_message), expected=True)
  137. raise
  138. video_data = preplay['video']
  139. formats = self._extract_m3u8_formats(
  140. preplay['playURL'], video_id, 'mp4', 'm3u8_native')
  141. episode = video_data.get('episode') or {}
  142. channel = video_data.get('channel') or {}
  143. season = video_data.get('season') or {}
  144. subtitles = {}
  145. for subtitle in preplay.get('subtitleURLs', []):
  146. cc_url = subtitle.get('url')
  147. if not cc_url:
  148. continue
  149. language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], compat_str) or 'en'
  150. subtitles.setdefault(language_code, []).append({
  151. 'url': cc_url,
  152. })
  153. return {
  154. 'formats': formats,
  155. 'id': video_id,
  156. 'title': title,
  157. 'description': clean_html(video.get('body')),
  158. 'thumbnail': video.get('thumbnail_url'),
  159. 'duration': int_or_none(video_data.get('video_duration')),
  160. 'timestamp': int_or_none(video_data.get('created_at'), 1000),
  161. 'age_limit': parse_age_limit(video_data.get('video_rating') or rating),
  162. 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], compat_str),
  163. 'episode_number': int_or_none(episode.get('episode_number')),
  164. 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
  165. 'season_number': int_or_none(season.get('season_number')),
  166. 'season_id': str_or_none(season.get('id') or video_data.get('season_id')),
  167. 'uploader': channel.get('name'),
  168. 'uploader_id': str_or_none(channel.get('id')),
  169. 'subtitles': subtitles,
  170. }
  171. class ViceShowIE(ViceBaseIE):
  172. IE_NAME = 'vice:show'
  173. _VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)'
  174. _PAGE_SIZE = 25
  175. _TESTS = [{
  176. 'url': 'https://video.vice.com/en_us/show/fck-thats-delicious',
  177. 'info_dict': {
  178. 'id': '57a2040c8cb727dec794c901',
  179. 'title': 'F*ck, That’s Delicious',
  180. 'description': 'The life and eating habits of rap’s greatest bon vivant, Action Bronson.',
  181. },
  182. 'playlist_mincount': 64,
  183. }, {
  184. 'url': 'https://www.vicetv.com/en_us/show/fck-thats-delicious',
  185. 'only_matching': True,
  186. }]
  187. def _fetch_page(self, locale, show_id, page):
  188. videos = self._call_api('videos', 'show_id', show_id, locale, '''body
  189. id
  190. url''', ', page: %d, per_page: %d' % (page + 1, self._PAGE_SIZE))
  191. for video in videos:
  192. yield self.url_result(
  193. video['url'], ViceIE.ie_key(), video.get('id'))
  194. def _real_extract(self, url):
  195. locale, display_id = self._match_valid_url(url).groups()
  196. show = self._call_api('shows', 'slug', display_id, locale, '''dek
  197. id
  198. title''')[0]
  199. show_id = show['id']
  200. entries = OnDemandPagedList(
  201. functools.partial(self._fetch_page, locale, show_id),
  202. self._PAGE_SIZE)
  203. return self.playlist_result(
  204. entries, show_id, show.get('title'), show.get('dek'))
  205. class ViceArticleIE(ViceBaseIE):
  206. IE_NAME = 'vice:article'
  207. _VALID_URL = r'https://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)'
  208. _TESTS = [{
  209. 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
  210. 'info_dict': {
  211. 'id': '58dc0a3dee202d2a0ccfcbd8',
  212. 'ext': 'mp4',
  213. 'title': 'Mormon War on Porn',
  214. 'description': 'md5:1c5d91fe25fa8aa304f9def118b92dbf',
  215. 'uploader': 'vice',
  216. 'uploader_id': '57a204088cb727dec794c67b',
  217. 'timestamp': 1491883129,
  218. 'upload_date': '20170411',
  219. 'age_limit': 17,
  220. },
  221. 'params': {
  222. # AES-encrypted m3u8
  223. 'skip_download': True,
  224. },
  225. 'add_ie': [ViceIE.ie_key()],
  226. }, {
  227. 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
  228. 'md5': '13010ee0bc694ea87ec40724397c2349',
  229. 'info_dict': {
  230. 'id': '3jstaBeXgAs',
  231. 'ext': 'mp4',
  232. 'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
  233. 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
  234. 'uploader': 'Motherboard',
  235. 'uploader_id': 'MotherboardTV',
  236. 'upload_date': '20140529',
  237. },
  238. 'add_ie': [YoutubeIE.ie_key()],
  239. }, {
  240. 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
  241. 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
  242. 'info_dict': {
  243. 'id': '57f41d3556a0a80f54726060',
  244. 'ext': 'mp4',
  245. 'title': "Making The World's First Male Sex Doll",
  246. 'description': 'md5:19b00b215b99961cf869c40fbe9df755',
  247. 'uploader': 'vice',
  248. 'uploader_id': '57a204088cb727dec794c67b',
  249. 'timestamp': 1476919911,
  250. 'upload_date': '20161019',
  251. 'age_limit': 17,
  252. },
  253. 'params': {
  254. 'skip_download': True,
  255. },
  256. 'add_ie': [ViceIE.ie_key()],
  257. }, {
  258. 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
  259. 'only_matching': True,
  260. }, {
  261. 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
  262. 'only_matching': True,
  263. }]
  264. def _real_extract(self, url):
  265. locale, display_id = self._match_valid_url(url).groups()
  266. article = self._call_api('articles', 'slug', display_id, locale, '''body
  267. embed_code''')[0]
  268. body = article['body']
  269. def _url_res(video_url, ie_key):
  270. return {
  271. '_type': 'url_transparent',
  272. 'url': video_url,
  273. 'display_id': display_id,
  274. 'ie_key': ie_key,
  275. }
  276. vice_url = ViceIE._extract_url(body)
  277. if vice_url:
  278. return _url_res(vice_url, ViceIE.ie_key())
  279. embed_code = self._search_regex(
  280. r'embedCode=([^&\'"]+)', body,
  281. 'ooyala embed code', default=None)
  282. if embed_code:
  283. return _url_res('ooyala:%s' % embed_code, 'Ooyala')
  284. youtube_url = YoutubeIE._extract_url(body)
  285. if youtube_url:
  286. return _url_res(youtube_url, YoutubeIE.ie_key())
  287. video_url = self._html_search_regex(
  288. r'data-video-url="([^"]+)"',
  289. article['embed_code'], 'video URL')
  290. return _url_res(video_url, ViceIE.ie_key())