espn.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. import base64
  2. import json
  3. import re
  4. import urllib.parse
  5. from .adobepass import AdobePassIE
  6. from .common import InfoExtractor
  7. from .once import OnceIE
  8. from ..utils import (
  9. determine_ext,
  10. dict_get,
  11. int_or_none,
  12. traverse_obj,
  13. unified_strdate,
  14. unified_timestamp,
  15. )
  16. class ESPNIE(OnceIE):
  17. _VALID_URL = r'''(?x)
  18. https?://
  19. (?:
  20. (?:
  21. (?:
  22. (?:(?:\w+\.)+)?espn\.go|
  23. (?:www\.)?espn
  24. )\.com/
  25. (?:
  26. (?:
  27. video/(?:clip|iframe/twitter)|
  28. )
  29. (?:
  30. .*?\?.*?\bid=|
  31. /_/id/
  32. )|
  33. [^/]+/video/
  34. )
  35. )|
  36. (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
  37. )
  38. (?P<id>\d+)
  39. '''
  40. _TESTS = [{
  41. 'url': 'http://espn.go.com/video/clip?id=10365079',
  42. 'info_dict': {
  43. 'id': '10365079',
  44. 'ext': 'mp4',
  45. 'title': '30 for 30 Shorts: Judging Jewell',
  46. 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
  47. 'timestamp': 1390936111,
  48. 'upload_date': '20140128',
  49. 'duration': 1302,
  50. 'thumbnail': r're:https://.+\.jpg',
  51. },
  52. 'params': {
  53. 'skip_download': True,
  54. },
  55. }, {
  56. 'url': 'https://broadband.espn.go.com/video/clip?id=18910086',
  57. 'info_dict': {
  58. 'id': '18910086',
  59. 'ext': 'mp4',
  60. 'title': 'Kyrie spins around defender for two',
  61. 'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b',
  62. 'timestamp': 1489539155,
  63. 'upload_date': '20170315',
  64. },
  65. 'params': {
  66. 'skip_download': True,
  67. },
  68. 'expected_warnings': ['Unable to download f4m manifest'],
  69. }, {
  70. 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672',
  71. 'only_matching': True,
  72. }, {
  73. 'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774',
  74. 'only_matching': True,
  75. }, {
  76. 'url': 'http://www.espn.com/video/clip?id=10365079',
  77. 'only_matching': True,
  78. }, {
  79. 'url': 'http://www.espn.com/video/clip/_/id/17989860',
  80. 'only_matching': True,
  81. }, {
  82. 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
  83. 'only_matching': True,
  84. }, {
  85. 'url': 'http://www.espnfc.us/video/espn-fc-tv/86/video/3319154/nashville-unveiled-as-the-newest-club-in-mls',
  86. 'only_matching': True,
  87. }, {
  88. 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
  89. 'only_matching': True,
  90. }, {
  91. 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
  92. 'only_matching': True,
  93. }, {
  94. 'url': 'http://www.espn.com/watch/player?id=19141491',
  95. 'only_matching': True,
  96. }, {
  97. 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
  98. 'only_matching': True,
  99. }, ]
  100. def _real_extract(self, url):
  101. video_id = self._match_id(url)
  102. clip = self._download_json(
  103. 'http://api-app.espn.com/v1/video/clips/%s' % video_id,
  104. video_id)['videos'][0]
  105. title = clip['headline']
  106. format_urls = set()
  107. formats = []
  108. def traverse_source(source, base_source_id=None):
  109. for source_id, source in source.items():
  110. if source_id == 'alert':
  111. continue
  112. elif isinstance(source, str):
  113. extract_source(source, base_source_id)
  114. elif isinstance(source, dict):
  115. traverse_source(
  116. source,
  117. '%s-%s' % (base_source_id, source_id)
  118. if base_source_id else source_id)
  119. def extract_source(source_url, source_id=None):
  120. if source_url in format_urls:
  121. return
  122. format_urls.add(source_url)
  123. ext = determine_ext(source_url)
  124. if OnceIE.suitable(source_url):
  125. formats.extend(self._extract_once_formats(source_url))
  126. elif ext == 'smil':
  127. formats.extend(self._extract_smil_formats(
  128. source_url, video_id, fatal=False))
  129. elif ext == 'f4m':
  130. formats.extend(self._extract_f4m_formats(
  131. source_url, video_id, f4m_id=source_id, fatal=False))
  132. elif ext == 'm3u8':
  133. formats.extend(self._extract_m3u8_formats(
  134. source_url, video_id, 'mp4', entry_protocol='m3u8_native',
  135. m3u8_id=source_id, fatal=False))
  136. else:
  137. f = {
  138. 'url': source_url,
  139. 'format_id': source_id,
  140. }
  141. mobj = re.search(r'(\d+)p(\d+)_(\d+)k\.', source_url)
  142. if mobj:
  143. f.update({
  144. 'height': int(mobj.group(1)),
  145. 'fps': int(mobj.group(2)),
  146. 'tbr': int(mobj.group(3)),
  147. })
  148. if source_id == 'mezzanine':
  149. f['quality'] = 1
  150. formats.append(f)
  151. links = clip.get('links', {})
  152. traverse_source(links.get('source', {}))
  153. traverse_source(links.get('mobile', {}))
  154. description = clip.get('caption') or clip.get('description')
  155. thumbnail = clip.get('thumbnail')
  156. duration = int_or_none(clip.get('duration'))
  157. timestamp = unified_timestamp(clip.get('originalPublishDate'))
  158. return {
  159. 'id': video_id,
  160. 'title': title,
  161. 'description': description,
  162. 'thumbnail': thumbnail,
  163. 'timestamp': timestamp,
  164. 'duration': duration,
  165. 'formats': formats,
  166. }
  167. class ESPNArticleIE(InfoExtractor):
  168. _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
  169. _TESTS = [{
  170. 'url': 'http://espn.go.com/nba/recap?gameId=400793786',
  171. 'only_matching': True,
  172. }, {
  173. 'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge',
  174. 'only_matching': True,
  175. }, {
  176. 'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings',
  177. 'only_matching': True,
  178. }, {
  179. 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
  180. 'only_matching': True,
  181. }]
  182. @classmethod
  183. def suitable(cls, url):
  184. return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url)
  185. def _real_extract(self, url):
  186. video_id = self._match_id(url)
  187. webpage = self._download_webpage(url, video_id)
  188. video_id = self._search_regex(
  189. r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
  190. webpage, 'video id', group='id')
  191. return self.url_result(
  192. 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
  193. class FiveThirtyEightIE(InfoExtractor):
  194. _VALID_URL = r'https?://(?:www\.)?fivethirtyeight\.com/features/(?P<id>[^/?#]+)'
  195. _TEST = {
  196. 'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
  197. 'info_dict': {
  198. 'id': '56032156',
  199. 'ext': 'flv',
  200. 'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
  201. 'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
  202. },
  203. 'params': {
  204. 'skip_download': True,
  205. },
  206. }
  207. def _real_extract(self, url):
  208. video_id = self._match_id(url)
  209. webpage = self._download_webpage(url, video_id)
  210. embed_url = self._search_regex(
  211. r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)',
  212. webpage, 'embed url')
  213. return self.url_result(embed_url, 'AbcNewsVideo')
  214. class ESPNCricInfoIE(InfoExtractor):
  215. _VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/video/[^#$&?/]+-(?P<id>\d+)'
  216. _TESTS = [{
  217. 'url': 'https://www.espncricinfo.com/video/finch-chasing-comes-with-risks-despite-world-cup-trend-1289135',
  218. 'info_dict': {
  219. 'id': '1289135',
  220. 'ext': 'mp4',
  221. 'title': 'Finch: Chasing comes with \'risks\' despite World Cup trend',
  222. 'description': 'md5:ea32373303e25efbb146efdfc8a37829',
  223. 'upload_date': '20211113',
  224. 'duration': 96,
  225. },
  226. 'params': {'skip_download': True}
  227. }]
  228. def _real_extract(self, url):
  229. id = self._match_id(url)
  230. data_json = self._download_json(f'https://hs-consumer-api.espncricinfo.com/v1/pages/video/video-details?videoId={id}', id)['video']
  231. formats, subtitles = [], {}
  232. for item in data_json.get('playbacks') or []:
  233. if item.get('type') == 'HLS' and item.get('url'):
  234. m3u8_frmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(item['url'], id)
  235. formats.extend(m3u8_frmts)
  236. subtitles = self._merge_subtitles(subtitles, m3u8_subs)
  237. elif item.get('type') == 'AUDIO' and item.get('url'):
  238. formats.append({
  239. 'url': item['url'],
  240. 'vcodec': 'none',
  241. })
  242. return {
  243. 'id': id,
  244. 'title': data_json.get('title'),
  245. 'description': data_json.get('summary'),
  246. 'upload_date': unified_strdate(dict_get(data_json, ('publishedAt', 'recordedAt'))),
  247. 'duration': data_json.get('duration'),
  248. 'formats': formats,
  249. 'subtitles': subtitles,
  250. }
  251. class WatchESPNIE(AdobePassIE):
  252. _VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
  253. _TESTS = [{
  254. 'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
  255. 'info_dict': {
  256. 'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
  257. 'ext': 'mp4',
  258. 'title': 'Huddersfield vs. Burnley',
  259. 'duration': 7500,
  260. 'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
  261. },
  262. 'params': {
  263. 'skip_download': True,
  264. },
  265. }, {
  266. 'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
  267. 'info_dict': {
  268. 'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
  269. 'ext': 'mp4',
  270. 'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
  271. 'duration': 8335,
  272. 'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
  273. },
  274. 'params': {
  275. 'skip_download': True,
  276. },
  277. }, {
  278. 'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
  279. 'info_dict': {
  280. 'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
  281. 'ext': 'mp4',
  282. 'title': 'The Wheel - Episode 10',
  283. 'duration': 3352,
  284. 'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
  285. },
  286. 'params': {
  287. 'skip_download': True,
  288. },
  289. }]
  290. _API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c'
  291. def _call_bamgrid_api(self, path, video_id, payload=None, headers={}):
  292. if 'Authorization' not in headers:
  293. headers['Authorization'] = f'Bearer {self._API_KEY}'
  294. parse = urllib.parse.urlencode if path == 'token' else json.dumps
  295. return self._download_json(
  296. f'https://espn.api.edge.bamgrid.com/{path}', video_id, headers=headers, data=parse(payload).encode())
  297. def _real_extract(self, url):
  298. video_id = self._match_id(url)
  299. cdn_data = self._download_json(
  300. f'https://watch-cdn.product.api.espn.com/api/product/v3/watchespn/web/playback/event?id={video_id}',
  301. video_id)
  302. video_data = cdn_data['playbackState']
  303. # ESPN+ subscription required, through cookies
  304. if 'DTC' in video_data.get('sourceId'):
  305. cookie = self._get_cookies(url).get('ESPN-ONESITE.WEB-PROD.token')
  306. if not cookie:
  307. self.raise_login_required(method='cookies')
  308. assertion = self._call_bamgrid_api(
  309. 'devices', video_id,
  310. headers={'Content-Type': 'application/json; charset=UTF-8'},
  311. payload={
  312. 'deviceFamily': 'android',
  313. 'applicationRuntime': 'android',
  314. 'deviceProfile': 'tv',
  315. 'attributes': {},
  316. })['assertion']
  317. token = self._call_bamgrid_api(
  318. 'token', video_id, payload={
  319. 'subject_token': assertion,
  320. 'subject_token_type': 'urn:bamtech:params:oauth:token-type:device',
  321. 'platform': 'android',
  322. 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
  323. })['access_token']
  324. assertion = self._call_bamgrid_api(
  325. 'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
  326. headers={
  327. 'Authorization': token,
  328. 'Content-Type': 'application/json; charset=UTF-8'
  329. })['assertion']
  330. token = self._call_bamgrid_api(
  331. 'token', video_id, payload={
  332. 'subject_token': assertion,
  333. 'subject_token_type': 'urn:bamtech:params:oauth:token-type:account',
  334. 'platform': 'android',
  335. 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
  336. })['access_token']
  337. playback = self._download_json(
  338. video_data['videoHref'].format(scenario='browser~ssai'), video_id,
  339. headers={
  340. 'Accept': 'application/vnd.media-service+json; version=5',
  341. 'Authorization': token
  342. })
  343. m3u8_url, headers = playback['stream']['complete'][0]['url'], {'authorization': token}
  344. # No login required
  345. elif video_data.get('sourceId') == 'ESPN_FREE':
  346. asset = self._download_json(
  347. f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
  348. video_id)
  349. m3u8_url, headers = asset['stream'], {}
  350. # TV Provider required
  351. else:
  352. resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None)
  353. auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode()
  354. asset = self._download_json(
  355. f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
  356. video_id, data=f'adobeToken={urllib.parse.quote_plus(base64.b64encode(auth))}&drmSupport=HLS'.encode())
  357. m3u8_url, headers = asset['stream'], {}
  358. formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
  359. return {
  360. 'id': video_id,
  361. 'duration': traverse_obj(cdn_data, ('tracking', 'duration')),
  362. 'title': video_data.get('name'),
  363. 'formats': formats,
  364. 'subtitles': subtitles,
  365. 'thumbnail': video_data.get('posterHref'),
  366. 'http_headers': headers,
  367. }