limelight.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. import re
  2. from .common import InfoExtractor
  3. from ..compat import compat_HTTPError
  4. from ..utils import (
  5. determine_ext,
  6. float_or_none,
  7. int_or_none,
  8. smuggle_url,
  9. try_get,
  10. unsmuggle_url,
  11. ExtractorError,
  12. )
  13. class LimelightBaseIE(InfoExtractor):
  14. _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
  15. @classmethod
  16. def _extract_embed_urls(cls, url, webpage):
  17. lm = {
  18. 'Media': 'media',
  19. 'Channel': 'channel',
  20. 'ChannelList': 'channel_list',
  21. }
  22. def smuggle(url):
  23. return smuggle_url(url, {'source_url': url})
  24. entries = []
  25. for kind, video_id in re.findall(
  26. r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
  27. webpage):
  28. entries.append(cls.url_result(
  29. smuggle('limelight:%s:%s' % (lm[kind], video_id)),
  30. 'Limelight%s' % kind, video_id))
  31. for mobj in re.finditer(
  32. # As per [1] class attribute should be exactly equal to
  33. # LimelightEmbeddedPlayerFlash but numerous examples seen
  34. # that don't exactly match it (e.g. [2]).
  35. # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
  36. # 2. http://www.sedona.com/FacilitatorTraining2017
  37. r'''(?sx)
  38. <object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
  39. <param[^>]+
  40. name=(["\'])flashVars\2[^>]+
  41. value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
  42. ''', webpage):
  43. kind, video_id = mobj.group('kind'), mobj.group('id')
  44. entries.append(cls.url_result(
  45. smuggle('limelight:%s:%s' % (kind, video_id)),
  46. 'Limelight%s' % kind.capitalize(), video_id))
  47. # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
  48. for video_id in re.findall(
  49. r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
  50. webpage):
  51. entries.append(cls.url_result(
  52. smuggle('limelight:media:%s' % video_id),
  53. LimelightMediaIE.ie_key(), video_id))
  54. return entries
  55. def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
  56. headers = {}
  57. if referer:
  58. headers['Referer'] = referer
  59. try:
  60. return self._download_json(
  61. self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
  62. item_id, 'Downloading PlaylistService %s JSON' % method,
  63. fatal=fatal, headers=headers)
  64. except ExtractorError as e:
  65. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
  66. error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
  67. if error == 'CountryDisabled':
  68. self.raise_geo_restricted()
  69. raise ExtractorError(error, expected=True)
  70. raise
  71. def _extract(self, item_id, pc_method, mobile_method, referer=None):
  72. pc = self._call_playlist_service(item_id, pc_method, referer=referer)
  73. mobile = self._call_playlist_service(
  74. item_id, mobile_method, fatal=False, referer=referer)
  75. return pc, mobile
  76. def _extract_info(self, pc, mobile, i, referer):
  77. get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
  78. pc_item = get_item(pc, 'playlistItems')
  79. mobile_item = get_item(mobile, 'mediaList')
  80. video_id = pc_item.get('mediaId') or mobile_item['mediaId']
  81. title = pc_item.get('title') or mobile_item['title']
  82. formats = []
  83. urls = []
  84. for stream in pc_item.get('streams', []):
  85. stream_url = stream.get('url')
  86. if not stream_url or stream_url in urls:
  87. continue
  88. if not self.get_param('allow_unplayable_formats') and stream.get('drmProtected'):
  89. continue
  90. urls.append(stream_url)
  91. ext = determine_ext(stream_url)
  92. if ext == 'f4m':
  93. formats.extend(self._extract_f4m_formats(
  94. stream_url, video_id, f4m_id='hds', fatal=False))
  95. else:
  96. fmt = {
  97. 'url': stream_url,
  98. 'abr': float_or_none(stream.get('audioBitRate')),
  99. 'fps': float_or_none(stream.get('videoFrameRate')),
  100. 'ext': ext,
  101. }
  102. width = int_or_none(stream.get('videoWidthInPixels'))
  103. height = int_or_none(stream.get('videoHeightInPixels'))
  104. vbr = float_or_none(stream.get('videoBitRate'))
  105. if width or height or vbr:
  106. fmt.update({
  107. 'width': width,
  108. 'height': height,
  109. 'vbr': vbr,
  110. })
  111. else:
  112. fmt['vcodec'] = 'none'
  113. rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
  114. if rtmp:
  115. format_id = 'rtmp'
  116. if stream.get('videoBitRate'):
  117. format_id += '-%d' % int_or_none(stream['videoBitRate'])
  118. http_format_id = format_id.replace('rtmp', 'http')
  119. CDN_HOSTS = (
  120. ('delvenetworks.com', 'cpl.delvenetworks.com'),
  121. ('video.llnw.net', 's2.content.video.llnw.net'),
  122. )
  123. for cdn_host, http_host in CDN_HOSTS:
  124. if cdn_host not in rtmp.group('host').lower():
  125. continue
  126. http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
  127. urls.append(http_url)
  128. if self._is_valid_url(http_url, video_id, http_format_id):
  129. http_fmt = fmt.copy()
  130. http_fmt.update({
  131. 'url': http_url,
  132. 'format_id': http_format_id,
  133. })
  134. formats.append(http_fmt)
  135. break
  136. fmt.update({
  137. 'url': rtmp.group('url'),
  138. 'play_path': rtmp.group('playpath'),
  139. 'app': rtmp.group('app'),
  140. 'ext': 'flv',
  141. 'format_id': format_id,
  142. })
  143. formats.append(fmt)
  144. for mobile_url in mobile_item.get('mobileUrls', []):
  145. media_url = mobile_url.get('mobileUrl')
  146. format_id = mobile_url.get('targetMediaPlatform')
  147. if not media_url or media_url in urls:
  148. continue
  149. if (format_id in ('Widevine', 'SmoothStreaming')
  150. and not self.get_param('allow_unplayable_formats', False)):
  151. continue
  152. urls.append(media_url)
  153. ext = determine_ext(media_url)
  154. if ext == 'm3u8':
  155. formats.extend(self._extract_m3u8_formats(
  156. media_url, video_id, 'mp4', 'm3u8_native',
  157. m3u8_id=format_id, fatal=False))
  158. elif ext == 'f4m':
  159. formats.extend(self._extract_f4m_formats(
  160. stream_url, video_id, f4m_id=format_id, fatal=False))
  161. else:
  162. formats.append({
  163. 'url': media_url,
  164. 'format_id': format_id,
  165. 'quality': -10,
  166. 'ext': ext,
  167. })
  168. subtitles = {}
  169. for flag in mobile_item.get('flags'):
  170. if flag == 'ClosedCaptions':
  171. closed_captions = self._call_playlist_service(
  172. video_id, 'getClosedCaptionsDetailsByMediaId',
  173. False, referer) or []
  174. for cc in closed_captions:
  175. cc_url = cc.get('webvttFileUrl')
  176. if not cc_url:
  177. continue
  178. lang = cc.get('languageCode') or self._search_regex(r'/([a-z]{2})\.vtt', cc_url, 'lang', default='en')
  179. subtitles.setdefault(lang, []).append({
  180. 'url': cc_url,
  181. })
  182. break
  183. get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
  184. return {
  185. 'id': video_id,
  186. 'title': title,
  187. 'description': get_meta('description'),
  188. 'formats': formats,
  189. 'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
  190. 'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
  191. 'subtitles': subtitles,
  192. }
  193. class LimelightMediaIE(LimelightBaseIE):
  194. IE_NAME = 'limelight'
  195. _VALID_URL = r'''(?x)
  196. (?:
  197. limelight:media:|
  198. https?://
  199. (?:
  200. link\.videoplatform\.limelight\.com/media/|
  201. assets\.delvenetworks\.com/player/loader\.swf
  202. )
  203. \?.*?\bmediaId=
  204. )
  205. (?P<id>[a-z0-9]{32})
  206. '''
  207. _TESTS = [{
  208. 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
  209. 'info_dict': {
  210. 'id': '3ffd040b522b4485b6d84effc750cd86',
  211. 'ext': 'mp4',
  212. 'title': 'HaP and the HB Prince Trailer',
  213. 'description': 'md5:8005b944181778e313d95c1237ddb640',
  214. 'thumbnail': r're:^https?://.*\.jpeg$',
  215. 'duration': 144.23,
  216. },
  217. 'params': {
  218. # m3u8 download
  219. 'skip_download': True,
  220. },
  221. }, {
  222. # video with subtitles
  223. 'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
  224. 'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
  225. 'info_dict': {
  226. 'id': 'a3e00274d4564ec4a9b29b9466432335',
  227. 'ext': 'mp4',
  228. 'title': '3Play Media Overview Video',
  229. 'thumbnail': r're:^https?://.*\.jpeg$',
  230. 'duration': 78.101,
  231. # TODO: extract all languages that were accessible via API
  232. # 'subtitles': 'mincount:9',
  233. 'subtitles': 'mincount:1',
  234. },
  235. }, {
  236. 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
  237. 'only_matching': True,
  238. }]
  239. _PLAYLIST_SERVICE_PATH = 'media'
  240. def _real_extract(self, url):
  241. url, smuggled_data = unsmuggle_url(url, {})
  242. video_id = self._match_id(url)
  243. source_url = smuggled_data.get('source_url')
  244. self._initialize_geo_bypass({
  245. 'countries': smuggled_data.get('geo_countries'),
  246. })
  247. pc, mobile = self._extract(
  248. video_id, 'getPlaylistByMediaId',
  249. 'getMobilePlaylistByMediaId', source_url)
  250. return self._extract_info(pc, mobile, 0, source_url)
  251. class LimelightChannelIE(LimelightBaseIE):
  252. IE_NAME = 'limelight:channel'
  253. _VALID_URL = r'''(?x)
  254. (?:
  255. limelight:channel:|
  256. https?://
  257. (?:
  258. link\.videoplatform\.limelight\.com/media/|
  259. assets\.delvenetworks\.com/player/loader\.swf
  260. )
  261. \?.*?\bchannelId=
  262. )
  263. (?P<id>[a-z0-9]{32})
  264. '''
  265. _TESTS = [{
  266. 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
  267. 'info_dict': {
  268. 'id': 'ab6a524c379342f9b23642917020c082',
  269. 'title': 'Javascript Sample Code',
  270. 'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
  271. },
  272. 'playlist_mincount': 3,
  273. }, {
  274. 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
  275. 'only_matching': True,
  276. }]
  277. _PLAYLIST_SERVICE_PATH = 'channel'
  278. def _real_extract(self, url):
  279. url, smuggled_data = unsmuggle_url(url, {})
  280. channel_id = self._match_id(url)
  281. source_url = smuggled_data.get('source_url')
  282. pc, mobile = self._extract(
  283. channel_id, 'getPlaylistByChannelId',
  284. 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
  285. source_url)
  286. entries = [
  287. self._extract_info(pc, mobile, i, source_url)
  288. for i in range(len(pc['playlistItems']))]
  289. return self.playlist_result(
  290. entries, channel_id, pc.get('title'), mobile.get('description'))
  291. class LimelightChannelListIE(LimelightBaseIE):
  292. IE_NAME = 'limelight:channel_list'
  293. _VALID_URL = r'''(?x)
  294. (?:
  295. limelight:channel_list:|
  296. https?://
  297. (?:
  298. link\.videoplatform\.limelight\.com/media/|
  299. assets\.delvenetworks\.com/player/loader\.swf
  300. )
  301. \?.*?\bchannelListId=
  302. )
  303. (?P<id>[a-z0-9]{32})
  304. '''
  305. _TESTS = [{
  306. 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
  307. 'info_dict': {
  308. 'id': '301b117890c4465c8179ede21fd92e2b',
  309. 'title': 'Website - Hero Player',
  310. },
  311. 'playlist_mincount': 2,
  312. }, {
  313. 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
  314. 'only_matching': True,
  315. }]
  316. _PLAYLIST_SERVICE_PATH = 'channel_list'
  317. def _real_extract(self, url):
  318. channel_list_id = self._match_id(url)
  319. channel_list = self._call_playlist_service(
  320. channel_list_id, 'getMobileChannelListById')
  321. entries = [
  322. self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
  323. for channel in channel_list['channelList']]
  324. return self.playlist_result(
  325. entries, channel_list_id, channel_list['title'])