arkena.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. ExtractorError,
  4. float_or_none,
  5. int_or_none,
  6. parse_iso8601,
  7. parse_qs,
  8. try_get,
  9. )
  10. class ArkenaIE(InfoExtractor):
  11. _VALID_URL = r'''(?x)
  12. https?://
  13. (?:
  14. video\.(?:arkena|qbrick)\.com/play2/embed/player\?|
  15. play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
  16. )
  17. '''
  18. # See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
  19. _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1']
  20. _TESTS = [{
  21. 'url': 'https://video.qbrick.com/play2/embed/player?accountId=1034090&mediaId=d8ab4607-00090107-aab86310',
  22. 'md5': '97f117754e5f3c020f5f26da4a44ebaf',
  23. 'info_dict': {
  24. 'id': 'd8ab4607-00090107-aab86310',
  25. 'ext': 'mp4',
  26. 'title': 'EM_HT20_117_roslund_v2.mp4',
  27. 'timestamp': 1608285912,
  28. 'upload_date': '20201218',
  29. 'duration': 1429.162667,
  30. 'subtitles': {
  31. 'sv': 'count:3',
  32. },
  33. },
  34. }, {
  35. 'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
  36. 'only_matching': True,
  37. }, {
  38. 'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
  39. 'only_matching': True,
  40. }, {
  41. 'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
  42. 'only_matching': True,
  43. }, {
  44. 'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
  45. 'only_matching': True,
  46. }, {
  47. 'url': 'http://video.arkena.com/play2/embed/player?accountId=472718&mediaId=35763b3b-00090078-bf604299&pageStyling=styled',
  48. 'only_matching': True,
  49. }]
  50. def _real_extract(self, url):
  51. mobj = self._match_valid_url(url)
  52. video_id = mobj.group('id')
  53. account_id = mobj.group('account_id')
  54. # Handle http://video.arkena.com/play2/embed/player URL
  55. if not video_id:
  56. qs = parse_qs(url)
  57. video_id = qs.get('mediaId', [None])[0]
  58. account_id = qs.get('accountId', [None])[0]
  59. if not video_id or not account_id:
  60. raise ExtractorError('Invalid URL', expected=True)
  61. media = self._download_json(
  62. 'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
  63. video_id, query={
  64. # https://video.qbrick.com/docs/api/examples/library-api.html
  65. 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
  66. })
  67. metadata = media.get('metadata') or {}
  68. title = metadata['title']
  69. duration = None
  70. formats = []
  71. thumbnails = []
  72. subtitles = {}
  73. for resource in media['asset']['resources']:
  74. for rendition in (resource.get('renditions') or []):
  75. rendition_type = rendition.get('type')
  76. for i, link in enumerate(rendition.get('links') or []):
  77. href = link.get('href')
  78. if not href:
  79. continue
  80. if rendition_type == 'image':
  81. thumbnails.append({
  82. 'filesize': int_or_none(rendition.get('size')),
  83. 'height': int_or_none(rendition.get('height')),
  84. 'id': rendition.get('id'),
  85. 'url': href,
  86. 'width': int_or_none(rendition.get('width')),
  87. })
  88. elif rendition_type == 'subtitle':
  89. subtitles.setdefault(rendition.get('language') or 'en', []).append({
  90. 'url': href,
  91. })
  92. elif rendition_type == 'video':
  93. f = {
  94. 'filesize': int_or_none(rendition.get('size')),
  95. 'format_id': rendition.get('id'),
  96. 'url': href,
  97. }
  98. video = try_get(rendition, lambda x: x['videos'][i], dict)
  99. if video:
  100. if not duration:
  101. duration = float_or_none(video.get('duration'))
  102. f.update({
  103. 'height': int_or_none(video.get('height')),
  104. 'tbr': int_or_none(video.get('bitrate'), 1000),
  105. 'vcodec': video.get('codec'),
  106. 'width': int_or_none(video.get('width')),
  107. })
  108. audio = try_get(video, lambda x: x['audios'][0], dict)
  109. if audio:
  110. f.update({
  111. 'acodec': audio.get('codec'),
  112. 'asr': int_or_none(audio.get('sampleRate')),
  113. })
  114. formats.append(f)
  115. elif rendition_type == 'index':
  116. mime_type = link.get('mimeType')
  117. if mime_type == 'application/smil+xml':
  118. formats.extend(self._extract_smil_formats(
  119. href, video_id, fatal=False))
  120. elif mime_type == 'application/x-mpegURL':
  121. formats.extend(self._extract_m3u8_formats(
  122. href, video_id, 'mp4', 'm3u8_native',
  123. m3u8_id='hls', fatal=False))
  124. elif mime_type == 'application/hds+xml':
  125. formats.extend(self._extract_f4m_formats(
  126. href, video_id, f4m_id='hds', fatal=False))
  127. elif mime_type == 'application/dash+xml':
  128. formats.extend(self._extract_f4m_formats(
  129. href, video_id, f4m_id='hds', fatal=False))
  130. elif mime_type == 'application/vnd.ms-sstr+xml':
  131. formats.extend(self._extract_ism_formats(
  132. href, video_id, ism_id='mss', fatal=False))
  133. return {
  134. 'id': video_id,
  135. 'title': title,
  136. 'description': metadata.get('description'),
  137. 'timestamp': parse_iso8601(media.get('created')),
  138. 'thumbnails': thumbnails,
  139. 'subtitles': subtitles,
  140. 'duration': duration,
  141. 'tags': media.get('tags'),
  142. 'formats': formats,
  143. }