berufetv.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. from .common import InfoExtractor
  2. from ..utils import float_or_none, mimetype2ext, traverse_obj
  3. class BerufeTVIE(InfoExtractor):
  4. _VALID_URL = r'https?://(?:www\.)?web\.arbeitsagentur\.de/berufetv/[^?#]+/film;filmId=(?P<id>[\w-]+)'
  5. _TESTS = [{
  6. 'url': 'https://web.arbeitsagentur.de/berufetv/studienberufe/wirtschaftswissenschaften/wirtschaftswissenschaften-volkswirtschaft/film;filmId=DvKC3DUpMKvUZ_6fEnfg3u',
  7. 'md5': '041b6432ec8e6838f84a5c30f31cc795',
  8. 'info_dict': {
  9. 'id': 'DvKC3DUpMKvUZ_6fEnfg3u',
  10. 'ext': 'mp4',
  11. 'title': 'Volkswirtschaftslehre',
  12. 'description': 'md5:6bd87d0c63163480a6489a37526ee1c1',
  13. 'categories': ['Studien&shy;beruf'],
  14. 'tags': ['Studienfilm'],
  15. 'duration': 602.440,
  16. 'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$',
  17. }
  18. }]
  19. def _real_extract(self, url):
  20. video_id = self._match_id(url)
  21. movie_metadata = self._download_json(
  22. 'https://rest.arbeitsagentur.de/infosysbub/berufetv/pc/v1/film-metadata',
  23. video_id, 'Downloading JSON metadata',
  24. headers={'X-API-Key': '79089773-4892-4386-86e6-e8503669f426'}, fatal=False)
  25. meta = traverse_obj(
  26. movie_metadata, ('metadaten', lambda _, i: video_id == i['miId']),
  27. get_all=False, default={})
  28. video = self._download_json(
  29. f'https://d.video-cdn.net/play/player/8YRzUk6pTzmBdrsLe9Y88W/video/{video_id}',
  30. video_id, 'Downloading video JSON')
  31. formats, subtitles = [], {}
  32. for key, source in video['videoSources']['html'].items():
  33. if key == 'auto':
  34. fmts, subs = self._extract_m3u8_formats_and_subtitles(source[0]['source'], video_id)
  35. formats += fmts
  36. subtitles = subs
  37. else:
  38. formats.append({
  39. 'url': source[0]['source'],
  40. 'ext': mimetype2ext(source[0]['mimeType']),
  41. 'format_id': key,
  42. })
  43. for track in video.get('videoTracks') or []:
  44. if track.get('type') != 'SUBTITLES':
  45. continue
  46. subtitles.setdefault(track['language'], []).append({
  47. 'url': track['source'],
  48. 'name': track.get('label'),
  49. 'ext': 'vtt'
  50. })
  51. return {
  52. 'id': video_id,
  53. 'title': meta.get('titel') or traverse_obj(video, ('videoMetaData', 'title')),
  54. 'description': meta.get('beschreibung'),
  55. 'thumbnail': meta.get('thumbnail') or f'https://asset-out-cdn.video-cdn.net/private/videos/{video_id}/thumbnails/active',
  56. 'duration': float_or_none(video.get('duration'), scale=1000),
  57. 'categories': [meta['kategorie']] if meta.get('kategorie') else None,
  58. 'tags': meta.get('themengebiete'),
  59. 'subtitles': subtitles,
  60. 'formats': formats,
  61. }