cybrary.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. ExtractorError,
  4. smuggle_url,
  5. str_or_none,
  6. traverse_obj,
  7. urlencode_postdata,
  8. )
  9. class CybraryBaseIE(InfoExtractor):
  10. _API_KEY = 'AIzaSyCX9ru6j70PX2My1Eq6Q1zoMAhuTdXlzSw'
  11. _ENDPOINTS = {
  12. 'course': 'https://app.cybrary.it/courses/api/catalog/browse/course/{}',
  13. 'course_enrollment': 'https://app.cybrary.it/courses/api/catalog/{}/enrollment',
  14. 'enrollment': 'https://app.cybrary.it/courses/api/enrollment/{}',
  15. 'launch': 'https://app.cybrary.it/courses/api/catalog/{}/launch',
  16. 'vimeo_oembed': 'https://vimeo.com/api/oembed.json?url=https://vimeo.com/{}',
  17. }
  18. _NETRC_MACHINE = 'cybrary'
  19. _TOKEN = None
  20. def _perform_login(self, username, password):
  21. CybraryBaseIE._TOKEN = self._download_json(
  22. f'https://identitytoolkit.googleapis.com/v1/accounts:signInWithPassword?key={self._API_KEY}',
  23. None, data=urlencode_postdata({'email': username, 'password': password, 'returnSecureToken': True}),
  24. note='Logging in')['idToken']
  25. def _real_initialize(self):
  26. if not self._TOKEN:
  27. self.raise_login_required(method='password')
  28. def _call_api(self, endpoint, item_id):
  29. return self._download_json(
  30. self._ENDPOINTS[endpoint].format(item_id), item_id,
  31. note=f'Downloading {endpoint} JSON metadata',
  32. headers={'Authorization': f'Bearer {self._TOKEN}'})
  33. def _get_vimeo_id(self, activity_id):
  34. launch_api = self._call_api('launch', activity_id)
  35. if launch_api.get('url'):
  36. return self._search_regex(r'https?://player\.vimeo\.com/video/(?P<vimeo_id>[0-9]+)', launch_api['url'], 'vimeo_id')
  37. return traverse_obj(launch_api, ('vendor_data', 'content', ..., 'videoId'), get_all=False)
  38. class CybraryIE(CybraryBaseIE):
  39. _VALID_URL = r'https?://app.cybrary.it/immersive/(?P<enrollment>[0-9]+)/activity/(?P<id>[0-9]+)'
  40. _TESTS = [{
  41. 'url': 'https://app.cybrary.it/immersive/12487950/activity/63102',
  42. 'md5': '9ae12d37e555cb2ed554223a71a701d0',
  43. 'info_dict': {
  44. 'id': '646609770',
  45. 'ext': 'mp4',
  46. 'title': 'Getting Started',
  47. 'thumbnail': 'https://i.vimeocdn.com/video/1301817996-76a268f0c56cff18a5cecbbdc44131eb9dda0c80eb0b3a036_1280',
  48. 'series_id': '63111',
  49. 'uploader_url': 'https://vimeo.com/user30867300',
  50. 'duration': 88,
  51. 'uploader_id': 'user30867300',
  52. 'series': 'Cybrary Orientation',
  53. 'uploader': 'Cybrary',
  54. 'chapter': 'Cybrary Orientation Series',
  55. 'chapter_id': '63110'
  56. },
  57. 'expected_warnings': ['No authenticators for vimeo']
  58. }, {
  59. 'url': 'https://app.cybrary.it/immersive/12747143/activity/52686',
  60. 'md5': '62f26547dccc59c44363e2a13d4ad08d',
  61. 'info_dict': {
  62. 'id': '445638073',
  63. 'ext': 'mp4',
  64. 'title': 'Azure Virtual Network IP Addressing',
  65. 'thumbnail': 'https://i.vimeocdn.com/video/936667051-1647ace66c627d4a2382185e0dae8deb830309bfddd53f8b2367b2f91e92ed0e-d_1280',
  66. 'series_id': '52733',
  67. 'uploader_url': 'https://vimeo.com/user30867300',
  68. 'duration': 426,
  69. 'uploader_id': 'user30867300',
  70. 'series': 'AZ-500: Microsoft Azure Security Technologies',
  71. 'uploader': 'Cybrary',
  72. 'chapter': 'Implement Network Security',
  73. 'chapter_id': '52693'
  74. },
  75. 'expected_warnings': ['No authenticators for vimeo']
  76. }]
  77. def _real_extract(self, url):
  78. activity_id, enrollment_id = self._match_valid_url(url).group('id', 'enrollment')
  79. course = self._call_api('enrollment', enrollment_id)['content']
  80. activity = traverse_obj(course, ('learning_modules', ..., 'activities', lambda _, v: int(activity_id) == v['id']), get_all=False)
  81. if activity.get('type') not in ['Video Activity', 'Lesson Activity']:
  82. raise ExtractorError('The activity is not a video', expected=True)
  83. module = next((m for m in course.get('learning_modules') or []
  84. if int(activity_id) in traverse_obj(m, ('activities', ..., 'id') or [])), None)
  85. vimeo_id = self._get_vimeo_id(activity_id)
  86. return {
  87. '_type': 'url_transparent',
  88. 'series': traverse_obj(course, ('content_description', 'title')),
  89. 'series_id': str_or_none(traverse_obj(course, ('content_description', 'id'))),
  90. 'id': vimeo_id,
  91. 'chapter': module.get('title'),
  92. 'chapter_id': str_or_none(module.get('id')),
  93. 'title': activity.get('title'),
  94. 'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'http_headers': {'Referer': 'https://api.cybrary.it'}})
  95. }
  96. class CybraryCourseIE(CybraryBaseIE):
  97. _VALID_URL = r'https://app.cybrary.it/browse/course/(?P<id>[\w-]+)/?(?:$|[#?])'
  98. _TESTS = [{
  99. 'url': 'https://app.cybrary.it/browse/course/az-500-microsoft-azure-security-technologies',
  100. 'info_dict': {
  101. 'id': 898,
  102. 'title': 'AZ-500: Microsoft Azure Security Technologies',
  103. 'description': 'md5:69549d379c0fc1dec92926d4e8b6fbd4'
  104. },
  105. 'playlist_count': 59
  106. }, {
  107. 'url': 'https://app.cybrary.it/browse/course/cybrary-orientation',
  108. 'info_dict': {
  109. 'id': 1245,
  110. 'title': 'Cybrary Orientation',
  111. 'description': 'md5:9e69ff66b32fe78744e0ad4babe2e88e'
  112. },
  113. 'playlist_count': 4
  114. }]
  115. def _real_extract(self, url):
  116. course_id = self._match_id(url)
  117. course = self._call_api('course', course_id)
  118. enrollment_info = self._call_api('course_enrollment', course['id'])
  119. entries = [self.url_result(
  120. f'https://app.cybrary.it/immersive/{enrollment_info["id"]}/activity/{activity["id"]}')
  121. for activity in traverse_obj(course, ('content_item', 'learning_modules', ..., 'activities', ...))]
  122. return self.playlist_result(
  123. entries,
  124. traverse_obj(course, ('content_item', 'id'), expected_type=str_or_none),
  125. course.get('title'), course.get('short_description'))