raywenderlich.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. import re
  2. from .common import InfoExtractor
  3. from .vimeo import VimeoIE
  4. from ..compat import compat_str
  5. from ..utils import (
  6. ExtractorError,
  7. int_or_none,
  8. merge_dicts,
  9. try_get,
  10. unescapeHTML,
  11. unified_timestamp,
  12. urljoin,
  13. )
  14. class RayWenderlichIE(InfoExtractor):
  15. _VALID_URL = r'''(?x)
  16. https?://
  17. (?:
  18. videos\.raywenderlich\.com/courses|
  19. (?:www\.)?raywenderlich\.com
  20. )/
  21. (?P<course_id>[^/]+)/lessons/(?P<id>\d+)
  22. '''
  23. _TESTS = [{
  24. 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
  25. 'info_dict': {
  26. 'id': '248377018',
  27. 'ext': 'mp4',
  28. 'title': 'Introduction',
  29. 'description': 'md5:804d031b3efa9fcb49777d512d74f722',
  30. 'timestamp': 1513906277,
  31. 'upload_date': '20171222',
  32. 'duration': 133,
  33. 'uploader': 'Ray Wenderlich',
  34. 'uploader_id': 'user3304672',
  35. },
  36. 'params': {
  37. 'noplaylist': True,
  38. 'skip_download': True,
  39. },
  40. 'add_ie': [VimeoIE.ie_key()],
  41. 'expected_warnings': ['HTTP Error 403: Forbidden'],
  42. }, {
  43. 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
  44. 'only_matching': True,
  45. }]
  46. @staticmethod
  47. def _extract_video_id(data, lesson_id):
  48. if not data:
  49. return
  50. groups = try_get(data, lambda x: x['groups'], list) or []
  51. if not groups:
  52. return
  53. for group in groups:
  54. if not isinstance(group, dict):
  55. continue
  56. contents = try_get(data, lambda x: x['contents'], list) or []
  57. for content in contents:
  58. if not isinstance(content, dict):
  59. continue
  60. ordinal = int_or_none(content.get('ordinal'))
  61. if ordinal != lesson_id:
  62. continue
  63. video_id = content.get('identifier')
  64. if video_id:
  65. return compat_str(video_id)
  66. def _real_extract(self, url):
  67. mobj = self._match_valid_url(url)
  68. course_id, lesson_id = mobj.group('course_id', 'id')
  69. display_id = '%s/%s' % (course_id, lesson_id)
  70. webpage = self._download_webpage(url, display_id)
  71. thumbnail = self._og_search_thumbnail(
  72. webpage, default=None) or self._html_search_meta(
  73. 'twitter:image', webpage, 'thumbnail')
  74. if '>Subscribe to unlock' in webpage:
  75. raise ExtractorError(
  76. 'This content is only available for subscribers',
  77. expected=True)
  78. info = {
  79. 'thumbnail': thumbnail,
  80. }
  81. vimeo_id = self._search_regex(
  82. r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None)
  83. if not vimeo_id:
  84. data = self._parse_json(
  85. self._search_regex(
  86. r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
  87. 'data collection', default='{}', group='data'),
  88. display_id, transform_source=unescapeHTML, fatal=False)
  89. video_id = self._extract_video_id(
  90. data, lesson_id) or self._search_regex(
  91. r'/videos/(\d+)/', thumbnail, 'video id')
  92. headers = {
  93. 'Referer': url,
  94. 'X-Requested-With': 'XMLHttpRequest',
  95. }
  96. csrf_token = self._html_search_meta(
  97. 'csrf-token', webpage, 'csrf token', default=None)
  98. if csrf_token:
  99. headers['X-CSRF-Token'] = csrf_token
  100. video = self._download_json(
  101. 'https://videos.raywenderlich.com/api/v1/videos/%s.json'
  102. % video_id, display_id, headers=headers)['video']
  103. vimeo_id = video['clips'][0]['provider_id']
  104. info.update({
  105. '_type': 'url_transparent',
  106. 'title': video.get('name'),
  107. 'description': video.get('description') or video.get(
  108. 'meta_description'),
  109. 'duration': int_or_none(video.get('duration')),
  110. 'timestamp': unified_timestamp(video.get('created_at')),
  111. })
  112. return merge_dicts(info, self.url_result(
  113. VimeoIE._smuggle_referrer(
  114. 'https://player.vimeo.com/video/%s' % vimeo_id, url),
  115. ie=VimeoIE.ie_key(), video_id=vimeo_id))
  116. class RayWenderlichCourseIE(InfoExtractor):
  117. _VALID_URL = r'''(?x)
  118. https?://
  119. (?:
  120. videos\.raywenderlich\.com/courses|
  121. (?:www\.)?raywenderlich\.com
  122. )/
  123. (?P<id>[^/]+)
  124. '''
  125. _TEST = {
  126. 'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
  127. 'info_dict': {
  128. 'title': 'Testing in iOS',
  129. 'id': '3530-testing-in-ios',
  130. },
  131. 'params': {
  132. 'noplaylist': False,
  133. },
  134. 'playlist_count': 29,
  135. }
  136. @classmethod
  137. def suitable(cls, url):
  138. return False if RayWenderlichIE.suitable(url) else super(
  139. RayWenderlichCourseIE, cls).suitable(url)
  140. def _real_extract(self, url):
  141. course_id = self._match_id(url)
  142. webpage = self._download_webpage(url, course_id)
  143. entries = []
  144. lesson_urls = set()
  145. for lesson_url in re.findall(
  146. r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage):
  147. if lesson_url in lesson_urls:
  148. continue
  149. lesson_urls.add(lesson_url)
  150. entries.append(self.url_result(
  151. urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
  152. title = self._og_search_title(
  153. webpage, default=None) or self._html_search_meta(
  154. 'twitter:title', webpage, 'title', default=None)
  155. return self.playlist_result(entries, course_id, title)