tubetugraz.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. float_or_none,
  4. parse_resolution,
  5. traverse_obj,
  6. urlencode_postdata,
  7. variadic,
  8. )
  9. class TubeTuGrazBaseIE(InfoExtractor):
  10. _NETRC_MACHINE = 'tubetugraz'
  11. _API_EPISODE = 'https://tube.tugraz.at/search/episode.json'
  12. _FORMAT_TYPES = ('presentation', 'presenter')
  13. def _perform_login(self, username, password):
  14. urlh = self._request_webpage(
  15. 'https://tube.tugraz.at/Shibboleth.sso/Login?target=/paella/ui/index.html',
  16. None, fatal=False, note='downloading login page', errnote='unable to fetch login page')
  17. if not urlh:
  18. return
  19. urlh = self._request_webpage(
  20. urlh.geturl(), None, fatal=False, headers={'referer': urlh.geturl()},
  21. note='logging in', errnote='unable to log in', data=urlencode_postdata({
  22. 'lang': 'de',
  23. '_eventId_proceed': '',
  24. 'j_username': username,
  25. 'j_password': password
  26. }))
  27. if urlh and urlh.geturl() != 'https://tube.tugraz.at/paella/ui/index.html':
  28. self.report_warning('unable to login: incorrect password')
  29. def _extract_episode(self, episode_info):
  30. id = episode_info.get('id')
  31. formats = list(self._extract_formats(
  32. traverse_obj(episode_info, ('mediapackage', 'media', 'track')), id))
  33. title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
  34. series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
  35. creator = ', '.join(variadic(traverse_obj(
  36. episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
  37. return {
  38. 'id': id,
  39. 'title': title,
  40. 'creator': creator or None,
  41. 'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
  42. 'series': series_title,
  43. 'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
  44. 'episode': series_title and title,
  45. 'formats': formats
  46. }
  47. def _set_format_type(self, formats, type):
  48. for f in formats:
  49. f['format_note'] = type
  50. if not type.startswith(self._FORMAT_TYPES[0]):
  51. f['preference'] = -2
  52. return formats
  53. def _extract_formats(self, format_list, id):
  54. has_hls, has_dash = False, False
  55. for format_info in format_list or []:
  56. url = traverse_obj(format_info, ('tags', 'url'), 'url')
  57. if url is None:
  58. continue
  59. type = format_info.get('type') or 'unknown'
  60. transport = (format_info.get('transport') or 'https').lower()
  61. if transport == 'https':
  62. formats = [{
  63. 'url': url,
  64. 'abr': float_or_none(traverse_obj(format_info, ('audio', 'bitrate')), 1000),
  65. 'vbr': float_or_none(traverse_obj(format_info, ('video', 'bitrate')), 1000),
  66. 'fps': traverse_obj(format_info, ('video', 'framerate')),
  67. **parse_resolution(traverse_obj(format_info, ('video', 'resolution'))),
  68. }]
  69. elif transport == 'hls':
  70. has_hls, formats = True, self._extract_m3u8_formats(
  71. url, id, 'mp4', fatal=False, note=f'downloading {type} HLS manifest')
  72. elif transport == 'dash':
  73. has_dash, formats = True, self._extract_mpd_formats(
  74. url, id, fatal=False, note=f'downloading {type} DASH manifest')
  75. else:
  76. # RTMP, HDS, SMOOTH, and unknown formats
  77. # - RTMP url fails on every tested entry until now
  78. # - HDS url 404's on every tested entry until now
  79. # - SMOOTH url 404's on every tested entry until now
  80. continue
  81. yield from self._set_format_type(formats, type)
  82. # TODO: Add test for these
  83. for type in self._FORMAT_TYPES:
  84. if not has_hls:
  85. hls_formats = self._extract_m3u8_formats(
  86. f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/playlist.m3u8',
  87. id, 'mp4', fatal=False, note=f'Downloading {type} HLS manifest', errnote=False) or []
  88. yield from self._set_format_type(hls_formats, type)
  89. if not has_dash:
  90. dash_formats = self._extract_mpd_formats(
  91. f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/manifest_mpm4sav_mvlist.mpd',
  92. id, fatal=False, note=f'Downloading {type} DASH manifest', errnote=False)
  93. yield from self._set_format_type(dash_formats, type)
  94. class TubeTuGrazIE(TubeTuGrazBaseIE):
  95. IE_DESC = 'tube.tugraz.at'
  96. _VALID_URL = r'''(?x)
  97. https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
  98. (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
  99. '''
  100. _TESTS = [
  101. {
  102. 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=f2634392-e40e-4ac7-9ddc-47764aa23d40',
  103. 'md5': 'a23a3d5c9aaca2b84932fdba66e17145',
  104. 'info_dict': {
  105. 'id': 'f2634392-e40e-4ac7-9ddc-47764aa23d40',
  106. 'ext': 'mp4',
  107. 'title': '#6 (23.11.2017)',
  108. 'episode': '#6 (23.11.2017)',
  109. 'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
  110. 'creator': 'Safran C',
  111. 'duration': 3295818,
  112. 'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
  113. }
  114. }, {
  115. 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
  116. 'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
  117. 'info_dict': {
  118. 'id': '2df6d787-e56a-428d-8ef4-d57f07eef238',
  119. 'title': 'TubeTuGraz video #2df6d787-e56a-428d-8ef4-d57f07eef238',
  120. 'ext': 'mp4',
  121. },
  122. 'expected_warnings': ['Extractor failed to obtain "title"'],
  123. }
  124. ]
  125. def _real_extract(self, url):
  126. video_id = self._match_id(url)
  127. episode_data = self._download_json(
  128. self._API_EPISODE, video_id, query={'id': video_id, 'limit': 1}, note='Downloading episode metadata')
  129. episode_info = traverse_obj(episode_data, ('search-results', 'result'), default={'id': video_id})
  130. return self._extract_episode(episode_info)
  131. class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
  132. _VALID_URL = r'''(?x)
  133. https?://tube\.tugraz\.at/paella/ui/browse\.html\?series=
  134. (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
  135. '''
  136. _TESTS = [{
  137. 'url': 'https://tube.tugraz.at/paella/ui/browse.html?series=0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  138. 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  139. 'info_dict': {
  140. 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  141. 'title': '[209351] Strassenwesen',
  142. },
  143. 'playlist': [
  144. {
  145. 'info_dict': {
  146. 'id': 'ee17ce5d-34e2-48b7-a76a-fed148614e11',
  147. 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  148. 'ext': 'mp4',
  149. 'title': '#4 Detailprojekt',
  150. 'episode': '#4 Detailprojekt',
  151. 'series': '[209351] Strassenwesen',
  152. 'creator': 'Neuhold R',
  153. 'duration': 6127024,
  154. }
  155. },
  156. {
  157. 'info_dict': {
  158. 'id': '87350498-799a-44d3-863f-d1518a98b114',
  159. 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  160. 'ext': 'mp4',
  161. 'title': '#3 Generelles Projekt',
  162. 'episode': '#3 Generelles Projekt',
  163. 'series': '[209351] Strassenwesen',
  164. 'creator': 'Neuhold R',
  165. 'duration': 5374422,
  166. }
  167. },
  168. {
  169. 'info_dict': {
  170. 'id': '778599ea-489e-4189-9e05-3b4888e19bcd',
  171. 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  172. 'ext': 'mp4',
  173. 'title': '#2 Vorprojekt',
  174. 'episode': '#2 Vorprojekt',
  175. 'series': '[209351] Strassenwesen',
  176. 'creator': 'Neuhold R',
  177. 'duration': 5566404,
  178. }
  179. },
  180. {
  181. 'info_dict': {
  182. 'id': '75e4c71c-d99d-4e56-b0e6-4f2bcdf11f29',
  183. 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
  184. 'ext': 'mp4',
  185. 'title': '#1 Variantenstudium',
  186. 'episode': '#1 Variantenstudium',
  187. 'series': '[209351] Strassenwesen',
  188. 'creator': 'Neuhold R',
  189. 'duration': 5420200,
  190. }
  191. }
  192. ],
  193. 'min_playlist_count': 4
  194. }]
  195. def _real_extract(self, url):
  196. id = self._match_id(url)
  197. episodes_data = self._download_json(self._API_EPISODE, id, query={'sid': id}, note='Downloading episode list')
  198. series_data = self._download_json(
  199. 'https://tube.tugraz.at/series/series.json', id, fatal=False,
  200. note='downloading series metadata', errnote='failed to download series metadata',
  201. query={
  202. 'seriesId': id,
  203. 'count': 1,
  204. 'sort': 'TITLE'
  205. })
  206. return self.playlist_result(
  207. map(self._extract_episode, episodes_data['search-results']['result']), id,
  208. traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))