123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234 |
- from .common import InfoExtractor
- from ..utils import (
- float_or_none,
- parse_resolution,
- traverse_obj,
- urlencode_postdata,
- variadic,
- )
- class TubeTuGrazBaseIE(InfoExtractor):
- _NETRC_MACHINE = 'tubetugraz'
- _API_EPISODE = 'https://tube.tugraz.at/search/episode.json'
- _FORMAT_TYPES = ('presentation', 'presenter')
- def _perform_login(self, username, password):
- urlh = self._request_webpage(
- 'https://tube.tugraz.at/Shibboleth.sso/Login?target=/paella/ui/index.html',
- None, fatal=False, note='downloading login page', errnote='unable to fetch login page')
- if not urlh:
- return
- urlh = self._request_webpage(
- urlh.geturl(), None, fatal=False, headers={'referer': urlh.geturl()},
- note='logging in', errnote='unable to log in', data=urlencode_postdata({
- 'lang': 'de',
- '_eventId_proceed': '',
- 'j_username': username,
- 'j_password': password
- }))
- if urlh and urlh.geturl() != 'https://tube.tugraz.at/paella/ui/index.html':
- self.report_warning('unable to login: incorrect password')
- def _extract_episode(self, episode_info):
- id = episode_info.get('id')
- formats = list(self._extract_formats(
- traverse_obj(episode_info, ('mediapackage', 'media', 'track')), id))
- title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
- series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
- creator = ', '.join(variadic(traverse_obj(
- episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
- return {
- 'id': id,
- 'title': title,
- 'creator': creator or None,
- 'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
- 'series': series_title,
- 'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
- 'episode': series_title and title,
- 'formats': formats
- }
- def _set_format_type(self, formats, type):
- for f in formats:
- f['format_note'] = type
- if not type.startswith(self._FORMAT_TYPES[0]):
- f['preference'] = -2
- return formats
- def _extract_formats(self, format_list, id):
- has_hls, has_dash = False, False
- for format_info in format_list or []:
- url = traverse_obj(format_info, ('tags', 'url'), 'url')
- if url is None:
- continue
- type = format_info.get('type') or 'unknown'
- transport = (format_info.get('transport') or 'https').lower()
- if transport == 'https':
- formats = [{
- 'url': url,
- 'abr': float_or_none(traverse_obj(format_info, ('audio', 'bitrate')), 1000),
- 'vbr': float_or_none(traverse_obj(format_info, ('video', 'bitrate')), 1000),
- 'fps': traverse_obj(format_info, ('video', 'framerate')),
- **parse_resolution(traverse_obj(format_info, ('video', 'resolution'))),
- }]
- elif transport == 'hls':
- has_hls, formats = True, self._extract_m3u8_formats(
- url, id, 'mp4', fatal=False, note=f'downloading {type} HLS manifest')
- elif transport == 'dash':
- has_dash, formats = True, self._extract_mpd_formats(
- url, id, fatal=False, note=f'downloading {type} DASH manifest')
- else:
- # RTMP, HDS, SMOOTH, and unknown formats
- # - RTMP url fails on every tested entry until now
- # - HDS url 404's on every tested entry until now
- # - SMOOTH url 404's on every tested entry until now
- continue
- yield from self._set_format_type(formats, type)
- # TODO: Add test for these
- for type in self._FORMAT_TYPES:
- if not has_hls:
- hls_formats = self._extract_m3u8_formats(
- f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/playlist.m3u8',
- id, 'mp4', fatal=False, note=f'Downloading {type} HLS manifest', errnote=False) or []
- yield from self._set_format_type(hls_formats, type)
- if not has_dash:
- dash_formats = self._extract_mpd_formats(
- f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/manifest_mpm4sav_mvlist.mpd',
- id, fatal=False, note=f'Downloading {type} DASH manifest', errnote=False)
- yield from self._set_format_type(dash_formats, type)
- class TubeTuGrazIE(TubeTuGrazBaseIE):
- IE_DESC = 'tube.tugraz.at'
- _VALID_URL = r'''(?x)
- https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
- (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
- '''
- _TESTS = [
- {
- 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=f2634392-e40e-4ac7-9ddc-47764aa23d40',
- 'md5': 'a23a3d5c9aaca2b84932fdba66e17145',
- 'info_dict': {
- 'id': 'f2634392-e40e-4ac7-9ddc-47764aa23d40',
- 'ext': 'mp4',
- 'title': '#6 (23.11.2017)',
- 'episode': '#6 (23.11.2017)',
- 'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
- 'creator': 'Safran C',
- 'duration': 3295818,
- 'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
- }
- }, {
- 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
- 'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
- 'info_dict': {
- 'id': '2df6d787-e56a-428d-8ef4-d57f07eef238',
- 'title': 'TubeTuGraz video #2df6d787-e56a-428d-8ef4-d57f07eef238',
- 'ext': 'mp4',
- },
- 'expected_warnings': ['Extractor failed to obtain "title"'],
- }
- ]
- def _real_extract(self, url):
- video_id = self._match_id(url)
- episode_data = self._download_json(
- self._API_EPISODE, video_id, query={'id': video_id, 'limit': 1}, note='Downloading episode metadata')
- episode_info = traverse_obj(episode_data, ('search-results', 'result'), default={'id': video_id})
- return self._extract_episode(episode_info)
- class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
- _VALID_URL = r'''(?x)
- https?://tube\.tugraz\.at/paella/ui/browse\.html\?series=
- (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
- '''
- _TESTS = [{
- 'url': 'https://tube.tugraz.at/paella/ui/browse.html?series=0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
- 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
- 'info_dict': {
- 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
- 'title': '[209351] Strassenwesen',
- },
- 'playlist': [
- {
- 'info_dict': {
- 'id': 'ee17ce5d-34e2-48b7-a76a-fed148614e11',
- 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
- 'ext': 'mp4',
- 'title': '#4 Detailprojekt',
- 'episode': '#4 Detailprojekt',
- 'series': '[209351] Strassenwesen',
- 'creator': 'Neuhold R',
- 'duration': 6127024,
- }
- },
- {
- 'info_dict': {
- 'id': '87350498-799a-44d3-863f-d1518a98b114',
- 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
- 'ext': 'mp4',
- 'title': '#3 Generelles Projekt',
- 'episode': '#3 Generelles Projekt',
- 'series': '[209351] Strassenwesen',
- 'creator': 'Neuhold R',
- 'duration': 5374422,
- }
- },
- {
- 'info_dict': {
- 'id': '778599ea-489e-4189-9e05-3b4888e19bcd',
- 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
- 'ext': 'mp4',
- 'title': '#2 Vorprojekt',
- 'episode': '#2 Vorprojekt',
- 'series': '[209351] Strassenwesen',
- 'creator': 'Neuhold R',
- 'duration': 5566404,
- }
- },
- {
- 'info_dict': {
- 'id': '75e4c71c-d99d-4e56-b0e6-4f2bcdf11f29',
- 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
- 'ext': 'mp4',
- 'title': '#1 Variantenstudium',
- 'episode': '#1 Variantenstudium',
- 'series': '[209351] Strassenwesen',
- 'creator': 'Neuhold R',
- 'duration': 5420200,
- }
- }
- ],
- 'min_playlist_count': 4
- }]
- def _real_extract(self, url):
- id = self._match_id(url)
- episodes_data = self._download_json(self._API_EPISODE, id, query={'sid': id}, note='Downloading episode list')
- series_data = self._download_json(
- 'https://tube.tugraz.at/series/series.json', id, fatal=False,
- note='downloading series metadata', errnote='failed to download series metadata',
- query={
- 'seriesId': id,
- 'count': 1,
- 'sort': 'TITLE'
- })
- return self.playlist_result(
- map(self._extract_episode, episodes_data['search-results']['result']), id,
- traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))
|