123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- from .common import InfoExtractor
- from ..utils import (
- try_get,
- ExtractorError,
- )
- import json
- import random
- import re
- class WPPilotBaseIE(InfoExtractor):
- _VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s'
- _VIDEO_GUEST_URL = 'https://pilot.wp.pl/api/v1/guest/channel/%s'
- _HEADERS_WEB = {
- 'Content-Type': 'application/json; charset=UTF-8',
- 'Referer': 'https://pilot.wp.pl/tv/',
- }
- def _get_channel_list(self, cache=True):
- if cache is True:
- cache_res = self.cache.load('wppilot', 'channel-list')
- if cache_res:
- return cache_res, True
- webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage')
- page_data_base_url = self._search_regex(
- r'<script src="(https://wp-pilot-gatsby\.wpcdn\.pl/v[\d.-]+/desktop)',
- webpage, 'gatsby build version') + '/page-data'
- page_data = self._download_json(f'{page_data_base_url}/tv/page-data.json', None, 'Downloading page data')
- for qhash in page_data['staticQueryHashes']:
- qhash_content = self._download_json(
- f'{page_data_base_url}/sq/d/{qhash}.json', None,
- 'Searching for channel list')
- channel_list = try_get(qhash_content, lambda x: x['data']['allChannels']['nodes'])
- if channel_list is None:
- continue
- self.cache.store('wppilot', 'channel-list', channel_list)
- return channel_list, False
- raise ExtractorError('Unable to find the channel list')
- def _parse_channel(self, chan):
- return {
- 'id': str(chan['id']),
- 'title': chan['name'],
- 'is_live': True,
- 'thumbnails': [{
- 'id': key,
- 'url': chan[key],
- } for key in ('thumbnail', 'thumbnail_mobile', 'icon') if chan.get(key)],
- }
- class WPPilotIE(WPPilotBaseIE):
- _VALID_URL = r'(?:https?://pilot\.wp\.pl/tv/?#|wppilot:)(?P<id>[a-z\d-]+)'
- IE_NAME = 'wppilot'
- _TESTS = [{
- 'url': 'https://pilot.wp.pl/tv/#telewizja-wp-hd',
- 'info_dict': {
- 'id': '158',
- 'ext': 'mp4',
- 'title': 'Telewizja WP HD',
- },
- 'params': {
- 'format': 'bestvideo',
- },
- }, {
- # audio only
- 'url': 'https://pilot.wp.pl/tv/#radio-nowy-swiat',
- 'info_dict': {
- 'id': '238',
- 'ext': 'm4a',
- 'title': 'Radio Nowy Świat',
- },
- 'params': {
- 'format': 'bestaudio',
- },
- }, {
- 'url': 'wppilot:9',
- 'only_matching': True,
- }]
- def _get_channel(self, id_or_slug):
- video_list, is_cached = self._get_channel_list(cache=True)
- key = 'id' if re.match(r'^\d+$', id_or_slug) else 'slug'
- for video in video_list:
- if video.get(key) == id_or_slug:
- return self._parse_channel(video)
- # if cached channel not found, download and retry
- if is_cached:
- video_list, _ = self._get_channel_list(cache=False)
- for video in video_list:
- if video.get(key) == id_or_slug:
- return self._parse_channel(video)
- raise ExtractorError('Channel not found')
- def _real_extract(self, url):
- video_id = self._match_id(url)
- channel = self._get_channel(video_id)
- video_id = str(channel['id'])
- is_authorized = next((c for c in self.cookiejar if c.name == 'netviapisessid'), None)
- # cookies starting with "g:" are assigned to guests
- is_authorized = True if is_authorized is not None and not is_authorized.value.startswith('g:') else False
- video = self._download_json(
- (self._VIDEO_URL if is_authorized else self._VIDEO_GUEST_URL) % video_id,
- video_id, query={
- 'device_type': 'web',
- }, headers=self._HEADERS_WEB,
- expected_status=(200, 422))
- stream_token = try_get(video, lambda x: x['_meta']['error']['info']['stream_token'])
- if stream_token:
- close = self._download_json(
- 'https://pilot.wp.pl/api/v1/channels/close', video_id,
- 'Invalidating previous stream session', headers=self._HEADERS_WEB,
- data=json.dumps({
- 'channelId': video_id,
- 't': stream_token,
- }).encode('utf-8'))
- if try_get(close, lambda x: x['data']['status']) == 'ok':
- return self.url_result(url, ie=WPPilotIE.ie_key())
- formats = []
- for fmt in video['data']['stream_channel']['streams']:
- # live DASH does not work for now
- # if fmt['type'] == 'dash@live:abr':
- # formats.extend(
- # self._extract_mpd_formats(
- # random.choice(fmt['url']), video_id))
- if fmt['type'] == 'hls@live:abr':
- formats.extend(
- self._extract_m3u8_formats(
- random.choice(fmt['url']),
- video_id, live=True))
- channel['formats'] = formats
- return channel
- class WPPilotChannelsIE(WPPilotBaseIE):
- _VALID_URL = r'(?:https?://pilot\.wp\.pl/(?:tv/?)?(?:\?[^#]*)?#?|wppilot:)$'
- IE_NAME = 'wppilot:channels'
- _TESTS = [{
- 'url': 'wppilot:',
- 'info_dict': {
- 'id': 'wppilot',
- 'title': 'WP Pilot',
- },
- 'playlist_mincount': 100,
- }, {
- 'url': 'https://pilot.wp.pl/',
- 'only_matching': True,
- }]
- def _entries(self):
- channel_list, _ = self._get_channel_list()
- for chan in channel_list:
- entry = self._parse_channel(chan)
- entry.update({
- '_type': 'url_transparent',
- 'url': f'wppilot:{chan["id"]}',
- 'ie_key': WPPilotIE.ie_key(),
- })
- yield entry
- def _real_extract(self, url):
- return self.playlist_result(self._entries(), 'wppilot', 'WP Pilot')
|